Skip to content

Commit

Permalink
Merge branch 'dev' into obj-version-data-migrate
Browse files Browse the repository at this point in the history
  • Loading branch information
shubham3121 authored Oct 30, 2023
2 parents f408f4c + 850e742 commit ced7ebe
Show file tree
Hide file tree
Showing 14 changed files with 766 additions and 99 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/pr-tests-syft.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ jobs:
strategy:
max-parallel: 99
matrix:
os: [ubuntu-latest, macos-13, windows-latest]
# Disable on windows until its flakyness is reduced.
# os: [ubuntu-latest, macos-13, windows-latest]
os: [ubuntu-latest, macos-13]
python-version: ["3.11"]
deployment-type: ["python"]
notebook-paths: ["tutorials"]
Expand Down
2 changes: 1 addition & 1 deletion docs/source/api_reference/syft.service.dataset.dataset.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ syft.service.dataset.dataset
CreateDataset
Dataset
DatasetUpdate
TupleDict
DictTuple



Expand Down
26 changes: 24 additions & 2 deletions notebooks/tutorials/data-owner/01-uploading-private-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"metadata": {},
"outputs": [],
"source": [
"node = sy.orchestra.launch(name=\"private-data-example-domain-1\",port=8040, reset=True)"
"node = sy.orchestra.launch(name=\"private-data-example-domain-1\", port=\"auto\", reset=True)"
]
},
{
Expand Down Expand Up @@ -158,6 +158,16 @@
"client.datasets"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af495cad",
"metadata": {},
"outputs": [],
"source": [
"client.datasets[\"my dataset\"]"
]
},
{
"attachments": {},
"cell_type": "markdown",
Expand Down Expand Up @@ -265,6 +275,18 @@
"source": [
"## High Side vs Low Side"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c13cdaa2",
"metadata": {},
"outputs": [],
"source": [
"# Cleanup local domain server\n",
"if node.node_type.value == \"python\":\n",
" node.land()"
]
}
],
"metadata": {
Expand All @@ -283,7 +305,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.11.5"
},
"toc": {
"base_numbering": 1,
Expand Down
151 changes: 151 additions & 0 deletions packages/grid/scripts/backup_node_credentials.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/bin/bash

#Define destination path on the host machine
ROOT_DATA_PATH="$HOME/.syft/data"

# Define the source path of the credentials.json file in the container
SOURCE_PATH="/root/data/creds/credentials.json"


calculate_checksum() {
# Calculate the checksum of a file
local file="$1"
local checksum
checksum=$(sha256sum "$file" | awk '{print $1}')
echo "$checksum"
}

docker_cp() {
# Exit in case of error
set -e

# Define the container name pattern
CONTAINER_NAME_PATTERN="backend-1"

# Get a list of container names that match the pattern
CONTAINER_NAMES=$(docker ps --filter "name=$CONTAINER_NAME_PATTERN" --format "{{.Names}}")

if [ -z "$CONTAINER_NAMES" ]; then
echo "No containers found with the name pattern: $CONTAINER_NAME_PATTERN"
exit 1
fi

for CONTAINER_NAME in $CONTAINER_NAMES; do


# Define the destination path on the host machine
DESTINATION_PATH="$ROOT_DATA_PATH/$CONTAINER_NAME"

# Calculate the checksum of the source file before copying
SOURCE_CHECKSUM=$(docker exec "$CONTAINER_NAME" sha256sum "$SOURCE_PATH" | awk '{print $1}')

# Check if the destination directory already exists
if [ ! -d "$DESTINATION_PATH" ]; then
mkdir -p "$DESTINATION_PATH"
fi

# Copy the credentials.json file from the container to the host, placing it in the destination folder
# Suppress output from docker cp command.
docker cp "${CONTAINER_NAME}:${SOURCE_PATH}" "$DESTINATION_PATH/credentials.json" >/dev/null

# Check if the copy was successful
if [ $? -eq 0 ]; then
# Calculate the checksum of the destination file
DESTINATION_CHECKSUM=$(calculate_checksum "$DESTINATION_PATH/credentials.json")

# Check if the source and destination checksums match
if [ "$SOURCE_CHECKSUM" = "$DESTINATION_CHECKSUM" ]; then
echo "Copied credentials.json from container $CONTAINER_NAME to $DESTINATION_PATH/credentials.json"
else
echo "Failed to copy credentials.json from $CONTAINER_NAME. Checksum mismatch."
fi
else
echo "Failed to copy credentials.json from $CONTAINER_NAME"
fi
done
}


k8s_cp() {
IMAGE_TAG="grid-backend"

# Get a list of available contexts
CONTEXTS=($(kubectl config get-contexts -o name))

for CONTEXT in "${CONTEXTS[@]}"; do
# Skip the "docker-desktop" context
if [ "$CONTEXT" = "docker-desktop" ]; then
continue
fi

# Set the context for kubectl
kubectl config use-context "$CONTEXT"

# Find all pods and namespaces with the specified image tag
PODS_AND_NAMESPACES=($(kubectl get pods --all-namespaces -o=jsonpath="{range .items[*]}{.metadata.name}{'\t'}{.metadata.namespace}{'\t'}{range .spec.containers[*]}{.image}{'\n'}{end}{end}" | grep "$IMAGE_TAG" | awk '{print $1, $2}'))

if [ ${#PODS_AND_NAMESPACES[@]} -eq 0 ]; then
echo "No pods found with image tag: $IMAGE_TAG in context $CONTEXT"
else
for ((i = 0; i < ${#PODS_AND_NAMESPACES[@]}; i += 2)); do
POD_NAME="${PODS_AND_NAMESPACES[i]}"
NAMESPACE="${PODS_AND_NAMESPACES[i + 1]}"

DESTINATION_FOLDER="$ROOT_DATA_PATH/$CONTEXT""_""$NAMESPACE"
mkdir -p $DESTINATION_FOLDER

# Calculate the checksum of the source file inside the pod
SOURCE_CHECKSUM=$(kubectl exec -n "$NAMESPACE" -it "$POD_NAME" -- sha256sum "$SOURCE_PATH" | awk '{print $1}')

# Copy the file (suppress error message from kubectl cp command: "tar: Removing leading `/' from member names")
kubectl cp "$NAMESPACE/$POD_NAME:$SOURCE_PATH" "$DESTINATION_FOLDER/credentials.json" &>/dev/null

# Check if the copy was successful
if [ $? -eq 0 ]; then
# Calculate the checksum of the destination file
DESTINATION_CHECKSUM=$(calculate_checksum "$DESTINATION_FOLDER/credentials.json")

# Check if the checksums match
if [ "$SOURCE_CHECKSUM" = "$DESTINATION_CHECKSUM" ]; then
echo "Copied credentials.json from $POD_NAME in namespace $NAMESPACE in context $CONTEXT to $DESTINATION_FOLDER"
else
echo "Failed to copy credentials.json. Checksum mismatch."
fi
else
echo "Failed to copy credentials.json from $POD_NAME in namespace $NAMESPACE in context $CONTEXT."
fi
done
fi
done
}


# Check if the "--docker" flag is set
if [[ "$1" == "--docker" ]]
then
docker_cp

# Check if the "--k8s" flag is set
elif [[ "$1" == "--k8s" ]]
then
k8s_cp

# If no flag is set, prompt the user for their choice
else
echo "Please select an option:"
echo "d for Docker"
echo "k for Kubernetes"
read -p "Enter your choice: " choice

case $choice in
d)
docker_cp
;;
k)
k8s_cp
;;
*)
echo "Invalid choice. Please select either 'd' for Docker or 'k' for Kubernetes."
;;
esac
fi
31 changes: 19 additions & 12 deletions packages/syft/src/syft/protocol/data_protocol.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# stdlib
from collections import defaultdict
from collections.abc import MutableMapping
from collections.abc import MutableSequence
import hashlib
import json
import os
Expand All @@ -21,6 +23,7 @@
from ..service.response import SyftError
from ..service.response import SyftException
from ..service.response import SyftSuccess
from ..types.dicttuple import DictTuple
from ..types.syft_object import SyftBaseObject

PROTOCOL_STATE_FILENAME = "protocol_version.json"
Expand Down Expand Up @@ -352,20 +355,25 @@ def check_or_stage_protocol() -> Result[SyftSuccess, SyftError]:

def debox_arg_and_migrate(arg: Any, protocol_state: dict):
"""Debox the argument based on whether it is iterable or single entity."""
box_to_result_type = None

if type(arg) in OkErr:
box_to_result_type = type(arg)
arg = arg.value
constructor = None
extra_args = []

single_entity = False
is_tuple = isinstance(arg, tuple)

if isinstance(arg, (list, tuple)):
if isinstance(arg, OkErr):
constructor = type(arg)
arg = arg.value

if isinstance(arg, MutableMapping):
iterable_keys = arg.keys()
elif isinstance(arg, MutableSequence):
iterable_keys = range(len(arg))
elif isinstance(arg, tuple):
iterable_keys = range(len(arg))
constructor = type(arg)
if isinstance(arg, DictTuple):
extra_args.append(arg.keys())
arg = list(arg)
elif isinstance(arg, dict):
iterable_keys = arg.keys()
else:
iterable_keys = range(1)
arg = [arg]
Expand All @@ -385,9 +393,8 @@ def debox_arg_and_migrate(arg: Any, protocol_state: dict):
arg[key] = _object

wrapped_arg = arg[0] if single_entity else arg
wrapped_arg = tuple(wrapped_arg) if is_tuple else wrapped_arg
if box_to_result_type is not None:
wrapped_arg = box_to_result_type(wrapped_arg)
if constructor is not None:
wrapped_arg = constructor(wrapped_arg, *extra_args)

return wrapped_arg

Expand Down
2 changes: 1 addition & 1 deletion packages/syft/src/syft/protocol/protocol_version.json
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@
"DatasetPageView": {
"1": {
"version": 1,
"hash": "68c7a0c3e7796fdabb8f732c6d150ec4a8071ce78d69b30da18393afdcea1e59",
"hash": "6741bd16dc6089d9deea37b1bd4e895152d1a0c163b8bdfe45280b9bfc4a1354",
"action": "add"
}
},
Expand Down
17 changes: 13 additions & 4 deletions packages/syft/src/syft/serde/recursive_primitives.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# stdlib
from collections import OrderedDict
from collections import defaultdict
from collections.abc import Iterable
from collections.abc import Mapping
from enum import Enum
from enum import EnumMeta
Expand Down Expand Up @@ -71,23 +72,31 @@ def deserialize_iterable(iterable_type: type, blob: bytes) -> Collection:
return iterable_type(values)


def serialize_kv(map: Mapping) -> bytes:
_KT = TypeVar("_KT")
_VT = TypeVar("_VT")


def _serialize_kv_pairs(size: int, kv_pairs: Iterable[tuple[_KT, _VT]]) -> bytes:
# relative
from .serialize import _serialize

message = kv_iterable_schema.new_message()

message.init("keys", len(map))
message.init("values", len(map))
message.init("keys", size)
message.init("values", size)

for index, (k, v) in enumerate(map.items()):
for index, (k, v) in enumerate(kv_pairs):
message.keys[index] = _serialize(k, to_bytes=True)
serialized = _serialize(v, to_bytes=True)
chunk_bytes(serialized, index, message.values)

return message.to_bytes()


def serialize_kv(map: Mapping) -> bytes:
return _serialize_kv_pairs(len(map), map.items())


def get_deserialized_kv_pairs(blob: bytes) -> List[Any]:
# relative
from .deserialize import _deserialize
Expand Down
23 changes: 18 additions & 5 deletions packages/syft/src/syft/serde/third_party.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@
import zmq.green as zmq

# relative
from ..types.tupledict import TupleDict
from ..types.dicttuple import DictTuple
from ..types.dicttuple import _Meta as _DictTupleMetaClass
from .deserialize import _deserialize as deserialize
from .recursive_primitives import _serialize_kv_pairs
from .recursive_primitives import deserialize_kv
from .recursive_primitives import deserialize_type
from .recursive_primitives import recursive_serde_register
from .recursive_primitives import recursive_serde_register_type
from .recursive_primitives import serialize_kv
from .recursive_primitives import serialize_type
from .serialize import _serialize as serialize

recursive_serde_register(
Expand Down Expand Up @@ -128,10 +131,20 @@ def deserialize_series(blob: bytes) -> Series:
deserialize=lambda x: Timestamp(deserialize(x, from_bytes=True)),
)


def _serialize_dicttuple(x: DictTuple) -> bytes:
return _serialize_kv_pairs(size=len(x), kv_pairs=zip(x.keys(), x))


recursive_serde_register(
_DictTupleMetaClass,
serialize=serialize_type,
deserialize=deserialize_type,
)
recursive_serde_register(
TupleDict,
serialize=serialize_kv,
deserialize=functools.partial(deserialize_kv, TupleDict),
DictTuple,
serialize=_serialize_dicttuple,
deserialize=functools.partial(deserialize_kv, DictTuple),
)


Expand Down
Loading

0 comments on commit ced7ebe

Please sign in to comment.