Skip to content

Commit

Permalink
Update non-K8s docker images to Apache Spark 2.3.1
Browse files Browse the repository at this point in the history
Update itests to check hostname against host prefix
Make requested changes

Closes #454
  • Loading branch information
akchinSTC authored and kevin-bates committed Oct 8, 2018
1 parent 322bd13 commit a5d65c7
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 32 deletions.
16 changes: 10 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ release: bdist sdist ## Make a wheel + source release on PyPI
# Here for doc purposes
docker-images: ## Build docker images
enterprise-gateway-demo: ## Build elyra/enterprise-gateway-demo:dev docker image
yarn-spark: ## Build elyra/yarn-spark:2.1.0 docker image
yarn-spark: ## Build elyra/yarn-spark:2.3.1 docker image
nb2kg: ## Build elyra/nb2kg:dev docker image
kubernetes-images: ## Build kubernetes docker images
enterprise-gateway: ## Build elyra/enterprise-gateway:dev docker image
Expand All @@ -114,7 +114,7 @@ docker-image-enterprise-gateway: $(WHEEL_FILE)
clean-docker: ## Remove docker images
clean-enterprise-gateway-demo: ## Remove elyra/enterprise-gateway-demo:dev docker image
clean-nb2kg: ## Remove elyra/nb2kg:dev docker image
clean-yarn-spark: ## Remove elyra/yarn-spark:2.1.0 docker image
clean-yarn-spark: ## Remove elyra/yarn-spark:2.3.1 docker image
clean-kubernetes: ## Remove kubernetes docker images
clean-enterprise-gateway: ## Remove elyra/enterprise-gateway:dev docker image
clean-kernel-py: ## Remove elyra/kernel-py:dev docker image
Expand All @@ -135,6 +135,10 @@ kubernetes-publish: ## Push kubernetes docker images to docker hub
# itest configurable settings
# indicates which host (gateway) to connect to...
ITEST_HOST?=localhost:8888
# indicates two things:
# this prefix is used by itest to determine hostname to test against, in addtion,
# if itests will be run locally with docker-prep target, this will set the hostname within that container as well
export ITEST_HOSTNAME_PREFIX?=itest
# indicates the user to emulate. This equates to 'KERNEL_USERNAME'...
ITEST_USER?=bob
# indicates the other set of options to use. At this time, only the python notebooks succeed, so we're skipping R and Scala.
Expand All @@ -151,11 +155,11 @@ ifeq (1, $(PREP_DOCKER))
make docker-prep
endif
($(SA) $(ENV) && GATEWAY_HOST=$(ITEST_HOST) KERNEL_USERNAME=$(ITEST_USER) nosetests -v enterprise_gateway.itests)
@echo "Run \`docker logs itest\` to see enterprise-gateway log."
@echo "Run \`docker logs $(ITEST_HOSTNAME_PREFIX)\` to see enterprise-gateway log."

PREP_TIMEOUT?=60
docker-prep:
@-docker rm -f itest >> /dev/null
@-docker rm -f $(ITEST_HOSTNAME_PREFIX) >> /dev/null
@echo "Starting enterprise-gateway container (run \`docker logs itest\` to see container log)..."
@-docker run -itd -p 8888:8888 -h itest --name itest -v `pwd`/enterprise_gateway/itests:/tmp/byok elyra/enterprise-gateway-demo:$(ENTERPRISE_GATEWAY_TAG) --elyra
@(r="1"; attempts=0; while [ "$$r" == "1" -a $$attempts -lt $(PREP_TIMEOUT) ]; do echo "Waiting for enterprise-gateway to start..."; sleep 2; ((attempts++)); docker logs itest |grep 'Jupyter Enterprise Gateway at http'; r=$$?; done; if [ $$attempts -ge $(PREP_TIMEOUT) ]; then echo "Wait for startup timed out!"; exit 1; fi;)
@-docker run -itd -p 8888:8888 -h itest --name itest -e ITEST_HOSTNAME_PREFIX -v `pwd`/enterprise_gateway/itests:/tmp/byok elyra/enterprise-gateway-demo:$(ENTERPRISE_GATEWAY_TAG) --elyra
@(r="1"; attempts=0; while [ "$$r" == "1" -a $$attempts -lt $(PREP_TIMEOUT) ]; do echo "Waiting for enterprise-gateway to start..."; sleep 2; ((attempts++)); docker logs $(ITEST_HOSTNAME_PREFIX) |grep 'Jupyter Enterprise Gateway at http'; r=$$?; done; if [ $$attempts -ge $(PREP_TIMEOUT) ]; then echo "Wait for startup timed out!"; exit 1; fi;)
4 changes: 2 additions & 2 deletions enterprise_gateway/itests/test_authorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_authorized_users(self):
self.assertEquals(result, "The cow jumped over the moon.\n")
finally:
if kernel:
kernel.shutdown()
self.gateway_client.shutdown_kernel(kernel)

def test_unauthorized_users(self):
kernel = None
Expand All @@ -39,7 +39,7 @@ def test_unauthorized_users(self):
self.assertRegexpMatches(be.args[0], "403")
finally:
if kernel:
kernel.shutdown()
self.gateway_client.shutdown_kernel(kernel)

if __name__ == '__main__':
unittest.main()
8 changes: 4 additions & 4 deletions enterprise_gateway/itests/test_python_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test_get_application_id(self):

def test_get_spark_version(self):
result = self.kernel.execute("sc.version")
self.assertRegexpMatches(result, '2.1.*')
self.assertRegexpMatches(result, '2.3.*')

def test_get_resource_manager(self):
result = self.kernel.execute("sc.getConf().get('spark.master')")
Expand All @@ -84,9 +84,9 @@ def test_get_deploy_mode(self):
result = self.kernel.execute("sc.getConf().get('spark.submit.deployMode')")
self.assertRegexpMatches(result, '(cluster|client)')

def test_get_host_address(self):
result = self.kernel.execute("print(sc.getConf().get('spark.driver.host'))")
self.assertRegexpMatches(result, '\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
def test_get_hostname(self):
result = self.kernel.execute("import subprocess; subprocess.check_output(['hostname'])")
self.assertRegexpMatches(result, os.environ['ITEST_HOSTNAME_PREFIX'] + "*")


class TestPythonKernelLocal(unittest.TestCase, PythonKernelBaseTestCase):
Expand Down
8 changes: 4 additions & 4 deletions enterprise_gateway/itests/test_r_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_get_application_id(self):

def test_get_spark_version(self):
result = self.kernel.execute("sparkR.version()")
self.assertRegexpMatches(result, '2.1')
self.assertRegexpMatches(result, '2.3')

def test_get_resource_manager(self):
result = self.kernel.execute('unlist(sparkR.conf("spark.master"))')
Expand All @@ -83,9 +83,9 @@ def test_get_deploy_mode(self):
result = self.kernel.execute('unlist(sparkR.conf("spark.submit.deployMode"))')
self.assertRegexpMatches(result, '(cluster|client)')

def test_get_host_address(self):
result = self.kernel.execute('unlist(sparkR.conf("spark.driver.host"))')
self.assertRegexpMatches(result, '\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
def test_get_hostname(self):
result = self.kernel.execute('system("hostname", intern=TRUE)')
self.assertRegexpMatches(result, os.environ['ITEST_HOSTNAME_PREFIX'] + "*")


class TestRKernelLocal(unittest.TestCase, RKernelBaseTestCase):
Expand Down
12 changes: 7 additions & 5 deletions enterprise_gateway/itests/test_scala_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_get_application_id(self):

def test_get_spark_version(self):
result = self.kernel.execute("sc.version")
self.assertRegexpMatches(result, '2.1')
self.assertRegexpMatches(result, '2.3')

def test_get_resource_manager(self):
result = self.kernel.execute('sc.getConf.get("spark.master")')
Expand All @@ -83,13 +83,15 @@ def test_get_deploy_mode(self):
result = self.kernel.execute('sc.getConf.get("spark.submit.deployMode")')
self.assertRegexpMatches(result, '(cluster|client)')

def test_get_host_address(self):
result = self.kernel.execute('sc.getConf.get("spark.driver.host")')
self.assertRegexpMatches(result, '\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
def test_get_hostname(self):
result = self.kernel.execute('import java.net._; \
val localhost: InetAddress = InetAddress.getLocalHost; \
val localIpAddress: String = localhost.getHostName')
self.assertRegexpMatches(result, os.environ['ITEST_HOSTNAME_PREFIX'] + "*")


class TestScalaKernelLocal(unittest.TestCase, ScalaKernelBaseTestCase):
KERNELSPEC = os.getenv("SCALA_KERNEL_LOCAL_NAME", "spark_2.1_scala")
KERNELSPEC = os.getenv("SCALA_KERNEL_LOCAL_NAME", "spark_2.3.1_scala")

@classmethod
def setUpClass(cls):
Expand Down
2 changes: 1 addition & 1 deletion etc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ kubernetes-images: $(KUBERNETES_IMAGES)

kubernetes-publish: publish-enterprise-gateway publish-kernel-py publish-kernel-tf-py publish-kernel-tf-gpu-py publish-kernel-r publish-kernel-spark-r publish-kernel-scala

clean-images: clean-enterprise-gateway clean-nb2kg clean-yarn-spark clean-enterprise-gateway clean-kubernetes
clean-images: clean-enterprise-gateway-demo clean-nb2kg clean-yarn-spark clean-enterprise-gateway clean-kubernetes
clean-kubernetes: clean-enterprise-gateway clean-kernel-py clean-kernel-tf-py clean-kernel-tf-gpu-py clean-kernel-r clean-kernel-r clean-kernel-scala

# Extra dependencies for each docker image...
Expand Down
11 changes: 6 additions & 5 deletions etc/docker/enterprise-gateway-demo/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
FROM elyra/yarn-spark:2.1.0
FROM elyra/yarn-spark:dev

# Install Enterprise Gateway wheel and kernelspecs
COPY jupyter_enterprise_gateway*.whl /tmp

RUN pip install /tmp/jupyter_enterprise_gateway*.whl && \
pip install --upgrade ipykernel jupyter-client notebook && \
rm -f /tmp/jupyter_enterprise_gateway*.whl
Expand All @@ -14,12 +15,12 @@ COPY start-enterprise-gateway.sh.template /usr/local/share/jupyter/
# Copy toree jar from install to scala kernelspec lib directory
# Add YARN_CONF_DIR to each env stanza, Add alternate-sigint to vanilla toree
RUN mkdir -p /usr/hdp/current /tmp/byok/kernels && \
ln -s /usr/local/spark-2.1.0-bin-hadoop2.7 /usr/hdp/current/spark2-client && \
cp /usr/local/share/jupyter/kernels/spark_2.1_scala/lib/*.jar /usr/local/share/jupyter/kernels/spark_scala_yarn_cluster/lib && \
cp /usr/local/share/jupyter/kernels/spark_2.1_scala/lib/*.jar /usr/local/share/jupyter/kernels/spark_scala_yarn_client/lib && \
ln -s /usr/local/spark-2.3.1-bin-hadoop2.7 /usr/hdp/current/spark2-client && \
cp /usr/local/share/jupyter/kernels/spark_2.3.1_scala/lib/*.jar /usr/local/share/jupyter/kernels/spark_scala_yarn_cluster/lib && \
cp /usr/local/share/jupyter/kernels/spark_2.3.1_scala/lib/*.jar /usr/local/share/jupyter/kernels/spark_scala_yarn_client/lib && \
cd /usr/local/share/jupyter/kernels && \
for dir in spark_*; do cat $dir/kernel.json | sed s/'"env": {'/'"env": {| "YARN_CONF_DIR": "\/usr\/local\/hadoop\/etc\/hadoop",'/ | tr '|' '\n' > xkernel.json; mv xkernel.json $dir/kernel.json; done && \
cat spark_2.1_scala/kernel.json | sed s/'"__TOREE_OPTS__": "",'/'"__TOREE_OPTS__": "--alternate-sigint USR2",'/ | tr '|' '\n' > xkernel.json; mv xkernel.json spark_2.1_scala/kernel.json && \
cat spark_2.3.1_scala/kernel.json | sed s/'"__TOREE_OPTS__": "",'/'"__TOREE_OPTS__": "--alternate-sigint USR2",'/ | tr '|' '\n' > xkernel.json; mv xkernel.json spark_2.3.1_scala/kernel.json && \
touch /usr/local/share/jupyter/enterprise-gateway.log && \
chmod 0666 /usr/local/share/jupyter/enterprise-gateway.log

Expand Down
6 changes: 3 additions & 3 deletions etc/docker/yarn-spark/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Use docker image with Spark 2.1 and Hadoop 2.7 (sequenceiq/hadoop-docker:2.7.1)
FROM aghorbani/spark:2.1.0
FROM akchin/spark:2.3.1

ENV ANACONDA_HOME=/opt/anaconda2
ENV PATH=$ANACONDA_HOME/bin:$PATH
Expand Down Expand Up @@ -31,7 +31,7 @@ RUN cd /tmp && \
curl -O https://dist.apache.org/repos/dist/release/incubator/toree/0.2.0-incubating/toree-pip/toree-0.2.0.tar.gz && \
pip install --upgrade setuptools --user python && \
pip install /tmp/toree-0.2.0.tar.gz && \
jupyter toree install --spark_home=/usr/local/spark --kernel_name="Spark 2.1" --interpreters=Scala && \
jupyter toree install --spark_home=/usr/local/spark --kernel_name="Spark 2.3.1" --interpreters=Scala && \
rm -f /tmp/toree-0.2.0.tar.gz

# Install Anaconda R binaries, argparser and kernelspecs dir
Expand Down Expand Up @@ -74,6 +74,6 @@ ENTRYPOINT ["/etc/bootstrap-yarn-spark.sh"]
CMD ["--help"]

LABEL Hadoop.version="2.7.1"
LABEL Spark.version="2.1.0"
LABEL Spark.version="2.3.1"
LABEL Anaconda.version="4.4.0"
LABEL Anaconda.python.version="2.7.13"
2 changes: 1 addition & 1 deletion etc/kernel-launchers/R/scripts/launch_IRkernel.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ return_connection_info <- function(connection_file, response_addr){
sendme <- read_json(connection_file)
# Add launcher process id to returned info...
sendme$pid <- Sys.getpid()
json <- toJSON(sendme, auto_unbox=TRUE)
json <- jsonlite::toJSON(sendme, auto_unbox=TRUE)
message(paste("JSON Payload: ", json))

fn <- basename(connection_file)
Expand Down
2 changes: 1 addition & 1 deletion etc/kernel-launchers/scala/toree-launcher/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ scalaVersion := "2.11.12"
resolvers += "Typesafe Repo" at "http://repo.typesafe.com/typesafe/releases/"
resolvers += "Sonatype Repository" at "http://oss.sonatype.org/content/repositories/releases"

val sparkVersion = "2.1.1"
val sparkVersion = "2.3.1"

libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion % "provided"
libraryDependencies += "com.typesafe.play" %% "play-json" % "2.3.10" // Apache v2
Expand Down
3 changes: 3 additions & 0 deletions etc/kernel-launchers/scala/toree-launcher/project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,8 @@

logLevel := Level.Warn

/*
* Following plugins have a dependency on sbt v0.13
*/
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.5")
addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")

0 comments on commit a5d65c7

Please sign in to comment.