Skip to content

Commit

Permalink
setup changes
Browse files Browse the repository at this point in the history
  • Loading branch information
vighnesh-wednesday committed Oct 21, 2024
1 parent 9c9251d commit fb4d6c9
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 13 deletions.
8 changes: 5 additions & 3 deletions automation/glue_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export SPARK_HOME=$(pwd)/spark

# Export Path
export PATH=$PATH:$SPARK_HOME/bin:$MAVEN_HOME/bin:$AWS_GLUE_HOME/bin
export PYTHONPATH=$PROJECT_ROOT
export PYTHONPATH=$PROJECT_ROOT:$AWS_GLUE_HOME/PyGlue.zip:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip:$SPARK_HOME/python/

# Download Glue ETL .jar files
cd $AWS_GLUE_HOME
Expand All @@ -39,11 +39,13 @@ mvn install dependency:copy-dependencies
cp $AWS_GLUE_HOME/jarsv1/AWSGlue*.jar $SPARK_HOME/jars/
cp $AWS_GLUE_HOME/jarsv1/aws*.jar $SPARK_HOME/jars/

echo "export AWS_GLUE_HOME=$AWS_GLUE_HOME
echo "
export AWS_GLUE_HOME=$AWS_GLUE_HOME
export MAVEN_HOME=$MAVEN_HOME
export SPARK_HOME=$SPARK_HOME
export PATH=$PATH:$SPARK_HOME/bin:$MAVEN_HOME/bin:$AWS_GLUE_HOME/bin
export PYTHONPATH=$PROJECT_ROOT" >> $SOURCE_FILE
export PYTHONPATH=$PROJECT_ROOT:$AWS_GLUE_HOME/PyGlue.zip:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip:$SPARK_HOME/python/
" >> $SOURCE_FILE


cd $PROJECT_ROOT
Expand Down
4 changes: 2 additions & 2 deletions jobs/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from dotenv import load_dotenv
import app.environment as env

load_dotenv("../app/.custom_env") # Loading env for databricks
load_dotenv() # Loading env for glue
load_dotenv("../app/.custom_env") # Loading env for databricks
load_dotenv() # Loading env for glue

# COMMAND ----------

Expand Down
4 changes: 2 additions & 2 deletions jobs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import app.environment as env
import app.spark_wrapper as sw

load_dotenv("../app/.custom_env") # Loading env for databricks
load_dotenv() # Loading env for glue
load_dotenv("../app/.custom_env") # Loading env for databricks
load_dotenv() # Loading env for glue

# COMMAND ----------

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ coverage
python-dotenv
kaggle~=1.5.16
pre-commit
pyspark==3.1.1
27 changes: 21 additions & 6 deletions tests/test_spark_wrapper_failure.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,14 @@ def test_value_counts_invalid_column(self):
with self.assertRaises(U.AnalysisException) as context:
value_counts(self.df, "nonexistent_column")

expected_error_message = re.compile("Column '.+' does not exist")
expected_error_message_1 = re.compile("Column '.+' does not exist")
expected_error_message_2 = re.compile("cannot resolve '.+' given input columns")
actual_error_message = str(context.exception)

self.assertTrue(expected_error_message.search(actual_error_message))
self.assertTrue(
expected_error_message_1.search(actual_error_message)
or expected_error_message_2.search(actual_error_message)
)

def test_create_frame_invalid_path(self):
with self.assertRaises(U.AnalysisException) as context:
Expand All @@ -48,19 +52,30 @@ def test_make_window_invalid_window_spec(self):
window_spec = make_window("invalid_column", "date", -20, -1)
self.df.withColumn("literal_1", F.lit(1).over(window_spec))

expected_error_message = re.compile("Column '.+' does not exist")
expected_error_message_1 = re.compile("Column '.+' does not exist")
expected_error_message_2 = re.compile("cannot resolve '.+' given input columns")
actual_error_message = str(context.exception)

self.assertTrue(expected_error_message.search(actual_error_message))
self.assertTrue(
expected_error_message_1.search(actual_error_message)
or expected_error_message_2.search(actual_error_message)
)

def test_make_window_invalid_range(self):
with self.assertRaises(U.AnalysisException) as context:
window_spec = make_window("market", "date", 5, 2)
self.df.withColumn("literal_1", F.lit(1).over(window_spec))

expected_error_message = "The lower bound of a window frame must be less than or equal to the upper bound"
expected_error_message_1 = "The lower bound of a window frame must be less than or equal to the upper bound"
expected_error_message_2 = re.compile(
"The data type of the lower bound '.+' does not match the expected data type '.+'"
)
actual_error_message = str(context.exception)
self.assertTrue(expected_error_message in actual_error_message)

self.assertTrue(
expected_error_message_1 in actual_error_message
or expected_error_message_2.search(actual_error_message)
)

def test_rename_column_invalid_column(self):
with self.assertRaises(ValueError) as context:
Expand Down

0 comments on commit fb4d6c9

Please sign in to comment.