From 181112cef78931815a5967ca4e9d2819f5288aac Mon Sep 17 00:00:00 2001
From: Jillian Vogel <jill@opencraft.com>
Date: Thu, 27 Jun 2024 18:35:07 +0930
Subject: [PATCH 1/6] chore: initial commit

Files generated by `dbt init`
---
 README.md                              | 15 +++++++++++
 analyses/.gitkeep                      |  0
 dbt_project.yml                        | 36 ++++++++++++++++++++++++++
 macros/.gitkeep                        |  0
 models/example/my_first_dbt_model.sql  | 27 +++++++++++++++++++
 models/example/my_second_dbt_model.sql |  6 +++++
 models/example/schema.yml              | 21 +++++++++++++++
 seeds/.gitkeep                         |  0
 snapshots/.gitkeep                     |  0
 tests/.gitkeep                         |  0
 10 files changed, 105 insertions(+)
 create mode 100644 README.md
 create mode 100644 analyses/.gitkeep
 create mode 100644 dbt_project.yml
 create mode 100644 macros/.gitkeep
 create mode 100644 models/example/my_first_dbt_model.sql
 create mode 100644 models/example/my_second_dbt_model.sql
 create mode 100644 models/example/schema.yml
 create mode 100644 seeds/.gitkeep
 create mode 100644 snapshots/.gitkeep
 create mode 100644 tests/.gitkeep

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7874ac8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,15 @@
+Welcome to your new dbt project!
+
+### Using the starter project
+
+Try running the following commands:
+- dbt run
+- dbt test
+
+
+### Resources:
+- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
+- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
+- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
+- Find [dbt events](https://events.getdbt.com) near you
+- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
diff --git a/analyses/.gitkeep b/analyses/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/dbt_project.yml b/dbt_project.yml
new file mode 100644
index 0000000..5f347b4
--- /dev/null
+++ b/dbt_project.yml
@@ -0,0 +1,36 @@
+
+# Name your project! Project names should contain only lowercase characters
+# and underscores. A good package name should reflect your organization's
+# name or the intended use of these models
+name: 'dbt_completion_aggregator'
+version: '1.0.0'
+
+# This setting configures which "profile" dbt uses for this project.
+profile: 'dbt_completion_aggregator'
+
+# These configurations specify where dbt should look for different types of files.
+# The `model-paths` config, for example, states that models in this project can be
+# found in the "models/" directory. You probably won't need to change these!
+model-paths: ["models"]
+analysis-paths: ["analyses"]
+test-paths: ["tests"]
+seed-paths: ["seeds"]
+macro-paths: ["macros"]
+snapshot-paths: ["snapshots"]
+
+clean-targets:         # directories to be removed by `dbt clean`
+  - "target"
+  - "dbt_packages"
+
+
+# Configuring models
+# Full documentation: https://docs.getdbt.com/docs/configuring-models
+
+# In this example config, we tell dbt to build all models in the example/
+# directory as views. These settings can be overridden in the individual model
+# files using the `{{ config(...) }}` macro.
+models:
+  dbt_completion_aggregator:
+    # Config indicated by + and applies to all files under models/example/
+    example:
+      +materialized: view
diff --git a/macros/.gitkeep b/macros/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/models/example/my_first_dbt_model.sql b/models/example/my_first_dbt_model.sql
new file mode 100644
index 0000000..f31a12d
--- /dev/null
+++ b/models/example/my_first_dbt_model.sql
@@ -0,0 +1,27 @@
+
+/*
+    Welcome to your first dbt model!
+    Did you know that you can also configure models directly within SQL files?
+    This will override configurations stated in dbt_project.yml
+
+    Try changing "table" to "view" below
+*/
+
+{{ config(materialized='table') }}
+
+with source_data as (
+
+    select 1 as id
+    union all
+    select null as id
+
+)
+
+select *
+from source_data
+
+/*
+    Uncomment the line below to remove records with null `id` values
+*/
+
+-- where id is not null
diff --git a/models/example/my_second_dbt_model.sql b/models/example/my_second_dbt_model.sql
new file mode 100644
index 0000000..c91f879
--- /dev/null
+++ b/models/example/my_second_dbt_model.sql
@@ -0,0 +1,6 @@
+
+-- Use the `ref` function to select from other models
+
+select *
+from {{ ref('my_first_dbt_model') }}
+where id = 1
diff --git a/models/example/schema.yml b/models/example/schema.yml
new file mode 100644
index 0000000..2a53081
--- /dev/null
+++ b/models/example/schema.yml
@@ -0,0 +1,21 @@
+
+version: 2
+
+models:
+  - name: my_first_dbt_model
+    description: "A starter dbt model"
+    columns:
+      - name: id
+        description: "The primary key for this table"
+        tests:
+          - unique
+          - not_null
+
+  - name: my_second_dbt_model
+    description: "A starter dbt model"
+    columns:
+      - name: id
+        description: "The primary key for this table"
+        tests:
+          - unique
+          - not_null
diff --git a/seeds/.gitkeep b/seeds/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/snapshots/.gitkeep b/snapshots/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/tests/.gitkeep b/tests/.gitkeep
new file mode 100644
index 0000000..e69de29

From e4c3862e4c3eaa954361f40b41cab6e5f07e4bc1 Mon Sep 17 00:00:00 2001
From: Jillian Vogel <jill@opencraft.com>
Date: Thu, 27 Jun 2024 18:16:13 +0930
Subject: [PATCH 2/6] feat: add aspects-dbt dependency

and use the aspects profile and target path.
---
 README.md       | 10 ++++------
 dbt_project.yml |  5 ++++-
 packages.yml    |  3 +++
 3 files changed, 11 insertions(+), 7 deletions(-)
 create mode 100644 packages.yml

diff --git a/README.md b/README.md
index 7874ac8..3e5d5cc 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,11 @@
-Welcome to your new dbt project!
+### DBT Completion Aggregator 
 
-### Using the starter project
+Basic dbt package to transform completion and completion aggregator events.
 
-Try running the following commands:
-- dbt run
-- dbt test
+Extends [aspects-dbt](https://github.com/openedx/aspects-dbt).
 
+### DBT Resources:
 
-### Resources:
 - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
 - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
 - Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
diff --git a/dbt_project.yml b/dbt_project.yml
index 5f347b4..efe5c1c 100644
--- a/dbt_project.yml
+++ b/dbt_project.yml
@@ -6,7 +6,10 @@ name: 'dbt_completion_aggregator'
 version: '1.0.0'
 
 # This setting configures which "profile" dbt uses for this project.
-profile: 'dbt_completion_aggregator'
+profile: 'aspects'
+
+# directory which will store compiled SQL files
+target-path: "target"
 
 # These configurations specify where dbt should look for different types of files.
 # The `model-paths` config, for example, states that models in this project can be
diff --git a/packages.yml b/packages.yml
new file mode 100644
index 0000000..f4aebaa
--- /dev/null
+++ b/packages.yml
@@ -0,0 +1,3 @@
+packages:
+  - git: "https://github.com/openedx/aspects-dbt.git"
+    revision: v3.29.0

From ebfc41db4c7f35839c0aecd90ad665c622ebb84b Mon Sep 17 00:00:00 2001
From: Jillian Vogel <jill@opencraft.com>
Date: Wed, 19 Jun 2024 10:31:40 +0930
Subject: [PATCH 3/6] feat: transforms aggregated "progress" events into a new
 fact

---
 dbt_project.yml                               | 14 +--
 .../aggregated_completion_events.sql          | 30 +++++++
 .../fact_aggregated_completions.sql           | 61 +++++++++++++
 models/completion_aggregator/schema.yml       | 89 +++++++++++++++++++
 models/example/my_first_dbt_model.sql         | 27 ------
 models/example/my_second_dbt_model.sql        |  6 --
 models/example/schema.yml                     | 21 -----
 tests/generic/test_is_completed.sql           | 27 ++++++
 8 files changed, 208 insertions(+), 67 deletions(-)
 create mode 100644 models/completion_aggregator/aggregated_completion_events.sql
 create mode 100644 models/completion_aggregator/fact_aggregated_completions.sql
 create mode 100644 models/completion_aggregator/schema.yml
 delete mode 100644 models/example/my_first_dbt_model.sql
 delete mode 100644 models/example/my_second_dbt_model.sql
 delete mode 100644 models/example/schema.yml
 create mode 100644 tests/generic/test_is_completed.sql

diff --git a/dbt_project.yml b/dbt_project.yml
index efe5c1c..4c39946 100644
--- a/dbt_project.yml
+++ b/dbt_project.yml
@@ -4,6 +4,7 @@
 # name or the intended use of these models
 name: 'dbt_completion_aggregator'
 version: '1.0.0'
+config-version: 2
 
 # This setting configures which "profile" dbt uses for this project.
 profile: 'aspects'
@@ -24,16 +25,3 @@ snapshot-paths: ["snapshots"]
 clean-targets:         # directories to be removed by `dbt clean`
   - "target"
   - "dbt_packages"
-
-
-# Configuring models
-# Full documentation: https://docs.getdbt.com/docs/configuring-models
-
-# In this example config, we tell dbt to build all models in the example/
-# directory as views. These settings can be overridden in the individual model
-# files using the `{{ config(...) }}` macro.
-models:
-  dbt_completion_aggregator:
-    # Config indicated by + and applies to all files under models/example/
-    example:
-      +materialized: view
diff --git a/models/completion_aggregator/aggregated_completion_events.sql b/models/completion_aggregator/aggregated_completion_events.sql
new file mode 100644
index 0000000..48749c5
--- /dev/null
+++ b/models/completion_aggregator/aggregated_completion_events.sql
@@ -0,0 +1,30 @@
+{{
+    config(
+        materialized="materialized_view",
+        schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
+        engine=aspects.get_engine("ReplacingMergeTree()"),
+        primary_key="(org, course_key, verb_id)",
+        order_by="(org, course_key, verb_id, emission_time, actor_id, object_id, event_id)",
+        partition_by="(toYYYYMM(emission_time))",
+        ttl=env_var("ASPECTS_DATA_TTL_EXPRESSION", ""),
+    )
+}}
+
+select
+    event_id,
+    CAST(emission_time, 'DateTime') as emission_time,
+    actor_id,
+    object_id,
+    course_key,
+    org,
+    verb_id,
+    JSON_VALUE(
+        event,
+        '$.result.extensions."https://w3id.org/xapi/cmi5/result/extensions/progress"'
+    ) as progress_percent,
+    JSON_VALUE(
+        event,
+        '$.result.completion'
+    ) as completed
+from {{ ref("xapi_events_all_parsed") }}
+where verb_id = 'http://adlnet.gov/expapi/verbs/progressed'
diff --git a/models/completion_aggregator/fact_aggregated_completions.sql b/models/completion_aggregator/fact_aggregated_completions.sql
new file mode 100644
index 0000000..de2f2eb
--- /dev/null
+++ b/models/completion_aggregator/fact_aggregated_completions.sql
@@ -0,0 +1,61 @@
+with
+    completions as (
+        select
+            emission_time,
+            org,
+            course_key,
+            actor_id,
+            progress_percent,
+            if(
+                object_id like '%/course/%',
+                splitByString('/course/', object_id)[-1],
+                splitByString('/xblock/', object_id)[-1]
+            ) as entity_id,
+            cast(progress_percent as Float) / 100 as scaled_progress
+        from {{ ref("aggregated_completion_events") }}
+    )
+
+select
+    completions.emission_time as emission_time,
+    completions.org as org,
+    completions.course_key as course_key,
+    courses.course_name as course_name,
+    courses.course_run as course_run,
+    completions.entity_id as entity_id,
+    if(blocks.block_name != '', blocks.block_name, courses.course_name) as entity_name,
+    if(
+        blocks.block_name != '', blocks.display_name_with_location, null
+    ) as entity_name_with_location,
+    completions.actor_id as actor_id,
+    cast(completions.scaled_progress as Float) as scaled_progress,
+    case
+        when scaled_progress >= 0.9
+        then '90-100%'
+        when scaled_progress >= 0.8 and scaled_progress < 0.9
+        then '80-89%'
+        when scaled_progress >= 0.7 and scaled_progress < 0.8
+        then '70-79%'
+        when scaled_progress >= 0.6 and scaled_progress < 0.7
+        then '60-69%'
+        when scaled_progress >= 0.5 and scaled_progress < 0.6
+        then '50-59%'
+        when scaled_progress >= 0.4 and scaled_progress < 0.5
+        then '40-49%'
+        when scaled_progress >= 0.3 and scaled_progress < 0.4
+        then '30-39%'
+        when scaled_progress >= 0.2 and scaled_progress < 0.3
+        then '20-29%'
+        when scaled_progress >= 0.1 and scaled_progress < 0.2
+        then '10-19%'
+        else '0-9%'
+    end as completion_bucket,
+    users.username as username,
+    users.name as name,
+    users.email as email
+from completions
+join {{ ref("course_names") }} courses on completions.course_key = courses.course_key
+left join
+    {{ ref("course_block_names") }} blocks on completions.entity_id = blocks.location
+left outer join
+    {{ ref("dim_user_pii") }} users
+    on toUUID(completions.actor_id) = users.external_user_id
diff --git a/models/completion_aggregator/schema.yml b/models/completion_aggregator/schema.yml
new file mode 100644
index 0000000..e3ccff6
--- /dev/null
+++ b/models/completion_aggregator/schema.yml
@@ -0,0 +1,89 @@
+version: 2
+
+models:
+  - name: fact_aggregated_completions
+    database: "{{ env_var('DBT_PROFILE_TARGET_DATABASE', 'reporting') }}"
+    description: "One record per aggregated completion event for component"
+    columns:
+      - name: emission_time
+        description: "Timestamp, to the second, of when this event was emitted"
+        data_type: DateTime64(3)
+      - name: org
+        data_type: String
+        description: "The organization that the course belongs to"
+      - name: course_key
+        data_type: String
+        description: "The course key for the course"
+      - name: course_name
+        data_type: String
+        description: "The name of the course"
+      - name: course_run
+        data_type: String
+        description: "The course run for the course"
+      - name: entity_id
+        description: "The block ID or course key for the graded entity"
+        data_type: String
+      - name: entity_name
+        data_type: String
+        description: "The name of the graded entity (course or block)"
+      - name: entity_name_with_location
+        data_type: Nullable(String)
+        description: "The entity's display name with section, subsection, and unit prepended to the name. This provides additional context when looking at block names and can help data consumers understand which block they are analyzing"
+      - name: actor_id
+        data_type: String
+        description: "The xAPI actor identifier"
+      - name: scaled_progress
+        description: "A ratio between 0 and 1, inclusive, of the learner's progress"
+        data_type: Float32
+      - name: completion_bucket
+        description: "A displayable value of progress sorted into 10% buckets. Useful for grouping progress together to show high-level learner performance"
+        data_type: String
+      - name: completed
+        description: "Flag indicating whether the object has been fully completed"
+        data_type: Bool
+        data_tests:
+          - is_completed:
+              progress_field: "scaled_progress"
+      - name: username
+        data_type: String
+        description: "The username of the learner"
+      - name: name
+        data_type: String
+        description: "The full name of the learner"
+      - name: email
+        data_type: String
+        description: "The email address of the learner"
+
+  - name: aggregated_completion_events
+    description: "A materialized view for xAPI events related to aggregated completions"
+    columns:
+      - name: event_id
+        data_type: uuid
+        description: "The unique identifier for the event"
+      - name: emission_time
+        data_type: datetime
+        description: "The time the event was emitted"
+      - name: actor_id
+        data_type: string
+        description: "The xAPI actor identifier"
+      - name: object_id
+        data_type: string
+        description: "The xAPI object identifier"
+      - name: course_key
+        data_type: string
+        description: "The course identifier"
+      - name: org
+        data_type: string
+        description: "The organization that the course belongs to"
+      - name: verb_id
+        data_type: string
+        description: "The xAPI verb identifier"
+      - name: progress_percent
+        data_type: string
+        description: "The percentage of the xAPI object completed"
+      - name: completed
+        description: "Flag indicating whether the object has been fully completed"
+        data_type: Bool
+        data_tests:
+          - is_completed:
+              progress_field: "progress_percent"
diff --git a/models/example/my_first_dbt_model.sql b/models/example/my_first_dbt_model.sql
deleted file mode 100644
index f31a12d..0000000
--- a/models/example/my_first_dbt_model.sql
+++ /dev/null
@@ -1,27 +0,0 @@
-
-/*
-    Welcome to your first dbt model!
-    Did you know that you can also configure models directly within SQL files?
-    This will override configurations stated in dbt_project.yml
-
-    Try changing "table" to "view" below
-*/
-
-{{ config(materialized='table') }}
-
-with source_data as (
-
-    select 1 as id
-    union all
-    select null as id
-
-)
-
-select *
-from source_data
-
-/*
-    Uncomment the line below to remove records with null `id` values
-*/
-
--- where id is not null
diff --git a/models/example/my_second_dbt_model.sql b/models/example/my_second_dbt_model.sql
deleted file mode 100644
index c91f879..0000000
--- a/models/example/my_second_dbt_model.sql
+++ /dev/null
@@ -1,6 +0,0 @@
-
--- Use the `ref` function to select from other models
-
-select *
-from {{ ref('my_first_dbt_model') }}
-where id = 1
diff --git a/models/example/schema.yml b/models/example/schema.yml
deleted file mode 100644
index 2a53081..0000000
--- a/models/example/schema.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-
-version: 2
-
-models:
-  - name: my_first_dbt_model
-    description: "A starter dbt model"
-    columns:
-      - name: id
-        description: "The primary key for this table"
-        tests:
-          - unique
-          - not_null
-
-  - name: my_second_dbt_model
-    description: "A starter dbt model"
-    columns:
-      - name: id
-        description: "The primary key for this table"
-        tests:
-          - unique
-          - not_null
diff --git a/tests/generic/test_is_completed.sql b/tests/generic/test_is_completed.sql
new file mode 100644
index 0000000..f8e3b23
--- /dev/null
+++ b/tests/generic/test_is_completed.sql
@@ -0,0 +1,27 @@
+{% test is_completed(model, progress_field) %}
+
+with validation as (
+
+    select
+        completion,
+        {{ progress_field }} as progress,
+
+    from {{ model }}
+
+),
+
+validation_errors as (
+
+    select
+        completion, scaled_progress
+
+    from validation
+    -- if this is true, then progress isn't being captured correctly
+    where completion == true and progress < 100
+
+)
+
+select *
+from validation_errors
+
+{% endtest %}

From 5cf7331bd1df62959c7fe5ea3781e5134b24e37f Mon Sep 17 00:00:00 2001
From: Jillian Vogel <jill@opencraft.com>
Date: Tue, 25 Jun 2024 16:29:43 +0930
Subject: [PATCH 4/6] fix: remove schema from sql

We only need the database name.
---
 models/completion_aggregator/aggregated_completion_events.sql | 1 -
 1 file changed, 1 deletion(-)

diff --git a/models/completion_aggregator/aggregated_completion_events.sql b/models/completion_aggregator/aggregated_completion_events.sql
index 48749c5..4eafdff 100644
--- a/models/completion_aggregator/aggregated_completion_events.sql
+++ b/models/completion_aggregator/aggregated_completion_events.sql
@@ -1,7 +1,6 @@
 {{
     config(
         materialized="materialized_view",
-        schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
         engine=aspects.get_engine("ReplacingMergeTree()"),
         primary_key="(org, course_key, verb_id)",
         order_by="(org, course_key, verb_id, emission_time, actor_id, object_id, event_id)",

From 66e4d0f5269df94b3da862451c861d94ea1b15b2 Mon Sep 17 00:00:00 2001
From: Jillian Vogel <jill@opencraft.com>
Date: Tue, 25 Jun 2024 21:33:19 +0930
Subject: [PATCH 5/6] fix: view only the highest level of completion reached
 for each block + user

and adds a test
---
 .../completion_aggregator/fact_aggregated_completions.sql   | 6 +++++-
 tests/generic/test_completion_uniqueness.sql                | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 tests/generic/test_completion_uniqueness.sql

diff --git a/models/completion_aggregator/fact_aggregated_completions.sql b/models/completion_aggregator/fact_aggregated_completions.sql
index de2f2eb..d2c1777 100644
--- a/models/completion_aggregator/fact_aggregated_completions.sql
+++ b/models/completion_aggregator/fact_aggregated_completions.sql
@@ -11,7 +11,10 @@ with
                 splitByString('/course/', object_id)[-1],
                 splitByString('/xblock/', object_id)[-1]
             ) as entity_id,
-            cast(progress_percent as Float) / 100 as scaled_progress
+            cast(progress_percent as Float) / 100 as scaled_progress,
+            row_number() over (
+                partition by org, entity_id, actor_id order by scaled_progress desc
+            ) as rn
         from {{ ref("aggregated_completion_events") }}
     )
 
@@ -59,3 +62,4 @@ left join
 left outer join
     {{ ref("dim_user_pii") }} users
     on toUUID(completions.actor_id) = users.external_user_id
+where rn = 1
diff --git a/tests/generic/test_completion_uniqueness.sql b/tests/generic/test_completion_uniqueness.sql
new file mode 100644
index 0000000..ca63736
--- /dev/null
+++ b/tests/generic/test_completion_uniqueness.sql
@@ -0,0 +1,4 @@
+select org, entity_id, actor_id, count(*) as num_rows
+from {{ ref("fact_aggregated_completions") }}
+group by org, entity_id, actor_id
+having num_rows > 1

From 9e223e5828dad2f9d5fb01a91c37c7b45f780e70 Mon Sep 17 00:00:00 2001
From: Jillian Vogel <jill@opencraft.com>
Date: Thu, 27 Jun 2024 19:44:02 +0930
Subject: [PATCH 6/6] feat: add section_subsection_name

to use as label for subsection chart
---
 models/completion_aggregator/fact_aggregated_completions.sql | 1 +
 models/completion_aggregator/schema.yml                      | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/models/completion_aggregator/fact_aggregated_completions.sql b/models/completion_aggregator/fact_aggregated_completions.sql
index d2c1777..d1a2039 100644
--- a/models/completion_aggregator/fact_aggregated_completions.sql
+++ b/models/completion_aggregator/fact_aggregated_completions.sql
@@ -29,6 +29,7 @@ select
     if(
         blocks.block_name != '', blocks.display_name_with_location, null
     ) as entity_name_with_location,
+    blocks.display_name_with_location as section_subsection_name,
     completions.actor_id as actor_id,
     cast(completions.scaled_progress as Float) as scaled_progress,
     case
diff --git a/models/completion_aggregator/schema.yml b/models/completion_aggregator/schema.yml
index e3ccff6..a3eaa93 100644
--- a/models/completion_aggregator/schema.yml
+++ b/models/completion_aggregator/schema.yml
@@ -29,6 +29,9 @@ models:
       - name: entity_name_with_location
         data_type: Nullable(String)
         description: "The entity's display name with section, subsection, and unit prepended to the name. This provides additional context when looking at block names and can help data consumers understand which block they are analyzing"
+      - name: section_subsection_name
+        data_type: Nullable(String)
+        description: "The name of the section this subsection belongs to, with section_number prepended"
       - name: actor_id
         data_type: String
         description: "The xAPI actor identifier"