feat: tc_to_fixture dumps to rc_variables.txt

OxfordRSE · Nov 18, 2024 · 0a486e4 · 0a486e4
1 parent ed2be7c
commit 0a486e4
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -29,6 +29,34 @@ mailgun_secret       = "your_mailgun_secret_value"
 
 The project converts True Colours data to REDCap data.
 REDCap doesn't allow direct database access, however, so we need to create fake instruments in REDCap to store the data.
+
+
+### The quick way
+
+The quick way to set up the REDCap project is still quite slow, but it's faster than the long way.
+
+1. Make sure you have all the questionnaires you need in your test dataset (`tests/fixtures/*.csv`).
+2. Run `tests/fixtures/tc_to_fixture.py`.
+3. Open the `tests/fixtures/rc_variables.txt` file created by the script.
+
+```text
+###### instrument_name ######
+field_1_name
+field_2_name
+field_3_name
+...
+```
+
+Each instrument looks like the above. 
+Each will have to be created individually. 
+You should use `instrument_name` as the name of the instrument in REDCap, although this is not strictly necessary.
+The field names must be copied exactly as they appear in the file.
+This means that most will be prefixed with their instrument name (except for private fields).
+
+When the data are exported from REDCap, the field names will help identify which instrument they belong to.
+
+### The long way
+
 Create a `private` instrument in REDCap with Text Box fields with these Variable Names:
 
 | Field Name         | True Colours `patient.csv` field | Contains personal information? |
@@ -45,16 +73,16 @@ Create a `private` instrument in REDCap with Text Box fields with these Variable
 This allows us to query REDCap for the `id` and link it to the internal `study_id`.
 This in turn allows us to identify whether a participant is already in the database.
 
-### Other questionnaires
+#### Other questionnaires
 
-#### We save **scores** only
+##### We save **scores** only
 
 The data in REDCap are the **scores** for items on questionnaires. 
 This means that reverse-coded items, etc. are already accounted for.
 
 To recover the actual answers that a participant entered, refer to the data dictionary for the scale of interest.
 
-#### Instrument structure
+##### Instrument structure
 
 The instruments only exist as a framework for holding data exported from True Colours. 
 This means that we need to provide a very specific structure:
@@ -67,8 +95,7 @@ This means that we need to provide a very specific structure:
 - Include `_score_` fields for any scores or subscale scores that are calculated in True Colours
   - E.g. `phq9_score_total_float`
 
-
-#### Using the REDCap data
+### Using the REDCap data
 
 REDCap records are always exported in `string` format. 
 This means that the data may have to be parsed to be useful.

diff --git a/tests/fixtures/tc_to_fixture.py b/tests/fixtures/tc_to_fixture.py
@@ -1,3 +1,6 @@
+from trd_cli.main_functions import compare_tc_to_rc
+
+
 def patient_to_fixture(patient_csv_data: dict) -> dict:
     """
     Return a dictionary of redacted patient data.
@@ -42,3 +45,29 @@ def tc_to_fixture(tc_data: dict) -> dict:
         json.dump(tc_to_fixture(parse_tc(".")), f, indent=4)
     with open("./tc_data_initial.json", "w+") as f:
         json.dump(parse_tc("./initial_tc_dump"), f, indent=4)
+
+    # Dump a list of the variables REDCap needs to have available to import data.
+    # This aids in setting up the REDCap project.
+    pp, rr = compare_tc_to_rc(parse_tc("."), redcap_id_data=list())
+    rc_variables_list = {}
+    for p in pp.values():
+        rc_variables_list["private"] = set(p["private"].keys())
+        rc_variables_list["info"] = set(p["info"].keys())
+        break
+    for r in rr:
+        if r["redcap_repeat_instrument"] not in rc_variables_list:
+            rc_variables_list[r["redcap_repeat_instrument"]] = set(r.keys())
+        else:
+            [rc_variables_list[r["redcap_repeat_instrument"]].add(k) for k in r.keys()]
+
+    redcap_vars = ["study_id", "redcap_repeat_instrument", "redcap_repeat_instance"]
+    for k, v in rc_variables_list.items():
+        rc_variables_list[k] = [x for x in v if x not in redcap_vars]
+    with open("./rc_variables.txt", "w+") as f:
+        f.flush()
+        for k in rc_variables_list.keys():
+            f.write(f"###### {k} ######\n")
+            vv = sorted(rc_variables_list[k])
+            for x in vv:
+                f.write(f"{x}\n")
+            f.write("\n")
diff --git a/trd_cli/main_functions.py b/trd_cli/main_functions.py
@@ -1,5 +1,5 @@
 import subprocess
-from typing import Tuple
+from typing import Tuple, List
 
 from trd_cli.conversions import QUESTIONNAIRES, extract_participant_info, questionnaire_to_rc_record, get_code_by_name
 from trd_cli.parse_tc import parse_tc
@@ -72,12 +72,12 @@ def get_true_colours_data(true_colours_secret, true_colours_url, true_colours_us
     return tc_data
 
 
-def compare_tc_to_rc(tc_data: dict, redcap_id_data: dict) -> Tuple[dict, list]:
+def compare_tc_to_rc(tc_data: dict, redcap_id_data: List[dict]) -> Tuple[dict, list]:
     """
     Compare the True Colours data to the REDCap data.
     
-    :param redcap_id_data: parsed data exported from REDCap
     :param tc_data: parsed data exported from True Colours
+    :param redcap_id_data: parsed data exported from REDCap
     :return: a tuple of new_participants, new_responses
         new_participants is a dictionary of participant_id: {private, info} where private and info are dictionaries
         of data to be uploaded to REDCap. These need a new study_id generated by REDCap to be added before upload.