diff --git a/.github/issue_template.yaml b/.github/issue_template.yaml new file mode 100644 index 0000000..864762d --- /dev/null +++ b/.github/issue_template.yaml @@ -0,0 +1,46 @@ +name: Bug Report +description: Report a bug +title: "[Bug]: " +labels: ["bug"] +assignees: + - danielrazavi + - rohanalexander +body: + - type: markdown + attributes: + value: | + Please fill out the sections below to help everyone identify and fix the bug + - type: textarea + id: description + attributes: + label: Describe your issue + placeholder: When I click here this happens + validations: + required: true + - type: textarea + id: steps + attributes: + label: Steps to reproduce + placeholder: | + 1. Go to page X + 2. Click here + 3. Click there + validations: + required: true + - type: textarea + id: expected + attributes: + label: What was the expected result? + placeholder: I expected this to happen + - type: textarea + id: screenshots + attributes: + label: Put here any screenshots or videos (optional) + - type: textarea + id: assignee + attributes: + label: Put here the code owner you'd like to review this issue. + - type: markdown + attributes: + value: | + Thanks for reporting this issue! We will get back to you as soon as possible. \ No newline at end of file diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..dbca4b5 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,14 @@ +## What changes are you trying to make? (e.g. Adding or removing code, refactoring existing code, adding reports) + +## What did you learn from the changes you have made? + +## Was there another approach you were thinking about making? If so, what approach(es) were you thinking of? + +## Were there any challenges? If so, what issue(s) did you face? How did you overcome it? + +## How were these changes tested? + +## A reference to a related issue in your repository (if applicable) + +## Checklist +- [ ] I can confirm that my changes are working as intended diff --git a/.github/workflows/autograder.yml b/.github/workflows/autograder.yml new file mode 100644 index 0000000..565cd32 --- /dev/null +++ b/.github/workflows/autograder.yml @@ -0,0 +1,42 @@ +name: Assignment autograder +on: + pull_request: + branches: + - main + types: [opened, synchronize, reopened] +jobs: + autograder: + name: Assignment autograder + if: startsWith(github.head_ref, 'assignment') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r 03_instructional_team/autograder/requirements.txt + - name: Setup working directory for script + run: | + UUID=$(uuidgen) + echo "UUID=$UUID" >> $GITHUB_ENV + mkdir /tmp/$UUID + cp $GITHUB_WORKSPACE/02_activities/assignments/assignment.sh /tmp/$UUID + - name: Run assignment script + run: | + bash -c "bash assignment.sh 2>&1 | tee /tmp/${{ env.UUID }}_output.txt" + working-directory: /tmp/${{ env.UUID }} + - name: Grade responses + id: grade + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO_OWNER: ${{ github.repository_owner }} + REPO_NAME: ${{ github.event.repository.name }} + PR_NUMBER: ${{ github.event.pull_request.number }} + REPO_BRANCH: ${{ github.event.pull_request.head.ref }} + WORKING_DIR: /tmp/${{ env.UUID }} + run: | + wget -O /tmp/autograder.py https://github.com/UofT-DSI/shell/raw/refs/heads/main/03_instructional_team/autograder/autograder.py + python /tmp/autograder.py diff --git a/.github/workflows/automatic_pr_comment.yaml b/.github/workflows/automatic_pr_comment.yaml new file mode 100644 index 0000000..0fb764c --- /dev/null +++ b/.github/workflows/automatic_pr_comment.yaml @@ -0,0 +1,25 @@ +name: UofT-DSI Main Repository PR Workflow + +on: + pull_request_target: + types: [opened, synchronize] + +jobs: + comment: + if: github.repository_owner == 'UofT-DSI' + runs-on: ubuntu-latest + steps: + - name: Comment on PR + uses: actions/github-script@v6 + with: + script: | + const issue_number = context.payload.pull_request.number; + const repo = context.repo; + const commentBody = `Hello, thank you for your contribution. If you are a participant, please close this pull request and open it in your own forked repository instead of here. Please read the instructions on your onboarding [Assignment Submission Guide](https://github.com/UofT-DSI/onboarding/blob/main/onboarding_documents/submissions.md) more carefully. If you are not a participant, please give us up to 72 hours to review your PR. Alternatively, you can reach out to us directly to expedite the review process.`; + // Check if the PR is made to a repo in the UofT-DSI organization + github.rest.issues.createComment({ + owner: repo.owner, + repo: repo.repo, + issue_number: issue_number, + body: commentBody + }); \ No newline at end of file diff --git a/.gitignore b/.gitignore index e43b0f9..a3442bc 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,18 @@ .DS_Store +.vscode/ +03_instructional_team/lessons/pdf/* +03_instructional_team/lessons/html/* + +# local node modules +node_modules/ +package-lock.json +package.json + +### Running the assignment script unzips a large number of files into the tree +# Ignore all files in shell/02_activities/assignments/ +02_activities/assignments/* + +# Un-ignore the specific files +!02_activities/assignments/assignment.sh +!02_activities/assignments/assignment_instructions.md + diff --git a/01_materials/slides/optional_unix_slides.pdf b/01_materials/slides/optional_unix_slides.pdf new file mode 100644 index 0000000..2cab6d0 Binary files /dev/null and b/01_materials/slides/optional_unix_slides.pdf differ diff --git a/01_materials/slides/unix_slides.pdf b/01_materials/slides/unix_slides.pdf new file mode 100644 index 0000000..ad8a49e Binary files /dev/null and b/01_materials/slides/unix_slides.pdf differ diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh new file mode 100644 index 0000000..310694c --- /dev/null +++ b/02_activities/assignments/assignment.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -x + +############################################ +# DSI CONSULTING INC. Project setup script # +############################################ +# This script creates standard analysis and output directories +# for a new project. It also creates a README file with the +# project name and a brief description of the project. +# Then it unzips the raw data provided by the client. + +mkdir analysis output +touch README.md +echo "# Project Name: DSI Consulting Inc." > README.md +touch analysis/main.py + +# download client data +curl -Lo rawdata.zip https://github.com/UofT-DSI/shell/raw/refs/heads/main/02_activities/assignments/rawdata.zip +unzip rawdata.zip + +########################################### +# Complete assignment here + +# 1. Create a directory named data +mkdir data +# 2. Move the ./rawdata directory to ./data/raw +mv ./rawdata ./data/raw +# 3. List the contents of the ./data/raw directory +ls ./data/raw +# 4. In ./data/processed, create the following directories: server_logs, user_logs, and event_logs +mkdir ./data/processed +mkdir ./data/processed/server_logs ./data/processed/user_logs ./data/processed/event_logs +# 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +cp ./data/raw/*server*.log ./data/processed/server_logs +# 6. Repeat the above step for user logs and event logs +cp ./data/raw/*user*.log ./data/processed/user_logs +cp ./data/raw/*event*.log ./data/processed/event_logs +# 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rm -rf ./data/raw/*ipaddr* +rm -rf ./data/processed/user_logs/*ipaddr* +# 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed +touch ls -r ./data/processed/*/* >> ./data/inventory.txt + + +########################################### + +echo "Project setup is complete!" diff --git a/02_activities/assignments/assignment_instructions.md b/02_activities/assignments/assignment_instructions.md new file mode 100644 index 0000000..9c44240 --- /dev/null +++ b/02_activities/assignments/assignment_instructions.md @@ -0,0 +1,93 @@ +# Shell / Git Assignment +You work in the data team at a consulting firm, and one of your team's products is helping companies optimize and manage their cloud hosting expenditures. + +Your team has an existing bash script that initializes an analysis directory for each new client. + +You've been asked to update this script to also automate the initial organization of data files provided by clients. + +## Instructions + +### Setup +1. **Forking the Repository**: If you have not already done so, fork this Shell learning module repository following these [instructions](https://github.com/UofT-DSI/onboarding/blob/main/onboarding_documents/submissions.md#setting-up). + - Forking creates a copy of the main repository in your GitHub account. This allows you to work on your version without affecting the original repository. +2. **Create a Branch for Your Work**: To keep your changes organized, create a new branch named `assignment`: + ```bash + git checkout -b assignment + ``` + +--- + +### Part 1: Update the Data Ingest Script +1. **Modify the Script**: Using the template in ``02_activities/assignments/assignment.sh``, fill in the correct commands as described by the comments. + - It may help to paste your commands into the Terminal as you write your script (or vice versa) to test as you go +2. **Test Your Script Locally**: Execute your script to ensure it works as expected. You may need to make additional tweaks and re-run it until you are satisfied with the results. +3. **Commit Your Changes**: As you complete parts of your script and confirmed that it is working correctly, commit your changes to ensure your progress is saved. + ```bash + git commit assignment.sh -m "describe your changes here" + ``` + +--- + +### Part 2: Merge in Updates from Your Coworkers +Your coworkers have made some other changes to the script. You'll need to incorporate their updates and resolve any conflicts. +1. **Merge the Updates**: Use the following command to pull in changes from the coworker's branch: + ```bash + git pull https://github.com/UofT-DSI/shell coworker-changes --no-rebase + ``` +2. **Resolve Merge Conflicts**: If there are any conflicts, use ```git status``` to see which files are affected, resolve the conflicts manually, and then mark them as resolved. +3. **Commit the Merge**: Once all conflicts are resolved, commit the merge. + +--- + +### Part 3: Test Your Script +1. **Re-test Your Script**: Make sure that your script still works after merging the updates. +2. **Optional Clean Test Setup**: If you'd like to test your script in a clean environment, follow these steps: + ```bash + mkdir assignment_test_clean # make an empty directory + ``` + ```bash + cp assignment.sh assignment_test_clean # copy your script into the empty directory + ``` + ```bash + cd assignment_test_clean # change your working directory to the new clean directory + ``` +3. **Run Your Script**: + ```bash + bash assignment.sh + ``` +4. **Verify the Output**: + - Check if the expected directories are created. + - Verify that files are moved or copied as expected. + - Ensure that files that should be deleted are no longer present. + +--- + +### Submit Your Changes for Review +1. **Commit Your Final Changes**: Ensure all changes are committed in your `assignment` branch. You can verify this with: + ```bash + git status + ``` +2. **Enable GitHub Actions**: Click on the **Actions** tab in your repository and enable workflows if prompted. +3. **Create a Pull Request**: + - Open a pull request from your `assignment` branch to your repository's `main` branch. + - The autograder will run automatically and post your assignment grade as a comment. + +--- + +## Submission Information + +🚨 **Please review our [Assignment Submission Guide](https://github.com/UofT-DSI/onboarding/blob/main/onboarding_documents/submissions.md)** 🚨 for detailed instructions on how to format, branch, and submit your work. Following these guidelines is crucial for your submissions to be evaluated correctly. + +### Submission Parameters: +* Submission Due Date: `2025-01-15 - 23:59` +* The branch name for your repo should be: `assignment` +* What to submit for this assignment: + * One or more commits that update the `assignment.sh` script +* What the pull request link should look like for this assignment: `https://github.com//shell/pull/` + * Open a private window in your browser. Copy and paste the link to your pull request into the address bar. Make sure you can see your pull request properly. This helps the technical facilitator and learning support staff review your submission easily. + +Checklist: +- [ ] Create a branch called `assignment`. +- [ ] Ensure that your repository is public. +- [ ] Review [the PR description guidelines](https://github.com/UofT-DSI/onboarding/blob/main/onboarding_documents/submissions.md#guidelines-for-pull-request-descriptions) and adhere to them. +- [ ] Verify that your link is accessible in a private browser window. diff --git a/02_activities/assignments/rawdata.zip b/02_activities/assignments/rawdata.zip new file mode 100644 index 0000000..fe36b0f Binary files /dev/null and b/02_activities/assignments/rawdata.zip differ diff --git a/02_activities/homework/homework.sh b/02_activities/homework/homework.sh new file mode 100644 index 0000000..a432922 --- /dev/null +++ b/02_activities/homework/homework.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# On your terminal, input all the commands you have used to create the following: + +# 1. How would you create 5 directories? Feel free to use any name for your directories. + +# 2. How would you verify the creation of all 5 directories? + +# 3. In each directory, how would you create 5 .txt files and write "I love data" into each within the directories? + +# 4. How would you verify the presence of all 5 files? + +# 5. How would you append to one of the existing files " and machine learning!"? + +# 6. How would you verify that the text was indeed appended to the existing file? + +# 7. How would you delete all files except for the one with the appended text? + +# 8. How would you navigate back to the parent directory containing all the directories? + +# 9. How would you remove each directory along with its contents? + +# 10. How would you verify that all directories and files have been deleted? diff --git a/03_instructional_team/README.md b/03_instructional_team/README.md new file mode 100644 index 0000000..d16e11b --- /dev/null +++ b/03_instructional_team/README.md @@ -0,0 +1,54 @@ +# Instructional Team Playbook + +## How do you interact with the repo? +The Technical Facilitator will deliver the content in the `/01_materials/slides` directory. You are encouraged to live code with participants during live sessions. Please ensure that live coding files are uploaded to a new directory called `/live_code` under `/04_cohort_three` in this repository using a new branch. Please open a pull request for it to be merged. + +## How does the module flow? +The module is organized into 3 main directories: +1. Slides +2. Assignments +3. Homework + +The `/01_materials/slides` directory contains the live learning session slides. + +The `/02_activities/assignments` contains assignments participants should submit for evaluation as `complete` or `incomplete`. The assignments measure a participant's achievement of the learning outcomes, and help technical facilitators determine if a participant has successfully completed the learning module. + +The `/02_activities/homework` directory contains homework participants can complete to further develop and practice the skills covered in a learning module. Homework is optional, but participants are encouraged to complete as much as they can. + +### Week 1 + +#### Live Learning Session +The focus of the live learning session is to introduce participants to basic bash commands such as `cd` and `pwd`, as well as creating files using `touch` and directories using `mkdir`. The module also introduces intermediate commands like `rm`, how to recursively remove files from a nested directory, and how to make copies or move files using `mv`. + +Note: If someone wants to open up the "manual", recommend they use the `man` command. + +#### Assignment +The assignment for this week is to have participants find the secret password by navigating directories and files. + +## How do you assign assignments? +Technical Facilitators are encouraged to introduce assignments as early as possible in the learning module. The Technical Facilitator should describe the assignment to participants and explain how the topics covered in the module will equip them with the knowledge and skills to complete the assignment. + +## How is an assignment is expected to be completed and delivered? +Participants are expected to complete the assignment by the end of the first week. They will deliver the assignment by opening a pull request on their copied repo. The participants will also add a Learning Support Staff as a reviewer indicating they delivered a completed assignment, and it is ready to be evaluated as `complete` or `incomplete`. + +## What is the criteria for `complete` or `incomplete`? +The criteria for a `complete` or `incomplete` is if all parts of the program are working, and nothing in the code is broken. For some assignments, a rubric is provided outlining the criteria needed to assess an assignment as incomplete. + +## How to evaluate? +If the participant's solution works, then their assignment should be assessed as `complete`! Technical Facilitators and the Learning Support Staff should focus on providing constructive feedback to participants who need to improve their code. If the solution does not work, then the assignment is `incomplete`. Facilitators should provide constructive feedback on their existing code, and guide them to get their solution working. + +## How will feedback be given? +Feedback should be given through the pull request a participant has made. Technical Facilitators are encouraged to allow participants to make revisions if needed. In order to maximize learning, feedback should be constructive, and specific. + +
+ +## Definitions +**Live Learning Sessions**: A Live Learning Session is a synchronous period of time, lasting up to 2.5 hours, where the Technical Facilitator will facilitate and deliver the content and learning outcomes online through Zoom. Participants are encouraged to participate and ask questions as they learn. 10 minute breaks are encouraged once per hour. + +**Work Period**: A Work Period is an asynchronous period of time, lasting up to 3 hours. Participants will work on assignments and/or homework during this block of time. Learning Support Staff are to be present online through Zoom to assist participants and answer any questions they may have. As work periods are asynchronous and flexible, participants can choose to work on their own time. However, it is encouraged that they work during the block of time when a Learning Support Staff is present. + +**Assignments**: An Assignment is work assigned as part of the learning modules. They provide an opportunity for participants to integrate and synthesize what they have learned throughout the week to meet the set learning outcomes. + +## generate_slides.sh + +This script is designed to convert Markdown files located in a specified folder into slide presentations using Marp CLI, allowing for the generation of either HTML or PDF formats based on user input. It includes an option to apply a custom CSS theme to the slides by specifying a theme path. The script also provides a help function detailing its usage, options, and examples for convenience. It ensures the necessary directories exist, validates the presence of Marp CLI on the system, processes each Markdown file found in the specified directory, and outputs the generated slides into a designated output folder, displaying the status of each operation and a completion message. The only configuration needed is to set where the md files are and where you would like the pdf/html files to be placed. diff --git a/03_instructional_team/autograder/autograder.py b/03_instructional_team/autograder/autograder.py new file mode 100644 index 0000000..e528070 --- /dev/null +++ b/03_instructional_team/autograder/autograder.py @@ -0,0 +1,311 @@ +import pandas as pd +import os +import requests +import glob + +# get environment variables for output +github_step_output = os.environ['GITHUB_STEP_SUMMARY'] +github_token = os.environ["GITHUB_TOKEN"] +github_repo_owner = os.environ["REPO_OWNER"] +github_repo_name = os.environ["REPO_NAME"] +github_repo_branch = os.environ["REPO_BRANCH"] +github_pr_number = os.environ["PR_NUMBER"] +working_dir = os.environ["WORKING_DIR"] + +status_c = '✅' +status_i = '❌' + + +# functions +def is_commit_in_branch( + owner, + repo, + branch, + other_owner, + other_repo, + other_branch, +): + headers = {} + headers['Authorization'] = f'token {github_token}' + + # Step 1: Get the latest commit SHA of the other repository's branch + other_commit_url = f'https://api.github.com/repos/{other_owner}/{other_repo}/commits/{other_branch}' + response = requests.get(other_commit_url, headers=headers) + response.raise_for_status() + other_commit_sha = response.json()['sha'] + print(f'Commit SHA of {other_owner}/{other_repo}/{other_branch}: {other_commit_sha}') + + # Step 2: Check if this commit exists in the given repository + commit_in_repo_url = f'https://api.github.com/repos/{owner}/{repo}/commits/{other_commit_sha}' + response = requests.get(commit_in_repo_url, headers=headers) + if response.status_code == 404: + # Commit does not exist in the given repository + print(f'Commit {other_commit_sha} not found in {owner}/{repo}') + return False + response.raise_for_status() + + # Step 3: Compare the commit with the branch in the given repository + compare_url = f'https://api.github.com/repos/{owner}/{repo}/compare/main...{branch}' + response = requests.get(compare_url, headers=headers) + response.raise_for_status() + compare_data = response.json() + + commit_shas = [commit['sha'] for commit in compare_data['commits']] + + # If the status is 'ahead' or 'identical', the commit is in the history + print(f'Commit SHAs in {owner}/{repo}/{branch}: {commit_shas}') + return other_commit_sha in commit_shas + + +# score table +s = [] + +# load script output +with open(working_dir + '_output.txt', 'r') as f: + script_rslt = f.read() + +script_rslt = script_rslt.split('\n+') +script_rslt = [{ + 'command': x.split('\n')[0][1:].strip(), + 'output': x.split('\n')[1:] +} for x in script_rslt] + +qn = 0 + +############################################################################################################ +# Step 1: Check if 'data' directory exists +qn += 1 +if os.path.isdir(os.path.join(working_dir, 'data')): + s.append({'question': qn, 'status': 1}) +else: + s.append({ + 'question': qn, + 'status': 0, + 'comment': 'data directory does not exist' + }) + +############################################################################################################ +# Step 2: Check that 'rawdata' directory was moved to 'data/raw' +qn += 1 +if os.path.isdir(os.path.join(working_dir, 'data/raw')) and not os.path.exists( + os.path.join(working_dir, 'rawdata')): + s.append({'question': qn, 'status': 1}) +else: + s.append({ + 'question': qn, + 'status': 0, + 'comment': 'rawdata not moved to data/raw' + }) + +############################################################################################################ +# Step 4: Check that 'ls data/raw' command was run +qn += 1 +indx = [i for i, x in enumerate(script_rslt) if x['command'].startswith('ls')] +if len(indx) > 0: + if any(['data/raw' in script_rslt[i]['command'] for i in indx]): + s.append({'question': qn, 'status': 1}) + else: + s.append({ + 'question': qn, + 'status': 0, + 'comment': '`ls` command run on wrong directory' + }) +else: + s.append({'question': qn, 'status': 0, 'comment': '`ls` command not run'}) + +############################################################################################################ +# Step 5: Check that in 'data/processed', the directories server_logs, user_logs, and event_logs were created +qn += 1 +dirs = [ + 'data/processed/server_logs', 'data/processed/user_logs', + 'data/processed/event_logs' +] +if all([os.path.isdir(os.path.join(working_dir, d)) for d in dirs]): + s.append({'question': qn, 'status': 1}) +else: + missing_dirs = [ + d for d in dirs if not os.path.isdir(os.path.join(working_dir, d)) + ] + s.append({ + 'question': qn, + 'status': 0, + 'comment': f'Missing directories: {", ".join(missing_dirs)}' + }) + + +############################################################################################################ +# Step 6: Check that server log files were copied from 'data/raw' to 'data/processed/server_logs' +def check_logs(log_type): + raw_logs = glob.glob( + os.path.join(working_dir, f'data/raw/*{log_type}*.log')) + processed_logs = glob.glob( + os.path.join(working_dir, f'data/processed/{log_type}_logs/*')) + if len(raw_logs) == 0: + return {'status': 0, 'comment': f'No {log_type} log files in data/raw'} + else: + raw_log_files = [os.path.basename(f) for f in raw_logs] + processed_log_files = [os.path.basename(f) for f in processed_logs] + if all([f in processed_log_files for f in raw_log_files]): + return {'status': 1} + else: + return { + 'status': 0, + 'comment': f'Missing files in data/processed/{log_type}_logs' + } + + +# Check server logs +qn += 1 +result = check_logs('server') +if result['status'] == 1: + s.append({'question': qn, 'status': 1}) +else: + s.append({'question': qn, 'status': 0, 'comment': result['comment']}) + +############################################################################################################ +# Step 7: Check that user logs and event logs were copied appropriately +qn += 1 + +result_user = check_logs('user') +result_event = check_logs('event') + +if result_user['status'] == 1 and result_event['status'] == 1: + s.append({'question': qn, 'status': 1}) +else: + comments = [] + if result_user['status'] == 0: + comments.append(result_user['comment']) + if result_event['status'] == 0: + comments.append(result_event['comment']) + s.append({'question': qn, 'status': 0, 'comment': '; '.join(comments)}) + +############################################################################################################ +# Step 8: Check that files containing 'ipaddr' in the filename were removed from 'data/raw' and 'data/processed/user_logs' +qn += 1 + +ipaddr_files_raw = glob.glob(os.path.join(working_dir, 'data/raw/*ipaddr*')) +ipaddr_files_user_logs = glob.glob( + os.path.join(working_dir, 'data/processed/user_logs/*ipaddr*')) + +if not ipaddr_files_raw and not ipaddr_files_user_logs: + s.append({'question': qn, 'status': 1}) +else: + comments = [] + if ipaddr_files_raw: + comments.append( + 'One or more files with ipaddr in data/raw not removed.') + if ipaddr_files_user_logs: + comments.append( + 'One or more files with ipaddr in data/processed/user_logs not removed' + ) + s.append({'question': qn, 'status': 0, 'comment': '; '.join(comments)}) + +############################################################################################################ +# Step 9: Check that 'data/inventory.txt' was created and contains all files in 'data/processed' subfolders +qn += 1 + +if os.path.isfile(os.path.join(working_dir, 'data/inventory.txt')): + with open(os.path.join(working_dir, 'data/inventory.txt'), 'r') as f: + inventory_files = [line.strip() for line in f.readlines()] + + # Now, find all files in 'data/processed' and its subfolders + processed_files = [] + for root, dirs, files in os.walk( + os.path.join(working_dir, 'data/processed')): + # remove working_dir from start of root + root = root[len(working_dir) + 1:] + + for name in files: + processed_files.append(name) + + foldername_in_inventory = ['data/processed' in x for x in inventory_files] + files_in_inventory = [ + any([f in x for x in inventory_files]) for f in processed_files + ] + + if foldername_in_inventory and all(files_in_inventory): + s.append({'question': qn, 'status': 1}) + else: + s.append({ + 'question': + qn, + 'status': + 0, + 'comment': + 'data/inventory.txt does not contain all files in data/processed' + }) +else: + s.append({ + 'question': qn, + 'status': 0, + 'comment': 'data/inventory.txt does not exist' + }) + +############################################################################################################ +# Step 10: Check if the coworker's commit ID is in the commit history +qn += 1 + +try: + # Check if commit_id is in git rev-list HEAD using grep and wc -l + result = is_commit_in_branch( + github_repo_owner, + github_repo_name, + github_repo_branch, + 'UofT-DSI', + 'shell', + 'coworker-changes', + ) + if result: + s.append({'question': qn, 'status': 1}) + else: + s.append({ + 'question': + qn, + 'status': + 0, + 'comment': + f'`coworker-changes` branch not found in commit history' + }) + +except Exception as e: + s.append({ + 'question': qn, + 'status': 0, + 'comment': f'Error checking git commit history.' + }) + print(f"Error checking git commit history: {e}") + +############################################################################################################ + +### Postprocessing ### +df = pd.DataFrame(s) +df.fillna('', inplace=True) + +# compute percentage correct +correct = df['status'].sum() +total = df.shape[0] + +# output the score table +df['status'] = df['status'].replace({1: status_c, 0: status_i}) +df.to_markdown(github_step_output, index=False) + +# also display markdown to console +render_md = df.to_markdown(index=False) +print(render_md) + +# create GitHub comment with markdown +headers = { + "Authorization": f"Bearer {github_token}", + "Accept": "application/vnd.github+json" +} +requests.post( + f"https://api.github.com/repos/{github_repo_owner}/{github_repo_name}/issues/{github_pr_number}/comments", + json={"body": "## Autograder results\n" + render_md}, + headers=headers) + +if correct == total: + print('All tests passed!') + exit(0) +else: + print(f'Only {correct}/{total} tests passed.') + exit(0) diff --git a/03_instructional_team/autograder/requirements.txt b/03_instructional_team/autograder/requirements.txt new file mode 100644 index 0000000..60edec3 --- /dev/null +++ b/03_instructional_team/autograder/requirements.txt @@ -0,0 +1,3 @@ +pandas +tabulate +requests diff --git a/03_instructional_team/generate_slides.sh b/03_instructional_team/generate_slides.sh new file mode 100755 index 0000000..12c0199 --- /dev/null +++ b/03_instructional_team/generate_slides.sh @@ -0,0 +1,182 @@ +#!/bin/bash + +# CONFIGURATION +folder_md="markdown_slides/" +folder_output="../01_materials/slides" # This will be used for both PDF and HTML + +# Clear the screen for the splash screen +clear + +# Display the splash screen +echo "============================================" +echo " Generating Slides" +echo "============================================" +echo "" +echo "Configuration:" +echo " - Markdown Folder: $folder_md" +echo " - Output Folder: $folder_output" +echo "" +echo "To change the configuration, please manually change them in the file." +echo "============================================" +echo " Processing..." +echo -e "============================================\n\n" + +# ---------- # +# Generate HTML or PDF versions of slides, based on input flag. +# Usage: +# To generate HTML: ./generate_slides.sh --html [theme] +# To generate PDF: ./generate_slides.sh --pdf [theme] +# For help: ./generate_slides.sh --help +# ---------- # + +function show_help() { + echo -e "\n\nUsage: $0 --html|--pdf [--theme theme_path] [--help]" + echo "" + echo "This script converts Markdown files to slide presentations using Marp CLI." + echo "It supports generating slides in HTML or PDF format and allows applying" + echo "a custom CSS theme." + echo "" + echo "Options:" + echo " --html Generate slides in HTML format. This option" + echo " processes all Markdown files in the 'markdown_slides'" + echo " directory, outputting HTML files." + echo "" + echo " --pdf Generate slides in PDF format. Similar to --html," + echo " but outputs PDF files instead." + echo "" + echo " --theme theme_path Apply a custom CSS theme to the slides. The" + echo " 'theme_path' should be the path to the CSS file." + echo " This is optional and can be used with either" + echo " --html or --pdf options." + echo "" + echo " --help Display this detailed help message and exit." + echo "" + echo "Examples:" + echo " Generate HTML slides with a custom theme:" + echo " $0 --html --theme /path/to/theme.css" + echo "" + echo " Generate PDF slides without a theme:" + echo " $0 --pdf" + echo "" + echo "Note:" + echo " Ensure Node.js is installed and npm is accessible in your system's PATH." + echo " The script processes Markdown (.md) files located in the 'markdown_slides'" + echo -e " directory, preserving filenames but changing extensions to .html or .pdf.\n\n" +} + +if [ "$#" -lt 1 ]; then + echo "Error: the correct number of parameters isn't passed in." + show_help + exit 1 +fi + +# Check for Marp CLI installation +if ! command -v npx >/dev/null 2>&1; then + echo "- Error: npx is not installed. Please install Node.js to proceed." + exit 1 +fi + +if ! npx marp --version >/dev/null 2>&1; then + echo "- Marp CLI is not installed. Installing Marp CLI in this repository..." + npm install --no-save @marp-team/marp-cli +fi + +# Defaults +output_type="" +theme_path="" + +# Process command-line arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --html|--pdf) + output_type="$1" + shift # Remove argument name from processing + ;; + --theme) + if [ -n "$2" ] && [ "${2:0:1}" != "-" ]; then + theme_path="$2" + shift 2 # Remove both argument name and value from processing + else + echo "Error: '--theme' requires a non-empty option argument." + exit 1 + fi + ;; + --help) + show_help + exit 0 + ;; + *) + # Unknown option + echo "Error: Unknown option: $1" + show_help + exit 1 + ;; + esac +done + +# Validate output type +if [ -z "$output_type" ]; then + echo "Error: You must specify either --html or --pdf." + show_help + exit 1 +fi + +# Create the output folder if it does not exist +if [ ! -d "$folder_output" ]; then + echo "- Creating output folder: '$folder_output'" + mkdir -p "$folder_output" +else + echo "- Output folder '$folder_output' already exists." +fi + +echo "- Starting slides generation..." + +# Get list of Markdown files +markdown_files=$(find "$folder_md" -name "*.md") +if [ -z "$markdown_files" ]; then + echo "- No Markdown files found in '$folder_md'. Exiting." + exit 1 +fi + +# Process each Markdown file +for markdown_file in $markdown_files; do + file_name=$(basename -- "$markdown_file") + base_name="${file_name%.md}" + echo "- Processing '$file_name'..." + output_file="$folder_output/$base_name" + + if [ "$output_type" = "--html" ]; then + # Generate HTML + output_file+=".html" + echo " - Generating HTML: $output_file" + npx marp "$markdown_file" --output "$output_file" --html --allow-local-files ${theme_path:+--theme-set $theme_path} # &> /dev/null + elif [ "$output_type" = "--pdf" ]; then + # Generate PDF + output_file+=".pdf" + echo " - Generating PDF: $output_file" + npx marp "$markdown_file" --output "$output_file" --pdf --allow-local-files --pdf-notes ${theme_path:+--theme-set $theme_path} # &> /dev/null + else + # nahhh + show_help + exit 1 +fi + + + if [ $? -eq 0 ]; then + echo " - Success: Generated '$output_file'" + else + echo " - Error: Failed to generate '$output_file'" + fi +done + +# Display the completion message with visual styling +echo -e "\n\n==================================================" +echo " Completion Status" +echo "==================================================" +echo "" +echo " - All files processed. Slides generation complete." +echo " - Generated files are located in: $folder_output" +echo "" +echo "==================================================" +echo " Done." +echo -e "==================================================\n\n" diff --git a/03_instructional_team/markdown_slides/images/1-terminal.png b/03_instructional_team/markdown_slides/images/1-terminal.png new file mode 100644 index 0000000..9489e29 Binary files /dev/null and b/03_instructional_team/markdown_slides/images/1-terminal.png differ diff --git a/03_instructional_team/markdown_slides/images/2-shell.png b/03_instructional_team/markdown_slides/images/2-shell.png new file mode 100644 index 0000000..21c3562 Binary files /dev/null and b/03_instructional_team/markdown_slides/images/2-shell.png differ diff --git a/03_instructional_team/markdown_slides/images/3-gitbash.png b/03_instructional_team/markdown_slides/images/3-gitbash.png new file mode 100644 index 0000000..9b56096 Binary files /dev/null and b/03_instructional_team/markdown_slides/images/3-gitbash.png differ diff --git a/03_instructional_team/markdown_slides/images/4-windows_shells.png b/03_instructional_team/markdown_slides/images/4-windows_shells.png new file mode 100644 index 0000000..5388aa5 Binary files /dev/null and b/03_instructional_team/markdown_slides/images/4-windows_shells.png differ diff --git a/03_instructional_team/markdown_slides/images/5-windows_terminals.png b/03_instructional_team/markdown_slides/images/5-windows_terminals.png new file mode 100644 index 0000000..9284555 Binary files /dev/null and b/03_instructional_team/markdown_slides/images/5-windows_terminals.png differ diff --git a/03_instructional_team/markdown_slides/images/terminology_explainer.afdesign b/03_instructional_team/markdown_slides/images/terminology_explainer.afdesign new file mode 100644 index 0000000..97d2dc9 Binary files /dev/null and b/03_instructional_team/markdown_slides/images/terminology_explainer.afdesign differ diff --git a/03_instructional_team/markdown_slides/images/terminology_explainer.afdesign~lock~ b/03_instructional_team/markdown_slides/images/terminology_explainer.afdesign~lock~ new file mode 100644 index 0000000..cce471a Binary files /dev/null and b/03_instructional_team/markdown_slides/images/terminology_explainer.afdesign~lock~ differ diff --git a/slides/markdown/unix_slides.md b/03_instructional_team/markdown_slides/optional_unix_slides.md similarity index 79% rename from slides/markdown/unix_slides.md rename to 03_instructional_team/markdown_slides/optional_unix_slides.md index fa51580..c0411a2 100644 --- a/slides/markdown/unix_slides.md +++ b/03_instructional_team/markdown_slides/optional_unix_slides.md @@ -1,81 +1,19 @@ -# Unix Shell +--- +marp: true +theme: dsi_certificates_theme +_class: invert +paginate: true +--- +# Advanced Unix Shell ``` $ echo "Data Sciences Institute" -$ echo "by: Rachael Lam" ``` -## Unix - -### What is Unix? - -Unix was created in 1970 and since then has branched into -other versions including Linux. Linux was created from Unix -with very similar features, although there are some minor -differences in commands. - -Unix shells - more specifically bash - is a powerful tool for -quickly and easily navigating and manipulating files, scaling -automated tasks, accessing Git and processing data. - -### So what is the shell? - -The shell is any user interface/program that takes an input from -the user, translates it into instructions that the operating -system can understand, and conveys the output back to the -user. - -There are various types of user interfaces: - -- graphical user interfaces (GUI) -- touch screen interfaces -- command line interfaces (CLI) - -### And what is bash? - -We'll be focusing on command line interfaces (CLI), more -specifically bash, which stands for Bourne Again SHell. - -We'll also need a terminal emulator to interact with the shell. -This is most likely called terminal on our menu. - -### Let's get started! - -First, we'll open our terminal. As mentioned earlier, this is most -likely called terminal and can be found by searching our -computer, which on a Mac would be through cmd + space - -Let's take a look at the terminal. What do we notice? - -- last login -- name -- location -- shell - -### Looking at the Shell - -If we type echo $SHELL in our terminal, the output will tell us -what shell we are working with. Most often, our shell will -already be bash but in newer Macs, it could be zsh which is -almost identical to bash. We can also see where bash is -located by typing: - -- whereis bash -- whence bash -- which bash - -Let's start with a few commands and see what happens in our -terminal. -$ echo Rachael -$ date -$ cal -$ lksjfs - -- What happens when we type something that does not exist? -- What happens with errors? - +--- # Navigate Files / Directories +--- ## Files Knowing the different types of files available helps us better @@ -83,14 +21,12 @@ understand how to navigate and manipulate them. - Regular files are text files with readable characters. - Executable files are programs that are invoked as commands. -- Shell scripts are executable files that we can read whereas - bash is a non-human-readable executable file. +- Shell scripts are readable executable files, in contrast to bash, which is a non-human-readable executable +--- ## Directories -Directories are files that are like folders which contain other -files and directories (subdirectories), creating a hierarchical -structure. +Directories, similar to folders, contain files and subdirectories, forming a hierarchical structure. - We can think of the structure of directories as a tree with the top of the tree being the root. @@ -98,68 +34,10 @@ structure. listing the directory names in order from the root, separated by slashes, followed by the file's name. -Let's try three commands that help us navigate our system: - -1. First, let's run the code below and see what happens: - -``` -$ pwd -``` - -`pwd` prints our working directory. If we ever need to know -where we are, we can execute this command. - -2. Now, let's run the code below and see again what happens: - -``` -$ cd -``` - -By default, `cd` changes your working directory to your home -directory. You can also use `cd` to set your working directory by -including the desired pathname - -``` -$ cd Desktop -``` - -In the previous example, we were able to just state `Desktop` -because it is a directory in our working directory. If we changed -our working directory to `Desktop`, and then wanted to change -it again to a directory in `Desktop`, we could again just specify -the folder. -If we wanted to change the working directory to a directory -outside of our working directory, we would need to specify a -pathname: - -``` -$ cd /Users/rachaellam/Desktop -``` - -3. To know what files and folders exist in our working directory, - we can use the code below: - -``` -$ ls -``` - -We can add a pathname at the end to list the contents of a -specified directory. - +--- ## Paths -As we've seen, directory names separated by slashes are paths. -There are two types of paths, _absolute_ and _relative_. - -- An absolute pathname begins at the root directory and - includes each directory, separated by slashes until the - desired directory or file is reached. -- A relative pathname starts from the working directory and - uses symbols `.` or `..` to represent relative positions in the - file tree. - -Using `cd` and `pwd` let's take a look at how we can use -absolute and relative pathnames. +Using `cd` and `pwd`, let's explore how to use absolute and relative pathnames. ``` $ cd @@ -176,29 +54,14 @@ $ cd 15 $ pwd ``` -Here's another example using the /usr pathname. - -``` -$ cd /usr/bin -$ pwd -``` - -``` -$ cd /usr -$ pwd -``` - -``` -$ cd .. -$ pwd -``` - +--- Let's now try move through some directories to get comfortable. Try out lots of different paths depending on the file structures of your computer. Try getting into different directories from different parent directories. The tilde notation ~ in the examples below refer to our home directory. +--- ``` $ cd ~/Desktop $ pwd @@ -209,19 +72,21 @@ $ cd ~/Desktop/dir1 $ pwd ``` -## Options and Arguements +--- +## Options and Arguments Options and arguments are used to write commands that can -make changes to our system. The syntax Is: +make changes to our system. The syntax is: ``` $ command -—option argument ``` -Options can also be combined, which we'll briefly see now but -learn more about a bit later. +--- +Options can also be combined, a topic we'll briefly touch on now and explore in more detail later. -There are two ways to write an `—option`: +--- +There are two ways to write an `--option`: 1. Short option: one dash followed by a single character 2. Long option: two dashes followed by a word @@ -230,6 +95,7 @@ There are two ways to write an `—option`: `-d` or `--directory` `-r` or `--reverse` +--- Let's try these lines of code and see what happens: ``` @@ -244,18 +110,19 @@ $ ls -lt $ ls -lt --reverse ``` +--- - `—l` long format - `—t` modification time - `—reverse` reverse the sort order -Notice how `-lt` is actually a combination of multiple options. +Notice how the `-lt` command combines multiple options. +--- ## Wildcards -Wildcards give us the ability to rapidly specify groups of -filenames based on patterns of characters. Let's look at a few -examples below: +Wildcards allow us to quickly specify groups of filenames based on character patterns. Let's look at a few examples below: +--- - `*` > matches any character - `?` > matches any single character - `[characters]` -> matches any character that is in the set @@ -267,6 +134,7 @@ Some other helpful character wildcards are: - `[:lower]` ~ matches any lowercase letter - `[:upper:]` > matches any uppercase letter +--- Let's try a few in our terminal: ``` @@ -293,191 +161,15 @@ $ ls [[:upper:]]x $ ls [![:digit:]]x ``` -# Working with Files / Directories - -We're going to learn some basic commands to begin some -preliminary coding. We'll also be using these throughout the -module, so it's important to understand how they work now: - -- create directory mkdir -- create file touch -- copy cp -- move and rename mv -- remove rm - -## Commands - -### mkdir - -First let's make a directory. It's important to remember what -directory you're working in currently, because that's where the -new directory will be made. Let's assume for now, we're working -on our desktop. - -``` -$ mkdir directory -``` - -We can also create multiple directories at the same time: - -``` -$ mkdir dir1 dir2 dir3 -``` - -### touch - -We can also make new files from the command line. This is -particularly useful when we want to make scripts, which we'll -learn a bit later. Using touch, we can make a new file in our -working directory. - -``` -$ touch file1 -``` - -We can also create a specific file type by adding the extension: - -``` -$ touch file1.sh -``` - -### cp - -Now we're going to copy a file that we have on our desktop. It -can be any file but remember to include the extension or if it -has multiple characters, special characters and spaces, to wrap -it in quotes. - -``` -$ cp file1 file2 -``` - -We can also copy files or directories into a directory. - -``` -$ cp file1 dir1 -``` - -And all files from one directory into another using wildcards: - -``` -$ cp dirl/* dir2 -``` - -What does the `/*` in this command mean? - -There are some useful `—options` that accompany `cp`: -| Option | Description | -| ------ | ------------------------------------------------------------- | -| -i | Before overwriting an existing file, prompt the user for confirmation. | -| -R | Recursively copy directories and their contents. | -| -v | Display informative messages as the copy is performed. | - -### mv - -The mv command enables us to move and rename files and -directories, depending on how it's used. In th example below, `mv` renames file1 to file2. - -``` -$ mv file1 file2 (Renames file1 to file2) -``` - -Here, `mv` moves file1 to dir1 - -``` -$ mv file1 dir1 (Moves file1 to dir1) -``` - -We can also move directories into other directories: - -``` -$ mv dir1 dir2 -``` - -In this casem, if `dir2` **exists**, `dir1` will be moved to `dir2`. If `dir2` does not exist, it will be created and `dir1` will be moved to the newly created `dir2`. In both casesm the entire directory will be moved to another/new directory, rather than the contents. - -Let's say we're in the directory `Desktop` and we just moved -`file1` into `dir1` but now we want to put it back in `Desktop`. How would we move a file out of a directory into another one? Unfortunatly we **can't** just say - -``` -$ mv file1 Desktop -``` - -because `file1` does not exist in `Desktop` any more and the command will try and rename `file1` to `Desktop`. - -The answer involves using pathnames and the tilde `~` notation: - -``` -$ mv dir1/file1 ~/Desktop -``` - -If we just wanted to move `file1` into `dir2` (if `dir2` is in our working directory), we could type: - -``` -$ mv dir1/file1 dir2 -``` - -What if we want to move just the contents of dir1 to another -directory rather than the whole folder? HINT: it is very (exactly) similar to copying (`cp`). - -``` -$ mv dir1/* dir2 -``` - -This is a combination of the directory `dir1`, pathnames `/` and wildcards `*`. Here, `di1/*` takes the all the contents of `dir1` and puts it in `dir2`. -We could also use the same techqniue to specify certain files to move rather than all of them. How do you think this would be done? - -#### Questions - -- We're starting to combine our knowledge of files, directories and pathnames with some basic commands. How do we feel up to this point? - -### rm - -To remove files we use the command `rm`. Because we're now -deleting files, it's important that you're sure of what you're deleting because **there is no way to undo**. Fortunately!! there -are ways to do this. - -``` -$ rm file1 -``` - -Without specifying any `-options`, `file1` will be deleted -without any feedback. - -To ensure we want to delete something, we can use the option `-i` (interactive) that we learned earlier. - -``` -$ rm -i file1 -``` - -This will prompt a question asking us if we want to delete `file1`. We can respond with `y` (yes) or `n` (no). - -If we want to delete a directory, we need to use the option `-r` (recursive) as we did when copying (`cp`). This will recursively delete everything inside of the directory and the directory itself. - -``` -$ rm -r dir1 -``` - -If we're specifying multiple deletions and a directory does not exist, the shell will tell us. If we don't want that message, we can add the `-option`, `-f` (force). Force will override `-i` if it is included. - -1. How do you delete multiple directories? -2. What happens if you delete multiple directories with `-i`? -3. What happens if you delete multiple directories with `-i` but one does not exist? - -Remember, it's extremely important to remember that you cannot undo `rm`. This means, if you start using wildcards to specify filenames and don't include `-i`, you could delete things by accident. For example, let's say you want to delete all `.txt` files in a directory: - -``` -$ rm *.txt -``` - -If you accidently add a space between `*` and `.txt`,the `rm` command will delete all the files in the directory and then try to find a `.txt` file which does not exist because it delete everything. - +--- ## Input / Output +--- ### Standard Input/Output -Each program invokes the standard input, output and error. We can think of the standard input default as coming from the keyboard and if we think of everything as a file, a command such as `ls` will result in a file called `standard output` and the status message to a file called `standard error`. By default, both are linked to the screen and not saved to a disk file. +Each program uses standard input, output, and error channels. We can think of the standard input default as coming from the keyboard and if we think of everything as a file, a command such as `ls` will result in a file called `standard output` and the status message to a file called `standard error`. By default, both are linked to the screen and not saved to a disk file. +--- ### Input/Output Redirection Input/Output redirection allows us to change where the input @@ -489,6 +181,7 @@ redirection operator `>`. $ ls -l /usr/bin > ls—output.txt ``` +--- Here we have redirected the output of `ls -—l /usr/bin` to a `.txt` file called _ls-output.txt_. We can now see the details of that file and if it worked: @@ -497,7 +190,8 @@ We can now see the details of that file and if it worked: $ ls -l ls-output.txt ``` -By looking at the details, we can see that the file was created and it a fairly large text file, indicating that something was written to it. +--- +By examining the details, we can see that the file was created and is a fairly large text file, indicating that content was written to it. If we specify a directory that does not exist, we receive the Standard error: @@ -505,14 +199,17 @@ If we specify a directory that does not exist, we receive the Standard error: $ ls -l /bin/usr > ls—-output.txt ``` +--- Why was the standard error not written to the `.txt` file? What happened to our _ls-output.txt_ file? -Although the standard error was not written tothe `.txt` file, +--- +Although the standard error was not written to the `.txt` file, the destination file is always written from the beginning, therefore, the redirection began to write the file and once noticed there was an error, stopped, resulting in an empty file. +--- So how do we append rather than rewrite? By using the redirection operator `>>`. @@ -520,6 +217,7 @@ redirection operator `>>`. $ ls -l /usr/bin >> ls—-output. txt ``` +--- If we want to redirect the standard error, we need to use the redirection operator `2>` @@ -527,6 +225,7 @@ redirection operator `2>` $ ls -l /bin/usr 2> ls—-error.txt ``` +--- If we want to redirect both the standard output and standard error to one file, we have two options. @@ -542,39 +241,35 @@ $ ls -l /bin/usr > ls-output.txt 2>&1 $ ls -l /bin/usr & 1ls-output. txt ``` +--- ### cat -`cat` takes one or more files and copies them to standard output. Using the _ls-output.txt_ created earlier, we can see how that's done: +`cat` takes one or more files and copies them to standard output. Using the previously created `ls-output.txt`, we can see how this is done: ``` $ cat ls—output.txt ``` +--- We can also use it to join files togther. Let's say | have two files, `file1` and `file2` and I want to combine them into a file called `file3`: ``` $ cat filel file2 > file3 ``` +--- Now the contents of file1 and file2 should be combined. -We can also use it to join files togther. Let's say | have two files, `file1` and `file2` and I want to combine them into a file called `file3`: - -``` -$ cat filel file2 > file3 -``` - -Now the contents of `file1` and `file2` should be combined. - We can also use `cat` to add to a `.txt` file. ``` $ cat > new_cat.txt ``` -Now we can type the text that we want in the file. Once we're finished, we can use `CTRL-D` to exit. +--- +Now we can type the desired text into the file. To finish, use `CTRL-D` to exit. -What would be the difference between `$ cat > new_cat.txt` and `$ cat >> new_cat.txt` ? +What is the difference between `$ cat > new_cat.txt` and `$ cat >> new_cat.txt` ? Finally, we can redirect the standard input from the keyboard to the file _new_cat.txt_ @@ -584,12 +279,15 @@ $ cat < new_cat.txt This is almost identitcal to just typing _$ cat new_cat.txt_ but we can see later how it could be more useful. +--- # Pipes / Filters -We use pipelines to read data from standard output and send to standard input using the pipe operator `|` . This means the standard output of one command can be piped into the standard input of another. +--- +Pipelines read data from standard output and send it to standard input using the pipe operator `|` . This means the standard output of one command can be piped into the standard input of another. Several commands put together in a pipeline are often referred to as filters. Filters take an input, change it and then output it. +--- ## Commands Let's learn a few more commands that will help us further @@ -603,6 +301,7 @@ understand pipelines and filters. We'll learn: - ouput the first part of a file `head` - output the last part of afile `tail` +--- ### cut Let's look at a `csv` to see how we Can initially see our data. Because it's a `csv`, each line is separated by a comma. Let's first read that file using `cat`: @@ -620,6 +319,7 @@ To use cut, I need to pass a couple options: 2. `-f`, which extracts a particular field based on what follows. For example, `-f1` will take the first field or `-f2` will take the second field and so on. +--- In this example, I'm taking the file _parking_data_ and cutting it based on colons and then only extracting the first field. ``` @@ -632,8 +332,10 @@ What happens if I add another `-f` option? What does this do? $ cut -d, -f1 -f2 < parking_data.csv ``` +--- How would I specify more than three fields? +--- ### sort How can we make our previous example more readable? @@ -644,6 +346,7 @@ One answer is to use the sort feature. We can pipe this with the cut feature: $ cut -d, -f1 < parking_data.csv | sort ``` +--- ### uniq Additionally, I can make the above command even more readable by removing any duplicates with `uniq` @@ -652,6 +355,7 @@ Additionally, I can make the above command even more readable by removing any du $ cut -d, -f1 < parking_data.csv | sort | uniq ``` +--- ### grep `grep` is a powerful tool for finding patterns in text files. The syntax is: @@ -660,6 +364,7 @@ $ cut -d, -f1 < parking_data.csv | sort | uniq $ grep pattern [file...] ``` +--- In our case, we're going to use it with our previous example and pipe it with other commands: @@ -669,6 +374,7 @@ $ cut -d, -f1 parking_data.csv | sort | uniq | grep FIRE The results are all patterns of FIRE in the text file. +--- ### find Another useful use for `grep` is to find files in directories. `grep` is nicely combined with `find` for this feature. @@ -677,8 +383,10 @@ Another useful use for `grep` is to find files in directories. `grep` is nicely $ find ~/Desktop/dir1 | grep cat ``` +--- Here we're searching in the directory _dir1_ with the pattern _cat_. This would be helpful if we wanted to know if there were any files with the word cat in the filename. +--- ### head | tail We can also extract the first and last part of files using `head` and `tail`. We can also add the option `-n` followed by a number to extract a certain number of lines. @@ -691,6 +399,7 @@ $ head -n 5 ls-output.txt $ tail -n 5 ls-output.txt ``` +--- `head` and `tail` can also be used in pipelines: ``` @@ -701,17 +410,21 @@ $ cut -d, -f1 < parking_data.csv | sort | uniq | head —-n 5 $ cut -d, -f1 < parking_data.csv | sort | unig | tail -n 5 ``` +--- ## Expansions +--- Expansion uses special characters to expand upon something before the shell processes it. We have learned a few expansions so far such as the tilde `~` and wildcards `*`. We've also seen some character wildcards `[characters]`. Expansions are another feature that help us when we're manipulating and working with files and directories. +--- Other examples of expansions are: - arithmetic expansion - brace expansion +--- ### Arithmetic Expansion Arithmetic expansion basically makes the shell a calculator. The syntax is: @@ -726,12 +439,14 @@ For example: $ echo $((2 + 2)) ``` +--- Arithmetic expressions can be nested: ``` $ echo $(($((2 + 2)) * 3)) ``` +--- Just for reference, here is a list of the arithmetic operators: | Operator | Description | @@ -742,6 +457,7 @@ Just for reference, here is a list of the arithmetic operators: | `/` | Integer division | | `**` | Exponentiation | +--- ### Brace Expansion Brace expansions allow us to create multiple text strings from a pattern containing braces. Here are a few examples: @@ -758,12 +474,14 @@ $ echo Number_{1..5} $ echo {Z..A} ``` +--- Brace expansions can also be nested: ``` $ echo a{A{1,2},B{3,4}}b ``` +--- We can use brace expansion to help make multiple directories using `mkdir`. ``` @@ -772,6 +490,7 @@ $ mkdir dir-{1..3} This command makes 3 directories named _dir-1_, _dir-2_, and _dir-3_ +--- ## Quoting / Backslashing Quoting suppresses unwanted expansions. We can use double @@ -782,24 +501,28 @@ quotes, single quotes or backslashes: - Single quotes suppress all expansion - Backslashes are used to escape single characters +--- Many times there will be file names or directories that are named with spaces. In this case, we'll need to use double quotes so that the shell can read it. Using `touch` we can create a text file named something separated with two words: ``` $ touch "two words.txt" ``` +--- We can then see the details of the file we just created: ``` $ ls -l "two words.txt" ``` +--- If we want to rename the text, we would do as follows: ``` $ mv "two words.txt" two_words.txt ``` +--- Let's see what these three examples do in shell: ``` @@ -814,11 +537,13 @@ $ echo '2 * 3 > 5' is an equation $ echo 2 \x 3 \> 5 is an equation ``` +--- ## Command Line Editing Getting familiar with command line editing can save you time. Bash uses a library called Redline to use command line editing There are many shortcuts and you don’t have to memorize them all, just use the ones that you feel are best. There are even more shortcuts that you can read about in the textbooks! +--- ### Command Character | Command | Description | @@ -828,6 +553,7 @@ There are many shortcuts and you don’t have to memorize them all, just use the | DEL | Delete one character backwards | | CTRL-D | Delete one character at cursor location | +--- ### Word Commands | Command | Description | @@ -838,6 +564,7 @@ There are many shortcuts and you don’t have to memorize them all, just use the | ESC-D | Delete one word forwards | | CTRL-Y | Undo | +--- ### Line Commands Command Description @@ -848,6 +575,7 @@ Command Description | CTRL-K | Delete text from the cursor to the end of the line | | CTRL-U | Delete text from the cursor to the beginning of the line | +--- ### History Line Commands | Command | Description | @@ -859,12 +587,14 @@ Command Description | `!string` | Repeat last history item starting with string | | `!?string` | Repeat last history item containing string | +--- ## Completion Command Completion commands autocomplete your command if it exists by hitting `tab`. If it does not exist, the command will not be able to complete. If multiple exist, the command will also not be able to complete because it will not know which one to choose. +--- For example, let's say we have two files called `file1` and `file2`. If would not be able to use autocomplete because the shell will not know which to choose until the last character. If we have two files, one called `foot.txt` and one called `file.txt`. This command would not be able to autocomplete: @@ -873,14 +603,17 @@ If we have two files, one called `foot.txt` and one called `file.txt`. This comm $ ls f ``` +--- But this one will: ``` $ ls fil ``` +--- # Shell Scripts +--- ### Shell Scripts Shell scripts allow us to combine several commands into one file, rather than one by one on the command line. @@ -889,6 +622,7 @@ The shell will read the script just as if you were to write the command on the c Most things that can be done in the shell script can be done on the command line and vice versa. +--- ### Writing Shell Scripts There are three important considerations when writing the shell script @@ -899,12 +633,14 @@ There are three important considerations when writing the shell script 3. **Put the shell script somewhere the shell can find it:** the shell script automatically searches certain directories for executable files when no explicit pathname is specified. +--- ### Set Up Open either TextEdit or your text editor of choice. Some popular programs are Sublime Text, Vim, Atom, and Notepad++. If you want to see the syntax highlighting, you might have to save your script as a `.sh` file. Without doing this, your file will just look like a regular `.txt` file. Once you open your text editor and save it, we can begin our first script! +--- ### Script File Format We must first tell the shell the name of the interpreter that should be used to execute the script. This is marked by using a shebang: `#!` @@ -916,10 +652,12 @@ Throughout the script, you can and **should** use `#` to make comments. Comments echo "This is our first script!" ``` +--- Here we can see we've told the shell to use `/bin/bash` using the shebang `#!` We've also added a comment using `#` And finally, something quite familiar, we have our first line of script using `echo` +--- ### A Note on Commenting Commenting is important not just so you can understand your own work, but also so others can understand your work in collaborative projects. It also helps make your code reproducible. @@ -937,6 +675,7 @@ or as comment blocks: echo "Hello World" ``` +--- ### Executable File Permission In order to execute our file, we have to add file permissions: `chmod` helps make our script executable, `775` is used to make scripts that everyone can execute, `700` is used to make scripts that only the owner can execute @@ -951,12 +690,14 @@ $ ls -l first_script.sh $ chmod 775 first_script.sh ``` +--- ### Script File Location In order to run our script, we have to call it using `./` in front of the script filename ( `./Script` ). File location is important to run your script. If just `Script` was written, the shell would not be able to find the script and try to read it as a command, outputting `command not found`. Running `echo $PATH` helps us see what directories are being searched for the script. +--- If we want to run our script without `./`, we can create a `/bin` for our script, move our script into the bin folder and then run it. It's important to note that we have to make this bin in our home directory. If we made it on our Desktop, the script would still not be found. ``` @@ -965,17 +706,21 @@ $ mv first_script.sh bin $ first_script.sh ``` +--- In this block of code, we're making the bin folder using `mkdir`, moving the script into the bin with `mv` and then running the script without `./`. +--- ### Good Locations for Scripts For personal use, a good place to put your script is `/bin`. For everyone's access, it's better to put scripts in `/usr/local/bin`. +--- # Shell Functions +--- ### Functions Functions are a good way to break down code into smaller, more manageable chunks. Each chunk can represent a task. @@ -987,6 +732,7 @@ For example, let's say your entire process is make pasta. It can be broken down 3. Cook pasta 4. Serve +--- Each of these steps can be expanded further into subprocesses. Cook pasta can be: 1. Fill pot with water @@ -996,6 +742,7 @@ Each of these steps can be expanded further into subprocesses. Cook pasta can be 5. Cook for 8-12 minutes 6. Strain +--- Functions have two syntactic forms: ``` @@ -1015,6 +762,7 @@ name () { `name` is the name of the function `commands` are the commands contained in the function +--- Let's write our first function: ``` @@ -1024,6 +772,7 @@ function funct { return } +--- #program starts here echo "Step 1" @@ -1031,8 +780,10 @@ funct Echo “Step 3" ``` +--- What do you think this function will output? +--- Let's save and run this function in our terminal to see what happens. Here's a good time to recap how to save, grant permissions, and run the script. @@ -1042,8 +793,10 @@ Here's a good time to recap how to save, grant permissions, and run the script. - `700` - grant permissions to yourself - `/bin` - where to save permissions +--- ## Variables +--- ### Global Variables Let's make our script more complex with some variables. We can first define variables directly through the terminal. @@ -1053,8 +806,10 @@ $ foo="Something cool" $ echo $foo ``` +--- Notice how in order to call the variable we need to add `$` before the variable. The quotes are not necessary if the value of the variable doesn't include spaces when defining it. If we did not include the quotes here, we would receive an error. +--- Now let's add some global variables to our script: ``` @@ -1076,6 +831,7 @@ echo "Step 3" What do we think will be the output in this example? +--- ### Local Variables Local variables are variables that are contained within the function. Because they're contained, they can have names that already exist in the shell globally or within other shell functions. @@ -1106,6 +862,7 @@ echo "global: foo = $foo" ``` +--- What would happen if we removed `local` ? ``` @@ -1129,8 +886,10 @@ funct_2 echo "global: foo $foo" ``` +--- ## Parameters +--- ### Positional Parameters Positional parameters are built-in parameters that allow our programs to get access to the contents of the command line. @@ -1158,6 +917,7 @@ Number of arguments: $# " ``` +--- In the example, you may notice that we haven't given `$0` any specific value. Let's try to run the script a couple ways through the command line to see what this means: @@ -1166,12 +926,14 @@ Let's try to run the script a couple ways through the command line to see what t What do we notice? +--- ### \$\* and $@ `$*` —> Expands into the list of positional parameters, starting with 1. When surrounded by double quotes, it expands into a double-quoted string containing all of the positional parameters, each separated by the first character of the IFS shell variable (by default a space character). `$@` —> Expands into the list of positional parameters, starting with 1. When surrounded by double quotes, it expands each positional parameter into a separate word surrounded by double quotes. +--- Let's take a look at this code piece by piece: ``` @@ -1190,6 +952,7 @@ pass_params () { pass_params "word" "words with spaces" ``` +--- 1. Here we have two functions: `print_params ()` and `pass_params ()`. `pass_params ()` calls on the function `print_params ()` within its function. 2. In the first function, `echo` is printing the line inside the double quotes. The `\` infront of `$1` escapes the `$`, thus losing its meaning, as we learned earlier. @@ -1202,6 +965,7 @@ print_params () { } ``` +--- 3. In the second function, `echo` again is printing the line inside the single quotes. `"\n"` is adding a tab at the beginning of the line for readability. It is then calling on the first function ( `print_params ()` ) with the argument `$*` . The second echo is calling the first function but with the argument `$*` in double quotes. This is repeated for `$@` ``` @@ -1213,14 +977,17 @@ pass_params () { } ``` +--- 4. In the final part of the code, we're calling on the `pass_params ()` function and passing two arguments: `"word"` and `"words with spaces"`. ``` pass_params "word" "words with spaces" ``` +--- Let's see what happens's when we run the script in terminal. Remember, we don't have to pass any arguments in the command line because we have done so in our script. +--- Let's take a look at another example. In this example we'll get a greater understanding of variables and positional parameters: ``` @@ -1238,6 +1005,7 @@ echo var1: $var1 echo $0: $1 $2 ``` +--- Let's break it down again: 1. In our first function called `afunc` , using `echo` we will print @@ -1253,12 +1021,14 @@ function afunc { } ``` +--- 2. Outside of the function, we'll create another variable also named `var1` and give it the value of `"outside function"` ``` var1="outside function" ``` +--- 3. We'll then add the program. a) `echo`, we'll print `var1` b) Print 3 positional parameters @@ -1274,16 +1044,19 @@ echo var1: $var1 echo $0: $1 $2 ``` +--- Let's run it in our terminal without any additional arguments and see what the output is. - Why did `echo $@: $1 $2` only output one argument? - Why did `var1` change the third time to `inside function` rather than `outside function` ? +--- Now let's change and add a few things to see what happens: - In our terminal, what happens if we pass two arguments by entering `ascript.sh arg1 arg2` with `ascript.sh` being the name of our script and `arg1 arg2` being two random arguments? - What happens if we add `local` to our function? +--- ### Parameter Expansion Let's discuss the difference between `$a` and `${a}` @@ -1294,6 +1067,7 @@ Let's discuss the difference between `$a` and `${a}` `a_file` rather than `a` - `${a}_file` the shell will now try to expand the variable `a` +--- This can help us be more flexible when navigating and manipulating files and directories. Let's look at the code below to see how this helps us: @@ -1304,10 +1078,12 @@ $ touch $filename $ mv $filename ${filename}1 ``` +--- This block of code creates a file based on our defined variable and then renames it with the same variable but with an additional component. Parameter expansion also help us if our variables are unset (i.e. do not exist) or are empty. Let's take a look at a couple examples in the next few slides. +--- 1. `${parameter:=x}` If parameter is unset or empty, expansion results in the value of `x` and the value of `x` is assigned to the parameter. If it's not empty, it results in the value of the parameter ``` @@ -1322,6 +1098,7 @@ $ echo $foo Through this sequence of commands we can see that when `$foo` is empty, `:-` fills the variable with `"something else"`. Once we define the variable, `:-` results in our defined variable. +--- 2. `${parameter:=x}` If parameter is unset or empty, expansion results in the value of x and the value of x is assigned to the parameter. If it's not empty, it results in the value of the parameter ``` @@ -1335,6 +1112,7 @@ $ echo $foo We can see that when `$foo` is empty, `:=` assigns the variable with `"something else"`. If we define the variable again, `:-` results in our second defined variable. +--- 3. `${parameter:?x}` If parameter is unset or empty, this expansion causes the script to exit with an error, and the contents of `x` are sent to standard error. If parameter is not empty, the expansion results in the value of parameter. ``` @@ -1348,6 +1126,7 @@ $ echo $? We can see that when `$foo` is empty, `:?` gives us an error which we can see as `echo $?` outputs `1`. If we define the variable again, `:?` results in the value of our variable. +--- 4. `${parameter:+x}` If parameter is unset or empty, the expansion results in nothing. If parameter is not empty, the value of `x` is substituted for parameter; however, the value of parameter is not changed. ``` @@ -1361,6 +1140,7 @@ $ echo $foo Here, `:+` resulted in an empty output and the value of `$foo` remains empty. If we define the variable, `:+` will still output what we defined, but it will not reassign the variable permanently. +--- ### String Operators String operators are extremely valuable for operations on pathnames. They can help extract parts of pathnames, especially if they follow a pattern. Many pathnames typically follow patterns, such as all extensions are preceded with `.`. @@ -1369,10 +1149,7 @@ Some character expansions are: 1. `${#parameter}` 2. `${parameter:offset}` 3. `${parameter:offset: Length}` - -4. - -`${#parameter}` expands into the length of the string +4. `${#parameter}` expands into the length of the string contained by the parameter. ``` @@ -1380,6 +1157,7 @@ $ foo="Toronto needs more trees" $ echo "'$foo' is ${#foo} characters long." ``` +--- With the following expansions, we can extract a portion the string contained by the parameter. 2. `${parameter:offset}` will extract characters from _offset_ characters to the end of the string. For example, counting from the beginning of the string, the _n_ of _needs_ is 8 characters from the beginning. Because did not specify an end, `echo` will print from _needs_ onwards. @@ -1389,6 +1167,7 @@ $ foo="Toronto needs more trees" ¢ echo ${foo:8} ``` +--- 3. `${parameter:offset: length}` will specify the length that we want to extract. This length is counted not from the beginning of the string, but from the offset of the string. ``` @@ -1396,6 +1175,7 @@ $ foo="Toronto needs more trees" $ echo ${foo:8:5} ``` +--- We can see that from the beginning of the string, _n_ is 8 characters in, and from _n_, s of _needs_ is the 5th character from _n_. Therefore, our ouput will be _needs_. @@ -1472,6 +1252,7 @@ $ foo="MP3.MP3" $ echo ${foo/MP3/mp3} ``` +--- ``` $ echo ${foo//MP3/mp3} ``` @@ -1486,8 +1267,10 @@ $ echo ${foo/%MP3/mp3} Can you think of when this might be helpful? +--- Let's say I have a a named "rachaels cool file". I want to rename them because spaces cause problems in filenames. How would I do this? +--- ### Arithmetic Assignment We have seen assignment before with examples such as `foo=5` . This is a simple assignment but we can also add complexity to this assignment with other operators. @@ -1504,6 +1287,7 @@ We can also increase or decrease our parameters by one. - `$((++parameter))` increases parameter by one before the parameter is returned - `$((--parameter))` decreases parameter by one before the parameter is returned. +--- These are very subtle changes so let's see what we mean after and before a parameter is returned: ``` @@ -1518,10 +1302,12 @@ $ echo $((++foo)) $ echo $foo ``` +--- ### Command Substitution So far we've learned how to get values into variables by using assignment statements ( `x=5` ) and positional parameters ( `x=$1` ). Another way is command substitution which allows you to use the standard output of the command as if it were a variable. +--- Let's say we want to assign a variable to the output of a command so that we can apply another command to that output. In this particular case, we want to make a variable equalall files beginning with _t_. We then want to apply a sort command on that variable: ``` @@ -1532,6 +1318,7 @@ $ echo $x | sort Although this seems quite simple now, we'll see how this can be extremely powerful when we move into flow control. +--- # Flow Control Flow control allows programs to "change directions" based on the results from a given input. @@ -1542,11 +1329,13 @@ Bash supports several constructs: - `case` - `for` +--- ## if / else `if/else` is a conditional statement that chooses whether or not to do something based on a true or false statement. +--- ``` if condition; then commands @@ -1557,11 +1346,13 @@ if condition; then fi ``` +--- Here, we've assigned `x` to the value `5`. We've then written an `if/else` statement that asks if `x` is equal to `5` than tell us that `x` equals `5`. Otherwise (`else`), tell us that `x` does not equal `5`. +--- ``` x=5 if [ $x = 5 ]; then @@ -1571,6 +1362,7 @@ else fi ``` +--- Let's take a look at a more practical example: we want to know if there are any files in our directory that contain spaces. ``` @@ -1583,6 +1375,7 @@ else fi ``` +--- First we've changed our working directory to _dir1_: ``` @@ -1597,12 +1390,14 @@ option checks if the length of a string is _nonzero_: -n $(find . -type f | grep " ") ``` +--- By wrapping our output in an if statement, we're stating: 1. `if` the value of `$(find . -type f | grep " ")` is nonzero, then print (`echo`) "A file contains a space" 2. Otherwise (`else`), print (`echo`) `"No files contain a space"` +--- ### Control Operators Control operators ( `&&` and `||` ) allow you to test more than one thing at a time. Their syntax is: @@ -1619,6 +1414,7 @@ if command1 || command2; then fi ``` +--- With the `&&` operator, command1 is executed and command2 is executed only if command1 is **successful**. With the `||` operator, command1 is executed and command2 is executed only if command1 is _unsuccessful_. @@ -1635,12 +1431,14 @@ if grep $word1 $filename && grep $word2 $filename; then fi ``` +--- Using positional parameters that we learned earlier, what do you think will happen if we run the previous code? - What happens if both words exist? - What happens if only one word exists? - What happens if no words exist? +--- Example of `||` ``` @@ -1653,12 +1451,14 @@ if grep $word1 $filename || grep $word2 $filename; then fi ``` +--- Similarly, what will happen if... - What happens if both words exist? - What happens if only one word exists? - What happens if no words exist? +--- ## While Using the while command, let's discuss looping. Looping allows portions of a program to repeat as long as the condition is false. @@ -1670,6 +1470,7 @@ while condition; do done ``` +--- Let's make a basic while script that displays five numbers in sequential order from 1 to 5 and then tells us when it's finished. ``` @@ -1686,6 +1487,7 @@ done echo "Finished." ``` +--- Why does the loop end? While loops are extremely helpful to read lines of a file and then perform some command if a line meets a certain condition. Let's explore how to read lines first: @@ -1697,6 +1499,7 @@ while read -r line; do done < "$file" ``` +--- In this script, we're creating a variable with our file. We're then reading the file until the last line is read. In this example, we're using an input redirection that we learned earlier ( `<` ), which passes the file into the read command. We've also used `-r` so that any backslashes are escaped. Because line is acting as a variable, we can also nest another loop if `$file` meets a condition. Let's say we have a file and we want to know every line that has `bananas` in it. @@ -1710,6 +1513,7 @@ while read -r line; do done < "$file" ``` +--- Here we're reading the file line by line using the `while` loop. We're then saying `if` our variable, `$line` equals `"banana"` , then print the `$line`. @@ -1717,6 +1521,7 @@ then print the `$line`. 1. Why have we added the wildcard `*` ? 2. What would happen if we didn't include `*` ? +--- ## Until Until loops are similar to while, except unlike while loops that run as long as the condition is true, the until loop will run as long as the condition is **false** @@ -1727,6 +1532,7 @@ until condition; do done ``` +--- Let's create a script similar to the while statement: a basic while script that displays five numbers in sequential order from 1 to 5 and then tells us when it's finished. ``` @@ -1741,6 +1547,7 @@ echo "Finished." How is this script different to the while loop? +--- How might this be useful? Let's say we want to create 3 directories labeled _dir1_, _dir2_ and _dir3_: @@ -1753,8 +1560,10 @@ until [[ $x == 4 ]]; do done ``` +--- Here we've created a variable `x=1` because we want our first directory to be _dir1_. We're then saying up until `x=4`, make a directory `mkdir` called _dir_ plus our variable. We've then added 1 to `x` each iteration using an arithmetic assignment. The `echo` part is just to give us some feedback on what is happening behind the scenes. +--- ## for For our final flow control, we're going to learn a powerful loop called `for`. The syntax is: @@ -1769,33 +1578,40 @@ What we might notice is that this flow uses variables that will increment during How would we use `for` if we wanted to list all files and directories in a folder? +--- ``` for i in $(find x«); do echo $i done ``` +--- The variable `i` becomes all instances of the variable `$(find *)` . For each instance of `i`, we are then printing it. Although this seems quite basic and there more simple ways to list all files and directories ( `ls` ), this enables us to do many things with the looped variable `i` by nesting other loops. What other ways can we use for loops? What other ways can we use for loops within files? +--- ### Questions? - Why do we use `i`? +--- ### Next Week: Git and Github - Please make sure to come with a GitHub account +--- ## Additional Material +--- ### Exit Status Commands issue a value to the system when they terminate, which is an integer in the range of O and 255 indicating the success or failure of a command's execution. Conventionally, zero indicates success and any other value indicates failure. +--- Let's list a file that we know exists on our desktop: ``` @@ -1807,6 +1623,7 @@ $ echo $? `$?` returns the value of the last executed command. The value being either zero for success or any other number for failure. +--- If we then list a file that we know does not exist in our desktop and return the value of `$?`, what do we expect to happen? ``` @@ -1814,14 +1631,17 @@ $ ls -d /bin/usr $ echo $? ``` +--- ### Exit Command The `exit` command ina script replaces the return command and accepts a single, optional argument, which becomes the scripts exit status. When no argument is passed, it defaults to zero. This enables our scripts to indicate an error. +--- If the script is a function in a larger program, we can use `return` instead of `exit` with a single, optional argument, allowing our function to indicate an error. +--- ``` #!/bin/bash @@ -1863,6 +1683,7 @@ test_file () { ``` +--- `if / else` statements are most frequently used with `test` `test` performs a variety of checks and comparisons @@ -1875,6 +1696,7 @@ Its syntax is: `[ expression ]` +--- There are many expressions that are used to evaluate the status of files. Some important **File Expressions** include: @@ -1886,6 +1708,7 @@ of files. Some important **File Expressions** include: | -r file | file exists and is readable (has readable permissions for the effective user) | | -s file | file exists and has a length greater than zero | +--- #### String Expressions | Expression | Is True If | @@ -1896,6 +1719,7 @@ of files. Some important **File Expressions** include: | string1 == string2 | string1 equals string2 | | string1 != string2 | string1 and string2 are not equal | +--- ### Breaking Out Of A Loop Bash has two build-in commands that can be used to control program flow inside loops. @@ -1904,6 +1728,7 @@ Bash has two build-in commands that can be used to control program flow inside l - `continue` command skips the remainder the loop that is not needed (ie. a condition has been met) and resumes with the next iteration of the loop. `continue` allows for a more efficient execution +--- ``` if condition; then if condition; then @@ -1920,6 +1745,7 @@ else condition; then fi ``` +--- If the first `if` condition is met, then the second one will be skipped and resumed with the next iteration. ``` diff --git a/03_instructional_team/markdown_slides/unix_slides.md b/03_instructional_team/markdown_slides/unix_slides.md new file mode 100644 index 0000000..21246f5 --- /dev/null +++ b/03_instructional_team/markdown_slides/unix_slides.md @@ -0,0 +1,559 @@ +--- +marp: true +style: | + section { + font-family: Inter, -apple-system, BlinkMacSystemFont, 'Helvetica Neue', sans-serif; + } +_class: invert +paginate: true +--- + +# Unix Shell + +``` +$ echo "Data Sciences Institute" +``` + +--- +### What is Unix? + +Unix was a text-based operating system created in 1970. Many of its derivatives are commonly used today, including Linux and MacOS. + +Linux powers +* 90% of global cloud infrastructure +* 100% of the world's top 500 high performance computers +* 97% of embedded and IoT devices + +--- +*Bash* and similar shells +* are the primary way of interacting with most production Linux systems +* empower you to quickly and easily navigate the system, manipulate files, and automate tasks + +--- +### So what is the shell? + +A *shell* is any user interface/program that takes an input from the user, translates it into instructions that the operating system can understand, and conveys the output back to the user. + +--- +There are various types of user interfaces: + +- graphical user interfaces (GUI) +- touch screen interfaces +- command line interfaces (CLI) + +--- +### And what is Bash? + +We'll be focusing on command line interfaces (CLI), more +specifically Bash, which stands for the Bourne Again SHell. + +On Windows: open the **Git Bash** or **Windows Terminal** app +On MacOS or Linux: open the **Terminal** app + +On newer macs, the default shell is zsh, which is almost identical to Bash. + +--- +### Clearing up an abundance of terminology + + +--- +### Clearing up an abundance of terminology + + +--- +### Clearing up an abundance of terminology + + +--- +### Clearing up an abundance of terminology + + +--- +### Clearing up an abundance of terminology + + +--- +### Let's get started! + +First, we'll open our terminal. As mentioned earlier, this is most +likely called terminal and can be found by searching our +computer, which on a Mac would be through cmd + space + +--- +Let's take a look at the terminal. What do we notice? + +- last login +- name +- location +- shell + +--- +### Looking at the Shell +Let's start with a few commands and see what happens in our +terminal. + +```bash +$ echo Rachael +$ date +$ cal +$ lksjfs +``` + +--- +- What happens when we type something that does not exist? +- What happens with errors? + +--- +# Getting help and accessing documentation +Bash includes built-in documentation for all commands + +```bash +$ man ls +``` + +Retrieve the **man**ual for each commands using `man`. +* Many manuals are also available online + + +--- +View a list of commonly used Bash commands + +```bash +$ help +``` + +--- +# Navigate Files / Directories + +--- +## Directories + +Let's try three commands that help us navigate our system: + +1. When using Bash, we always have a *working directory*: + +```bash +$ pwd +``` + +`pwd` prints our current working directory. If we ever need to know +where we are, we can execute this command. + +--- +2. To know what files and folders exist in our working directory, + we can use the code below: + +```bash +$ ls +``` + +--- +3. We can change the working directory using the following command: + +```bash +$ cd +``` + +By default, `cd` changes your *working directory* to your *home +directory*. You can also use `cd` to set your *working directory* by +including the desired *pathname* + +```bash +$ cd Desktop +``` + +--- + +```bash +$ cd Desktop +``` + +Note #1: In this example +* We can change the behaviour of the `cd` command by providing *arguments* +* By default, `cd` changes the *working directory* to our *home directory* +* By adding `Desktop` after the `cd` command, we change to the `Desktop` directory instead +* Most other bash commands have their own *arguments* that allow you to modify the behaviour and effect of the command + +--- + +Let's try using a *pathname argument* with the `ls` command too! + +```bash +$ cd +$ ls Desktop +``` + +--- + +```bash +$ cd Desktop +``` + +Note #2: +* we were able to refer to the `Desktop` directory using only its directory name because it existed within our *working directory* at the time the command was *called* +* in this context, `Desktop` is a *relative pathname* +* to refer to paths outside our working directory, use the full path instead + + +If we wanted to change the working directory to a directory +outside of our working directory, we would need to specify an *absolute pathname*: + +```bash +$ cd /Users/rachellam/Documents +$ cd /c/Users/simeo/Downloads +``` + +--- +## Paths + +As we've seen, directory names separated by slashes are paths. +There are two types of paths, _absolute_ and _relative_. + +- An *absolute pathname* begins at the *root directory* and + includes each directory, separated by slashes until the + desired directory or file is reached. +- A *relative pathname* starts from the current *working directory* and + uses symbols `.` or `..` to represent relative positions in the + file tree. + +--- +Using `cd` and `pwd` let's take a look at how we can use +*absolute* and *relative pathnames*. + +```bash +$ cd /usr/bin +$ pwd +``` + +```bash +$ cd /usr +$ pwd +``` + +```bash +$ cd .. +$ pwd +``` + +--- +## Working with Files / Directories + +We're going to learn some basic commands to begin some +preliminary coding. We'll also be using these throughout the +module, so it's important to understand how they work now: + +- create directory `mkdir` +- create an empty file `touch` +- copy `cp` +- move and rename `mv` +- remove `rm` + +--- +## Commands + +--- +### mkdir + +First let's make a directory. It's important to remember what +directory you're working in currently, because that's where the +new directory will be made. Let's assume for now, we're working +on our Desktop. + +```bash +$ mkdir my_directory +``` + +We can also create multiple directories at the same time: + +```bash +$ mkdir dir1 dir2 dir3 +``` + +--- +### touch + +We can also make new empty files from the command line. +Using `touch`, we can make a new file in our +*working directory*. + +```bash +$ touch file1 +``` + +We can also create a specific file type by adding the extension: + +```bash +$ touch file1.txt +``` + +--- +### cp + +Now we're going to copy a file that we have in our *working directory*. It +can be any file but remember to include the extension or if it +has multiple characters, special characters and spaces, to wrap +it in quotes. + +```bash +$ cp file1 file2 +``` + +--- +We can also copy files or directories into a directory. + +```bash +$ cp file1 dir1/ +``` + +And all files from one directory into another using *wildcards*: + +```bash +$ cp dirl/* dir2 +``` + +--- +#### Wildcards +*Wildcards* enable us to use commands on more than one file at a time. +The `*` is a placeholder that represents zero or more characters when used in a *pathname*. + +For example: +* `./*` matches all files and folders in the current directory +* `./*.txt` matches all files and folders that have a txt extension in the current directory +* `/home/simeo/project1_*` matches all files and folders that begin with `project1_` in the `/home/simeo` directory + +--- +*Wildcards* are one of many reasons why bash so powerful! + +Many of the features in bash empower you to work with a large number of files easily. + + +--- +There are some useful *options* that accompany `cp`: +| Option | Description | +| ------ | ------------------------------------------------------------- | +| -i | Before overwriting an existing file, prompt the user for confirmation. | +| -R | Recursively copy directories and their contents. | +| -v | Display informative messages as the copy is performed. | + +--- +### mv + +The mv command enables us to move and rename files and +directories, depending on how it's used. In th example below, `mv` renames `file1` to `file2`. + +Rename a file: +```bash +$ mv file1 file2 +``` + +Here, `mv` moves `file1` from the *working directory* into `./dir1`: +```bash +$ mv file1 dir1 +``` + +--- +We can also move directories into other directories: + +```bash +$ mv dir1 dir2 +``` + +--- + +```bash +$ mv dir1 dir2 +``` + +In this case, if `dir2` exists, `dir1` will be moved into `dir2` (eg. from `./dir1` to `./dir2/dir1`). + +If `dir2` does not exist, `dir1` will be renamed to `dir2`. + +In both cases the entire directory will be affected (moved into another directory, or renamed), rather than the contents. + +--- +Let's say we're in the directory `Desktop` and we just moved +`file1` into `dir1` but now we want to put it back in `Desktop`. How would we move a file out of a directory into another one? Unfortunatly we can't just say + +```bash +$ mv file1 Desktop +``` + +because `file1` does not exist in `Desktop` any more and the command will try and rename `file1` to `Desktop`. + +--- +The answer involves using *absolute pathnames* and the tilde `~` notation: + +``` +$ mv dir1/file1 ~/Desktop +``` + +The `~` is shortform for your *home directory*. + +--- +If we just wanted to move `file1` into `dir2` (if `dir2` is in our *working directory*), we could type: + +```bash +$ mv dir1/file1 dir2 +``` + +--- +What if we want to move just the contents of dir1 to another +directory rather than the whole folder? + +HINT: it is very similar to copying (`cp`). + +--- +Move just the contents of dir1 to another +directory rather than the whole folder: + +```bash +$ mv dir1/* dir2 +``` + +This is a combination of the directory `dir1`, pathnames `/` and wildcards `*`. Here, `dir1/*` takes the all the contents of `dir1` and puts it in `dir2`. + +--- +We could also use the same technique to specify certain files to move rather than all of them. + +How would we move all txt files from `dir1` into `dir2`? + +--- +#### Questions? + +We're starting to combine our knowledge of files, directories and pathnames with some basic commands. How do we feel up to this point? + +--- +### rm + +To remove files we use the command `rm`. Because we're now +deleting files, it's important that you're sure of what you're deleting because there is no way to undo. Fortunately!! there +are ways to do this. + +```bash +$ rm file1 +``` + +Without specifying any *options*, `file1` will be deleted +without any feedback. + +--- +To ensure we want to delete something, we can use the option `-i` (interactive) that we learned earlier. + +```bash +$ rm -i file1 +``` + +This will prompt a question asking us if we want to delete `file1`. We can respond with `y` (yes) or `n` (no). + +--- +If we want to delete a directory, we need to use the option `-r` (recursive) as we did when copying (`cp`). This will recursively delete everything inside of the directory and the directory itself. + +```bash +$ rm -r dir1 +``` + +--- +If we're specifying multiple deletions and a directory does not exist, the shell will tell us. If we don't want that message, we can add the `-option`, `-f` (force). Force will override `-i` if it is included. + +1. How do you delete multiple directories? +2. What happens if you delete multiple directories with `-i`? +3. What happens if you delete multiple directories with `-i` but one does not exist? + +--- +Remember, it's extremely important to remember that you cannot undo `rm`. This means, if you start using wildcards to specify filenames and don't include `-i`, you could delete things by accident. For example, let's say you want to delete all `.txt` files in a directory: + +```bash +$ rm *.txt +``` + +If you accidently add a space between `*` and `.txt`,the `rm` command will delete all the files in the directory and then try to find a `.txt` file which does not exist because it delete everything. + + +--- +## Working with text files +--- +Output the contents of any text file using `cat` + +```bash +$ cat file1.txt +``` + +* Quick way to preview file contents +* Note, this might flood your terminal if the file is too big. + +--- +Write the output of a command to a text file using `>` or `>>` + +```bash +$ ls >> dirlist.txt +``` + +* `>` replaces the destination file (deletes existing content) +* `>>` appends to the destination file + +--- + +We can also write custom content + +```bash +$ echo "Hello, world!" > myfile.txt +``` + +--- +We can also edit text files in the command line using `nano` + +```bash +$ nano file1.txt +``` + +To exit `nano`, press Control + X. The shortcuts are also shown at the bottom of the terminal while in the `nano` editor, where `^` denotes the Control key. + +--- +For more complex edits, we can launch Visual Studio Code directly from the command line. + +```bash +$ code file1.txt +``` + +Note: MacOS users may need to setup the `code` command first. + +--- +## Bash scripts +--- +We can write down a list of commands in a *script*, useful for +* A set of commands we need to frequently *execute* together +* Sharing commands with others + +A bash *script* is a text file with commands on different lines. +* bash will *execute* each command in sequence + +--- +Lines that begin with a `#` are *comments* that are ignored by bash. +* Use *comments* to annotate your thought process + +```bash +# copy text files from temp directory +cp tempdir/*.txt data/ + +# delete the rest of the temporary files +rm tempdir/* +``` + +--- +Bash scripts usually have a special *comment* as the very first line that indicates which shell should be used to run the script. +* This is also known as the sh-bang (ha**sh** symbol + exclamation mark/**bang**) + +Typically, the shell used is bash: +```bash +#!/bin/bash + +# do stuff +cp file1 dir2/ +``` + +--- + +Execute a script by running: +```bash +$ bash myscript.sh +``` diff --git a/04_this_cohort/additional_resources/.keep b/04_this_cohort/additional_resources/.keep new file mode 100644 index 0000000..945c9b4 --- /dev/null +++ b/04_this_cohort/additional_resources/.keep @@ -0,0 +1 @@ +. \ No newline at end of file diff --git a/04_this_cohort/live_code/.keep b/04_this_cohort/live_code/.keep new file mode 100644 index 0000000..945c9b4 --- /dev/null +++ b/04_this_cohort/live_code/.keep @@ -0,0 +1 @@ +. \ No newline at end of file diff --git a/README.md b/README.md index caca70e..91cc882 100644 --- a/README.md +++ b/README.md @@ -1,113 +1,150 @@ -# Unix, Git and GitHub - -## Contents -1. [Description](https://github.com/UofT-DSI/01-shell_git_github#description) -2. [Learning Outcomes](https://github.com/UofT-DSI/01-shell_git_github#learning-outcomes) -3. [Design](https://github.com/UofT-DSI/01-shell_git_github#design) -4. [Expectations](https://github.com/UofT-DSI/01-shell_git_github#expectations) -5. [Policies](https://github.com/UofT-DSI/01-shell_git_github#policies) -6. [Schedule](https://github.com/UofT-DSI/01-shell_git_github#schedule) -7. [Folder Structure](https://github.com/UofT-DSI/01-shell_git_github#folder-structure) +# Unix shell + +## Content + +* [Description](#description) +* [Learning Outcomes](#learning-outcomes) +* [Assignments](#assignments) +* [Contacts](#contacts) +* [Delivery of the Learning Module](#delivery-of-the-learning-module) +* [Schedule](#schedule) +* [Requirements](#requirements) +* [Resources](#resources) + + [Cheat sheet](#cheat-sheet) + + [Videos](#videos) + + [How to get help](#how-to-get-help) +* [Folder Structure](#folder-structure) ## Description -The beginning of the course will introduce the basic language of Unix shell including how to navigate and manipulate files and directories. Learners will then learn certain commands, how to create scripts and write basic functions using pipes, filters and loops. - -The next portion of the lesson will be dedicated to getting started with version control and GitHub, and how it connects to the ethical discussions of reproducibility. Learners will learn how to set up Git and initialize and utilize repositories, including recording, viewing and undoing changes. They will also learn how to create branches and collaborate with others with shared branches. This course will put it all together and introduce some more advanced commands such as de-bugging and history editing. - -Finally, learners will determine how to problem-solve by identifying where the issue is and how to search with Google and Stack Overflow. This will then lead to the topic of reproducibility and how to contribute by commenting code and writing documentation. - -In the final week, students will learn about ethics and inequity and why it's important to include these discussions within their projects. This class will be discussion based and will require students to do readings before the class. -Throughout the entire course, students will learn how to problem solve through live coding. They will also learn about reproducibility and how to centre it within their work. +This module introduces the Unix shell language and covers file and directory navigation and manipulation. Participants gain proficiency in various commands, script creation, and writing basic functions using pipes, filters, and loops. -This course is designed for those who have a degree in something other than Computer Science/Statistics who are looking to enhance their data science skills for their career. +Participants will acquire problem-solving skills through live coding sessions. Additionally, they will explore the concept of reproducibility and its integration into their work. ## Learning Outcomes -Students will know how to: -1. Comfortably access the terminal and write scripts using basic commands, variables, pipes, filters and loops. This will be assessed in Assignment 1. -2. How to use version control to preserve personal work, access and edit pervious code versions, collaborate with others, and find and debug errors. This will be assessed in Assignment 2. -3. Self-problem solve by identifying issues, researching, or properly formulating questions using components of reproducibility. This will be assessed in both Assignment 1 and Assignment 2. -4. Situate all work within wider discussions of ethics and inequity. Students will actively scrutinize who is and isn't in our datasets and develop knowledge of past abuses of power to better engage their work with ethical considerations. This will be assessed in Assignment 2. +By the end of the module, participants will be able to: +* Comfortably access and navigate the terminal +* Create, modify and delete directories and files -## Design -The workshop will be held over three weeks, three days a week. Two of the three days will be 2-hours long and the last day will be 3-hours. Being mindful of online fatigue, there will be one break during each class where students are encouraged to stretch, grab a drink and snacks, or ask any additional questions. -## Expectations -The course is a live-coding class. Students are expected to follow along with the coding, creating files and folders to navigate and manipulate. Students should be active participants while coding and are encouraged to ask questions throughout. Although slides will be available for students to reference, they should be referenced before or after class, as during class will be dedicated to coding with the instructor. - -## Policies -1. Camera is optional although highly encouraged. We understand that not everyone may have the space at home to have the camera on. -2. Class will start 5 minutes past the designated time. - -## Schedule -Day 1: Unix Shell I (introducing the Shell, introductory commands, files and directories) +## Activities +This module has two types of activities. +1. Assignments are mandatory, and form part of your evaluation. +1. Homework is not assessed, but are provided to you for extra practice. We encourage you to check each other's homework solutions during Work Periods or ask a Learning Support! -Day 2: Unix Shell II (input/output and pipes/filters) +### Assignments +Participants should review the [Assignment Submission Guide](https://github.com/UofT-DSI/onboarding/blob/main/onboarding_documents/submissions.md) for instructions on how to complete assignments in this module. -Day 3: Unix Shell III (shell scripts, shell functions, parameters, flow control) +Assignments are typically due on the Sunday following the module's live learning session. -Day 4: Version Control and GitHub I (introducing version control and GitHub, basic Git commands) +1. [Shell script assignment](./02_activities/assignments/assignment_instructions.md) -Day 5: Version Control and GitHub II (remote repositories; branching) +### Homework +1. [Shell Homework](./02_activities/homework/homework.sh) -Day 6: Version Control and GitHub III (collaborating, dealing with conflicts) -Day 7: Problem solve, reproducibility, ethics, inequity +## Contacts -Day 8: Data Science Foundations - Review and Practice +**Questions can be submitted to the _#dc-help_ channel on Slack** -## Deadlines -Homework Submission: +* Technical Facilitator: + * **Abhishek Moturu** + abhishek.moturu@mail.utoronto.ca -Homework assignments are expected to be submitted within 2 days of their release date. For example, if homework-1 is released on Saturday, December 2nd, the submission deadline is Monday, December 4th. +* Learning Support Staff: + * **Keli Chiu** + keli.chiu.kc@gmail.com + * **Ananya Jha** + ananya.jha@mail.utoronto.ca +  +## Delivery of the Learning Module -Chapter Assignments Submission: +This module will include live learning sessions and optional, asynchronous work periods. During live learning sessions, the Technical Facilitator will introduce and explain key concepts and demonstrate core skills. Learning is facilitated during this time. Before and after each live learning session, the instructional team will be available for questions related to the core concepts of the module. Optional work periods are to be used to seek help from peers, the Learning Support team, and to work through the homework and assignments in the learning module, with access to live help. Content is not facilitated, but rather this time should be driven by participants. We encourage participants to come to these work periods with questions and problems to work through. +  +Participants are encouraged to engage actively during the learning module. They key to developing the core skills in each learning module is through practice. The more participants engage in coding along with the instructional team, and applying the skills in each module, the more likely it is that these skills will solidify. -Assignments related to each chapter will have a dedicated submission deadline. These assignments are typically due one week after the conclusion of the respective chapter. As an illustration, if you are working on assignment-1 released on Saturday, December 2nd, the due date for submission is Saturday, December 9th. - -Assignment-1 due date: Saturday, Dec 9th - -Assignment-2 due date: TBD - -All Homework and Assignment submissions can be submitted through [this google form link](https://forms.gle/YJsEuZCKBoZkrnqt5) +## Schedule +||Jan 7|Jan 9|Jan 10|Jan 14| +|---|---|---|---|---| +|Week 1|Live Learning Session 1 ([Shell](https://github.com/UofT-DSI/shell))|Live Learning Session 2 (Git & Github)|Work Period|Live Learning Session 3 (Git & Github)| +  +## Requirements + +* Participants are not expected to have any coding experience; the learning content has been designed for beginners. +* Participants are encouraged to ask questions, and collaborate with others to enhance their learning experience. +* Participants must have a computer and an internet connection to participate in online activities. +* Participants must not use generative AI such as ChatGPT to generate code in order to complete assignments. It should be used as a supportive tool to seek out answers to questions you may have. +* We expect participants to have completed the steps in the [onboarding repo](https://github.com/UofT-DSI/onboarding/). +* We encourage participants to default to having their camera on at all times, and turning the camera off only as needed. This will greatly enhance the learning experience for all participants and provides real-time feedback for the instructional team. + +## Resources + +Feel free to use the following as resources: + +### Cheat sheet +- [Devhints](https://devhints.io/bash) +- [Bash-Cheat-Sheet](https://github.com/RehanSaeed/Bash-Cheat-Sheet) + +### Videos +- [Change Directory](https://www.youtube.com/watch?v=6U4XV4w8qtE) +- [Deleting Files and Directories](https://www.youtube.com/watch?v=-L3XeZPwj_Y) +- [Bash in 100 seconds](https://www.youtube.com/watch?v=I4EWvMFj37g) + +### How to get help +#### 1. Gather information about your problem +- Copy and paste your error message +- Copy and paste the code that caused the error, and the last few commands leading up to the error +- Write down what you are trying to accomplish with your code. Include both the specific action, and the bigger picture and context +- (optional) Take a screenshot of your entire workspace + +#### 2. Try searching the web for your error message +- Sometimes, the error has common solutions that can be easy to find! + - This will be faster than waiting for an answer +- If none of the solutions apply, consider asking a Generative AI tool + - Paste your code, the error message, and a description of your overall goals + +#### 3. Try asking in your cohort's Slack help channel +- Since we're all working through the same material, there's a good chance one of your peers has encountered the same error, or has already solved it +- Try searching in the DSI Certificates Slack help channel for whether a similar query has been posted +- If the question has not yet been answered, post your question! + - Describe your the overall goals, the context, and the specific details of what you were trying to accomplish + - Make sure to **copy and paste** your code, your error message + - Copying and pasting helps: + 1. your peers and teaching team quickly try out your code + 1. others to find your question in the future + +#### Great resources on how to ask good technical questions that get useful answers +- [Asking for Help - The Odin Project](https://www.theodinproject.com/lessons/foundations-asking-for-help) +- [How do I ask a good question? - Stack Overflow](https://stackoverflow.com/help/how-to-ask) +- [The XY problem: A question pitfall that won't get useful answers](https://xyproblem.info/) +- [How to create a minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) + +#### Getting help: A summary + + +
## Folder Structure -Below are the folders contained in this repo with a description of what they contain and information on how to use them. - -### 1. *slides-resources* -This folder contains all editable slides. To edit, download the entire folder, including the *pics* folder as this folder contains the pictures which are relationally referenced in the markdown files. - -To change a photo, edit the markdown where photos are referenced. - -Example: - -Change `![w:1150 center](pics/github.png)` to `![bg](pics/github.png)` - -To add a photo, add photo to the *pics* folder and reference it within the markdown file. - -Example: - -Added photo labelled "git_commit.png" will be referenced in markdown file as `![w:1000 left](pics/git_commit.png)` - -This folder also includes the html versions of the slides. Either the pdf slides or the html slides can be used when teaching. If slides are edited to contain any gifs, the instructor will need to use the html slides so that the gifs are active. - -### 2. *pdf-slides* -This folder contains the pdf versions of the slides for students to follow. Slides should be referenced before class to prepare or after class to review. During class will be live-coding, therefore, there is no need to follow them during class. They contain all information that was discussed in class and are a great resource in the future if students need to reassess their knowledge. - -### 3. *pdf-homework* -This folder contains homework for students to practice Unix and Git/GitHub workshops. It is separated by week, then by day. Please complete the Unix Shell homework in the first week, and the Git/GitHub homework in the second. - -It is just a suggestion but it will help students throughout the workshop, as content is cumulative and will only get more difficult. Unfortunately, there is not enough time to review previous content each class so while this homework is **not** graded, it is highly recommended. - -### 4. *homework* -This folder contains the editable versions of the pdf homework. Can be changed based on the amount of content that was completed each day. - -### 5. *post-workshop* -This folder contains the exit surveys for students to complete. It holds both the md and docx versions of the survey. - -### 6. *assignments* -This folder contains the assignments for the workshop. Students are expected to complete them one week after the content has been delivered - -### 7. *guides* -This folder includes guides specifically for windows to set up Ubuntu environment and SSH Keys. Students who experience difficulty with these topics should reference the guides to debug. +Below is an outline of the folder structure for this module: +``` +. +├── .github +├── 01_materials +├── 02_activities +├── 03_instructional_team +├── 04_this_cohort +├── .gitignore +├── LICENSE +├── README.md +└── steps_to_ask_for_help.png +``` +* **.github**: Contains issue templates and pull request templates for the repository. +* **materials**: Module slides and interactive notebooks (.ipynb files) used during learning sessions. +* **activities**: Contains graded assignments, exercises, and homework to practice concepts covered in the learning module. +* **instructional_team**: Resources for the instructional team. +* **this_cohort**: Additional materials for this cohort. +* **.gitignore**: Files to exclude from this folder, specified by the Technical Facilitator +* **LICENSE**: The license for this repository. +* **README**: This file. +* **steps_to_ask_for_help.png**: Guide on how to ask for help. diff --git a/assignments/markdown/00-DSI-Pre-Workshop-Assignment.md b/assignments/markdown/00-DSI-Pre-Workshop-Assignment.md deleted file mode 100644 index ec5ab00..0000000 --- a/assignments/markdown/00-DSI-Pre-Workshop-Assignment.md +++ /dev/null @@ -1,36 +0,0 @@ -# DSI: Unix Shell, Git and GitHub - -## Pre-Workshop Assignment - -### Notes: - -- We will learn more about the above commands during the workshop; you do not need to be familiar with these commands prior to the workshop. -- This is an ungraded assignment but **please complete it before October 5th**. If you are having difficulty accessing the terminal or with any of the above commands, please join the October 5th prep session either in-person or online. - -### Requirements: - -1. Open Terminal - - - MacOS: `CMD + Space` and search for "Terminal" - - Windows: Please follow [this guide](https://www.ssl.com/how-to/enable-linux-subsystem-install-ubuntu-windows-10/) - -2. Run the following commands in order and screenshot the output - -``` -$ echo (inset your name without parentheses) -$ cd -$ pwd -$ echo $HOME -``` - -Please answer the following questions: - -1. Were you able to open terminal? Y / N -2. Do you know where your home directory is? Y / N -3. Do you know what folders it contains? Y / N - -### Lesson Outcomes: - -- Determine if you can access the terminal -- Try executing some basic commands -- Learn and share what your home directory is set to diff --git a/assignments/markdown/01-Unix-Assignment.md b/assignments/markdown/01-Unix-Assignment.md deleted file mode 100644 index 22535ae..0000000 --- a/assignments/markdown/01-Unix-Assignment.md +++ /dev/null @@ -1,37 +0,0 @@ -# DSI: Unix Shell, Git and GitHub - -## Assignment 1: Unix and Data - -### Requirements: - -1. Write a script that takes the parking_data.csv file as a positional parameter from the terminal as the input (this is so that the script can be run from any computer, so long as the csv file is available, assuming it is on the Desktop). - - - You can find the Toronto parking ticket data at Toronto's Open Data Portal: [https://open.toronto.ca/dataset/parking-tickets/](https://open.toronto.ca/dataset/parking-tickets/) - -2. Build a function or multiple functions into the script that: - a) Prints all types of parking infractions (_infraction_description_) - b) Prints the mean, min and max _set_fine_amount_ - these calculations can either be in the same function or multiple functions - c) Saves one type of parking infraction of your choosing to a separate csv file (this file should contain all observations of the chosen _infraction_description_, _set_fine_amount_, and _location2_ with the same headings as original csv) - -3. Things to remember: - - The script should be able to navigate to the directory housing the csv file - - Functions should include loops (either if/else, while, until, for) to make the process iterative - - You **must use outside sources** (Google and StackOverflow) to build these calculations - - Remember to cite any code that was used - -### Lesson Outcomes: - -- Practice using commands, positional parameters, functions and loops from submodules on a dataset -- Work on navigating directories within scripts -- Build skills in searching using Google and StackOverflow for commands not directly learned within the lesson and cite your sources - -### Rubric: - -| Component | 1 | 2 | 3 | 4 | 5 | -| ----------------------------------------------------------------------------- | --- | --- | --- | --- | --- | -| 1. Script is functioning and does as described in the assignment requirements | | | | | | -| 2. Script uses at least one type of loop to fulfill requirements | | | | | | -| 3. Script is free from bugs and has been appropriately commented | | | | | | -| 4. All outside sources have been properly cited | | | | | | - -**Total:** /20 diff --git a/assignments/markdown/02-Git-Quiz.md b/assignments/markdown/02-Git-Quiz.md deleted file mode 100644 index 6f6b5db..0000000 --- a/assignments/markdown/02-Git-Quiz.md +++ /dev/null @@ -1,132 +0,0 @@ -# DSI: Unix Shell, Git and GitHub - -## Assignment 2 & Quiz: Git and GitHub - -### Part 1 - -Part 1 of Assignment 2 is a quiz. Please complete to the best of your ability. Notes are permitted. Please email your responses to the TA with Instructor CC'd. - -1. Check all that are TRUE about version control: - - - [ ] Can revert files to a previous state - - [ ] Can compare changes over time - - [ ] Can see who modified something last - - [ ] Can recover lost files - -2. What is the difference between centralized version control systems and distributed version control systems? - -``` -Your answer here... - - - - -``` - -3. What are the three states that files can reside in? - - - [ ] a) committed, changed, waiting - - [ ] b) saved, changed, staged - - [ ] c) committed, modified, staged - - [ ] d) saved, modified, staged - -4. What command initializes a new repository? - - - [ ] a) `git clone` - - [ ] b) `git branch` - - [ ] c) `git fork` - - [ ] d) `git init` - -5. What does `git diff` do? - - - [ ] a) compares the differences between the home directory and staging area - - [ ] b) compares the differences between the working directory and staging area - - [ ] c) compares the differences between the working directory and what’s been committed - - [ ] d) compares the differences between the staging area and what’s been committed - -6. How do you add a message to your commit? (select all that apply) - - - [ ] a) `git commit -m` - - [ ] b) `git commit --messages` - - [ ] c) `git commit` - - [ ] d) `git commit -message` - -7. How do you add a remote repo? (select all that apply) - - - [ ] a) `git remote add` - - [ ] b) `git add remote` - - [ ] c) `git clone` - - [ ] d) `git add clone` - -8. How do you add a remote repo? (select all that apply) - - - [ ] a) `git remote` - - [ ] b) `git add remote` - - [ ] c) `git clone` - - [ ] d) `git add clone` - -9. What is the difference between `git pull` and `git fetch`? - -``` -Your answer here... - - - - -``` - -9. How do you switch branches? - - - [ ] a) `git checkout` - - [ ] b) `git checkout -b` - - [ ] c) `git branch -c` - - [ ] d) `git branch` - -10. Why are messages important? What would make a good commit message? - -``` -Your answer here... - - - - -``` - -11. Please correct the merge shown below (both codes are suitable, neither has errors): - -``` -<<<<<<< HEAD -df.loc[df['sex'] == 'f', 'age'].mean() -======= -df.loc[df['sex'] == 'm', 'age'].mean() ->>>>>>> branch_1 -``` - -``` -Your corrected merge here... - - - - -``` - -# Part 2 - -1. `fork` and `clone` [this class GitHub repo](https://github.com/rachaellam/DSI-workshop-repo). -2. `push` your Assignment 1 to the folder labelled "assignment-2." Your additions should include... - - All components necessary to run Assignment 1 - - Proper folder structure (inputs, outputs, scripts) - - A README.md file. The README should include components discussed in the workshop. Feel free to research good READMEs and add anything that you believe will add value to your README -3. Create a `pull request` to add your additions to the class repo. - -## Rubric: - -| Component | 1 | 2 | 3 | 4 | 5 | -| --------------------------------------------------------------------------------------------------------------------------- | --- | --- | --- | --- | --- | -| Repo contains all necessary components to run Shell script and has the correct folder structure | | | | | | -| README is comprehensive and includes components discussed in class plus at least one component learned from outside sources | | | | | | -| Pull request has been successfully requested without any merge errors | | | | | | - -**Total:** /15 -**Quiz Total:** /11 -**Final:** /26 diff --git a/assignments/markdown/03-Ethics-Inequality-Assignment.md b/assignments/markdown/03-Ethics-Inequality-Assignment.md deleted file mode 100644 index 6d18eb8..0000000 --- a/assignments/markdown/03-Ethics-Inequality-Assignment.md +++ /dev/null @@ -1,22 +0,0 @@ -# DSI: Unix Shell, Git and GitHub - -## Assignment 3: Inequity and Ethics - -### Requirements: - -1. Reflect on a past project that lacked an ethics and inequity component (minimum 1 page). What would you change about your project? What discussion should you add? -2. Include at least 3 primary and/or secondary resources - -### Lesson Outcomes: - -- Reflect on inequities and inequalities in relation to data science work -- Better understand the people and communities that are omitted and how that might continue to marginalize already marginalized people and communities - -### Rubric: - -| Component | 1 | 2 | 3 | 4 | 5 | -| ------------------------------------------------------------------- | --- | --- | --- | --- | --- | -| Thorough discussion of additional ethical component to past project | | | | | | -| Included at least 3 resources which have all been properly cited | | | | | | - -**Total:** /10 diff --git a/assignments/pdf/00-DSI-Pre-Workshop-Assignment.pdf b/assignments/pdf/00-DSI-Pre-Workshop-Assignment.pdf deleted file mode 100644 index 15a661d..0000000 Binary files a/assignments/pdf/00-DSI-Pre-Workshop-Assignment.pdf and /dev/null differ diff --git a/assignments/pdf/01-Unix-Assignment.pdf b/assignments/pdf/01-Unix-Assignment.pdf deleted file mode 100644 index 5e52cb3..0000000 Binary files a/assignments/pdf/01-Unix-Assignment.pdf and /dev/null differ diff --git a/assignments/pdf/02-Git-Quiz.pdf b/assignments/pdf/02-Git-Quiz.pdf deleted file mode 100644 index 032a68e..0000000 Binary files a/assignments/pdf/02-Git-Quiz.pdf and /dev/null differ diff --git a/assignments/pdf/03-Ethics-Inequality-Assignment.pdf b/assignments/pdf/03-Ethics-Inequality-Assignment.pdf deleted file mode 100644 index 2150ca8..0000000 Binary files a/assignments/pdf/03-Ethics-Inequality-Assignment.pdf and /dev/null differ diff --git a/guides/set-home-directory-windows.pdf b/guides/set-home-directory-windows.pdf deleted file mode 100644 index e1be8b7..0000000 Binary files a/guides/set-home-directory-windows.pdf and /dev/null differ diff --git a/guides/ssh-keys-windows.pdf b/guides/ssh-keys-windows.pdf deleted file mode 100644 index 466ed03..0000000 Binary files a/guides/ssh-keys-windows.pdf and /dev/null differ diff --git a/homework/markdown/git-homework.md b/homework/markdown/git-homework.md deleted file mode 100644 index d56c2f6..0000000 --- a/homework/markdown/git-homework.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -marp: true -theme: uncover -_class: invert -paginate: true - ---- - - - -# **Unix Shell Homework** -```console -$ echo "Data Sciences Institute" -$ echo "by: Rachael Lam" -``` - ---- -##### **Expectations** -The goal of this homework is to give students an opportunity to practice what was learned during each class. This will help students remember the content and prepare for the next class. - -Because each class builds upon the last, it's important to review the content and become comfortable with it, as time is too limited for a full in-class review. - - ---- - - -## `Day 1` - ---- -##### **Prompt:** -Initiate a new git directory. Add your homework from previous classes (copy or move) and create your first `commit` with a sufficient message. Remember, messages should indicate what you have changed. - -**To submit:** - 1. A screenshot of your terminal commands. - ---- - - -## `Day 2` - ---- -##### **Prompt:** -Clone a repo of your choice from GitHub. Save the first 10 instances of `git log` to a txt file. Please include the commentor name, relative date and subject. Then add a test file, commit the test file and push it to the remote repo. This test file does not have to have anything to do with the project itself - just to practice the git process. - -**To submit:** - 1. A screenshot of your terminal commands - 2. txt file of `git log` - -**Due:** Thursday, Feb 16th \ No newline at end of file diff --git a/homework/markdown/unix-homework.md b/homework/markdown/unix-homework.md deleted file mode 100644 index f97b97e..0000000 --- a/homework/markdown/unix-homework.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -marp: true -theme: uncover -_class: invert -paginate: true - ---- - - - -# **Unix Shell Homework** -```console -$ echo "Data Sciences Institute" -$ echo "by: Rachael Lam" -``` - ---- -##### **Expectations** -The goal of this homework is to give students an opportunity to practice what was learned during each class. This will help students remember the content and prepare for the next class. - -Because each class builds upon the last, it's important to review the content and become comfortable with it, as time is too limited for a full in-class review. - ---- -##### **Due Dates** -Homework will be released after each class and is due on the day of the next class. For example, after class on Monday, the homework prompt will be released and then homework is expected to be handed in at any time on Thursday. - ---- - - -## `Day 1` - ---- -##### **Prompt:** -Create one directory in any parent directory of your choice. This new directory should contain three additional directories. Within one of these additional subdirectories, please create two text files (.txt) - -**To submit:** - 1. A screenshot of your terminal commands. - ---- -##### **Potential Commands** - -- current directory `pwd` -- set working directory `cd` -- list contents of working directory `ls` -- create directory `mkdir` -- create file `touch` - ---- - - -## `Day 2` - ---- -##### **Prompt:** -Download a csv of your choice. This could be from Toronto Open Data or another source but use a different csv than the one used in class. Place the new csv in a directory of your choice and rename it. Extract one column and the unique values from that column. Redirect the standard output to be a txt file. - -**To submit:** - 1. A screenshot of your terminal commands. - 2. The txt file - ---- -##### **Potential Commands** - -- commands from previous class -- `mv` -- `cp` -- `cat` -- `cut` -- `sort` -- `uniq` - ---- - - -## `Day 3` - ---- -##### **Prompt:** -Create a shell script that extracts some data using pipes (ie. a column, particular rows using `grep`, or anything else you want to play around with) from the same csv file from Day 2 homework. The filter should be contained within a function and include at least one variable. - -**To submit:** - 1. A screenshot of your terminal commands. - 2. The shell script file - ---- - - -## `Day 4` - ---- -No homework. Please work on Assignment 1. \ No newline at end of file diff --git a/homework/pdf/git-homework.pdf b/homework/pdf/git-homework.pdf deleted file mode 100644 index 748d64b..0000000 Binary files a/homework/pdf/git-homework.pdf and /dev/null differ diff --git a/homework/pdf/unix-homework.pdf b/homework/pdf/unix-homework.pdf deleted file mode 100644 index 70e8dec..0000000 Binary files a/homework/pdf/unix-homework.pdf and /dev/null differ diff --git a/post-workshop/Exit Survey - DS Foundations.docx b/post-workshop/Exit Survey - DS Foundations.docx deleted file mode 100644 index 015aec4..0000000 Binary files a/post-workshop/Exit Survey - DS Foundations.docx and /dev/null differ diff --git a/post-workshop/Exit Survey - DS Foundations.md b/post-workshop/Exit Survey - DS Foundations.md deleted file mode 100644 index 4905011..0000000 --- a/post-workshop/Exit Survey - DS Foundations.md +++ /dev/null @@ -1,49 +0,0 @@ -# DSI Upskilling Pilot Course Exit Survey -## Course Name: Data Science Foudations -### _Course Instructor: Rachael Lam_ -### _Ta: Delaram Pouyabahar_ -\ -Thank you for joining the DSI upskilling pilot courses! - -We would like to get your thoughts on your experiences with this course and how we can design our full course offerings. We would appreciate it if you could take the time to fill out and submit this short survey. - -### Scale questions: -- 1 - Not at all -- 2 - Somewhat -- 3 - Moderately -- 4 - Mostly -- 5- A great deal - -## 1. About the Curriculum. -- I found the course intellectually stimulating. -- The course provided me with a deeper understanding of Unix shell, version control, and GitHub. -- The course is set up to fully onboard someone without prior technical experience. -- The course design, including live coding and examples, provided an opportunity for me to demonstrate an understanding of data science skills. -- The course inspired me to think further about the subject matter outside of class. -- The course material is helpful for me to enhance my data science skills for my career. -- I would recommend this course to other students. -- Overall, the quality of my learning experience in this course was good. - -## 2. About the Instructor. -- The course instructor (Rachael Lam) explained concepts clearly. -- The course instructor (Rachael Lam) encourages learners to ask questions about the course material. - -## 3. About the TA. -- The TA (Delaram Pouyabahar) was readily available during the class. -- The TA (Delaram Pouyabahar) was helpful when I had difficulties or questions. - -### Short Answer Questions. - -#### 4. How would you rate the pilot course sequence and flow? - -##### 4.1 How do you feel about the course materials? Was there too much material? Would you prefer less material but more in-depth? Or did you enjoy how high-level the material was? - -#### 5. Please comment on the in-class support model. - -#### 6. What were the top 2 things you liked about this pilot course. - -#### 7. What were 2 things you do NOT like about this pilot course. - -#### 8. Please tell us about other data science topics that would of interest and helpful in your career. - -### Thank you so much for your feedback! diff --git a/slides-resources/ethics_slides.md b/slides-resources/ethics_slides.md deleted file mode 100644 index a956d53..0000000 --- a/slides-resources/ethics_slides.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -marp: true -theme: uncover -_class: invert -paginate: true - -style: | - img[alt~="center"] { - display: block; - margin: 0 auto; - } - ---- - - - - -# **Ethics** -```bash -$ echo "Data Sciences Institute" -$ echo "Rachael Lam" -``` - ---- - - -##### Why should we care about ethics in Data Science? - ---- - - -## `Who Counts in our` -## `Datasets?` - ---- - -![w:950 center](pics/torontocrime.png) - ---- - -![w:950 center](pics/covidcases.png) - ---- - -![w:2300 center](pics/drivingtrends.png) -- [Kieran Healy](https://kieranhealy.org/blog/archives/2020/05/21/the-kitchen-counter-observatory/) - ---- - -![w:750 center](pics/census.png) - ---- - -##### **Os Keyes: Counting the Countless** ->Trans lives are ultimately (to a certain degree) about autonomy: about the freedom to set one’s own path. Society isn’t a tremendous fan of this - ---- -> ...“administrative violence” to refer to the way that administrative systems such as the law — run by the state, that white supremacist capitalist patriarchy — “create narrow categories of gender and force people into them in order to get their basic needs met.” - ---- -> “data violence” refers to the perpetuation of violence through datalogical systems: everything from YouTube’s recommender algorithm to facial recognition to online advertising. - ---- - - - -How do data science practices exclude people or communities? - ---- - -- Normalizing and Standardizing - - Open text boxes create data that is difficult to clean - - - A quantitative approach forces us to make buckets and decide the definitions of gender and constrains people's decisions to those buckets -- Consistency - - Big data encourages us to collect as much data as possible in a standardized way - - - Developing a data history means that variation from the previously determined standardized approach complicates data collection thus is discouraged - ---- - - - -Insurance companies are making decisions based on health data (ex. food you buy, exercise, etc.) -
- -How might this practice exclude people or communities? - -Is this practice even ethically reponsible? - ---- - -> "The inhumane reduction of humanity down to what can be counted." - ---- - -##### **Kieran Healy: The Kitchen Counter Observatory** -> Numbers and measures are crude; they pick up the wrong things; they strip out the meaning of what’s happening to real people; they make it easy to ignore what can’t be counted. There’s something to those complaints. But it’s mostly a lazy critique. In practice, I find that far from distancing you from questions of meaning, quantitative data forces you to confront them. - ---- - - - -Do you think the reductionist appoach of data science make it easy to ignore realities, or does it force you to confront them as Healy states? - ---- - -##### **Reforming Data Science** ->With administrative violence, Spade notes how “reform” often benefits only the least marginalized while legitimizing the system and giving cover for it to continue its violence. - -We can see this in facial recognition where people of colour are not well recognized by the program, yet creating a better algorithm only benefits systems of control. - ---- - - - -Can you think of another product/algorithm/program that further marginalizes communities? - -Do you believe we can reform data science? - ---- -##### **References** -- Healy, 2020, ‘The Kitchen Counter Observatory’, https://kieranhealy.org/blog/archives/2020/05/21/the-kitchen-counter-observatory/. -- Jasmine Mithani and Alex Samuels: https://fivethirtyeight.com/features/who-the-census-misses/ -- Keyes, 2019, ‘Counting the Countless’, https://reallifemag.com/counting-the-countless/ -- NYTimes: https://www.nytimes.com/interactive/2021/us/new-york-covid-cases.html -- Rachael: https://github.com/rachaellam/Toronto-Crime-Rates/blob/main/outputs/paper/toronto_crime_analysis.pdf diff --git a/slides-resources/foundation_slides.md b/slides-resources/foundation_slides.md deleted file mode 100644 index 98a0ea1..0000000 --- a/slides-resources/foundation_slides.md +++ /dev/null @@ -1,306 +0,0 @@ ---- -marp: true -theme: uncover -_class: invert -paginate: true - -style: | - img[alt~="center"] { - display: block; - margin: 0 auto; - } - ---- - - - - -# **Foundations Overview** -```bash -$ echo "Data Sciences Institute" -$ echo "Rachael Lam" -``` - ---- -##### **Why take this course?** - -- Unix shells - more specifically bash - is a powerful tool for quickly and easily navigating and manipulating files, scaling automated tasks, accessing Git and processing data. - -- Git is extremely important for reproducibility of your personal work and collaborating with others on group projects. - -- Git is incredible at keeping a historical reference of the changes you make to your work and debugging your code. - -- Github has an amazing community with educational resources, open-sourced projects, and events. - ---- -##### **Learning Outcomes** -1. Become comfortable with Unix basics and more complicated functions - -2. Learn how to use Git and Github in solo and group projects - -3. Navigate how to solve problems that you encounter - -4. Understand why reproducibility is important and how to make your code reproducible - ---- -5. Grasp the ethical considerations of who is and isn’t in our datasets - -6. Recognize past abuses of power and their continued influence - -7. Learn professional skills and how to work within a team - ---- -##### **Prerequisites** -Please come prepared with a Github account. - ---- -##### **Assessments** -- A number of formative assessments that continuously put in practice what we learned in class - -- Attitudinal assessments to help understand how students feel about the material and any areas for additional review - -- One summative assessment that compiles everything we have learned - -- Written reflections - ---- -##### **Course Reading Material** -- Chacon and Straub, 2014, Pro Git, 2nd Edition. - -- Newham and Rosenblatt, 2005, Learning the bash shell: Unix shell programming, O'Reilly. - -- Timbers, Campbell, Lee, 2021, Data Science: A First Introduction, https://ubc-dsci.github.io/introduction-to-datascience/. - -- William E. Shotts, Jr., 2009, The Linux Command Line - -- Wilson, 2021, Building Software Together, https://buildtogether.tech/. - ---- - - -## `Unix` - ---- -##### **Intro to Unix and Linux** -**Readings:** Newham et al. chapter 1 & Scotts chapter 1 - -Unix encompasses many features. In this section, we will look at what Unix/Unix shells are and the differences between Unix and Linux, introduce Bash and understand why it’s important to learn. We will also get our environment set up so that we can try a few initial commands. - ---- -##### **Navigate Files and Directories** -**Readings:** Newham et al. chapter 1.6 & Scotts chapters 3-4 - -To begin, we’ll start with a bit of theory to understand directories and the differences between the types of paths and files. We’ll then look at some tools that will help us navigate our files and directories using different options and arguments. We’ll also learn a few commands that will help us quickly and easily navigate our system. - ---- -```bash -$ ls -``` -```bash -$ cd -``` -```bash -$ pwd -``` - ---- -##### **Working with Files and Directories** -**Readings:** Newham et al. chapter 1.7 & Scotts chapters 5, 7, 11 - -In this section we’ll start manipulating files and directories. This includes creating, copying, moving and more. We’ll then introduce inputs and outputs and how combine them into command pipes. - ---- -```bash -$ cp -``` -```bash -$ mv -``` -```bash -$ mkdir -``` -```bash -$ rm -``` -```bash -$ ln -``` - ---- -##### **Pipes and Filters** -**Readings:** Newham et al. chapters 1.9-2 & Scotts chapters 8-9 + 13 - -Continuing from last lesson, we’ll expand on pipes and introduce some filter commands that will help us gain more shell experience. We’ll also cover some important expansions and command line editing tips. - ---- -```bash -$ cat -``` -```bash -$ sort -``` -```bash -$ uniq -``` -```bash -$ grep -``` -```bash -$ find -``` - ---- -##### **Shell Scripts** -**Readings:** Scotts chapter 25 - -Now we’ll learn how to group together commands and compile them into shell scripts. This avoids writing commands one by one on the command line. We’ll build our first script and in the process discover how to write, run and store shell scripts. - ---- -##### **Shell Functions** -**Readings:** Newham et al. chapter 4 & Scotts chapters 26, 33, 35 - -A good practice in programming is to create functions which separate larger tasks into smaller tasks. We’ll learn the basic structure of functions, how to use self contained variables and parameters and further expand upon expansions. - ---- -```bash -function name { - commands - returns -} -``` -```bash -name () { - commands - returns -} -``` - ---- -##### **Flow Control** -**Readings:** Newham et al. chapter 5 & Scotts chapters 28, 30 - -In the final lesson of Unix Shell, we’ll introduce more advanced topics: if statements and loops. In doing so, we’ll be able to write scripts that make decisions based on true/false statements and allow portions of our program to repeat. - ---- -```bash -x=5 -if [ $x = 5 ]; then - echo "x equals 5." -else - echo "x does not equal 5." -fi -``` - ---- - - -## `Git and GitHub` - ---- -##### **Intro to Git and Github** -**Readings:** Wilson chapter 1 & Timbers chapter 12.3-12.4, 13.3.1 - -Git and Github are extremely important for code revisions, reproducibility and collaboration. This introduction will discuss local, centralized and distributed version control as well as how to get started with Git using some of our knowledge from the Unix lessons. - ---- -##### **Git Basics** -**Readings:** Wilson chapter 2 - -At this point we’ll create our first repository in an existing directory or by cloning a directory. We’ll also introduce how to ignore files and why we might want to do that. - ---- -##### **Git Commands** -**Readings:** Wilson chapter 2 & Timbers chapter 12.5-12.6 - -This lesson will contain the most important Git commands. We’ll learn how to pull any changes, check the status of our work, commit changes, push them to our repository and even more commands and options. We’ll also discuss why adding messages to our commits is important and should be added to our practice. - ---- -##### **Remote Repositories** -**Readings:** Chacon and Straub chapter 2 & Timbers chapter 12.5-12.6 - -Here we will be focusing on remote repositories and the workflow you should adopt to appropriately collaborate with teammates. We'll learn commands such as `git remote`, `git pull` and `git push` during this section. - ---- -##### **Branching and Pull Requests** -**Readings:** Wilson chapter 3 - -Git is extremely useful for separating work that you want to develop and test from the main line to avoid damage. It does this through a feature called branching. We’ll be learning how to create these branches and merge them when our work is sufficient. - ---- -##### **Collaborating** -**Readings:** Wilson chapter 3 & Timbers chapter 12.8 - -Another amazing feature of Git is the ability to collaborate with others. We’ll discuss how to grant access to our repositories and how branching can help us collaborate. We’ll also go through some of the best practices when collaborating with others. - ---- -##### **Dealing with Conflicts** -**Readings:** Wilson chapter 6 - -Collaborating with others can produce several conflicts. We’ll explore some practices to deal with merge conflicts when multiple individuals are working on a project, as well as Github Issues as another tool in collaboration. We’ll end with some debugging using annotations and binary searches. - ---- - - -## `Important Considerations` - ---- -##### **Problem Solving** -Problem solving is a necessary skill when writing code. In this section we’ll learn how to identify the problem and effectively search for our solution using Google and Stack Overflow. We’ll also begin a discussion how how reproducibility makes a difference when asking for help. - ---- -##### **Reproducibility** -Reproducibility is extremely important in reducing and solving errors and increasing trust and transparency. We’ll have thorough discussions on the significance of reproducibility and how to practice it through code commenting, documentation writing and proper folder structures. - ---- -##### **Ethics** -When looking at open-source projects on Github or other libraries, it’s important to not take the information and results we see at face-value. We’ll be discussing what to look for, and who might be inappropriately excluded from our data. We’ll be examining at several datasets to analyze the ethics of the project and what might be missing. - ---- -##### **Inequality** -When ethics are not taken into consideration, massive inequality can take place. We’ll further our understanding of what happens when ethics are dismissed and the past abuses of power that have occured under this massively harmful failure. - ---- -##### **Professional Skills** -We’ll end this module with a lesson on pertinent and tech based professional skills. This includes healthy work habits, time management and best practices in meetings. We’ll also discuss some team collaboration skills such as code reviews and sprint methodology. - - ---- - - -**Discussion/Questions** diff --git a/slides-resources/html-slides/ethics_slides.html b/slides-resources/html-slides/ethics_slides.html deleted file mode 100644 index 327dee6..0000000 --- a/slides-resources/html-slides/ethics_slides.html +++ /dev/null @@ -1,288 +0,0 @@ -
-

Ethics

-
$ echo "Data Sciences Institute"
-$ echo "Rachael Lam"
-
-
-
-
Why should we care about ethics in Data Science?
-
-
-

Who Counts in our

-

Datasets?

-
-
-

center

-
Rachael Lam
-
-
-

center

-
NYTimes
-
-
-

center

- -
Kieran Healy
-
-
-

center

-
Mithani & Samuels
-
-
-
Os Keyes: Counting the Countless
-
-

Trans lives are ultimately (to a certain degree) about autonomy: about the freedom to set one’s own path. Society isn’t a tremendous fan of this

-
-
Keyes
-
-
-
-

...“administrative violence” to refer to the way that administrative systems such as the law — run by the state, that white supremacist capitalist patriarchy — “create narrow categories of gender and force people into them in order to get their basic needs met.”

-
-
Keyes
-
-
-
-

“data violence” refers to the perpetuation of violence through datalogical systems: everything from YouTube’s recommender algorithm to facial recognition to online advertising.

-
-
Keyes
-
-
-

How do data science practices exclude people or communities?

-
-
-
    -
  • Normalizing and Standardizing -
      -
    • -

      Open text boxes create data that is difficult to clean

      -
    • -
    • -

      A quantitative approach forces us to make buckets and decide the definitions of gender and constrains people's decisions to those buckets

      -
    • -
    -
  • -
  • Consistency -
      -
    • -

      Big data encourages us to collect as much data as possible in a standardized way

      -
    • -
    • -

      Developing a data history means that variation from the previously determined standardized approach complicates data collection thus is discouraged

      -
    • -
    -
  • -
-
Keyes
-
-
-

Insurance companies are making decisions based on health data (ex. food you buy, exercise, etc.)
-

-

How might this practice exclude people or communities?

-

Is this practice even ethically reponsible?

-
-
-
-

"The inhumane reduction of humanity down to what can be counted."

-
-
Keyes
-
-
-
Kieran Healy: The Kitchen Counter Observatory
-
-

Numbers and measures are crude; they pick up the wrong things; they strip out the meaning of what’s happening to real people; they make it easy to ignore what can’t be counted. There’s something to those complaints. But it’s mostly a lazy critique. In practice, I find that far from distancing you from questions of meaning, quantitative data forces you to confront them.

-
-
Kieran Healy
-
-
-

Do you think the reductionist appoach of data science make it easy to ignore realities, or does it force you to confront them as Healy states?

-
-
-
Reforming Data Science
-
-

With administrative violence, Spade notes how “reform” often benefits only the least marginalized while legitimizing the system and giving cover for it to continue its violence.

-
-

We can see this in facial recognition where people of colour are not well recognized by the program, yet creating a better algorithm only benefits systems of control.

-
Kieran Healy
-
-
-

Can you think of another product/algorithm/program that further marginalizes communities?

-

Do you believe we can reform data science?

-
-
-
References
- -
-
\ No newline at end of file diff --git a/slides-resources/html-slides/git_slides.html b/slides-resources/html-slides/git_slides.html deleted file mode 100644 index 5b70e5c..0000000 --- a/slides-resources/html-slides/git_slides.html +++ /dev/null @@ -1,4550 +0,0 @@ -
-

Version Control and GitHub

-
$ echo "Data Sciences Institute"
-$ echo "Rachael Lam"
-
-
-
-

Prerequisites:

-
    -
  • GitHub account
  • -
-
-
-

Key Texts:

- -
-
-

References

-
    -
  • Chacon and Straub: Chapter 1
  • -
  • Timbers: Chapter 12.3 - 12.4, 13.3.1
  • -
-
-
-

Version Control

-
-
-
What is Version Control?
-

Version control is a system that records changes to a file or a set of files over time so that we can recall a specific version later. We may already do this by copying files to another directory to save past versions.While it is simple, it lacks flexibility and complexity.

-
-
-

Version Control Systems (VCS) can do a number of things and can be applied on nearly any type of file on our computers:

-
    -
  • revert files to a previous state
  • -
  • revert entire project to a previous state
  • -
  • compare changes over time
  • -
  • see who modified something last
  • -
  • who introduced an issue and when
  • -
  • recover lost files
  • -
-
-
-
Local Version Control Systems
-

Local VCSs were developed to keep track of changes to our files by putting them in a version database.

-
-
-
Centralized Version Control Systems
-

Centralized VCSs (CVCS) were developed to enable collaboration with developers on other systems. CVCSs have a single server that contains all the versioned files.

-
-
-

CVCSs allow some level of transparency to others' work and give Administrators a level of control over what developers can and can't do.

-

Unfortunately, a single server means that if it ever goes down, all collaboration halts for however long that lasts for. Additionally, if backups haven't been kept, work could easily be lost.

-
-
-
Distributed Version Control Systems
-

To handle the limitations of LVCSs and CVCSs, Distributed VCSs were created. This includes Git, Mercurial and Bazaar.

-

Collaborators mirror the entire repsoitory, therefore if a server dies, any one of the collaborators' repositories can be copied back to the server to restore it.

-
-
-

center

-
-
-

Questions?

-
-
-

Git

-
-
-
Git Basics
-

Git thinks of data in a very different way than other VCSs. Instead of storing a set of files and the changes over time, Git thinks of its data more like a set of snapshots of a mini file system.

-

If files have not changed, Git does not store the file again, it links to the previous identical file already stored.

-
-
-

center

-
-
-
Local Operations
-

Most operations on Git only need local files and resources to operate. Git also keeps the entire history of our projects on our local disks meaning we can see changes made months ago without a remote server.

-

We also don't need to be connected to the server to get work done, rather we only need to be connected when we want to upload our work.

-
-
-
Benefits
-

Git uses a check-summing mechanism called SHA-1 hash which is calculated based on the contents of a file or directory structure in Git. It looks somehting like this:

-
24b9da6552252987aa493b52f8696cd6d3b00393
-
-

This checksum means it's impossible to change the contents of any file or directory without Git knowing about it.

-

Git generally only adds data, making it fairly difficult to lose data once we've committed, which we'll learn about later.

-
-
-
The Three States
-

There are three main states that our files can reside in:

-
    -
  • Committed: -
      -
    • data is safely stored on local database
    • -
    -
  • -
  • Modified: -
      -
    • file has been changed but not yet committed
    • -
    -
  • -
  • Staged: -
      -
    • modified file has been marked to go into the next commit
    • -
    -
  • -
-
-
-
The Three Main Sections
-

There are three main sections to a Git project:

-
    -
  • The Git directory
  • -
  • The working directory
  • -
  • The staging area
  • -
-
-
-
The Git Directory
-

The Git directory is where Git stores the metadata and object database for our projects. It is what is copied when we clone a repository from another computer.

-
-
-
The Working Directory
-

The working directory is a single checkout of one version of our projects. These files are pulled out of the compressed database in the Git directory and placed on the disk for us to modify.

-
-
-
The Staging Area
-

The staging area is a simple file that stores information about what will go into our next commit.

-
-
-
Workflow
-

A basic workflow will look something like this:

-
    -
  1. Modify files in our working directory
  2. -
  3. Stage the files in the staging area
  4. -
  5. Commit the changes which takes the files from the staging area and stores them on the Git directory.
  6. -
-
-
-

Questions?

-
-
-

Installing Git

-
-
-

Typically, Git is already installed on our system but we can check for that using the git command:

-
$ git --version
-
-

Does anyone not see a version?

-
-
-
Installing on Linux
-

If you're on Ubantu:

-
$ sudo apt install git
-
-
-

If you're on Fedora, RHEL or CentOS:

-
$ sudo dnf install git
-
-
$ sudo yum install git 
-
-
-
-
Installing on Mac
-

You can install Git via Homebrew, if you have Homebrew installed (https://brew.sh/).

-
$ brew install git
-
-

Finally, you can install Git from source at this link: https://sourceforge.net/projects/git-osx-installer/

-
-
-
Installing on Windows
-

The download will start automatically through this link: https://git-scm.com/download/win

-
-
-

Questions?

-
-
-

Git Setup

-
-
-

The first thing to do now that we have Git installed on our system is to customize it. These changes will remain despite any upgrades to Git that we install.

-

Using the command git config, we can set configuration variables that control all aspects of how Git looks and operates.

-
-
-
Checking Configurations
-

Before we change any of our global configurations, we can check what they are:

-
$ git config --list
-
-

If we haven't configured Git, we can do that now!

-
-
-
Identity
-

First, we'll set our username and email address. Git uses this information everytime we commit.

-
$ git config --global user.name "Rachael Lam"
-$ git config --global user.email "rachael.a.lam@gmail.com"
-
-

The option --global means that we only have to pass this through once.

-
-
-
Editor
-

Next, we'll configure our the default text editor for when Git needs to type in a message. Git uses our system's default editor (usually Vi or Vim) but we can change it if we prefer. If we want to change the editor to emacs, we would do so below:

-
$ git config --global core.editor emacs
-
-
-
-
Diff Tool
-

We can also set the default diff tool which is used to resolve merge conflicts:

-
$ git config --global merge.tool vimdiff
-
-
-
-
Checking the Setting
-

We can use the git config --list command to see all Git settings. See the values of a specific specific setting:

-
$ git config user.name
-
-
-
-
Help
-

If we ever need help, even offline, we can access the manual page three ways:

-
    -
  1. $ git help <verb>
  2. -
  3. $ git <verb> --help
  4. -
  5. $ man git-<verb>
  6. -
-

For example, we can get help for the config command:

-
$ git help config
-
-
-
-

Questions?

-
-
-

Git Basics

-
-
-

References

-
    -
  • Chacon and Straub: Chapter 2
  • -
-
-
-

$ git init / $git clone

-
-
-
Respositories in an Exisiting Directory
-

We're quickly getting into how to start our first Git repository, or commonly known as repo. First we'll learn how to import an existing repo into Git:

-
$ git init
-
-
$ git init -b main
-
-

Here we're creating a new subdirectory named .git that will contain all our necessary repo files. The option -b will create a new branch called main.

-
-
-
Cloning an Existing Respository
-

If we want to collaborate on an existing repo, we need to clone the repo from GitHub. If we don't have a project set up yet, we'll need to do that first.

-
-
-
    -
  1. Create a new project
    -
  2. -
-

center

-
-
-
    -
  1. Add name and optional description
    -
  2. -
-

center

-
-
-
    -
  1. Choose public or private and add initialize
    -
  2. -
-

center

-
-
-

There are a number of automatically generated files such as log files that we might not want Git to add or show as untracked. We can create a file called .gitignore to ignore the automatically generated files.

-

The .gitignore is dependent on the type of coding language you are using but can also be modified to fit specific purposes.

-
-
-

If we created a repo on GitHub, we can choose a .gitignore template. We can select a template specific to the coding language we are using.

-

center

-
-
-

Once we have our repo, we can clone it:

-
$ git clone https://github.com/rachaellam/git-module.git
-
-

Using this code, we've created a repo called git-module (by taking the last part of the link) and initialized a .git directory and pulled all data for that repository while checking for the latest copy.

-
-
-

The url used in the previous code block is copied directly from GitHub by clicking code and copying the HTTPS:

-

center

-
-
-

If we want to change the name of the repo, we can specify that as the next command line option:

-
$ git clone https://github.com/rachaellam/git-module.git mymodule
-
-
-
-

Questions?

-
-
-

Git Commands

-
-
-

References

-
    -
  • Chacon and Straub: Chapter 2
  • -
  • Timbers: Chapter 12.5
  • -
-
-
-

$ git status

-
-
-
Tracked and Untracked Files
-

Files in our working directory can either be tracked or untracked. Tracked files are files that that were in the last snapshot and can be unmodified, modified or staged. Untracked files are files that aren't in our last snapshot or staging area.

-

When we modify a file, Git keeps track of the modifications even before we've decided to commit. We can then stage the modifications and then commit.

-
-
-

center

-
-
-
File Status
-

To better understand what state our files are in, we can check the status:

-
$ git status
-
-

If we've just created our repo, we should see (or something similar):

-
# On branch main
-# Your branch is up to date with 'origin/main'.
-
-# nothing to commit, working tree clean
-
-
-
-

Let's now add a README.md file, because every good repo has a good README.

-
$ touch README.md
-
-

And see the status:

-
$ git status
-
-
-
-
On branch main
-
-No commits yet
-
-Untracked files:
-  (use "git add <file>..." to include in what will be committed)
-	README.md
-
-

Here we can see that we still haven't committed anything and that we have an untracked README.md file. Git also gives us a bit of information including how to add a file to track.

-
-
-

$ git add

-
-
-
Tracking New Files
-

To track new files, or stage new files, we can use git add along with the file that we want to track:

-
$ git add README.md
-
-

We can run git status again to see the results of git add.

-
-
-
On branch main
-
-No commits yet
-
-Changes to be committed:
-  (use "git rm --cached <file>..." to unstage)
-	new file:   README.md
-
-

Now we can see that our README.md file is staged to be committed.

-
-
-

Let's say we add some more info to our README.md file, which has now been tracked. If we run git status, we can know:

-
On branch main
-
-No commits yet
-
-Changes to be committed:
-  (use "git rm --cached <file>..." to unstage)
-	new file:   README.md
-
-Changes not staged for commit:
-  (use "git add <file>..." to update what will be committed)
-  (use "git restore <file>..." to discard changes in working directory)
-	modified:   README.md
-
-
-
-
-

We can stage our additional changes and check the status:

-
$ git add README.md
-$ git status
-
-
On branch main
-
-No commits yet
-
-Changes to be committed:
-  (use "git rm --cached <file>..." to unstage)
-	new file:   README.md
-
-
-
-
-

Let's try adding another file into our directory. It can be something that you've been working on independently, or we can add our project from the previous Unix module.

-
-
-

If we modify many things at once, we can add the option -A to add all files, rather than adding one by one

-
$ git add -A
-
-

A little note about this: it's best to upload your work in small chunks for readability and for collaboration. So if you have a bunch of files, it's recommended to split them into smaller chunks.

-
-
-

Questions?

-
-
-

$ git diff

-
-
-

If we want to see more details of the changes that we've made, we can use the command git diff.

-

git diff compares what is in our working directory to what is in our staging area. If we've made changes to our files without running git add, we'll see the comparison. If there are no differences, nothing will be shown.

-
-
-
diff --git a/README.md b/README.md
-index e69de29..4711fce 100644
---- a/README.md
-+++ b/README.md
-@@ -0,0 +1 @@
-+# git-r
-\ No newline at end of file
-
-
-
-
diff --git a/README.md b/README.md
-
-

This is telling us what we're comparing. In this case, it's the difference between a previous version of the README file and the current one

-
-
-
index e69de29..4711fce 100644
-
-

Here is some meta data, or hash identifier that we likely won't need.

-
-
-
--- a/README.md
-+++ b/README.md
-
-

This is acting as a legend. Changes from a/README.md are marked by --- and changes from b/README.md are marked by +++

-
-
-
@@ -0,0 +1 @@
-+# git-r
-
-

Here we're being told the lines that have changed and what on those lines changed. Because there was nothing removed, this is a bit of a simplistic representation.

-
-
-

We might see something more like...

-
@@ -21,5 +77, 12
-
-

This is telling us 5 lines were removed starting on line 21 and 12 lines were added starting on line 77.

-
-
-
--staged
-

If we want to see the details of what will go into the next commit, we can use git diff with the option --staged

-
-
-

$ git commit

-
-
-

Once we've staged your selected files, it's time to commit the changes. Anything that wasn't staged (any modifications since git add) will not be included in the commit.

-

git commit is most easily run with the option -m. This adds a message to your commit

-
$ git commit -m "adding a message here"
-
-
-
-
-m
-

Messages should be clear. They can also be extremely detailed if needed. By not including the option -m, Git will provide the latest output of git status. If you want even more information, you can use the option -v.

-
-
-

Messages are extremely important for our own records and also when collaborating with others. They can act as a reminder for what our commit includes, and also tell our collaborators what we did last.

-

It's important to commit often as well so that merges are easier to locate and fix.

-

It's also helpful if you want to go back to an earlier version. You have more options to choose from.

-
-
-

Practices around messages can vary but if we want to add a longer message we can remove the -m option.

-
$ git commit
-
-

Then hit i to add a message. You'll see -- INSERT -- at the bottom and you can begin typing your message.

-

When finished, press esc then :wq or :x.

-

w means write and q means quit. x is shorthand for wq

-
-
-
Short (50 chars or less) summary of changes
-
-More detailed explanatory text, if necessary. Wrap it to about
-72 characters or so. In some contexts, the first line is treated 
-as the subject of an email and the rest of the text as the body, 
-the blank line separating thesummary from the body is critical 
-(unless you omit the body entirely).
-
-Further paragraphs come after blank lines.
-
-- Bullet points are okay, too
-
-- Typically a hyphen or asterisk is used for the bullet, preceded
-  by a single space with blank lines in between, but conventions
-  vary here
-
-
-
-
-a
-

If we want to commit all the files we've worked on without putting them in the staging area, we can use the option -a. This will avoid using git add and condense our workflow.

-
$ git commit -a -m "skip staging add message"
-
-

Here we've used two options, -a and -m to skip the staging and add a message.

-
-
-

Questions?

-
-
-

$ git rm

-
-
-

If we delete a file from our working directory after staging it using rm without git, the file will show up in our untracked files. We can then use git rm to stage the file's removal.

-

Let's follow the code below to understand this better:

-
$ touch test.sh
-$ git status
-$ rm test.sh
-$ git status
-
-

Because we haven't tracked the test.sh file so we can remove it and we don't need to tell git to also remove it.

-
-
-

What happens if we add a file to our staging area but then want to delete it? Let's try the two codes below:

-
$ touch test.sh
-$ git add test.sh
-$ git rm test.sh
-
-
$ touch test.sh
-$ git add test.sh
-$ rm test.sh
-$ git rm test.sh
-
-
-
-
-f
-

If we've modified and staged a file, we have to force the removal with the option -f. This is a safety feature so that we don't accidentally delete something.

-
$ touch testfile
-$ git add testfile
-$ git rm -f testfile
-
-
-
-
--cached
-

The option --cashed allows us to remove a file from our staging area without permanently deleting it from our local drive.

-
$ git rm --cached testfile
-
-

We can use wildcards to remove files from our staging area in bulk, although we have to add a backslash in front of * because Git does its own filename expansion.

-
$ git rm -f \*.txt
-
-
-
-

We can also delete files in a folder of our working directory:

-
$ git rm -f dir1/\*.sh
-
-
-
-

$ git mv

-
-
-

Using git mv, we can rename files conveniently and succinctly:

-
$ git mv test.txt test.sh
-
-
-
-

Questions?

-
-
-

$ git log

-
-
-

Sometimes we might want to see a history of our commits or we want to see previous commits after cloning an existing repository. We can do this using the git log command.

-
$ git log
-
-

There are a number of options that help us see even more, or sometimes less, information about each commit.

-
-
-

If we attempt to run a log before any commits have been made, we will get an error:

-
fatal: your current branch 'main' does not have any commits yet
-
-
-
-
-p
-

Adding the option -p will show the diff introduced in each commit. We can also pass a number option that will limit the number of entries shown:

-
$ git log -p -2
-
-

Entries can be any number of entries (-<n>)but is limited to one page of log out puts

-
-
-
--stat
-

The --stat option shows abbreviated stats for each commit:

-
$ git log --stat
-
-
-
-
commit 6c91df668d1899317a643153bd169d37fe05d9f1 (HEAD -> main)
-Author: Rachael Lam <rachael.a.lam@gmail.com>
-Date:   Fri Feb 18 14:56:27 2022 -0500
-
-    first commit
-
- .gitignore |  4 ++++
- README.md  |  1 +
- test.Rproj | 13 +++++++++++++
- testfile.r |  0
- 4 files changed, 18 insertions(+)
-
-

+ or -(if there were any) show the number of insertions or deletions. We can also see the date of the commit, who committed and the message.

-
-
-
--pretty
-

The --pretty= option is an interesting feature that enables us to specify the log output when we combine it with format:, creating an extremely useful data extraction feature:

-
$ git log --pretty=format:"%h - %an, %ar : %s"
-
-
-
-
Formatting Options
- - - - - - - - - - - - - - - - - - - - - - - - - -
OptionDescription
%HCommit hash
%hAbbreviated commit hash
%tAbbreviated tree hash
%pAbbreviated parent hashes
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
OptionDescription
%anAuthor name
%aeAuthor email
%adAuthor date (ex. Thu Dec 2 14:14:55 2021 -0500)
%arAuthor date relative (ex. 26 hours ago)
%cnCommitter name
%sSubject (-m)
-
-
-
--since / --until
-

The options --since= and --until= are more usually more useful than -(n). They produce the logs of any time before (--until) or after (--since) a certain date. You can specify an exact date or relative date:

-
$ git log --since=2.weeks
-
-
$ git log --since="2 days 3 minutes ago"
-
-
$ git log --until="2021-11-20"
-
-
-
-

We can also combine log options to generate specific outputs:

-
$ git log --pretty=format:"%h: %s" --author=Rachael
-
-
$ git log --after="2020-11-01" --since="2020-11-30"
-
-
-
-

Finally, and a favourite for quick glances:

-
$ git log --oneline
-
-
-
-

Questions?

-
-
-

undo undo undo

-
-
-
Changing Commit
-

If we already committed a few files but forgot to add one or made modifications since our commit that we want to add, we can use the option --amend

-
$ git commit -m "initial commit"
-$ git add missed_file
-$ git commit --amend -m "initial commit with missed_file"
-
-

We can still add the -m option to add a new comment.

-
-
-
Unstaging
-

When we want to remove a file from our staging area because we accidentally added one too many files, we can use the code below:

-
$ git reset HEAD README.md
-
-

If we ever forget how to do this, running git status will remind us.

-
-
-
Unmodify
-

We can also revert our files back to the version from our previous commit using git checkout --. It's important to realize that this command essentially rewrites the file so any changes that were made will not be able to be recovered.

-

As well, any commit can usually be recovered but anything that was never committed will most likely be lost forever.

-
$ git checkout -- README.md
-
-
-
-
Select Previous Commit
-

To select a previous commit to revert to, we need the hash of the commit:

-
$ git log
-$ git checkout <HASH> file1
-
-

This can be used forwards or backwards, ie. you can also "revert" to a commit that later than your current version.

-

You can also revert several files at the same time

-
$ git checkout <HASH> file1 file2
-
-
-
-

Questions?

-
-
-

Remote Repositories

-
-
-

References

-
    -
  • Chacon and Straub: Chapter 2
  • -
  • Timbers: Chapter 12.5-12.6
  • -
-
-
-

$ git remote

-
-
-

Remote repos are versions of our projects that are hosted on the internet or some network. This allows us to collaborate with others outside of our local repo.

-

We can see the remote servers we've configured using git remote. If we add the option -v, we can see the URL:

-
$ git remote -v
-
-

Cloned repos will be displayed as origin by default.

-
-
-
Remote Setup
-

Before we connect our local repo to a remote repo, we need to setup our permissions. This is so we can send and retrieve work to and from our remote repositories. There are two ways to do this:

-
    -
  1. -

    Access Tokens

    -
  2. -
  3. -

    SSH

    -
  4. -
-
-
-
Access Tokens
-

               left           right

-
-
-

center

-
-
-
SSH
-
$ ls -al ~/.ssh
-
-

If SSH has not been set up on your computer, you should see something like:

-
ls: cannot access '/c/Users/rachaellam/.ssh': No such file 
-or directory
-
-

Otherwise you'll see filenames id_ed25519 and id_ed25519.pub OR id_rsa and id_rsa.pub which represent your public and private keys.

-
-
-
$ ssh-keygen -t ed25519 -C "rachael.lam@mail.utoronto.ca"
-
-

Use the code above but with your email. This will output:

-
Generating public/private ed25519 key pair.
-Enter file in which to save the key (/c/Users/rachaellam/.ssh/
-id_ed25519):
-
-

Press enter to use the default file.

-
-
-

You will then be prompted to add a passphrase. You cannot reset this passphrase, so be sure to remember it or write it down somewhere safe:

-
Created directory '/c/Users/Vlad Dracula/.ssh'.
-Enter passphrase (empty for no passphrase):
-
-

It will then ask you to reenter the passphrase:

-
Enter same passphrase again:
-
-
-
-

You will then get a confirmation with a random piece of art at the end. It will show the private key (identification) which you should never share, the public key and the key fingerprint which is a shorter version of the public key.

-
Your identification has been saved in /c/Users/rachaellam/.ssh/
-id_ed25519
-Your public key has been saved in /c/Users/rachaellam/.ssh/
-id_ed25519.pub
-The key fingerprint is:
-SHA256:SMSPIStNyA00KPxuYu94KpZgRAYjgt9g4BA4kFy3g1o
-rachael.lam@mail.utoronto.ca
-
-
-
-

Now we can check that we have the public and private key files:

-
$ ls -al ~/.ssh
-
-
-
-

It's time to give GitHub our public key so let's read the public key file and copy it:

-
$ cat ~/.ssh/id_ed25519.pub
-
-

Output:

-
ssh-ed25519 AAAAC3NzaC1lZDI1NPN7AAAAIDmRA3d51X0uu9wXek559gfn6UFNF
-69yZjChyBIU2qKI rachael.lam@mail.utoronto.ca
-
-

Copy the long public key to add to GitHub.

-
-
-
Settings --> SSH and GPG keys --> New SSH key
-

Add a title like rachael's key and paste the public key then click Add SSH key.

-

Finally, we can check that it's been authenticated:

-
$ ssh -T git@github.com
-
-
-
-
remote add
-

To add a remote repo, we can use git remote add followed by the name and URL. Now we can connect our local repo to a remote repo:

-
$ git remote add origin https://github.com/rachaellam/git-r.git
-$ git remote -v
-
-

After checking we'll see:

-
origin  https://github.com/rachaellam/git-r.git (fetch)
-origin  https://github.com/rachaellam/git-r.git (push)
-
-
-
-

If we want to see more information about a remote repo, we can use the command:

-
$ git remote show origin
-
-

Here we can see the URL that we're fetching and pulling from, our remote branches, and configurations for git push (to the main branch or another).

-
-
-

To send and retrieve work between our local and remote repositories, we have to authenticate a personal access token:

-

               left           right

-
-
-

center

-
-
-

Questions?

-
-
-

$ git fetch / $ git push

-
-
-

When collaborating with others, changes might be made that are important to copy to your local directory. git fetch will get any new changes but it won't merge it to our work or modify our work.

-
$ git fetch origin
-
-
-
-

git pull will automatically fetch and merge a remote branch to our current branch (more on branching later). It's a good practice to pull before every work session, especially when working with others. Otherwise, a collaborator might have made changes, and you won't be able to push your changes to GitHub.

-
$ git pull
-
-
-
-

If we've create our remote repository using init and remote add, we need to specify the remote that we want to pull to and the branch we want to pull from.

-
$ git pull origin main
-
-

origin being the name of the remote repo we created earlier and main being the main branch on our GitHub repo.

-
-
-

Questions?

-
-
-

$ git push

-
-
-

When we're ready to share our modifications, we have to push our project and files upstream using git push

-
$ git push origin main
-
-

Here we're pushing to our origin server on your main branch. The main branch is sometimes called the master branch.

-

This command only works if we have write access and if no collaborator is pushing upstream at the same time as we are. We'd have to instead pull and merge their work before pushing our own.

-
-
-

Questions?

-
-
-

Git Branching

-
-
-

References

-
    -
  • Chacon and Straub: Chapter 3
  • -
  • Timbers: Chapter 12.8
  • -
-
-
-

Branching allows us to diverge from the main line to do work without accidentally messing with the main line. This helps with testing without making any accidental changes to the working branch.

-

To understand how branching works, let's go back and understand how Git saves files.

-
    -
  • blob
  • -
  • tree
  • -
  • pointer
  • -
-
-
-

center

-
-
-

A branch is a way to move different pointers to a specific commit. In Git, the default branch is named master or main. When we first start making commits, we start at the master branch that automatically points to the last commit made.

-

center

-
-
-

$ git branch

-
-
-

We can make a new branch which creates a new pointer for us to move around. We can do this by using the command git branch:

-
$ git branch testing
-
-
-
-

Here, we've created a branch called testing, which means we've created a new pointer that could point to our current commit.

-

center

-
-
-

$ git checkout

-
-
-

Git tracks what branch we're on using a pointer called HEAD. If we move the HEAD to the branch main, we'll see:

-
Already on 'main'
-
-

To move HEAD to point to the testing branch that we just created, we use git checkout:

-
$ git checkout testing
-
-

and we should see..

-
Switched to branch 'testing'
-
-
-
-

center

-
-
-

If we make some changes to our testing branch and commit, our head will move with the new commit.

-

center

-
-
-

If we want to go back to an older version of our project and make changes, we can use git checkout again to redirect the head back to our master branch:

-
$ git checkout main
-
-

Using this command will move the HEAD pointer back to our master branch and revert our files in our working directory back to the snapshot that the master branch points to.

-
-
-

Questions?

-
-
-

Branching and Merging

-
-
-

Let's take a look at a workflow that you might encounter:

-
$ git commit -m "commits to master branch"
-
-

center

-
-
-
$ git checkout -b iss53
-
-

center

-
-
-
$ git commit -a -m "commits to iss53"
-
-

center

-
-
-
$ git checkout master
-$ git checkout -b 'hotfix'
-$ git commit -m "commits to hotfix"
-
-

center

-
-
-
$ git checkout master
-$ git merge hotfix
-
-

center

-
-
-

$ git merge

-
-
-

In the last step we saw a command called git merge. Once we've committed changes and are ready to deploy, we can use git merge to merge our working branch back into our master branch.

-
$ git merge testing 
-
-
-
-

center

-
-
-

We can then delete the branch that we've created, as the master branch points to the same place.

-

Adding the option -d will delete the branch that had been merged with the main, as we no longer need it.

-
$ git branch -d testing
-
-
-
-

Remember that changes to our master branch have not been added to our iss53 branch. We either need to pull them in or wait to integrate them when we pull iss53 into the master branch

-

center

-
-
-

If we're merging a branch with the main that has been changed since we diverged, merging isn't as simple for Git.

-

Git will create a new snapshot of the merge and automatically create a new commit that points to it, called a merge commit.

-

center

-
-
-

We saw git branch earlier with the option -d to delete a branch, but to get a list of our current branches, we can run git branch without any arguments.

-
$ git branch
-
-

The * indicates the branch we are currently on or have checked out (git checkout)

-
-
-

If we run git branch with the option -v, we can see the last commit on each branch. This is another reason why comments are so important to add to our commits: they can be extremely useful when looking back at our work and seeing what we've done.

-
-
-

We can also add the options --merged or --no-merged to git branch. --merged allows us to see what branches been merged to the branch we're currently on. Branches without the * are generally safe to delete because we've already merged our work with our main branch.

-
$ git branch --merged
-
-
-
-

On the other hand, --no-merged allows us to see all the branches that haven't been merged.

-
$ git branch --no-merged
-
-

If we try to delete one of these branches, we will receive an error. We can force delete using the option -D.

-
-
-

Merge Conflicts

-
-
-

Often times, merging our work with other topic branches or the main branch creates errors.

-

For example, if we've changed the same part of the same file differently in the two branches we're merging, we will encounter a conflict.

-

Luckily, Git helps us see where the error is to correct it.

-
-
-
-

Git shows us the beginning of the merge conflict with
-<<<<<<< HEAD and the end with >>>>>>>.
-

-

======= separates the differences.
-

-

To fix the merge, you can choose one set of changes, the difference you prefer or re-write it entirely. You have to remove all identifiers of the merge conflict as well.

-
-
-

Questions?

-
-
-

Branching Workflow

-
-
-
Long-Running Branches
-

Multiple long running branches are helpful when tackling large and complex projects.

-

Typically, developers will keep the master branch as the stable branch or code that has been or will be released. They will then have parallel branches that are used for development and testing.

-

Braches can also have various levels of stability, and will graduate/merge branches once they're fully tested.

-
-
-

center

-
-
-
Topic Branches
-

Topic branches are short-lived branches that are created for a particular feature or related work. They allow us to quickly switch between topics and keep changes there for as long or as little as needed, regardless of the created or modified order, before merging.

-
-
-

left right

-
-
-

Questions?

-
-
-

Remote Branches

-
-
-

Remote branches are pointers to the state of branches on our remote repositories. Our remote repositories can have multiple remote branches, just as we can have multiple braches on our local repositories.

-

The format is (remote)/(branch) or (remote) (branch)

-

If branches already exist on your GitHub repo, you will have access to these branches. If we're working with a branch that does not exist yet, we can push it to our remote repo.

-
-
-
Pushing
-

When we're ready to share our work, we'll use git push. If the remote branch already exists, we can push directly to that branch:

-
$ git checkout testing
-$ git add -A
-$ git commit -m "testing branch commit"
-$ git push origin testing
-
-

This will push our changes to the existing testing branch on GitHub.

-
-
-

If we were working with a branch that only exists locally, we can push it to GitHub with a slight tweak:

-
$ git checkout new-branch
-$ git add -A
-$ git commit -m "new branch commit"
-$ git push origin main:new-branch
-
-

This will create a new branch on GitHub called new-branch. From here, if we want to continue updating this branch, we can just run git push origin new-branch.

-
-
-
Fetching
-

When we fetch or pull files from our remote repos, we don't automatically have access to local, editable copies of files of the remote branches.

-

We can do this in several steps. First we're going go fetch the remote branches:

-
$ git fetch
-
-
-
-

We can then see what branches exist remotely:

-
$ git branch -v -a
-
-

And we'll see something like this:

-
* main                        3d850f2 a commit
-  remotes/origin/HEAD         -> origin/main
-  remotes/origin/main         3d850f2 another commit
-  remotes/origin/testing      3d850f2 another committ
-
-
-
-

Then we'll create a branch that exists on our local drive:

-
git checkout -b testing origin/testing
-
-

Here we're pointing the HEAD to the new branch (-b) called testing from origin/testing

-
-
-
Tracking Branches
-

Tracking branches are branches that have a direct relationship with a remote branch. We can push and pull to and from these branches, as Git automatically knows which server and branch we're working with.

-

For this to work, the name of your local branch must be the same as the remote branch

-
-
-

If the branches are named differently, we must run a different command for the push to be successful:

-
$ git push origin HEAD:remote-branch
-
-
-
-
Deleting Branches
-

If we've merged all our changes into our main branch, we can delete the remote branch with the following code:

-
$ git push origin :testing
-
-
-
-

Questions?

-
-
-

Collaborating

-
-
-

References

-
    -
  • Chacon and Straub: Chapter 3 + 5
  • -
  • Timbers: Chapter 12.8
  • -
-
-
-

Much of the work that we do will involve working with others. It's important that we learn how best do this so we can successfully collaborate and avoid conflicts where possible. If conflicts arise, good collaboration practices help us resolve them with ease.

-

So far we've learned several practices and commands that help us collaborate with others, including remote repositories and branches, git pull git push and git merge but we'll learn more practices that make collaboration straightforward.

-
-
-

There are many different factors that influence what workflow you might follow and how you might contribute to a project including:

-

1. Active contributor size
-Teams can vary from a few collaborators to thousands, varying the number of commits per day.

-

2. Chosen workflow
-Each project could have a different process to check patches including an integration manager or peer reviews.

-

3. Commit access
-Policies regarding how to contribute work can differ between projects, even by how much work or how often.

-
-
-

Let's take a look at a couple possible workflows:

-

center

-
-
-

center

-
-
-

GitHub

-
-
-
Adding Collaborators
-

To collaborate with others on our GitHub repo, we can add collaborators so they have direct access to the repo:

-

center

-
-
-

center

-
-
-

center

-
-
-

Access does not have to be permanent. We can remove collaborators at any time and add additional ones when needed.

-

Granting access to your repo this way, enables collaborators to make changes and push them to the repo without our constant permission. If we do not add push access, collaborators have to fork the repo and create pull request.

-
-
-
Forking Projects
-

Forking allows us to collaborate on projects without push access. We can fork a public project on GitHub and then clone it into our local server to begin making changes.

-

center

-
-
-

Once a project has been forked, we can find the repo in our GitHub repositories. We can then clone the repo (git clone), make changes and push our changes without altering the original repo.

-

Alternatively, we can clone the original repo, make our changes, fork the original repo and then merge our branch to the master branch of the forked repo.

-

If we're collaborating with someone and we want our changes to be merged to the original repo, we can create a pull request.

-
-
-
Pull Request
-

After making a few changes, we now want to create a pull request to merge our changes with the original repo. We can do this directly in GitHub:

-

center

-
-
-

To the pull request, we can see what branches and repos we're attempting to merge:

-

center

-
-
-

We can also see the changes that were made:

-

center

-
-
-

GitHub will also check to make sure that there are no conflicts with the base branch:

-

center

-
-
-

Pull requests with no merge conflicts are easy to merge into the branches but it gets more complicated if there are merge conflicts:

-

center

-
-
-

You can still create a pull request with merge conflicts:

-

center

-
-
-

center

-
-
-

To resolve conflicts, it's very similar to merging conflicts through terminal:

-

center

-

Because resolving conflicts is done on GitHub, it's a good practice to resove conflicts before creating a pull request.

-
-
-

Questions?

-
-
-

Conflicts

-
-
-

References

-
    -
  • Chacon and Straub: Chapter 3 + 6
  • -
  • Timbers: Chapter 12.5
  • -
-
-
-

Conflicts are going to arise at some point, especially when working with others. It's important that we learn how to handle these conflicts for easier and more successful collaboration.

-
-
-

GitHub Issues

-
-
-

GitHub issues are an extremely useful tool for communicating decisions, ideas and problems that are project specific.

-

They are an alternative to email or Slack that keep communication isolated to a particular project.

-

Issues can be opened on GitHub and even when they're closed, they remain available. They're also accessible to all collaborators for transperancy.

-
-
-

To open an issue, navigate to the project page and click Issues:

-

center

-
-
-

Then open a new issue:

-

center

-
-
-

From here, we can add a title and description of the issue, and add any specific collaborators, labels, etc.

-

center

-
-
-
Information
-

Title: should be descriptive and quickly convey what the issue is about

-

Description: explain the purpose of the issue and how to potentially resolve it. If it's a bug fix, include a reprex, what you wanted to happen and what actually happen. You can also include steps already taken to solve the issue.

-
-
-
Reprex
-
    -
  • -

    A reprex is a REPRoducible EXample.

    -
  • -
  • -

    It contains just enough of the code to reproduce the error, ie. it is self-contained

    -
  • -
  • -

    We might have to create a smaller version of the code in order to create the reprex. Don't include anything that isn't related to the problem.

    -
  • -
  • -

    Sometimes, this process will help us solve our issue.

    -
  • -
-
-
-
Inclusions
-

A minimal dataset to demonstrate the problem. This could be a regularly used one such as iris

-
install.packages("dyplr")
-library(dplyr)
-head(mtcars)
-
-

or one easily built yourself.

-
df <- data.frame (col1  = c(1, 2),
-                  col2 = c(3, 4))
-df
-
-
-
-
    -
  • -

    Make sure to include classes that are necessary to your reprex (ex. dates, factors, etc.)

    -
  • -
  • -

    If you're using randomly sampled data, set the seed to so the same data is produced each time.

    -
  • -
-
set.seed(853)
-
-
-
-

Include all packages that you need.
-

-
    -
  • Make sure they are placed at the top of the script so it's quick and easy to see what is necessary for the reprex.
  • -
-
-
-
Other Inclusions
-
    -
  • -

    Details about the issues you are facing.

    -
  • -
  • -

    Comments that will add clarification to your error.

    -
  • -
  • -

    Add what fixes have been attempted. This could include pages to StackOverflow articles that you've viewed.

    -
  • -
  • -

    Communicate cleary what you're desired outcome is.

    -
  • -
-
-
-
Task Lists
-

If an issue is quite large, it's possible to add tasks lists to break the issue into smaller pieces.

-
    -
  • -

    Use square brackets - [ ]

    -
  • -
  • -

    To mark it complete, use - [x]

    -
  • -
  • -

    Issues can be linked to previous issues using

    -
      -
    • the number - [x] #11
    • -
    • a URL - [x] https://github.com/rachaellam/git-r/issues/11
    • -
    -
  • -
-
-
-

Once an issue has been opened, we can respond and comment.

-

When we decide it has been resolved, we can close the issue. The history of the issues can still be seen, even if it has been closed.

-
-
-

Questions?

-
-
-

Debugging

-
-
-
File Annotation
-

File annotation can help us resolve issues in our code if we know where thie issue is. We can see when the code was introduced and by whom, line by line, using the aptly named git blame.

-
$ git blame -L 1,3 script.sh
-^8e9b89da (Rachael Lam  2021-12-02 15:01:02 -0500  1) #line 1
-8e9b89da (Rachael Lam   2021-12-02 15:01:02 -0500  2) #line 2
-8e9b89da (Rachael Lam   2021-12-02 15:01:02 -0500  3) #line 3 
-
-
-
-

git blame is combined with the filename we want to inspect. We can also use the option -L followed by two numbers to limit the number of lines shown.

-

We can then see the partial SHA-1 of the commit that last modified the line, the author name and date of the commit, and the content of the file by line.

-

When the SHA-1 is preceeded by a ^, it indicates that those commits were when the file was first added to the project and have not changed since.

-
-
-
Binary Search
-

If we don't know where the issue is, we can use git bisect to get identify the commit that introduced an issue.

-
$ git bisect start
-$ git bisect bad
-$ git bisect good [good_commit]
-
-

First, we've started the bisect program. We then told the system that the current commit is broken using bisect bad followed by the last good commit using bisect good [good_commit]. We can see the different commit if we run git log that we learned earlier.

-
-
-

Git produced the number of commits that were between the good and the bad commit and then checked out the middle one.

-

From here, we can run our test to see if the issue still exists. If it does, it means the issue was introduced in a commit before this middle commit and we can run git bisect bad to tell the system that there is still an issue.

-

If it does not, then the issue was introduced after and we can run git bisect good.

-
-
-

We can keep running this loop until we find the commit that introduced an issue and make our corrections.

-

When we're finished, we can run git bisect reset to reset our HEAD to where we were before we started.

-
-
-

Best Practices

-
-
-
    -
  • Topic branches should be used to try out new code before integrating. They enable us to play around or leave for the time being it if it's not working.
  • -
  • Commit often rather than submitting a massive commit. This makes it easier to review and merge changes, or revert if necessary.
  • -
-
-
-
    -
  • Create quality commit messages so that your collaborators can easily understand what has been done. For example:
  • -
-
Short (50 chars or less) summary of changes
-
-More detailed explanatory text, if necessary. Wrap it to about
-72 characters or so. In some contexts, the first line is treated 
-as the subject of an email and the rest of the text as the body, 
-the blank line separating thesummary from the body is critical 
-(unless you omit the body entirely).
-
-Further paragraphs come after blank lines.
-
-- Bullet points are okay, too
-
-- Typically a hyphen or asterisk is used for the bullet, preceded
-  by a single space with blank lines in between, but conventions
-  vary here
-
-
-
-

Questions?

-
-
-

Reproducibility

-
-
-
    -
  • -

    Reproducibility is the ability for for independent researches to obtain the same or similar results when repeating an experiment or test.

    -
  • -
  • -

    This concept has been widely used in natural sciences, but is not yet as popular in data science.

    -
  • -
  • -

    Remember, data science is a science. We question, hypothesize, test, and therefore, we should also have the same rigour of confirmation.

    -
  • -
-
-
-
    -
  • -

    Skepticism should always be able to be independently verified. We should be able to defend our results and decisions.

    -
  • -
  • -

    Who would believe your results otherwise? More importantly, you should not believe results if they cannot be verified.

    -
  • -
-
-
-

Why is reproducibility important?

-
-
-
    -
  1. -

    New Insights

    -
  2. -
  3. -

    Reduce Error Risks

    -
  4. -
  5. -

    Validate Results

    -
  6. -
  7. -

    Transparency

    -
  8. -
-
-
-

How can we make our work reproducible?

-
-
-

There are a number of practices that can help make our work reproducible including:

-
    -
  • Reproducible Examples
  • -
  • Commenting Code
  • -
  • Technical Documentation
  • -
  • Folder Structure
  • -
-
-
-

Reproducible Examples

-
-
-
Reprex
-
    -
  • -

    A reprex is a REPRoducible EXample.

    -
  • -
  • -

    It contains just enough of the code to reproduce the error, ie. it is self-contained

    -
  • -
  • -

    We might have to create a smaller version of the code in order to create the reprex. Don't include anything that isn't related to the problem.

    -
  • -
  • -

    Sometimes, this process will help us solve our issue.

    -
  • -
-
-
-
Inclusions
-

A minimal dataset to demonstrate the problem. This could be a regularly used one such as iris

-
install.packages("dyplr")
-library(dplyr)
-head(mtcars)
-
-

or one easily built yourself.

-
df <- data.frame (col1  = c(1, 2),
-                  col2 = c(3, 4))
-df
-
-
-
-
    -
  • -

    Make sure to include classes that are necessary to your reprex (ex. dates, factors, etc.)

    -
  • -
  • -

    If you're using randomly sampled data, set the seed to so the same data is produced each time.

    -
  • -
-
set.seed(853)
-
-
-
-

Include all packages that you need.
-

-
    -
  • Make sure they are placed at the top of the script so it's quick and easy to see what is necessary for the reprex.
  • -
-
-
-
Other Inclusions
-
    -
  • -

    Details about the issues you are facing.

    -
  • -
  • -

    Comments that will add clarification to your error.

    -
  • -
  • -

    Add what fixes have been attempted. This could include pages to StackOverflow articles that you've viewed.

    -
  • -
  • -

    Communicate cleary what you're desired outcome is.

    -
  • -
-
-
-

Commenting Code

-
-
-

How does commenting code help in reproducibility?

-
-
-

Commenting code is an important practice that benefits both ourselves and collaborators.

-

Not only can we understand what we did to fix our own errors or improve our work, but others can better understand our code to reproduce it.

-
-
-

Ellen Spertus outlines 9 rules to follow:
-

-
    -
  1. -

    Comments should not duplicate the code

    -
  2. -
  3. -

    Good comments do not excuse unclear code

    -
  4. -
  5. -

    If you can’t write a clear comment, there may be a problem with the code

    -
  6. -
  7. -

    Comments should dispel confusion, not cause it

    -
  8. -
-
-
-
    -
  1. -

    Explain unidiomatic code in comments

    -
  2. -
  3. -

    Provide links to the original source of copied code

    -
  4. -
  5. -

    Include links to external references where they will be most helpful

    -
  6. -
  7. -

    Add comments when fixing bugs

    -
  8. -
  9. -

    Use comments to mark incomplete implementations

    -
  10. -
-
-
-
1. Comments should not duplicate the code
-
    -
  • Comments should add value to whoever is reading your code.
  • -
  • Duplicating code adds unneccesary bulk and can actually make it more difficult to understand the code.
    -
  • -
-

Can you think of a bad example?

-
-
-

Here is an example of what you should not do:

-
x=5
-
-if [ $x = 5 ]; then
-    echo "x equals 5." # if x = 5 then ouput x equals 5
-
-else
-    echo "x does not equal 5." # otherwise output x does not equal 5
-
-fi
-
-
-
-
2. Good comments do not excuse unclear code
-
    -
  • Our aim should always be having clear code, rather than relying on our comments to add clarity.
  • -
  • Remember, we should not be adding more bulk to the code that makes it more difficult to understand.
  • -
-
-
-
3. If you can’t write a clear comment, there may be a problem with the code
-
-

Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to
-debug it.

-
-

- Kernighan's Law

-
-
-
4. Comments should dispel confusion, not cause it
-
    -
  • If our comments are adding further confusion, we should either rewrite the comment or remove it entirely.
  • -
  • A could comment should always be written with the intent to help better understand what is being done.
  • -
-
-
-
5. Explain unidiomatic code in comments
-
    -
  • If we've purposefully written code that others may find unecessary, we need to comment our reasoning.
  • -
  • Others may try to simplify our code if we don't explain our reasoning.
    -
  • -
-

Can you think of an example?

-
-
-
6. Provide links to the original source of copied code
-
    -
  • Often times, we'll use code that others have written. It's important to give credit to the original source, but as well as give us a reminder as to where we got the code to reference it later if we need.
  • -
  • Referencing the source can also provide other information such as what the problem was, why the solution was recommended and how it can be improved. It also means, we don't have to comment all of these details again in our own code.
  • -
-
-
-

An example:

-
# I got these 9 rules from Ellen Spertus' blog post on
-# StackOverflow: https://stackoverflow.blog/2021/12/23/
-# best-practices-for-writing-code-comments/
-
-
    -
  • It's best to include the URL so other's don't have to search for the exact location.
  • -
  • Remember: never copy code that you don't personally understand.
  • -
  • Code from StackOverflow falls under Create Commons licenses so a reference comment is needed.
  • -
-
-
-
7. Include links to external references where they will be most helpful
-
    -
  • References don't just have to be used for copied code. They can also provide information on decisions made or changes in practices
  • -
-
-
-
8. Add comments when fixing bugs
-
    -
  • Comments can help others understand what we modified, if the modification is still needed, and how to test our modifications
  • -
  • Although git blame can be used to find the commit that modified the code, a good comment can help locate the change and are quite brief.
  • -
-
-
-
9. Use comments to mark incomplete implementations
-
    -
  • Sometimes we have limitations in our knowledge or time. Adding code documenting these limitations can allow us to better address and fix the issues.
  • -
-
-
-
Some other good practices:
-
    -
  • Comments should be clear and efficient. Don't add more information than necessary, but don't be too vague
  • -
  • Remember to update your comments if you update your code. Old comments can add more confusion.
  • -
  • Inline comments can add noise as they're mixed with our code. Spacing can be helpful here:
  • -
-
colors = [[213/255,94/255,0],         # vermillion
-          [86/255,180/255,233/255],   # sky blue
-          [230/255,159/255,0],        # orange
-          [204/255,121/255,167/255]]  # reddish purple
-
-
-
-
-

Code tells you how, comments tell you why.

-
-

- Jeff Atwood, Co-founder of StackOverflow

-
-
-

Technical Documentation

-

Writing

-
-
-

What is technical documentation writing?

-
-
-

Why is it important to write a good technical documentation?

-
-
-

Technical documents are necessary for reproducibility as they relay important information about your project to others. Writing technical documents is not easy but should not be overlooked.

-

A well done technical document will communicate the goals of a project and in doing so, can generate interest in the project.

-
-
-

GitHub outlines several pieces of information to include:

-
    -
  1. What the project does
  2. -
  3. Why the project is useful
  4. -
  5. How users can get started with the project
  6. -
  7. Where users can get help with the project
  8. -
  9. Who maintains and contributes to the project
    -
  10. -
-

This is just part of the story and we'll add more to this in the coming slides.

-
-
-
README
-
    -
  • Technical documentation writing is typically found in a README.md file.
  • -
  • If the README.md file is placed in our repo's root, doc folder, or hidden in the .github directory, GitHub will place the contents of the README.md on the main repo page.
  • -
  • The README.md file will be the first thing visitors see when they come to the project page so it's important to make it as appealing as possible.
  • -
-
-
-
Examples
-

Let's walk through some good examples of README.md files:

- -
-
-

What did you like about these README files?

-

What similarities can you see?

-
-
-
What should be included?
-
    -
  1. Name of the project
  2. -
  3. What the project does
  4. -
  5. The project's usages
  6. -
  7. How to get started
  8. -
  9. Where to find help
  10. -
  11. Who contributes
  12. -
-
-
-
1. Name of the Project
-
    -
  • The name of your project should be unambiguous.
  • -
-
-
-
2. What the project does
-
    -
  • This should be a description of the project.
  • -
  • Provide context to the project and any reference links.
  • -
  • Include features or background information
  • -
  • Can be titled "Description"
  • -
-
-
-
3. The project's usages
-
    -
  • This should include how the project can be used.
  • -
  • Provide examples using the code along with the expected output of said code.
  • -
  • It should be a smaller example. Longer examples can be linked to.
  • -
  • Can be titled "Usages"
  • -
-
-
-
4. How to get started
-
    -
  • This is the installation guide.
  • -
  • Think of your particular audience and how much detail you might need to include.
  • -
  • Add a requirements section if there are specific dependencies or needs to run in a particular programming language.
  • -
  • Can be titled "Installation"
  • -
-
-
-
5. Where to find help
-
    -
  • Direct people on where to find help if they need.
  • -
  • This could be the issues page on GitHub, a forum, or an email address.
  • -
  • Can be titled "Support"
  • -
-
-
-
6. Who contributes
-
    -
  • This should outline how others can contribute to your project and what your requirements are for accepting contributions.
  • -
  • Can be titled "Contributing"
  • -
-
-
-
Additional Additions
-
    -
  • Visuals: Visuals can grab people's attention, but they can also be helpful for showcasing what the code does. Include screenshots or GIFs that demonstrate your project.
  • -
  • Badges: Badges provide metadata such as issue tracking, test results and downloads. Shields.io provides this service and you can also look at their GitHub for more information.
  • -
  • Acknowledgements: Include the authors or anyone that helped with the project.
  • -
-
-
-
Markdown
-
    -
  • As noted by the extension, README.md files are usually written in markdown, thus using markdown syntax for styling.
  • -
  • GitHub provides a good reference on how to write your README in markdown.
  • -
-
-
-
Headings
-
# Largest Heading
-## Second Largest Heading
-### Third Largest Heading
-
-

center

-
-
-
Text Styling
-
**bold**
-*italic*
-~~strikethrough~~
-**this is a *nested* example**
-***bold and italic***
-
-

center

-
-
-
Quoting
-
> Block quote some text
-
-

center

-
-
-
Unordered Lists
-
- this is an unordered list
-- second item
-    - nested
-        - second nest
-
-

center

-
-
-
Ordered Lists
-
1. This is an ordered list
-2. This is the second item
-    - with some additional information
-3. This is the third
-
-

W:1000 center

-
-
-
Codeblock
-

Wrap your code in ``` to create a codeblock.

-

W:1000 center

-
-
-
Links
-
[Rachael's GitHub](https://github.com/rachaellam)
-
-

W:1000 center

-
-
-
Images
-
![w:1000 center](pics/picture.png)
-
-

center
-As we see, images can also be GIFs. We can also play around with the size and alignment.

-
-
-

Folder Structure

-
-
-

What is folder structure and why is important?

-
-
-

A good folder structure is important for reproducibility because it easily allows for others to navigate and implement our projects. If someone references a file that is self contained, they know they won't have to change the file path to gain access.

-

For example, what is the difference between these two paths:

-
    -
  1. -

    "/Users/rachaellam/Documents/all-projects/this-project/data/"

    -
  2. -
  3. -

    "this-project/data/"

    -
  4. -
-
-
-

Folder structure can vary based on the project but a basic one to follow is...

-
    -
  • /inputs -
      -
    • Everything that will not be edited including raw data and references
    • -
    -
  • -
  • /outputs -
      -
    • Everything that was created during the project and your results
    • -
    -
  • -
  • /scripts -
      -
    • All code that was written for the project
    • -
    -
  • -
-
-
-

Wilson et. al also outline a file structure that is similar...

-
    -
  • /doc -
      -
    • All text documents including documentation or references
    • -
    -
  • -
  • /data -
      -
    • All raw data and metadata
    • -
    -
  • -
  • /results -
      -
    • Files generated during the analysis including generated data or cleaned data
    • -
    • Results can be further divided into subdirectories that contain intermediate files and finished files
    • -
    -
  • -
  • /src -
      -
    • All code that was written for the project
    • -
    -
  • -
-
-
-

References

-

Reproducibility:

- -
-
-

Commenting:

- -
-
-

Technical Documentation Writing:

- -
-
-

Folder Structure:

- -
-
\ No newline at end of file diff --git a/slides-resources/html-slides/inequity_slides.html b/slides-resources/html-slides/inequity_slides.html deleted file mode 100644 index e8048d5..0000000 --- a/slides-resources/html-slides/inequity_slides.html +++ /dev/null @@ -1,593 +0,0 @@ -
-

Inequity

-
$ echo "Data Sciences Institute"
-$ echo "Rachael Lam"
-
-
-
-
Inequality vs. Inequity
-

Inequality:

-
    -
  • Uneven distribution of resources
  • -
  • Unbalanced conditions
  • -
  • Usually quantitative in nature
    -
  • -
-

Inequity:

-
    -
  • Avoidable differences arising from social circumstances
  • -
  • The state of being unfair or unjust
  • -
  • Typically qualitative in nature
  • -
-
-
-

Inequality usually emerges due to inequity.
-

-

Can you think of any examples?

-
-
-
    -
  1. -

    Lower salaries for female employees stems from gender inequity

    -
  2. -
  3. -

    Job opportunities favouring white applicants stems from racial inequity

    -
  4. -
  5. -

    Higher rate of Indigenous children in the child welfare system stems from a long history of structural inequity

    -
  6. -
-
-
-

Truth and Reconcilliation

-
Missing Children and Unmarked Burials
-
-
-
The History of Residential Schools
-
    -
  • -

    The Residential Schools System dates back to 1870, which was government-funded and church-led

    -
  • -
  • -

    The system's intention was to lead a cultural genocide to assimilate Indigenous children

    -
  • -
  • -

    More than 130 schools were estabilshed with more than 150,000 Indigenous students attending

    -
  • -
  • -

    Children were forcibly removed from their families

    -
  • -
  • -

    Families who resisted faced fines and or jail time

    -
  • -
-
Reconciliation Dialogue Workshop
-
-
-
    -
  • Children did not see their families for years or interact with their family within the schools, unable to speak their language or practice their culture
  • -
-
Reconciliation Dialogue Workshop
-
-
-
    -
  • -

    Children received inadequate food, clothing, facilities, education, staff and medical treatment

    -
  • -
  • -

    Children faced severe and constant abuse with mortality rates ranging between 30-60%

    -
  • -
-
Reconciliation Dialogue Workshop
-
-
-
Oral Histories
-
    -
  • -

    Using language such as they and them can create distance from ourselves and those we are speaking or learning about

    -
  • -
  • -

    Instead, the second person can help us feel closer to the stories of survivors and their testimonies

    -
  • -
  • -

    Oral histories is a significant practise for Indigenous Peoples. It is how knowledge is passed on. To respect the practice and values of Indigenous Peoples, we will engage with oral testimonies from survivors of the Residential School System

    -
  • -
-
Let the Truth Be Told: Indigenous Oral Testimonies Activity Guide
-
-
-
Rita's Story
-

center

-
-
-
-
Workshop
-

Medicine wheels are used by some Indigenous peoples to represents elements of a whole person.

-

We'll use this tool as we listen to stories of Survivors and discuss all together.

-
Let the Truth Be Told: Indigenous Oral Testimonies Activity Guide
-
-
-

Physical:

-
-

Possible items can include the physical descriptions of the home setting before Residential School, the settings at school, or any descriptions of locations after school that stand out to students. This can include all healthy forms of affection and/or inappropriate and harmful physical contact. Sports and games played, and events could be included here. Acts of violence and abuse would also go here.

-
-
Let the Truth Be Told: Indigenous Oral Testimonies Activity Guide
-
-
-

Intellectual:

-
-

Possible items to be placed here include thoughts the students had, reflections and understandings about life before school, the school itself, or after their time in school that they share. Students may also note what Survivors learned in school, what they thought about that learning and other mental activities required by the school. Students may also note its absence.

-
-
Let the Truth Be Told: Indigenous Oral Testimonies Activity Guide
-
-
-

Spiritual:

-
-

Separating children from their family, customs, languages and traditional ways of being was thought to be the only way to force them into the dominant religions of Canada. Experiences students could place here would be spiritual teachings from before Residential School, during, and after. Students may find that they put a lot into this category when Survivors talk about their return to culture, family and language as part of their healing journey

-
-
Let the Truth Be Told: Indigenous Oral Testimonies Activity Guide
-
-
-

Emotional:

-
-

There are likely to be many emotional moments in the Indigenous Survivors’ Oral Testimony. Students may struggle with determining whether to put something in this category or another category. Consider physical abuse – because of the nature of the experience, it may seem like it should go in physical; however, because of a strong response of a Survivor, it may seem to belong in the emotional category. Selecting either or both categories are accurate and demonstrates the multi-faceted impacts on Indigenous children

-
-
Let the Truth Be Told: Indigenous Oral Testimonies Activity Guide
-
-
-

center
-Legacy of Hope

-
-
-

Let's discuss the stories that we heard and our medicine wheels
-

-
    -
  • -

    What categorical decisions did we make?

    -
  • -
  • -

    What was challenging about these decisions?

    -
  • -
  • -

    Did you find yourselves noting things you might not otherwise have if you had not been asked specifically for these categories ?

    -
  • -
-
-
-

What are the consquences of this history for Indigenous Peoples today?

-
-
-
Intergenerational Impacts
-

The legacy of Residential Schools have had lasting impacts on Survivors and their families. Some include:

-
    -
  • Alcohol and drug abuse
  • -
  • Educational blocks
  • -
  • Higher rates of suicide
  • -
  • Destruction of social support networks
  • -
  • Missing and Murdered Indigenous Women and Girls
  • -
  • Higher rate of children in the child welfare system (an extention of Residential Schools)
  • -
-
-
-
TRC: Calls to Action
-

To redress the legacy of colonization and residential schools, the Truth and Reconciliation Commission drafted 94 calls to action in 2012. Since then, only 14 have been completed.

-

Reading these call to actions can give us a good idea of some of the inequities that exist today

-
-
-
-

1.ii. Providing adequate resources to enable Aboriginal communities and child-welfare organizations to keep Aboriginal families together where it is safe to do so, and to keep children in culturally appropriate environments, regardless of where they reside.

-
-
-
-
-

6. We call upon the federal government to develop with Aboriginal groups a joint strategy to eliminate educational and employment gaps between Aboriginal and non-Aboriginal Canadians.

-
-
-
-
-

23. We call upon all levels of government to:

-i. Increase the number of Aboriginal professionals working in the health-care field.

-ii. Ensure the retention of Aboriginal health-care providers in Aboriginal communities

-iii. Provide cultural competency training for all health-care professionals

-
-
-
-
-

30. We call upon the federal, provincial, and territorial governments to commit to eliminating the overrepresentation of Aboriginal people in custody over the next decade, and to issue detailed annual reports that monitor and evaluate progress in doing so.

-
-
-
-

How does this relate to data science?

-

How do we utilize this knowledge as we practice data science?

-
-
-


-It's important as we move forward to understand inequity and the inequality it has produced.
-

-It is not enough to discuss crime without discussing the overrepresentation of racialized people in the justice system.

-
-
-


-It is not enough to discuss healthcare without discussing the difference of treatment between Indigenous and non-Indigenous people.

-
-
-


-It's not enough to discuss the child welfare system without discussing the history of Residential Schools and the impact on Indigenous Peoples.

-
-
-

Resources

- -
-
-

Potential Resources:

- -
-
- -
-
\ No newline at end of file diff --git a/slides-resources/html-slides/unix_slides.html b/slides-resources/html-slides/unix_slides.html deleted file mode 100644 index 0834abd..0000000 --- a/slides-resources/html-slides/unix_slides.html +++ /dev/null @@ -1,2007 +0,0 @@ -
-

Unix Shell

-
$ echo "Data Sciences Institute"
-$ echo "by: Rachael Lam"
-
-
-
-

Unix

-
-
-
What is Unix?
-

Unix was created in 1970 and since then has branched into other versions including Linux. Linux was created from Unix with very similar features, although there are some minor differences in commands.

-

Unix shells - more specifically bash - is a powerful tool for quickly and easily navigating and manipulating files, scaling automated tasks, accessing Git and processing data.

-
-
-
So what is the shell?
-

The shell is any user interface/program that takes an input from the user, translates it into instructions that the operating system can understand, and conveys the output back to the user.

-

There are various types of user interfaces:

-
    -
  • graphical user interfaces (GUI)
  • -
  • touch screen interfaces
  • -
  • command line interfaces (CLI)
  • -
-
-
-
And what is bash?
-

We'll be focusing on command line interfaces (CLI), more specifically bash, which stands for Bourne Again SHell.

-

We'll also need a terminal emulator to interact with the shell. This is most likely called terminal on our menu.

-
-
-
Let's get started!
-

First, we'll open our terminal. As mentioned earlier, this is most likely called terminal and can be found by searching our computer, which on a Mac would be through cmd + space

-

Let's take a look at the terminal. What do we notice?

-
    -
  • last login
  • -
  • name
  • -
  • location
  • -
  • shell
  • -
-
-
-
Looking at the Shell
-

If we type echo $SHELL in our terminal, the output will tell us what shell we are working with. Most often, our shell will already be bash but in newer Macs, it could be zsh which is almost identitcal to bash. We can also see where bash is located by typing:

-
    -
  • whereis bash
  • -
  • whence bash
  • -
  • which bash
  • -
-
-
-

Let's start with a few commands and see what happens in our terminal.

-
$ echo Rachael
-
-
$ date
-
-
$ cal
-
-
$ lksjfs
-
-
-
-
    -
  • What happens when we type something that does not exist?
  • -
  • What happens with errors?
  • -
-
-
-

Navigate Files / Directories

-
-
-

Files

-
-
-

Knowing the different types of files available helps us better understand how to navigate and manipulate them.

-
    -
  • -

    Regular files are text files with readable characters.

    -
  • -
  • -

    Executable files are programs that are invoked as commands.

    -
  • -
  • -

    Shell scripts are executable files that we can read whereas bash is a non-human-readable executable file.

    -
  • -
-
-
-

Directories

-
-
-

Directories are files that are like folders which contain other files and directories (subdirectories), creating a hierarchical structure.

-
    -
  • -

    We can think of the structure of directories as a tree with the top of the tree being the root.

    -
  • -
  • -

    All files can be named and found in relation to the root by listing the directory names in order from the root, separated by slashes, followed by the file's name.

    -
  • -
-
-
-

Let's try three commands that help us navigate our system:

-
    -
  1. First, let's run the code below and see what happens:
  2. -
-
$ pwd
-
-

pwd prints our working directory. If we ever need to know where we are, we can execute this command.

-
-
-
    -
  1. Now, let's run the code below and see agian what happens:
  2. -
-
$ cd
-
-

By default, cd changes your working directory to your home directory. You can also use cd to set your working directory by including the desired pathname

-
$ cd Desktop
-
-
-
-

In the previous example, we were able to just state Desktop because it is a directory in our working directory. If we changed our working directory to Desktop, and then wanted to change it again to a directory in Desktop, we could again just specify the folder.

-

If we wanted to change the working directory to a directory outside of our working directory, we would need to specify a pathname:

-
$ cd /Users/rachaellam/Desktop
-
-
-
-
    -
  1. To know what files and folderes exist in our working directory, we can use the code below:
  2. -
-
$ ls
-
-

We can add a pathname at the end to list the contents of a specified directory.

-
-
-

Paths

-
-
-

As we've seen, directory names separated by slashes are paths. There are two types of paths, absolute and relative.

-
    -
  • -

    An absolute pathname begins at the root directory and includes each directory, separated by slashes until the desired directory or file is reached.

    -
  • -
  • -

    A relative pathname starts from the working directory and uses symbols . or .. to represent relative positions in the file tree.

    -
  • -
-
-
-

Using cd and pwd let's take a look at how we can use absolute and relative pathnames.

-
$ cd
-$ pwd
-
-
$ cd Desktop
-$ pwd
-
-
$ cd ..
-$ pwd
-
-
-
-

Here's another example using the /usr pathname.

-
$ cd /usr/bin
-$ pwd
-
-
$ cd /usr
-$ pwd
-
-
$ cd ..
-$ pwd
-
-
-
-

Let's now try move through some directories to get comfortable. Try out lots of different paths depending on the file structures of your computer. Try getting into different directories from different parent directories. The tilde notation ~ in the examples below refer to our home directory.

-
$ cd ~/Desktop
-$ pwd
-
-
$ cd ~/Desktop/dir1
-$ pwd
-
-
-
-

Questions?

-
-
-

Options and Arguments

-
-
-

Options and arguments are used to write commands that can make changes to our system. The syntax is:

-
$ command -option argument
-
-

Options can also be combined, which we'll briefly see now but learn more about a bit later.

-
-
-

There are two ways to write an -option:

-
    -
  1. Short option: one dash followed by a single character
  2. -
  3. Long option: two dashes followed by a word
  4. -
-

Some examples:

-

-a or --all
--d or --directory
--r or --reverse

-
-
-

Let's try these lines of code and see what happens:

-
$ ls -l
-
-
$ ls -lt
-
-
$ ls -lt -reverse
-
-

-l long format
--t modification time
--reverse reverse the sort order
-Notice how -lt is actually a combination of multiple options.

-
-
-

Questions?

-
-
-

Wildcards

-
-
-

Wildcards give us the ability to rapidly specify groups of filenames based on patterns of characters. Let's look at a few examples below:

-

* → matches any character

-

? → matches any single character

-

[characters] → matches any character that is in the set

-

[!characters] → matches any character that is not in the set

-
-
-

Some other helpful character wildcards are:
-[:digit:] → matches any numeral
-[:lower] → matches any lowercase letter
-[:upper:] → matches any uppercase letter

-
-
-

Let's try a few in our terminal:

-
$ ls * 
-
-
$ ls a*.txt
-
-
$ ls [abc]*
-
-
$ ls [[:upper:]]*
-
-
$ ls [![:digit:]]*
-
-
-
-

Questions?

-
-
-

Working with

-

Files / Directories

-
-
-

We're going to learn some basic commands to begin some preliminary coding. We'll also be using these throughout the module, so it's important to understand how they work now:

-
    -
  • create directory mkdir
  • -
  • create file touch
  • -
  • copy cp
  • -
  • move and rename mv
  • -
  • remove rm
  • -
-
-
-

Commands

-
-
-
mkdir
-

First let's make a directory. It's important to remember what directory you're working in currently, because that's where the new directory will be made. Let's assume for now, we're working on our desktop.

-
$ mkdir directory
-
-

We can also create multiple directories at the same time:

-
$ mkdir dir1 dir2 dir3
-
-
-
-
touch
-

We can also make new files from the command line. This is particularly useful when we want to make scripts, which we'll learn a bit later. Using touch, we can make a new file in our working directory.

-
$ touch file1
-
-

We can also create a specific file type by adding the extension:

-
$ touch file1.sh
-
-
-
-
cp
-

Now we're going to copy a file that we have on our desktop. It can be any file but remember to include the extension or if it has multiple characters, special characters and spaces, to wrap it in quotes.

-
$ cp file1 file2
-
-

We can also copy files or directories into a directory.

-
$ cp file1 dir1
-
-
-
-

And all files from one directory into another using wildcards:

-
$ cp dir1/* dir2
-
-

What does the /* in this command mean?

-
-
-

There are some useful -options that accompany cp:

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-iBefore overwriting an existing file, prompt the user for confirmation. If this option is not specified, cp will silently overwrite files.
-rRecursively copy directories and their contents. This option is required when copying directories.
-vDisplay informative messages as the copy is performed.
-
-
-
mv
-

The mv command enables us to move and rename files and directories, depending on how it's used. In the example below, mv renames file1 to file2.

-
$ mv file1 file2
-
-

Here, mv moves file1 to dir1

-
$ mv file1 dir1
-
-
-
-

We can also move directories into other directories:

-
$ mv dir1 dir2
-
-

In this case, if dir2 exists, dir1 will be moved to dir2. If dir2 does not exist, it will be created and dir1 will be moved to the newly created dir2. In both cases, the entire directory will be moved to another/new directory, rather than the contents.

-
-
-

Let's say we're in the directory Desktop and we just moved file1 into dir1 but now we want to put it back in Desktop. How would we move a file out of a directory into another one? Unfortunately we can't just say

-
$ mv file1 Desktop
-
-

because file1 does not exist in Desktop any more and the command will try and rename file1 to Desktop.

-
-
-

The answer involves using pathnames and the tilde ~ notation:

-
$ mv dir1/file1 ~/Desktop
-
-

If we just wanted to move file1 into dir2 (if dir2 is in our working directory), we could type:

-
$ mv dir1/file1 dir2
-
-
-
-

What if we want to move just the contents of dir1 to another directory rather than the whole folder? HINT: it is very (exactly) similar to copying (cp).

-
-
-
$ mv dir1/* dir2
-
-

This is a combination of the directory dir1, pathnames / and wildcards *. Here, dir1/* takes the all the contents of dir1 and puts it in dir2.

-

We could also use the same technique to specify certain files to move rather than all of them. How do you think this would be done?

-
-
-

Questions

-
    -
  • We're starting to combine our knowledge of files, directories and pathnames with some basic commands. How do we feel up to this point?
  • -
-
-
-
rm
-

To remove files we use the command rm. Because we're now deleting files, it's important that you're sure of what you're deleting because there is no way to undo. Fortunately!! there are ways to do this.

-
$ rm file1
-
-

Without specifying any -options, file1 will be deleted without any feedback.

-
-
-

To ensure we want to delete something, we can use the option -i (interactive) that we learned earlier.

-
$ rm -i file1
-
-

This will prompt a question asking us if we want to delete file1. We can respond with y if yes and n if not.

-
-
-

If we want to delete a directory, we need to use the option -r as we did when copying (cp). This will recursively delete everything inside of the directory and the directory itself.

-
$ rm -r dir1
-
-

If we're specifying multiple deletions and a directory does not exist, the shell will tell us. If we don't want that message, we can add the -option, -f (force). Force will override -i if it is included.

-
-
-
    -
  1. -

    How do you delete multiple directories?

    -
  2. -
  3. -

    What happens if you delete multiple directories with -i?

    -
  4. -
  5. -

    What happens if you delete multiple directories with i but one does not exist?

    -
  6. -
-
-
-

Remember, it's extremely important to remember that you cannot undo rm. This means, if you start using wildcards to specify filenames and don't include -i, you could delete things by accident. For example, let's say you want to delete all .txt files in a directory:

-
$ rm *.txt
-
-

If you accidently add a space between * and .txt, the rm command will delete all the files in the directory and then try to find a .txt file which does not exist because it delete everything.

-
-
-

Questions?

-
-
-

Input / Output

-
-
-
Standard Input/Output
-

Each program invokes the standard input, output and error.

-

We can think of the standard input default as coming from the keyboard and if we think of everything as a file, a command such as ls will result in a file called standard output and the status message to a file called standard error. By default, both are linked to the screen and not saved to a disk file.

-
-
-
Input/Output Redirection
-

Input/Output redirection allows us to change where the input comes from and where the output goes to, such as storing the output of a command into a file. We can do this using the redirection operator >.

-
$ ls -l /usr/bin > ls-output.txt
-
-

Here we have redirected the output of ls -l /usr/bin to a .txt file called ls-output.txt.

-
-
-

We can now see the details of that file and if it worked:

-
$ ls -l ls-output.txt
-
-

By looking at the details, we can see that the file was created and it a fairly large text file, indicating that something was written to it.

-
-
-

If we specify a directory that does not exist, we receive the standard error:

-
$ ls -l /bin/usr > ls-output.txt
-
-

Why was the standard error not written to the .txt file?
-What happened to our ls-output.txt file?

-
-
-

Although the standard error was not written to the .txt file, the destination file is always written from the beginning, therefore, the redirection began to write the file and once noticed there was an error, stopped, resulting in an empty file.

-

So how do we append rather than rewrite? By using the redirection operator >>.

-
$ ls -l /usr/bin >> ls-output.txt
-
-
-
-

If we want to redirect the standard error, we need to use the redirection operator 2>

-
$ ls -l /bin/usr 2> ls-error.txt
-
-

If we want to redirect both the standard output and standard error to one file, we have two options.

-
    -
  1. Use 2>&1 at the end of the command.
  2. -
-
$ ls -l /bin/usr > ls-output.txt 2>&1
-
-
    -
  1. Use &> in place of >
  2. -
-
$ ls -l /bin/usr &> ls-output.txt
-
-
-
-

Questions

-
-
-
cat
-

cat takes one or more files and copies them to standard output. Using the ls-output.txt created earlier, we can see how that's done:

-
$ cat ls-output.txt
-
-
-
-

We can also use it to join files togther. Let's say I have two files, file1 and file2 and I want to combine them into a file called file3:

-
$ cat file1 file2 > file3
-
-

Now the contents of file1 and file2 should be combined.

-
-
-

We can also use cat to add to a .txt file.

-
$ cat > new_cat.txt
-
-

Now we can type the text that we want in the file. Once we're finished, we can use CTRL-D to exit.

-

What would be the difference between $ cat > new_cat.txt and $ cat >> new_cat.txt?

-
-
-

Finally, we can redirect the standard input from the keyboard to the file new_cat.txt

-
$ cat < new_cat.txt
-
-

This is almost identitcal to just typing $ cat new_cat.txt but we can see later how it could be more useful.

-
-
-

Questions?

-
-
-

Pipes / Filters

-
-
-

We use pipelines to read data from standard output and send to standard input using the pipe operator|. This means the standard output of one command can be piped into the standard input of another.

-

Several commands put together in a pipeline are often referred to as filters. Filters take an input, change it and then output it.

-
-
-

Commands

-
-
-

Let's learn a few more commands that will help us further understand pipelines and filters. We'll learn:

-
    -
  • extract columns from output cut
  • -
  • sort lines of text sort
  • -
  • report or omit repeated lines uniq
  • -
  • print lines matching a patter grep
  • -
  • search directories and subdirectories for files find
  • -
  • ouput the first part of a file head
  • -
  • output the last part of a file tail
  • -
-
-
-
cut
-

Let's look at a csv to see how we can initially see our data. Because it's a csv, each line is separated by a comma. Let's first read that file using cat:

-
$ cat parking_data.csv
-
-

We'll see a lot of text, so let's make some sense of it using cut.

-
-
-

To use cut, I need to pass a couple options:

-
    -
  1. -d which cuts the text based on what follows. For example, -d: will cut based on colons or -d" " will cut based on a space.
  2. -
  3. -f, which extracts a particular field based on what follows. For example, -f1 will take the first field or -f2 will take the second field and so on.
  4. -
-
-
-

In this example, I'm taking the file parking_data and cutting it based on colons and then only extracting the first field.

-
$ cut -d, -f1 < parking_data.csv
-
-

What happens if I add another -f option? What does this do?

-
$ cut -d, -f1 -f2 < parking_data.csv
-
-

How would I specify more than three fields?

-
-
-
sort
-

How can we make our previous example more readable?

-

One answer is to use the sort feature. We can pipe this with the cut feature:

-
$ cut -d, -f1 < parking_data.csv | sort
-
-
-
-
uniq
-

Additionally, I can make the above command even more readable by removing any duplicates with uniq

-
$ cut -d, -f1 < parking_data.csv | sort | uniq
-
-
-
-

Questions?

-
-
-
grep
-

grep is a powerful tool for finding patterns in text files. The syntax is:

-
$ grep pattern [file...]
-
-

In our case, we're going to use it with our previous example and pipe it with other commands:

-
$ cut -d, -f1 parking_data.csv | sort | uniq | grep FIRE
-
-

The results are all patterns of FIRE in the text file.

-
-
-
find
-

Another useful use for grep is to find files in directories. grep is nicely combined with find for this feature.

-
$ find ~/Desktop/dir1 | grep cat
-
-

Here we're searching in the directory dir1 with the pattern cat. This would be helpful if we wanted to know if there were any files with the word cat in the filename.

-
-
-
head / tail
-

We can also extract the first and last part of files using head and tail. We can also add the option -n followed by a number to extract a certain number of lines.

-
$ head -n 5 ls-output.txt
-
-
$ tail -n 5 ls-output.txt
-
-
-
-

head and tail can also be used in pipelines:

-
$ cut -d, -f1 < parking_data.csv | sort | uniq | head -n 5
-
-
$ cut -d, -f1 < parking_data.csv | sort | uniq | tail -n 5
-
-
-
-

Questions?

-
-
-

Expansions

-
-
-

Expansion uses special characters to expand upon something before the shell processes it. We have learned a few expansions so far such as the tilde ~ and wildcards *. We've also seen some character wildcards [characters].

-

Expansions are another feature that help us when we're manipulating and working with files and directories.

-

Other examples of expansions are:

-
    -
  • arithmetic expansion
  • -
  • brace expansion
  • -
-
-
-
Arithmetic Expansion
-

Arithmetic expansion basically makes the shell a calculator.
-The syntax is:

-

$((expression))

-

For example:

-
$ echo $((2 + 2))
-
-

Arithmetic expressions can nested:

-
$ echo $(($((2 + 2)) * 3))
-
-
-
-

Just for reference, here is a list of the arithmetic operators:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
OperatorDescription
+Addition
-Subtration
*Multiplication
/Integer division
**Exponentiation
-
-
-
Brace Expansion
-

Brace expansions allow us to create multiple text strings from a pattern containing braces. Here are a few examples:

-
$ echo Test-{A,B,C}-Example
-
-
$ echo Number_{1..5}
-
-
$ echo {Z..A}
-
-

Brace expansions can also be nested:

-
$ echo a{A{1,2},B{3,4}}b
-
-
-
-

We can use brace expansion to help make multiple directories using mkdir.

-
$ mkdir dir-{1..3}
-
-

This command makes 3 directories named dir-1, dir-2 and dir-3

-
-
-

Quoting / Backslashing

-
-
-

Quoting suppresses unwanted expansions. We can use double quotes, single quotes or backslashes:

-
    -
  • Double quotes force special characters to lose their meaning and are treated as ordinary characters except for
    -* \ '
  • -
  • Single quotes suppress all expansion
  • -
  • Backslashes are used to escape single characters
  • -
-
-
-

Many times there will be file names or directories that are named with spaces. In this case, we'll need to use double quotes so that the shell can read it.

-

Using touch we can create a text file named something separated with two words:

-
$ touch "two words.txt"
-
-

We can then see the details of the file we just created:

-
$ ls -l "two words.txt"
-
-
-
-

If we want to rename the text, we would do as follows:

-
$ mv "two words.txt" two_words.txt
-
-
-
-

Let's see what these three examples do in shell:

-
$ echo '2 * 3 > 5 is an equation'
-
-
$ echo '2 * 3 > 5' is an equation
-
-
$ echo 2 \* 3 \> 5 is an equation
-
-
-
-

Questions?

-
-
-

Command Line Editing

-
-
-

Getting familiar with command line editing can save you time. Bash uses a library called Redline to use command line editing

-

There are many shortcuts and you don’t have to memorize them all, just use the ones that you feel are best. There are even more shortcuts that you can read about in the textbooks!

-
-
-
Character Commands
- - - - - - - - - - - - - - - - - - - - - - - - - -
CommandDescription
CTRL-BMove one character backwards
CTRL-FMove one character forwards
DELDelete one character backwards
CTRL-DDelete one character at cursor location
-
-
-
Word Commands
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CommandDescription
ESC-BMove one word backwards
ESC-FMove one word forwards
ESC-DELDelete one word backwards
ESC-DDelete one word forwards
CTRL-YUndo
-
-
-
Line Commands
- - - - - - - - - - - - - - - - - - - - - - - - - -
CommandDescription
CTRL-AMove to beginning of the line
CTRL-EMove to end of the line
CTRL-KDelete text from the cursor to end of line
CTRL-UDelete text from the cursor to the beginning of the line
-
-
-
History Line Commands
- - - - - - - - - - - - - - - - - - - - - -
CommandDescription
CTRL-PMove to the previous line in your history of commands
CTRL-NMove to the next line in your history commands
!!Repeat the last command
-
-
- - - - - - - - - - - - - - - - - - - - - -
CommandDescription
!numberRepeat history list item number
!stringRepeat last history item starting with string
!?stringRepeat last history item containing string
-
-
-

Questions?

-
-
-

Completion Command

-
-
-

Completion commands autocomplete your command if it exists by hitting tab. If it does not exist, the command will not be able to complete.

-

If multiple exist, the command will also not be able to complete because it will not know which one to choose.

-

For example, let's say we have two files called file1 and file2. If would not be able to use autocomplete because the shell will not know which to choose until the last character.

-
-
-

If we have two files, one called foot.txt and one called file.tx. This command would not be able to autocomplete:

-
$ ls f
-
-

But this one will:

-
$ ls fil
-
-
-
-

Questions?

-
-
-

Shell Scripts

-
-
-
Shell Scripts
-

Shell scripts allow us to combine several commands into one file, rather than one by one on the command line.

-

The shell will read the script just as if you were to write the command on the command line.

-

Most things that can be done in the shell script can be done on the command line and vice versa.

-
-
-
Writing Shell Scripts
-

There are three important considerations when writing the shell script

-
    -
  1. Write a script: scripts are ordinary text files. You can use a text editor that will provide syntax highlighting (color coding elements of the script). It can help find errors but writing in TextEdit is possible.
  2. -
  3. Make a shell script executable: set the script permissions to allow it to be executed
  4. -
  5. Put the shell script somewhere the shell can find it: the shell script automatically searches certain directories for executable files when no explicit pathname is specified.
  6. -
-
-
-
Set Up
-

Open either TextEdit or your text editor of choice. Some popular programs are Sublime Text, Vim, Atom and Notepad++.

-

If you want to see the syntax highlighting, you might have to save your script as a .sh file. Without doing this, your file will just look like a regular .txt file.

-

Once you open your text editor and save it, we can begin our first script!

-
-
-
Script File Format
-

We must first tell the shell the name of the interpreter that should be used to execute the script. This is marked by using a shebang: #!

-

Throughout the script, you can and should use # to make comments. Comments make your code more readable and can help you understand your code when you come back to it.

-
-
-
#!/bin/bash
-
-# this is our first comment
-
-echo "This is our first script!"
-
-

Here we can see we've told the shell to use/bin/bash using the shebang #!
-We've also added a comment using #
-And finally, something quite familiar, we have our first line of script using echo

-
-
-
A Note on Commenting
-

Commenting is important not just so you can understand your own work, but also so other can understand your work in collaborative projects. It also helps make your code reproducible.

-

Comments can be inline:

-
echo "Hello World" #this is an inline comment
-
-

or as comment blocks:

-
#this is a comment block
-echo "Hello World"
-
-
-
-

Questions?

-
-
-
Executable File Permission
-

In order to execute our file, we have to add file permissions:

-

chmod helps make our script executable
-775 is used to make scripts that everyone can execute
-700 is used to make scripts that only the owner can execute

-
-
-

Here, chmod is combined with 775 so that everyone can execute the script:

-
$ ls -l first_script.sh
-
-
$ chmod 775 first_script.sh
-
-
-
-
Script File Location
-

In order to run our script, we have to call it using ./ in front of the script filename (./script).

-

File location is important to run your script. If just script was written, the shell would not be able to find the script and try read it as a command, ouputting command not found.

-

Running echo $PATH helps us see what directories are being searched for the script.

-
-
-

If we want to run our script without ./, we can create a /bin for our script, move our script into the bin folder and then run it. It's important to note that we have to make this bin in our home directory. If we made it on our Desktop, the script would still not be found.

-
$ mkdir bin
-$ mv first_script.sh bin
-$ first_script.sh
-
-

In this block of code, we're making the bin folder using mkdir, moving the script into the bin with mv and then running the script without ./.

-
-
-
Good Locations for Scripts
-

For personal use, a good place to put your script is /bin.

-

For everyone's access, it's better to put scripts in /usr/local/bin.

-
-
-

Questions?

-
-
-

Shell Functions

-
-
-
Functions
-

Functions are a good way to break down code into smaller, more manageable chunks. Each chunck can represent a task.

-

For example, let's say your entire process is make pasta. It can be broken down into:

-
    -
  1. Prepare vegetables
  2. -
  3. Make sauce
  4. -
  5. Cook pasta
  6. -
  7. Serve
  8. -
-
-
-

Each of these steps can be expanded further into sub processes. Cook pasta can be:

-
    -
  1. Fill pot with water
  2. -
  3. Boil water
  4. -
  5. Measure pasta
  6. -
  7. Add pasta to boiling water
  8. -
  9. Cook for 8-12 minutes
  10. -
  11. Strain
  12. -
-
-
-

Functions have two syntactic forms:

-
function name {
-    commands
-    return
-}
-
-
name () {
-    commands
-    return
-}
-
-

name is the name of the function
-commands are the commands contained in the function

-
-
-

Let's write our first function:

-
#!/bin/bash
-
-function funct {
-    echo "Step 2"
-    return
-}
-
-#program starts here
-
-echo "Step 1"
-funct
-echo "Step 3"
-
-

What do you think this function will output?

-
-
-

Let's save and run this function in our terminal to see what happens.

-

Here's a good time to recap how to save, grant permissions and run the script.
-chmod - permissions command
-775 - grant permissions to everyone
-700 - grant permissions to yourself
-/bin - where to save permissions

-
-
-

Questions?

-
-
-

Variables

-
-
-
Global Variables
-

Let's make our script more complex with some variables. We can first define variables directly through the terminal.

-
$ foo="something cool"
-$ echo $foo
-
-

Notice how in order to call the variable we need to add $ before the variable. The quotes are not necessary if the value of the variable doesn't include spaces when defining it. If we did not include the quotes here, we would receive an error.

-
-
-

Now let's add some global variables to our script:

-
#!/bin/bash
-
-step="Step 2"
-
-function funct {
-    echo $step
-    return
-}
-
-#program starts here
-
-echo "Step 1"
-funct
-echo "Step 3"
-
-

What do we think will be the output in this example?

-
-
-
Local Variables
-

Local variables are variables that are contained within the function. Because they're contained, they can have names that already exist in the shell globally or within other shell functions.

-
-
-
#!/bin/bash
-
-foo=0 # global variable foo
-funct_1 () {
-    local foo # variable foo local to funct_1
-    foo=1
-    echo "funct_1: foo = $foo"
-}
-
-funct_2 () {
-    local foo # variable foo local to funct_2
-    foo=2
-    echo "funct_2: foo = $foo"
-}
-
-echo "global:  foo = $foo"
-funct_1
-echo "global:  foo = $foo"
-funct_2
-echo "global:  foo = $foo"
-
-
-
-

What would happen if we removed local?

-
#!/bin/bash
-
-foo=0 # global variable foo
-funct_1 () {
-    foo=1
-    echo "funct_1: foo = $foo"
-}
-
-funct_2 () {
-    foo=2
-    echo "funct_2: foo = $foo"
-}
-
-echo "global:  foo = $foo"
-funct_1
-echo "global:  foo = $foo"
-funct_2
-echo "global:  foo = $foo"
-
-
-
-

Questions?

-
-
-

Parameters

-
-
-
Positional Parameters
-

Positional parameteres are built in parameters that allow our programs to get access to the contents of the command line. This is extremely valuable when we are creating scripts and then want to pass a parameter through the script from the command line.

-

If our code has more than 9 positional parameters, you need to enclose the positional parameter in curly brackets ${10}

-

Let's create a script to see how this works:

-
-
-
#!/bin/bash
-
-echo "
-Number of arguments: $#
-\$0 = $0
-\$1 = $1
-\$2 = $2
-\$3 = $3
-\$4 = $4
-\$5 = $5
-\$6 = $6
-\$7 = $7
-\$8 = $8
-\$9 = $9
-"
-
-
-
-

In the example, you may notice that we haven't given $0 any specific value.
-Let's try run the script a couple ways through the command line to see what this means:

-
    -
  1. Run the script with arguments a b c d.
  2. -
  3. Run the script with any arguments of your choice.
  4. -
-

What do we notice?

-
-
-
$* and $@
-

$* → Expands into the list of positional parameters, starting with 1. When surrounded by double quotes, it expands into a double quoted string containing all of the positional parameters, each separated by the first character of the IFS shell variable (by default a space character).
-$@ → Expands into the list of positional parameters, starting with 1. When surrounded by double quotes, it expands each positional parameter into a separate word surrounded by double quotes.

-
-
-

Let's take a look at this code piece by piece:

-
print_params () {
-    echo "\$1 = $1"
-    echo "\$2 = $2"
-    echo "\$3 = $3"
-    echo "\$4 = $4"
-}
-
-pass_params () {
-    echo -e "\n" '$* :'; print_params $*
-    echo -e "\n" '"$*" :'; print_params "$*"
-    echo -e "\n" '$@ :';   print_params $@
-    echo -e "\n" '"$@" :'; print_params "$@"
-}
-
-pass_params "word" "words with spaces"
-
-
-
-
    -
  1. Here we have two functions: print_params () and pass_params (). pass_params () calls on the function print_params () within its function.
  2. -
  3. In the first function, echo is printing the line inside the double quotes. The \ in front of $1 escapes the $, thus losing its meaning, as we learned earlier.
  4. -
-
print_params () {
-    echo "\$1 = $1"
-    echo "\$2 = $2"
-    echo "\$3 = $3"
-    echo "\$4 = $4"
-}
-
-
-
-
    -
  1. In the second function, echo again is printing the line inside the single quotes. "\n" is adding a tab at the beginning of the line for readability. It is then calling on the first function (print_params ()) with the argument $*. The second echo is calling the first function but with the argument $* in double quotes. This is repeated for $@
  2. -
-
pass_params () {
-    echo -e "\n" '$* :'; print_params $*
-    echo -e "\n" '"$*" :'; print_params "$*"
-    echo -e "\n" '$@ :';   print_params $@
-    echo -e "\n" '"$@" :'; print_params "$@"
-}
-
-
-
-
    -
  1. In the final part of the code, we're calling on the pass_params () function and passing two arguments: "word" and "words with spaces".
  2. -
-
pass_params "word" "words with spaces"
-
-
-
-

Let's see what happens's when we run the script in terminal. Remember, we don't have to pass any arguments in the command line because we have done so in our script.

-
-
-

Questions?

-
-
-

Let's take a look at another example. In this example we'll get a greater understanding of variables and positional parameteres:

-
function afunc {
-  echo in function: $0 $1 $2
-  var1="in function"
-  echo var1: $var1
-}
-
-var1="outside function"
-
-echo var1: $var1
-echo $0: $1 $2
-afunc funcarg1 funcarg2
-echo var1: $var1
-echo $0: $1 $2
-
-
-
-

Let's break it down again:

-
    -
  1. In our first function called afunc, using echo we will print in function: and pass 3 positional parameters. We will then define the variable var1 and call it "in function" and print it using echo again.
  2. -
-
function afunc {
-  echo in function: $0 $1 $2
-  var1="in function"
-  echo var1: $var1
-}
-
-
    -
  1. Outside of the function, we'll create another variable also named var1 and give it the value of "outside function"
  2. -
-
var1="outside function"
-
-
-
-
    -
  1. We'll then add the program.
    -a) echo, we'll print var1
    -b) Print 3 positional paramaeters
    -c) Call the function with two arguments
    -d) Print var1 again
    -e) Print 3 positional parameters again
  2. -
-
echo var1: $var1
-echo $0: $1 $2
-afunc funcarg1 funcarg2
-echo var1: $var1
-echo $0: $1 $2
-
-
-
-

Let's run it in our terminal without any additional arguments and see what the output is.

-
    -
  • Why did echo $0: $1 $2 only output one argument?
  • -
  • Why did var1 change the third time to inside function rather than outside function?
  • -
-
-
-

Now let's change and add a few things to see what happens:

-
    -
  • In our terminal, what happens if we pass two arguments by entering ascript.sh arg1 arg2 with ascript.sh being the name of our script and arg1 arg2 being two random arguments?
  • -
  • What happens if we add local to our function?
  • -
-
-
-

Questions?

-
-
-
Parameter Expansion
-

Let's discuss the difference between $a and ${a}

-

$a on it's own is fine, but when placed next to another string, it can confuse the shell. For example:

-
    -
  • -

    $a_file the shell will try to expand a variable named a_file rather than a

    -
  • -
  • -

    ${a}_file the shell will now try to expand the variable a

    -
  • -
-

This can help us be more flexible when navigating and manipulating files and directories.

-
-
-

Let's look at the code below to see how this helps us:

-
$ filename="myfile"
-$ touch $filename
-$ mv $filename ${filename}1
-
-

This block of code creates a file based on our defined variable and then renames it with the same variable but with an additional component.

-
-
-

Parameter expansion also help us if our variables are unset (ie. do not exist) or are empty. Let's take a look at a couple examples in the next few slides.

-
-
-
    -
  1. ${parameter:-x} If parameter is unset or empty, expansion results in the value of x. If it's not empty, it results in the value of the parameter
  2. -
-
$ foo=
-$ echo ${foo:-"something else"}
-$ echo $foo
-$ foo=bar
-$ echo ${foo:-"something else"}
-$ echo $foo
-
-

Through this sequence of commands we can see that when $foo is empty, :- fills the variable with "something else". Once we define the variable, :- results in our defined variable.

-
-
-
    -
  1. ${parameter:=x} If parameter is unset or empty, expansion results in the value of x and the value of x is assigned to the parameter. If it's not empty, it results in the value of the parameter
  2. -
-
$ foo=
-$ echo ${foo:="something else"}
-$ echo $foo
-$ foo=bar
-$ echo ${foo:="something else"}
-$ echo $foo
-
-

We can see that when $foo is empty, := assigns the variable with "something else". If we define the variable again, :- results in our second defined variable.

-
-
-
    -
  1. ${parameter:?x} If parameter is unset or empty, this expansion causes the script to exit with an error, and the contents of x are sent to standard error. If parameter is not empty, the expansion results in the value of parameter.
  2. -
-
$ foo=
-$ echo ${foo:?"something else"}
-$ echo $?
-$ foo=bar
-$ echo ${foo:?"something else"}
-$ echo $?
-
-

We can see that when $foo is empty, :? gives us an error which we can see as echo $ outputs 1. If we define the variable again, :? results in the value of our variable.

-
-
-
    -
  1. ${parameter:+x} If parameter is unset or empty, the expansion results in nothing. If parameter is not empty, the value of x is substituted for parameter; however, the value of parameter is not changed.
  2. -
-
$ foo=
-$ echo ${foo:+"something else"}
-$ echo $foo
-$ foo=bar
-$ echo ${foo:+"something else"}
-$ echo $foo
-
-

Here, :+ resulted in an empty output and the value of $foo remains empty. If we define the variable, :+ will still ouput what we defined, but it will not reassign the variable perminently.

-
-
-
String Operators
-

String operators are extemely valuable for operations on pathnames. They can help extract parts of pathnames, especially if they follow a pattern. Many pathnames typically follow patters, such as all extensions are preceeded with ..

-

Some character expansions are:

-
    -
  1. ${#parameter}
  2. -
  3. ${parameter:offset}
  4. -
  5. ${parameter:offset:length}
  6. -
-
-
-
    -
  1. ${#parameter} expands into the length of the string contained by the parameter.
  2. -
-
$ foo="Toronto needs more trees"
-$ echo "'$foo' is ${#foo} characters long."
-
-
-
-

With the following expansions, we can extract a portion the string contained by the parameter.

-
    -
  1. ${parameter:offset} will extract characters from offset characters to the end of the string. For example, counting from the beginning of the string, the n of needs is 8 characters from the beginning. Because did not specify an end, echo will print from needs onwards.
  2. -
-
$ foo="Toronto needs more trees"
-$ echo ${foo:8}
-
-
-
-
    -
  1. ${parameter:offset:length} will specify the length that we want to extract. This length is counted not from the beginning of the string, but from the offset of the string.
  2. -
-
$ foo="Toronto needs more trees"
-$ echo ${foo:8:5}
-
-

We can see that from the beginning of the string, n is 8 characters in, and from n, s of needs is the 5th character from n. Therefore, our ouput will be needs.

-
-
-

Questions?

-
-
-

Let's now see how to use patterns in our parameter expansions. There are several ways we can achieve this:

-
    -
  1. ${parameter#pattern}
  2. -
  3. ${parameter##pattern}
  4. -
  5. ${parameter%pattern}
  6. -
  7. ${parameter%%pattern}
  8. -
-
-
-
    -
  1. ${parameter#pattern} removes the shortest leading portion of the string contained in parameter defined by the pattern.
  2. -
-
$ foo=/User/name/Desktop/file.txt.zip
-$ echo ${foo#/*/}
-
-

In this example, we've defined foo as a file with an extension. The expansion matches any (*) pattern of /*/ and returns the shortest leading portion.

-
-
-
    -
  1. ${parameter##pattern} is very similar to the previous expansion except it removes the longest leading portion of the string.
  2. -
-
$ foo=/User/name/Desktop/file.txt.zip
-$ echo ${foo##/*/}
-
-

Very similar to the previous example, the expansion matches any (*) pattern of /*/ and returns the longest leading portion.

-
-
-
    -
  1. ${parameter%pattern} removes the shortest ending portion of the string rather than the beginning.
  2. -
-
$ foo=/User/name/Desktop/file.txt.zip
-$ echo ${foo%.*}
-
-
    -
  1. ${parameter%pattern} removes the longest ending portion of the string.
  2. -
-
$ foo=/User/name/Desktop/file.txt.zip
-$ echo ${foo%%.*}
-
-
-
-

What happens if we change our pattern to #*_?

-

Let's pretend a file named "rachaels_file" and we want to know its extension. How would we do that?

-

What if our file was name "rachaels file"

-
-
-

We can also use expansions to replace the contents of the parameter with a string based on the pattern.

-
    -
  1. ${parameter/pattern/string} replaces only the first occurence of pattern.
  2. -
  3. ${parameter//pattern/string} replaces all occurances.
  4. -
  5. ${parameter/#pattern/string} requires the match to occur at the beginning of the string to replace it.
  6. -
  7. ${parameter/%pattern/string} requires the match to occur at the end of the string to replace it.
  8. -
-
-
-

Let's see how this would work:

-
$ foo="MP3.MP3"
-
-
$ echo ${foo/MP3/mp3}
-
-
$ echo ${foo//MP3/mp3}
-
-
$ echo ${foo/#MP3/mp3}
-
-
$ echo ${foo/%MP3/mp3}
-
-
-
-

Can you think of when this might be helpful?

-

Let's say I have a a named "rachaels cool file". I want to rename them because spaces cause problems in filenames. How would I do this?

-
-
-

Questions?

-
-
-
Arithmetic Assignment
-

We have seen assignment before with examples such as foo=5. This is a simple assignment but we can also add complexity to this assignment with other operators.

-
    -
  • $((parameter += x)) assigns the parameter to itself + x
  • -
  • $((parameter -= x)) assigns the parameter to itself - x
  • -
  • $((parameter *= x)) assigns the parameter to itself * x
  • -
  • $((parameter /= x)) assigns the parameter to itself / x
  • -
-
-
-

We can also increase or decrease our parameters by one.

-
    -
  • $((parameter++)) increases parameter by one after the parameter is retruned
  • -
  • $((parameter--)) decreases the parameter by one after the parameter is returned
  • -
  • $((++parameter)) increases parameter by one before the parameter is returned
  • -
  • $((--parameter)) decreases parameter by one before the parameter is returned.
  • -
-
-
-

These are very subtle changes so let's see what we mean after and before a parameter is returned:

-
$ foo=1
-$ echo $((foo++))
-$ echo $foo
-
-
$ foo=1
-$ echo $((++foo))
-$ echo $foo
-
-
-
-

Questions

-
-
-
Command Substitution
-

So far we've learned how to get values into variables by using assignment statements (x=5) and positional parameters (x=$1). Another way is command substitution which allows you to use the standard output of the command as if it were a variable.

-
-
-

Let's say we want to assign a variable to the output of a command so that we can apply another command to that output. In this particular case, we want to make a variable equal all files beginning with t. We then want to apply a sort command on that variable:

-
$ x=$(find t*)
-$ echo $x | sort
-
-

Although this seems quite simple now, we'll see how this can be extremely powerful when we move into flow control.

-
-
-

Flow Control

-
-
-

Flow control allows programs to "change directions" based on the results from a given input.

-

Bash supports several constructs:

-
    -
  • if/else
  • -
  • while / until
  • -
  • case
  • -
  • for
  • -
-
-
-

if / else

-
-
-

if/else is a conditional statement that chooses whether or not to do something based on a true or false statement.

-
if condition; then
-    commands
-
-[elseif condition; then
-    commands...]
-
-[else
-    commands]
-
-fi
-
-
-
-

Here, we've assigned x to the value 5. We've then written an if/else statement that asks if x is equal to 5 than tell us that x equals 5. Otherwise (else), tell us that x does not equal 5

-
x=5
-
-if [ $x = 5 ]; then
-    echo "x equals 5."
-
-else
-    echo "x does not equal 5."
-
-fi
-
-
-
-

Let's take a look at a more practical example: we want to know if there are any files in our directory that contain spaces.

-
#!/bin/bash
-
-cd ~/Desktop/dir1
-
-if [[ -n $(find t* | grep " ") ]]; then
-	echo "A file contains a space"
-else
-	echo "No files contain a space"
-fi
-
-
-
-

First we've changed our working directory to dir1:

-
cd ~/Desktop/dir1
-
-

We then utilized command substitutions that we've just learned by storing the output of files that contain a space. The -n option checks if the length of of a string is nonzero:

-
-n $(find t* | grep " ")
-
-
-
-

By wrapping our output in an if statement, we're stating:

-
    -
  1. if the value of $(find t* | grep " ") is nonzero, then print (echo) "A file contains a space"
  2. -
  3. Otherwise (else), print (echo) "No files contain a space"
  4. -
-
-
-

Questions

-
-
-
Control Operators
-

Control operators (&& and ||) allow you to test more than one thing at a time. Their syntax is:

-
if command1 && command2; then
-    ...
-fi
-
-
if command1 || command2;  then
-    ...
-fi
-
-
-
-

With the && operator, command1 is executed and command2 is executed only if command1 is successful

-

With the || operator, command1 is executed and command2 is executed only if command1 is unsuccessful

-
-
-

Example of &&

-
filename=$1
-word1=$2
-word2=$3
-
-if grep $word1 $filename && grep $word2 $filename; then
-    echo "$word1 and $word2 are both in $filename."
-fi
-
-
-
-

Using positional parameters that we learned earlier, what do you think will happen if we run the previous code?

-
    -
  • What happens if both words exist?
  • -
  • What happens if only one word exists?
  • -
  • What happens if no words exist?
  • -
-
-
-

Example of ||

-
filename=$1
-word1=$2
-word2=$3
-
-if grep $word1 $filename || grep $word2 $filename; then
-    echo "$word1 or $word2 is in $filename."
-fi
-
-
-
-

Similarly, what will happen if...

-
    -
  • What happens if both words exist?
  • -
  • What happens if only one word exists?
  • -
  • What happens if no words exist?
  • -
-
-
-

Questions?

-
-
-

While

-
-
-

Using the while command, let's discuss looping. Looping allows portions of a program to repeat as long as the condition is false. This syntax is:

-
while condition; do
-    commands
-done
-
-
-
-

Let's make a basic while script that displays five numbers in sequential order from 1 to 5 and then tells us when it's finished.

-
#!/bin/bash
-
-# script called while-count.sh
-
-count=1
-
-while [ $count -le 5 ]; do
-    echo $count
-    count=$((count +1))
-done
-echo "Finished."
-
-

Why does the loop end?

-
-
-

While loops are extremely helpful to read lines of a file and then perform some command if a line meets a certain condition. Let's explore how to read lines first:

-
file=file1
-
-while read -r line; do 
-	echo $line
-done < "$file"
-
-

In this script, we're creating a variable with our file. We're then reading the file until the last line is read. In this example, we're using an input redirection that we learned earlier (<), which passes the file into the read command. We've also used -r so that any backslashes are escaped.

-
-
-

Because line is acting as variable, we can also nest another loop if $file meets a condition. Let's say we have a file and we want to know every line that has bananas in it.

-

How would we combine the while loop with an if statement?

-
-
-
while read -r line; do
-	if [[ $line == *"bananas"* ]]; then
-		echo $line
-	fi
-done < "$file"
-
-

Here we're reading the file line by line using the while loop. We're then saying if our variable, $line equals "banana", then print the $line.

-
    -
  1. Why have we added the wildcard *?
  2. -
  3. What would happen if we didn't include *?
  4. -
-
-
-

Questions?

-
-
-

Until

-
-
-

Until loops are similar to while, except unlike while loops that run as long as the condition is true, the until loop will run as long as the condition is false

-
until condition; do
-    commands
-done
-
-
-
-

Let's create a script similar to the while statement: a basic while script that displays five numbers in sequential order from 1 to 5 and then tells us when it's finished.

-
count=1
-
-until [ $count -gt 5 ]; do
-    echo $count
-    count=$((count +1))
-done
-echo "Finished."
-
-

How is this script different to the while loop?

-
-
-

How might this be useful? Let's say we want to create 3 directories labeled dir1, dir2 and dir3:

-
x=1
-until [[ $x == 4 ]]; do
-	echo "Creating dir$x..."
-	mkdir dir$x
-	((x++))
-done
-
-

Here we've created a variable x=1 because we want our first directory to be dir1. We're then saying up until x=4, make a directory mkdir called dir plus our variable. We've then added 1 to x each iteration using an arithmetic assignment. The echo part is just to give us some feedback on what is happening behind the scenes.

-
-
-

Questions?

-
-
-

for

-
-
-

For our final flow control, we're going to learn a powerful loop called for. The syntax is:

-
for variable [in words]; do
-    commands
-done
-
-

What we might notice is that this flow uses variables that will increment during the execution of the loop.

-
-
-

How would we use for if we wanted to list all files and directories in a folder?

-
for i in $(find *); do
-    echo $i
-done
-
-

The variable i becomes all instances of the variable
-$(find *). For each instance of i, we are then printing it.

-

Although this seems quite basic and there more simple ways to list all files and directories (ls), this enables us to do many things with the looped variable i by nesting other loops.

-
-
-

What other ways can we use for loops?
-What other ways can we use for loops within files?

-
-
-

Questions?

-
    -
  • Why do we use i?
  • -
-
-
-
-
Next Week: Git and Github
-
    -
  • Please make sure to come with a GitHub account
  • -
-
-
-

Additional Material

-
-
-
Exit Status
-

Commands issue a value to the system when they terminate, which is an integer in the range of 0 and 255 indicating the success or failure of a command's execution.

-

Conventionally, zero indicates success and any other value indicates failure.

-
-
-

Let's list a file that we know exists on our desktop:

-
$ ls -d /usr/bin
-$ echo $?
-
-

-d is an option that returns the file if it exists and is a directory.
-$? returns the value of the last executed command. The value being either zero for succes or any other number for failure.

-
-
-

If we then list a file that we know does not exist in our desktop and return the value of $?, what do we expect to happen?

-
$ ls -d /bin/usr
-$ echo $?
-
-
-
-
Exit Command
-

The exit command in a script replaces the return command and accepts a single, optional argument, which becomes the scripts exit status.

-

When no argument is passed, it defaults to zero.

-

This enables our scripts to indicate an error.

-

If the script is a function in a larger program, we can use return instead of exit with a single, optional argument, allowing our function to indicate an error.

-
-
-
#!/bin/bash
-
-# test-file: Evaluate the status of a file
-
-FILE=~/.bashrc
-
-if [ -e "$FILE" ]; then
-    if [ -f "$FILE" ]; then
-        echo "$FILE is a regular file."
-    fi
-    if [ -d "$FILE" ]; then
-        echo "$FILE is a directory."
-    fi
-else
-    echo "$FILE does not exist"
-    exit 1 
-fi
-
-exit
-
-
-
-
test_file () {
-    # test-file: Evaluate the status of a file
-
-    FILE=~/.bashrc
-
-    if [ -e "$FILE" ]; then
-        if [ -f "$FILE" ]; then
-            echo "$FILE is a regular file."
-        fi
-        if [ -d "$FILE" ]; then
-            echo "$FILE is a directory."
-        fi
-    else
-        echo "$FILE does not exist"
-        return 1
-    fi
-}
-
-
-
-

if / else statements are most frequently used with test

-

test performs a variety of checks and comparisons

-

Its syntax is:

-

test expression

-

or

-

[ expression ]

-
-
-

There are many expressions that are used to evaluate the status of files. Some important File Expressions include:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionIs True If:
-e filefile exists
-d filefile exists and is a directory
-f filefile exists and is a regular file
-r filefile exists and is readable (has readable permissions for the effective user)
s filefile exists and has a length greater than zero
-
-
-

String Expressions

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionIs True If:
stringstring is not null
-n stringthe length of string is > than zero
-z stringthe length of string is zero
string1 == string2string1 equals string2
string1 != string2string1 and string2 are not equal
-
-
-

Integer Expressions

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionIs True If:
integer1 -eq integer2integer1 is == to integer2
integer1 -ne integer2integer1 is != equal to integer2
integer1 -le integer2integer1 is <= to integer2
integer1 -lt integer2integer1 is < to integer2
integer1 -ge integer2integer1 is >= to integer2
integer1 -gt integer2integer1 is > to integer2
-
-
-
Breaking Out Of A Loop
-

Bash has two build-in commands that can be used to control program flow inside loops.

-
    -
  • break command immediately terminates a loop and resumes with the next statement following the loop
  • -
  • continue command skips the remainder the loop that is not needed (ie. a condition has been met) and resumes with the next iteration of the loop. continue allows for a more efficient execution
  • -
-
-
-
if condition; then
-    if condition; then
-        commands
-        continue
-    fi
-    if condition; then
-        commands
-        continue
-    fi
-else condition; then
-    command
-fi
-
-

If the first if condition is met, then the second one will be skipped and resumed with the next iteration.

-
-
-
if condition; then
-    if condition; then
-        commands
-        continue
-    fi
-    if condition; then
-        commands
-        break
-    fi
-else condition; then
-    command
-fi
-
-

If the second if condition is met, then the break immediately terminates the loop and resumes with the next statement.

-
-
\ No newline at end of file diff --git a/slides-resources/inequity_slides.md b/slides-resources/inequity_slides.md deleted file mode 100644 index 3f4dd65..0000000 --- a/slides-resources/inequity_slides.md +++ /dev/null @@ -1,257 +0,0 @@ ---- -marp: true -theme: uncover -_class: invert -paginate: true - -style: | - img[alt~="center"] { - display: block; - margin: 0 auto; - } - ---- - - - - -# **Inequity** -```bash -$ echo "Data Sciences Institute" -$ echo "Rachael Lam" -``` - ---- -##### **Inequality vs. Inequity** -**Inequality:** -- Uneven distribution of resources -- Unbalanced conditions -- Usually quantitative in nature -
- -**Inequity:** -- Avoidable differences arising from social circumstances -- The state of being unfair or unjust -- Typically qualitative in nature - ---- - - -Inequality usually emerges due to inequity. -
- -**Can you think of any examples?** - ---- -1. Lower salaries for female employees stems from gender inequity - -2. Job opportunities favouring white applicants stems from racial inequity - -3. Higher rate of Indigenous children in the child welfare system stems from a long history of structural inequity - ---- - - -## `Truth and Reconcilliation` -###### `Missing Children and Unmarked Burials` - ---- - -##### **The History of Residential Schools** -- The Residential Schools System dates back to 1870, which was government-funded and church-led - -- The system's intention was to lead a cultural genocide to assimilate Indigenous children - -- More than 130 schools were estabilshed with more than 150,000 Indigenous students attending - -- Children were forcibly removed from their families - -- Families who resisted faced fines and or jail time - ---- -![bg left cover](pics/Colonization.png) - -- Children did not see their families for years or interact with their family within the schools, unable to speak their language or practice their culture - ---- -![bg right cover](pics/IRSburials.jpeg) -- Children received inadequate food, clothing, facilities, education, staff and medical treatment - -- Children faced severe and constant abuse with mortality rates ranging between 30-60% - ---- - -##### **Oral Histories** -- Using language such as *they* and *them* can create distance from ourselves and those we are speaking or learning about - -- Instead, the second person can help us feel closer to the stories of survivors and their testimonies - -- Oral histories is a significant practise for Indigenous Peoples. It is how knowledge is passed on. To respect the practice and values of Indigenous Peoples, we will engage with oral testimonies from survivors of the Residential School System - ---- - -##### [Rita's Story](https://legacyofhope.ca/wherearethechildren/stories/watcheston/) - -![w:900 center](pics/rita.png) - ---- - -![bg right contain](pics/medicinewheel.png) -##### **Workshop** -Medicine wheels are used by some Indigenous peoples to represents elements of a whole person. - -We'll use this tool as we listen to stories of Survivors and discuss all together. - ---- -**Physical:** ->Possible items can include the physical descriptions of the home setting before Residential School, the settings at school, or any descriptions of locations after school that stand out to students. This can include all healthy forms of affection and/or inappropriate and harmful physical contact. Sports and games played, and events could be included here. Acts of violence and abuse would also go here. - ---- -**Intellectual:** ->Possible items to be placed here include thoughts the students had, reflections and understandings about life before school, the school itself, or after their time in school that they share. Students may also note what Survivors learned in school, what they thought about that learning and other mental activities required by the school. Students may also note its absence. - ---- -**Spiritual:** ->Separating children from their family, customs, languages and traditional ways of being was thought to be the only way to force them into the dominant religions of Canada. Experiences students could place here would be spiritual teachings from before Residential School, during, and after. Students may find that they put a lot into this category when Survivors talk about their return to culture, family and language as part of their healing journey - ---- -**Emotional:** ->There are likely to be many emotional moments in the Indigenous Survivors’ Oral Testimony. Students may struggle with determining whether to put something in this category or another category. Consider physical abuse – because of the nature of the experience, it may seem like it should go in physical; however, because of a strong response of a Survivor, it may seem to belong in the emotional category. Selecting either or both categories are accurate and demonstrates the multi-faceted impacts on Indigenous children - ---- - -![w:1000 center](pics/legacyofhope.png) -[Legacy of Hope](https://legacyofhope.ca/wherearethechildren/stories/) - ---- - - -Let's discuss the stories that we heard and our medicine wheels -
-- What categorical decisions did we make? - -- What was challenging about these decisions? - -- Did you find yourselves noting things you might not otherwise have if you had not been asked specifically for these categories ? - ---- - - - -What are the consquences of this history for Indigenous Peoples today? - ---- -##### **Intergenerational Impacts** -The legacy of Residential Schools have had lasting impacts on Survivors and their families. Some include: -- Alcohol and drug abuse -- Educational blocks -- Higher rates of suicide -- Destruction of social support networks -- Missing and Murdered Indigenous Women and Girls -- Higher rate of children in the child welfare system (an extention of Residential Schools) - ---- -##### **TRC: Calls to Action** -To redress the legacy of colonization and residential schools, the Truth and Reconciliation Commission drafted 94 calls to action in 2012. Since then, only 14 have been completed. - -Reading these call to actions can give us a good idea of some of the inequities that exist today - ---- ->**1.ii.** Providing adequate resources to enable Aboriginal communities and child-welfare organizations to keep Aboriginal families together where it is safe to do so, and to keep children in culturally appropriate environments, regardless of where they reside. - ---- ->**6.** We call upon the federal government to develop with Aboriginal groups a joint strategy to eliminate educational and employment gaps between Aboriginal and non-Aboriginal Canadians. - ---- ->**23.** We call upon all levels of government to:
-**i.** Increase the number of Aboriginal professionals working in the health-care field.
-**ii.** Ensure the retention of Aboriginal health-care providers in Aboriginal communities
-**iii.** Provide cultural competency training for all health-care professionals - ---- ->**30.** We call upon the federal, provincial, and territorial governments to commit to eliminating the overrepresentation of Aboriginal people in custody over the next decade, and to issue detailed annual reports that monitor and evaluate progress in doing so. - ---- - - -How does this relate to data science? - -How do we utilize this knowledge as we practice data science? - ---- -![bg left cover](pics/protest1.png) -It's important as we move forward to understand inequity and the inequality it has produced. -
-It is not enough to discuss crime without discussing the overrepresentation of racialized people in the justice system. - ---- -![bg right cover](pics/protest2.png) -It is not enough to discuss healthcare without discussing the difference of treatment between Indigenous and non-Indigenous people. - ---- -![bg left cover](pics/protest3.png) -It's not enough to discuss the child welfare system without discussing the history of Residential Schools and the impact on Indigenous Peoples. - ---- -**Resources** -- [Activity Workshop](https://secureservercdn.net/198.71.233.37/jjk.2f4.myftpupload.com/wp-content/uploads/2020/02/Let-the-Truth-Be-Told-Guide-2018-V1.44-HR-compressed-1.pdf) -- [Reconciliation Dialogue Workshop](https://reconciliationcanada.ca/staging/wp-content/uploads/2020/02/RDW-Workshop-Booklet_v3final.pdf) -- [TRC Calls to Action](https://www.documentcloud.org/documents/2091412-trc-calls-to-action.html) -- [We Are The Children](https://legacyofhope.ca/wherearethechildren/stories/) - ---- -**Potential Resources:** -- [Reconciliation Dialogue Workshop](https://reconciliationcanada.ca/staging/wp-content/uploads/2020/02/RDW-Workshop-Booklet_v3final.pdf) - - History and intergenerational impacts of residential schools -- [TRC Calls to Action](https://www.documentcloud.org/documents/2091412-trc-calls-to-action.html) - - 94 calls to action to address the legacy of residential schools -- [Historica Canada Video](https://www.youtube.com/watch?v=VFgNI1lfe0A&ab_channel=HistoricaCanada) - - Quick video for the timeline of residential schools -- [Activity Workshop](https://secureservercdn.net/198.71.233.37/jjk.2f4.myftpupload.com/wp-content/uploads/2020/02/Let-the-Truth-Be-Told-Guide-2018-V1.44-HR-compressed-1.pdf) - - Understanding the importance of oral histories - - Discussions of agency, language and allyship - ---- -- [We Are The Children](https://legacyofhope.ca/wherearethechildren/stories/) - - First hand accounts of experiences in residential schools diff --git a/slides-resources/pics/CVCS.png b/slides-resources/pics/CVCS.png deleted file mode 100644 index 78aedb8..0000000 Binary files a/slides-resources/pics/CVCS.png and /dev/null differ diff --git a/slides-resources/pics/Colonization.png b/slides-resources/pics/Colonization.png deleted file mode 100644 index be86cf6..0000000 Binary files a/slides-resources/pics/Colonization.png and /dev/null differ diff --git a/slides-resources/pics/DVCS.png b/slides-resources/pics/DVCS.png deleted file mode 100644 index 02d18fa..0000000 Binary files a/slides-resources/pics/DVCS.png and /dev/null differ diff --git a/slides-resources/pics/IRSburials.jpeg b/slides-resources/pics/IRSburials.jpeg deleted file mode 100644 index d6e3c3d..0000000 Binary files a/slides-resources/pics/IRSburials.jpeg and /dev/null differ diff --git a/slides-resources/pics/LVC.png b/slides-resources/pics/LVC.png deleted file mode 100644 index 98bff71..0000000 Binary files a/slides-resources/pics/LVC.png and /dev/null differ diff --git a/slides-resources/pics/NOAA.png b/slides-resources/pics/NOAA.png deleted file mode 100644 index 8821dc7..0000000 Binary files a/slides-resources/pics/NOAA.png and /dev/null differ diff --git a/slides-resources/pics/blobs.png b/slides-resources/pics/blobs.png deleted file mode 100644 index 39cafdd..0000000 Binary files a/slides-resources/pics/blobs.png and /dev/null differ diff --git a/slides-resources/pics/blockquote.png b/slides-resources/pics/blockquote.png deleted file mode 100644 index 97c511e..0000000 Binary files a/slides-resources/pics/blockquote.png and /dev/null differ diff --git a/slides-resources/pics/bobs-burgers-louise.gif b/slides-resources/pics/bobs-burgers-louise.gif deleted file mode 100644 index b296ab5..0000000 Binary files a/slides-resources/pics/bobs-burgers-louise.gif and /dev/null differ diff --git a/slides-resources/pics/census.png b/slides-resources/pics/census.png deleted file mode 100644 index c2070ba..0000000 Binary files a/slides-resources/pics/census.png and /dev/null differ diff --git a/slides-resources/pics/codeblock.png b/slides-resources/pics/codeblock.png deleted file mode 100644 index 6c947bd..0000000 Binary files a/slides-resources/pics/codeblock.png and /dev/null differ diff --git a/slides-resources/pics/colabworkflow.png b/slides-resources/pics/colabworkflow.png deleted file mode 100644 index b0fbc16..0000000 Binary files a/slides-resources/pics/colabworkflow.png and /dev/null differ diff --git a/slides-resources/pics/colabworkflow2.png b/slides-resources/pics/colabworkflow2.png deleted file mode 100644 index 828e0dc..0000000 Binary files a/slides-resources/pics/colabworkflow2.png and /dev/null differ diff --git a/slides-resources/pics/covidcases.png b/slides-resources/pics/covidcases.png deleted file mode 100644 index 98a2e20..0000000 Binary files a/slides-resources/pics/covidcases.png and /dev/null differ diff --git a/slides-resources/pics/creatingrepo1.png b/slides-resources/pics/creatingrepo1.png deleted file mode 100644 index f0eb874..0000000 Binary files a/slides-resources/pics/creatingrepo1.png and /dev/null differ diff --git a/slides-resources/pics/creatingrepo2.png b/slides-resources/pics/creatingrepo2.png deleted file mode 100644 index f024603..0000000 Binary files a/slides-resources/pics/creatingrepo2.png and /dev/null differ diff --git a/slides-resources/pics/creatingrepo3.png b/slides-resources/pics/creatingrepo3.png deleted file mode 100644 index 0d4c9cd..0000000 Binary files a/slides-resources/pics/creatingrepo3.png and /dev/null differ diff --git a/slides-resources/pics/delete-branch.png b/slides-resources/pics/delete-branch.png deleted file mode 100644 index d854504..0000000 Binary files a/slides-resources/pics/delete-branch.png and /dev/null differ diff --git a/slides-resources/pics/developer.png b/slides-resources/pics/developer.png deleted file mode 100644 index 1a89e90..0000000 Binary files a/slides-resources/pics/developer.png and /dev/null differ diff --git a/slides-resources/pics/difference.png b/slides-resources/pics/difference.png deleted file mode 100644 index b647ec5..0000000 Binary files a/slides-resources/pics/difference.png and /dev/null differ diff --git a/slides-resources/pics/drivingtrends.png b/slides-resources/pics/drivingtrends.png deleted file mode 100644 index be52a92..0000000 Binary files a/slides-resources/pics/drivingtrends.png and /dev/null differ diff --git a/slides-resources/pics/email.jpeg b/slides-resources/pics/email.jpeg deleted file mode 100644 index acfbf9f..0000000 Binary files a/slides-resources/pics/email.jpeg and /dev/null differ diff --git a/slides-resources/pics/error.png b/slides-resources/pics/error.png deleted file mode 100644 index 2dffa34..0000000 Binary files a/slides-resources/pics/error.png and /dev/null differ diff --git a/slides-resources/pics/error2.png b/slides-resources/pics/error2.png deleted file mode 100644 index 8767d6b..0000000 Binary files a/slides-resources/pics/error2.png and /dev/null differ diff --git a/slides-resources/pics/error3.png b/slides-resources/pics/error3.png deleted file mode 100644 index 8ad2eb3..0000000 Binary files a/slides-resources/pics/error3.png and /dev/null differ diff --git a/slides-resources/pics/error5.png b/slides-resources/pics/error5.png deleted file mode 100644 index 2a19285..0000000 Binary files a/slides-resources/pics/error5.png and /dev/null differ diff --git a/slides-resources/pics/fork1.png b/slides-resources/pics/fork1.png deleted file mode 100644 index 9be2b76..0000000 Binary files a/slides-resources/pics/fork1.png and /dev/null differ diff --git a/slides-resources/pics/git_data.png b/slides-resources/pics/git_data.png deleted file mode 100644 index 716f6bd..0000000 Binary files a/slides-resources/pics/git_data.png and /dev/null differ diff --git a/slides-resources/pics/gitcollabs1.png b/slides-resources/pics/gitcollabs1.png deleted file mode 100644 index 642c286..0000000 Binary files a/slides-resources/pics/gitcollabs1.png and /dev/null differ diff --git a/slides-resources/pics/gitcollabs2.png b/slides-resources/pics/gitcollabs2.png deleted file mode 100644 index 321b917..0000000 Binary files a/slides-resources/pics/gitcollabs2.png and /dev/null differ diff --git a/slides-resources/pics/gitcollabs3.png b/slides-resources/pics/gitcollabs3.png deleted file mode 100644 index 5682905..0000000 Binary files a/slides-resources/pics/gitcollabs3.png and /dev/null differ diff --git a/slides-resources/pics/github.png b/slides-resources/pics/github.png deleted file mode 100644 index f81aa24..0000000 Binary files a/slides-resources/pics/github.png and /dev/null differ diff --git a/slides-resources/pics/gitignorelarge.png b/slides-resources/pics/gitignorelarge.png deleted file mode 100644 index 99ab14e..0000000 Binary files a/slides-resources/pics/gitignorelarge.png and /dev/null differ diff --git a/slides-resources/pics/gitignoresmall.png b/slides-resources/pics/gitignoresmall.png deleted file mode 100644 index c2a549f..0000000 Binary files a/slides-resources/pics/gitignoresmall.png and /dev/null differ diff --git a/slides-resources/pics/headings.png b/slides-resources/pics/headings.png deleted file mode 100644 index b786874..0000000 Binary files a/slides-resources/pics/headings.png and /dev/null differ diff --git a/slides-resources/pics/help.png b/slides-resources/pics/help.png deleted file mode 100644 index 0f7e145..0000000 Binary files a/slides-resources/pics/help.png and /dev/null differ diff --git a/slides-resources/pics/issues.png b/slides-resources/pics/issues.png deleted file mode 100644 index 759e63f..0000000 Binary files a/slides-resources/pics/issues.png and /dev/null differ diff --git a/slides-resources/pics/issues2.png b/slides-resources/pics/issues2.png deleted file mode 100644 index 1d3b27d..0000000 Binary files a/slides-resources/pics/issues2.png and /dev/null differ diff --git a/slides-resources/pics/issues3.png b/slides-resources/pics/issues3.png deleted file mode 100644 index e2aa744..0000000 Binary files a/slides-resources/pics/issues3.png and /dev/null differ diff --git a/slides-resources/pics/legacyofhope.png b/slides-resources/pics/legacyofhope.png deleted file mode 100644 index 144a9f4..0000000 Binary files a/slides-resources/pics/legacyofhope.png and /dev/null differ diff --git a/slides-resources/pics/link.png b/slides-resources/pics/link.png deleted file mode 100644 index f61afe4..0000000 Binary files a/slides-resources/pics/link.png and /dev/null differ diff --git a/slides-resources/pics/master.png b/slides-resources/pics/master.png deleted file mode 100644 index e3bb2d6..0000000 Binary files a/slides-resources/pics/master.png and /dev/null differ diff --git a/slides-resources/pics/medicinewheel.png b/slides-resources/pics/medicinewheel.png deleted file mode 100644 index 263b726..0000000 Binary files a/slides-resources/pics/medicinewheel.png and /dev/null differ diff --git a/slides-resources/pics/merge.png b/slides-resources/pics/merge.png deleted file mode 100644 index c618560..0000000 Binary files a/slides-resources/pics/merge.png and /dev/null differ diff --git a/slides-resources/pics/mergeconflicts.png b/slides-resources/pics/mergeconflicts.png deleted file mode 100644 index 20f4c00..0000000 Binary files a/slides-resources/pics/mergeconflicts.png and /dev/null differ diff --git a/slides-resources/pics/minions.gif b/slides-resources/pics/minions.gif deleted file mode 100644 index 7f0cbf2..0000000 Binary files a/slides-resources/pics/minions.gif and /dev/null differ diff --git a/slides-resources/pics/ordered.png b/slides-resources/pics/ordered.png deleted file mode 100644 index 5b50a4d..0000000 Binary files a/slides-resources/pics/ordered.png and /dev/null differ diff --git a/slides-resources/pics/personalauth.png b/slides-resources/pics/personalauth.png deleted file mode 100644 index 801b852..0000000 Binary files a/slides-resources/pics/personalauth.png and /dev/null differ diff --git a/slides-resources/pics/protest1.png b/slides-resources/pics/protest1.png deleted file mode 100644 index e46c40e..0000000 Binary files a/slides-resources/pics/protest1.png and /dev/null differ diff --git a/slides-resources/pics/protest2.png b/slides-resources/pics/protest2.png deleted file mode 100644 index e527b17..0000000 Binary files a/slides-resources/pics/protest2.png and /dev/null differ diff --git a/slides-resources/pics/protest3.png b/slides-resources/pics/protest3.png deleted file mode 100644 index 45b967e..0000000 Binary files a/slides-resources/pics/protest3.png and /dev/null differ diff --git a/slides-resources/pics/pullrequest1.png b/slides-resources/pics/pullrequest1.png deleted file mode 100644 index 41f1703..0000000 Binary files a/slides-resources/pics/pullrequest1.png and /dev/null differ diff --git a/slides-resources/pics/pullrequest2.png b/slides-resources/pics/pullrequest2.png deleted file mode 100644 index 0dfc699..0000000 Binary files a/slides-resources/pics/pullrequest2.png and /dev/null differ diff --git a/slides-resources/pics/pullrequest3.png b/slides-resources/pics/pullrequest3.png deleted file mode 100644 index e77d8ef..0000000 Binary files a/slides-resources/pics/pullrequest3.png and /dev/null differ diff --git a/slides-resources/pics/pullrequest4.png b/slides-resources/pics/pullrequest4.png deleted file mode 100644 index 3ade7d8..0000000 Binary files a/slides-resources/pics/pullrequest4.png and /dev/null differ diff --git a/slides-resources/pics/pullrequest5.png b/slides-resources/pics/pullrequest5.png deleted file mode 100644 index ced584e..0000000 Binary files a/slides-resources/pics/pullrequest5.png and /dev/null differ diff --git a/slides-resources/pics/pullrequestmergeconflict.png b/slides-resources/pics/pullrequestmergeconflict.png deleted file mode 100644 index a18d33c..0000000 Binary files a/slides-resources/pics/pullrequestmergeconflict.png and /dev/null differ diff --git a/slides-resources/pics/pullrequestmergeconflict2.png b/slides-resources/pics/pullrequestmergeconflict2.png deleted file mode 100644 index 55bc398..0000000 Binary files a/slides-resources/pics/pullrequestmergeconflict2.png and /dev/null differ diff --git a/slides-resources/pics/pullrequestmergeconflict3.png b/slides-resources/pics/pullrequestmergeconflict3.png deleted file mode 100644 index a1f5cd6..0000000 Binary files a/slides-resources/pics/pullrequestmergeconflict3.png and /dev/null differ diff --git a/slides-resources/pics/pullrequestmergeconflict4.png b/slides-resources/pics/pullrequestmergeconflict4.png deleted file mode 100644 index 1672265..0000000 Binary files a/slides-resources/pics/pullrequestmergeconflict4.png and /dev/null differ diff --git a/slides-resources/pics/rebase1.png b/slides-resources/pics/rebase1.png deleted file mode 100644 index 6ce0695..0000000 Binary files a/slides-resources/pics/rebase1.png and /dev/null differ diff --git a/slides-resources/pics/rebase2.png b/slides-resources/pics/rebase2.png deleted file mode 100644 index e79818d..0000000 Binary files a/slides-resources/pics/rebase2.png and /dev/null differ diff --git a/slides-resources/pics/rhelp.png b/slides-resources/pics/rhelp.png deleted file mode 100644 index 6a2abad..0000000 Binary files a/slides-resources/pics/rhelp.png and /dev/null differ diff --git a/slides-resources/pics/rhelp2.png b/slides-resources/pics/rhelp2.png deleted file mode 100644 index 6899ece..0000000 Binary files a/slides-resources/pics/rhelp2.png and /dev/null differ diff --git a/slides-resources/pics/rita.png b/slides-resources/pics/rita.png deleted file mode 100644 index f77eb19..0000000 Binary files a/slides-resources/pics/rita.png and /dev/null differ diff --git a/slides-resources/pics/settings.png b/slides-resources/pics/settings.png deleted file mode 100644 index 288009d..0000000 Binary files a/slides-resources/pics/settings.png and /dev/null differ diff --git a/slides-resources/pics/stackoverflow1.png b/slides-resources/pics/stackoverflow1.png deleted file mode 100644 index a0f81d3..0000000 Binary files a/slides-resources/pics/stackoverflow1.png and /dev/null differ diff --git a/slides-resources/pics/stackoverflow2.png b/slides-resources/pics/stackoverflow2.png deleted file mode 100644 index bd802c0..0000000 Binary files a/slides-resources/pics/stackoverflow2.png and /dev/null differ diff --git a/slides-resources/pics/stackoverflow3.png b/slides-resources/pics/stackoverflow3.png deleted file mode 100644 index e8c0966..0000000 Binary files a/slides-resources/pics/stackoverflow3.png and /dev/null differ diff --git a/slides-resources/pics/testing-commit.png b/slides-resources/pics/testing-commit.png deleted file mode 100644 index eeaac5f..0000000 Binary files a/slides-resources/pics/testing-commit.png and /dev/null differ diff --git a/slides-resources/pics/testing-head.png b/slides-resources/pics/testing-head.png deleted file mode 100644 index 0483b49..0000000 Binary files a/slides-resources/pics/testing-head.png and /dev/null differ diff --git a/slides-resources/pics/testing.png b/slides-resources/pics/testing.png deleted file mode 100644 index 764c23f..0000000 Binary files a/slides-resources/pics/testing.png and /dev/null differ diff --git a/slides-resources/pics/text-styling.png b/slides-resources/pics/text-styling.png deleted file mode 100644 index 0fe65a4..0000000 Binary files a/slides-resources/pics/text-styling.png and /dev/null differ diff --git a/slides-resources/pics/topics.png b/slides-resources/pics/topics.png deleted file mode 100644 index 730e3e9..0000000 Binary files a/slides-resources/pics/topics.png and /dev/null differ diff --git a/slides-resources/pics/topics2.png b/slides-resources/pics/topics2.png deleted file mode 100644 index 18ef375..0000000 Binary files a/slides-resources/pics/topics2.png and /dev/null differ diff --git a/slides-resources/pics/topics3.png b/slides-resources/pics/topics3.png deleted file mode 100644 index 316f586..0000000 Binary files a/slides-resources/pics/topics3.png and /dev/null differ diff --git a/slides-resources/pics/torontocrime.png b/slides-resources/pics/torontocrime.png deleted file mode 100644 index b6feb0b..0000000 Binary files a/slides-resources/pics/torontocrime.png and /dev/null differ diff --git a/slides-resources/pics/unordered.png b/slides-resources/pics/unordered.png deleted file mode 100644 index 3ec7a26..0000000 Binary files a/slides-resources/pics/unordered.png and /dev/null differ diff --git a/slides-resources/pics/workflow.png b/slides-resources/pics/workflow.png deleted file mode 100644 index b1c7019..0000000 Binary files a/slides-resources/pics/workflow.png and /dev/null differ diff --git a/slides-resources/pics/workflow1.png b/slides-resources/pics/workflow1.png deleted file mode 100644 index 80d647b..0000000 Binary files a/slides-resources/pics/workflow1.png and /dev/null differ diff --git a/slides-resources/pics/workflow2.png b/slides-resources/pics/workflow2.png deleted file mode 100644 index 971e846..0000000 Binary files a/slides-resources/pics/workflow2.png and /dev/null differ diff --git a/slides-resources/pics/workflow3.png b/slides-resources/pics/workflow3.png deleted file mode 100644 index 45bea59..0000000 Binary files a/slides-resources/pics/workflow3.png and /dev/null differ diff --git a/slides-resources/pics/workflow4.png b/slides-resources/pics/workflow4.png deleted file mode 100644 index 7ed0d9f..0000000 Binary files a/slides-resources/pics/workflow4.png and /dev/null differ diff --git a/slides-resources/pics/workflow5.png b/slides-resources/pics/workflow5.png deleted file mode 100644 index 8d48f61..0000000 Binary files a/slides-resources/pics/workflow5.png and /dev/null differ diff --git a/slides-resources/pics/workflow6.png b/slides-resources/pics/workflow6.png deleted file mode 100644 index 60f7401..0000000 Binary files a/slides-resources/pics/workflow6.png and /dev/null differ diff --git a/slides/markdown/git_slides.md b/slides/markdown/git_slides.md deleted file mode 100644 index e75c6ca..0000000 --- a/slides/markdown/git_slides.md +++ /dev/null @@ -1,2185 +0,0 @@ ---- -marp: true -theme: uncover -_class: invert -paginate: true - -style: | - img[alt~="center"] { - display: block; - margin: 0 auto; - } - ---- - - - - -# **Version Control and GitHub** -```console -$ echo "Data Sciences Institute" -$ echo "Rachael Lam" -``` - ---- -**Prerequisites:** -- GitHub account - ---- -**Key Texts:** -- Chacon and Straub, 2014, Pro Git, 2nd Edition. -- Timbers, Campbell, Lee, 2021, Data Science: A First Introduction, https://ubc-dsci.github.io/introduction-to-datascience/ - ---- -**References** -- Chacon and Straub: Chapter 1 -- Timbers: Chapter 12.3 - 12.4, 13.3.1 - ---- - - -## `Version Control` - ---- -##### **What is Version Control?** -Version control is a system that records changes to a file or a set of files over time so that we can recall a specific version later. We may already do this by copying files to another directory to save past versions.While it is simple, it lacks flexibility and complexity. - ---- -Version Control Systems (VCS) can do a number of things and can be applied on nearly any type of file on our computers: -- revert files to a previous state -- revert entire project to a previous state -- compare changes over time -- see who modified something last -- who introduced an issue and when -- recover lost files - ---- -##### **Local Version Control Systems** -Local VCSs were developed to keep track of changes to our files by putting them in a version database. - -![bg right contain](pics/LVC.png) - ---- -##### **Centralized Version Control Systems** -Centralized VCSs (CVCS) were developed to enable collaboration with developers on other systems. CVCSs have a single server that contains all the versioned files. - -![bg left contain](pics/CVCS.png) - ---- -CVCSs allow some level of transparency to others' work and give Administrators a level of control over what developers can and can't do. - -Unfortunately, a single server means that if it ever goes down, all collaboration halts for however long that lasts for. Additionally, if backups haven't been kept, work could easily be lost. - ---- -##### **Distributed Version Control Systems** -To handle the limitations of LVCSs and CVCSs, Distributed VCSs were created. This includes Git, Mercurial and Bazaar. - -Collaborators mirror the entire repsoitory, therefore if a server dies, any one of the collaborators' repositories can be copied back to the server to restore it. - ---- -![w:560 center](pics/DVCS.png) - ---- - - -Questions? - ---- - - -## `Git` - ---- -##### **Git Basics** -Git thinks of data in a very different way than other VCSs. Instead of storing a set of files and the changes over time, Git thinks of its data more like a set of snapshots of a mini file system. - -If files have not changed, Git does not store the file again, it links to the previous identical file already stored. - ---- -![w:1100 center](pics/git_data.png) - ---- -##### **Local Operations** -Most operations on Git only need local files and resources to operate. Git also keeps the entire history of our projects on our local disks meaning we can see changes made months ago without a remote server. - -We also don't need to be connected to the server to get work done, rather we only need to be connected when we want to upload our work. - ---- -##### **Benefits** -Git uses a check-summing mechanism called *SHA-1 hash* which is calculated based on the contents of a file or directory structure in Git. It looks somehting like this: -``` -24b9da6552252987aa493b52f8696cd6d3b00393 -``` -This checksum means it's impossible to change the contents of any file or directory without Git knowing about it. - -Git generally only adds data, making it fairly difficult to lose data once we've committed, which we'll learn about later. - ---- -##### **The Three States** -There are three main states that our files can reside in: -- Committed: - - data is safely stored on local database -- Modified: - - file has been changed but not yet committed -- Staged: - - modified file has been marked to go into the next commit - ---- -##### **The Three Main Sections** -There are three main sections to a Git project: -- The Git directory -- The working directory -- The staging area - ---- -##### **The Git Directory** -The Git directory is where Git stores the metadata and object database for our projects. It is what is copied when we clone a repository from another computer. - ---- -##### **The Working Directory** -The working directory is a single checkout of one version of our projects. These files are pulled out of the compressed database in the Git directory and placed on the disk for us to modify. - ---- -##### **The Staging Area** -The staging area is a simple file that stores information about what will go into our next commit. - ---- -##### **Workflow** -A basic workflow will look something like this: -1. Modify files in our working directory -2. Stage the files in the staging area -3. Commit the changes which takes the files from the staging area and stores them on the Git directory. - ---- - - -Questions? - ---- - - -## `Installing Git` - ---- -Typically, Git is already installed on our system but we can check for that using the `git` command: -```console -$ git --version -``` -**Does anyone not see a version?** - ---- -##### **Installing on Linux** -If you're on Ubantu: -```console -$ sudo apt install git -``` -
- -If you're on Fedora, RHEL or CentOS: -```console -$ sudo dnf install git -``` -```console -$ sudo yum install git -``` - ---- -##### **Installing on Mac** - -You can install Git via Homebrew, if you have Homebrew installed (https://brew.sh/). -```console -$ brew install git -``` - -Finally, you can install Git from source at this link: https://sourceforge.net/projects/git-osx-installer/ - ---- -##### **Installing on Windows** -The download will start automatically through this link: https://git-scm.com/download/win - ---- - - -Questions? - ---- - - -## `Git Setup` - ---- -The first thing to do now that we have Git installed on our system is to customize it. These changes will remain despite any upgrades to Git that we install. - -Using the command `git config`, we can set configuration variables that control all aspects of how Git looks and operates. - ---- -##### **Checking Configurations** -Before we change any of our global configurations, we can check what they are: -```console -$ git config --list -``` -If we haven't configured Git, we can do that now! - ---- -##### **Identity** -First, we'll set our username and email address. Git uses this information everytime we commit. -```console -$ git config --global user.name "Rachael Lam" -$ git config --global user.email "rachael.a.lam@gmail.com" -``` -The option `--global` means that we only have to pass this through once. - ---- -##### **Editor** -Next, we'll configure our the default text editor for when Git needs to type in a message. Git uses our system's default editor (usually Vi or Vim) but we can change it if we prefer. If we want to change the editor to emacs, we would do so below: -```console -$ git config --global core.editor emacs -``` - ---- -##### **Diff Tool** -We can also set the default diff tool which is used to resolve merge conflicts: -```console -$ git config --global merge.tool vimdiff -``` - ---- -##### **Checking the Setting** -We can use the `git config --list` command to see all Git settings. See the values of a specific specific setting: -```console -$ git config user.name -``` - ---- -##### **Help** -If we ever need help, even offline, we can access the manual page three ways: -1. `$ git help ` -2. `$ git --help` -3. `$ man git-` - -For example, we can get help for the `config` command: -```console -$ git help config -``` - ---- - - -Questions? - ---- - - -# **Git Basics** - ---- -**References** -- Chacon and Straub: Chapter 2 - ---- - - -## `$ git init` / `$git clone` - ---- -##### **Respositories in an Exisiting Directory** -We're quickly getting into how to start our first Git repository, or commonly known as repo. First we'll learn how to import an existing repo into Git: -```console -$ git init -``` -```console -$ git init -b main -``` -Here we're creating a new subdirectory named `.git` that will contain all our necessary repo files. The option `-b` will create a new branch called main. - ---- -##### **Cloning an Existing Respository** -If we want to collaborate on an existing repo, we need to clone the repo from GitHub. If we don't have a project set up yet, we'll need to do that first. - ---- -1. Create a new project -
- -![w:1100 center](pics/creatingrepo1.png) - ---- -2. Add name and optional description -
- -![w:1100 center](pics/creatingrepo2.png) - ---- -3. Choose public or private and add initialize -
- -![w:1000 center](pics/creatingrepo3.png) - ---- -There are a number of automatically generated files such as log files that we might not want Git to add or show as untracked. We can create a file called `.gitignore` to ignore the automatically generated files. - -The `.gitignore` is dependent on the type of coding language you are using but can also be modified to fit specific purposes. - ---- -If we created a repo on GitHub, we can choose a `.gitignore` template. We can select a template specific to the coding language we are using. - -![w:900 center](pics/gitignoresmall.png) - ---- -Once we have our repo, we can clone it: -```console -$ git clone https://github.com/rachaellam/git-module.git -``` -Using this code, we've created a repo called `git-module` (by taking the last part of the link) and initialized a `.git` directory and pulled all data for that repository while checking for the latest copy. - ---- -The url used in the previous code block is copied directly from GitHub by clicking code and copying the HTTPS: - -![w:1150 center](pics/github.png) - ---- -If we want to change the name of the repo, we can specify that as the next command line option: -```console -$ git clone https://github.com/rachaellam/git-module.git mymodule -``` - ---- - - -Questions? - ---- - -# **Git Commands** - ---- -**References** -- Chacon and Straub: Chapter 2 -- Timbers: Chapter 12.5 - ---- - - -## `$ git status` - ---- -##### **Tracked and Untracked Files** -Files in our working directory can either be tracked or untracked. Tracked files are files that that were in the last snapshot and can be unmodified, modified or staged. Untracked files are files that aren't in our last snapshot or staging area. - -When we modify a file, Git keeps track of the modifications even before we've decided to commit. We can then stage the modifications and then commit. - ---- -![w:1000 center](pics/workflow.png) - ---- -##### **File Status** -To better understand what state our files are in, we can check the status: -```console -$ git status -``` -If we've just created our repo, we should see (or something similar): -```console -# On branch main -# Your branch is up to date with 'origin/main'. - -# nothing to commit, working tree clean -``` - ---- -Let's now add a README.md file, because every good repo has a good README. - -```console -$ touch README.md -``` - -And see the status: - -```console -$ git status -``` - ---- -```console -On branch main - -No commits yet - -Untracked files: - (use "git add ..." to include in what will be committed) - README.md -``` -Here we can see that we still haven't committed anything and that we have an untracked README.md file. Git also gives us a bit of information including how to add a file to track. - ---- - - -## `$ git add` - ---- -##### **Tracking New Files** -To track new files, or stage new files, we can use `git add` along with the file that we want to track: -```console -$ git add README.md -``` -We can run `git status` again to see the results of `git add`. - ---- -```console -On branch main - -No commits yet - -Changes to be committed: - (use "git rm --cached ..." to unstage) - new file: README.md -``` -Now we can see that our README.md file is staged to be committed. - ---- -Let's say we add some more info to our README.md file, which has now been tracked. If we run `git status`, we can know: -```console -On branch main - -No commits yet - -Changes to be committed: - (use "git rm --cached ..." to unstage) - new file: README.md - -Changes not staged for commit: - (use "git add ..." to update what will be committed) - (use "git restore ..." to discard changes in working directory) - modified: README.md - -``` - ---- -We can stage our additional changes and check the status: -```console -$ git add README.md -$ git status -``` -```console -On branch main - -No commits yet - -Changes to be committed: - (use "git rm --cached ..." to unstage) - new file: README.md - -``` - ---- -Let's try adding another file into our directory. It can be something that you've been working on independently, or we can add our project from the previous Unix module. - ---- -If we modify many things at once, we can add the option `-A` to add all files, rather than adding one by one -```console -$ git add -A -``` -A little note about this: it's best to upload your work in small chunks for readability and for collaboration. So if you have a bunch of files, it's recommended to split them into smaller chunks. - ---- - - -Questions? - ---- - - -## `$ git diff` - ---- -If we want to see more details of the changes that we've made, we can use the command `git diff`. - -`git diff` compares what is in our working directory to what is in our staging area. If we've made changes to our files without running `git add`, we'll see the comparison. If there are no differences, nothing will be shown. - ---- -```console -diff --git a/README.md b/README.md -index e69de29..4711fce 100644 ---- a/README.md -+++ b/README.md -@@ -0,0 +1 @@ -+# git-r -\ No newline at end of file -``` - ---- -```console -diff --git a/README.md b/README.md -``` -This is telling us what we're comparing. In this case, it's the difference between a previous version of the README file and the current one - ---- -```console -index e69de29..4711fce 100644 -``` -Here is some meta data, or hash identifier that we likely won't need. - ---- -```console ---- a/README.md -+++ b/README.md -``` -This is acting as a legend. Changes from `a/README.md` are marked by `---` and changes from `b/README.md` are marked by `+++` - ---- -```console -@@ -0,0 +1 @@ -+# git-r -``` -Here we're being told the lines that have changed and what on those lines changed. Because there was nothing removed, this is a bit of a simplistic representation. - ---- -We might see something more like... -```console -@@ -21,5 +77, 12 -``` -This is telling us 5 lines were removed starting on line 21 and 12 lines were added starting on line 77. - ---- -##### **--staged** -If we want to see the details of what will go into the next commit, we can use `git diff` with the option `--staged` - ---- - - -## `$ git commit` - ---- -Once we've staged your selected files, it's time to commit the changes. Anything that wasn't staged (any modifications since `git add`) will not be included in the commit. - -`git commit` is most easily run with the option `-m`. This adds a message to your commit - -```console -$ git commit -m "adding a message here" -``` - ---- -##### **-m** -Messages should be clear. They can also be extremely detailed if needed. By not including the option `-m`, Git will provide the latest output of `git status`. If you want even more information, you can use the option `-v`. - ---- -Messages are extremely important for our own records and also when collaborating with others. They can act as a reminder for what our commit includes, and also tell our collaborators what we did last. - -It's important to commit often as well so that merges are easier to locate and fix. - -It's also helpful if you want to go back to an earlier version. You have more options to choose from. - ---- -Practices around messages can vary but if we want to add a longer message we can remove the `-m` option. -```console -$ git commit -``` -Then hit `i` to add a message. You'll see `-- INSERT --` at the bottom and you can begin typing your message. - -When finished, press `esc` then `:wq` or `:x`. - -`w` means write and `q` means quit. `x` is shorthand for `wq` - ---- -``` -Short (50 chars or less) summary of changes - -More detailed explanatory text, if necessary. Wrap it to about -72 characters or so. In some contexts, the first line is treated -as the subject of an email and the rest of the text as the body, -the blank line separating thesummary from the body is critical -(unless you omit the body entirely). - -Further paragraphs come after blank lines. - -- Bullet points are okay, too - -- Typically a hyphen or asterisk is used for the bullet, preceded - by a single space with blank lines in between, but conventions - vary here -``` - ---- -##### **-a** -If we want to commit all the files we've worked on without putting them in the staging area, we can use the option `-a`. This will avoid using `git add` and condense our workflow. -```console -$ git commit -a -m "skip staging add message" -``` -Here we've used two options, `-a` and `-m` to skip the staging and add a message. - ---- - - -Questions? - ---- - - -## `$ git rm` - ---- -If we delete a file from our working directory after staging it using `rm` without `git`, the file will show up in our untracked files. We can then use `git rm` to stage the file's removal. - -Let's follow the code below to understand this better: -```console -$ touch test.sh -$ git status -$ rm test.sh -$ git status -``` -Because we haven't tracked the `test.sh` file so we can remove it and we don't need to tell git to also remove it. - ---- -What happens if we add a file to our staging area but then want to delete it? Let's try the two codes below: -```console -$ touch test.sh -$ git add test.sh -$ git rm test.sh -``` - -```console -$ touch test.sh -$ git add test.sh -$ rm test.sh -$ git rm test.sh -``` - ---- -##### **-f** -If we've modified and staged a file, we have to force the removal with the option `-f`. This is a safety feature so that we don't accidentally delete something. - ```console - $ touch testfile - $ git add testfile - $ git rm -f testfile - ``` - - --- - ##### **--cached** - The option `--cashed` allows us to remove a file from our staging area without permanently deleting it from our local drive. -```console -$ git rm --cached testfile -``` -We can use wildcards to remove files from our staging area in bulk, although we have to add a backslash in front of `*` because Git does its own filename expansion. -```console -$ git rm -f \*.txt -``` - ---- -We can also delete files in a folder of our working directory: -```console -$ git rm -f dir1/\*.sh -``` - ---- - - -## `$ git mv` - ---- -Using `git mv`, we can rename files conveniently and succinctly: -```console -$ git mv test.txt test.sh -``` - ---- - - -Questions? - ---- - - -## `$ git log` - ---- -Sometimes we might want to see a history of our commits or we want to see previous commits after cloning an existing repository. We can do this using the `git log` command. - -```console -$ git log -``` -There are a number of options that help us see even more, or sometimes less, information about each commit. - ---- -If we attempt to run a log before any commits have been made, we will get an error: -```console -fatal: your current branch 'main' does not have any commits yet -``` - ---- -##### **-p** -Adding the option `-p` will show the `diff` introduced in each commit. We can also pass a number option that will limit the number of entries shown: - -```console -$ git log -p -2 -``` -Entries can be any number of entries (`-`)but is limited to one page of log out puts - ---- -##### **--stat** -The `--stat` option shows abbreviated stats for each commit: -```console -$ git log --stat -``` - ---- -```console -commit 6c91df668d1899317a643153bd169d37fe05d9f1 (HEAD -> main) -Author: Rachael Lam -Date: Fri Feb 18 14:56:27 2022 -0500 - - first commit - - .gitignore | 4 ++++ - README.md | 1 + - test.Rproj | 13 +++++++++++++ - testfile.r | 0 - 4 files changed, 18 insertions(+) -``` -`+` or `-`(if there were any) show the number of insertions or deletions. We can also see the date of the commit, who committed and the message. - ---- -##### **--pretty** -The `--pretty=` option is an interesting feature that enables us to specify the log output when we combine it with `format:`, creating an extremely useful data extraction feature: -```console -$ git log --pretty=format:"%h - %an, %ar : %s" -``` - ---- -##### **Formatting Options** -Option | Description -:-----|:------ -%H | Commit hash -%h | Abbreviated commit hash -%t | Abbreviated tree hash -%p | Abbreviated parent hashes - ---- -Option | Description -:-----|:------ -%an | Author name -%ae | Author email -%ad | Author date (ex. Thu Dec 2 14:14:55 2021 -0500) -%ar | Author date relative (ex. 26 hours ago) -%cn | Committer name -%s | Subject (-m) - ---- -##### **--since / --until** -The options `--since=` and `--until=` are more usually more useful than `-(n)`. They produce the logs of any time before (`--until`) or after (`--since`) a certain date. You can specify an exact date or relative date: -```console -$ git log --since=2.weeks -``` -```console -$ git log --since="2 days 3 minutes ago" -``` -```console -$ git log --until="2021-11-20" -``` - ---- -We can also combine log options to generate specific outputs: -```console -$ git log --pretty=format:"%h: %s" --author=Rachael -``` -```console -$ git log --after="2020-11-01" --since="2020-11-30" -``` - ---- -Finally, and a favourite for quick glances: - -```console -$ git log --oneline -``` - ---- - - -Questions? - ---- - - -## `undo undo undo` - ---- -##### **Changing Commit** -If we already committed a few files but forgot to add one or made modifications since our commit that we want to add, we can use the option `--amend` -```console -$ git commit -m "initial commit" -$ git add missed_file -$ git commit --amend -m "initial commit with missed_file" -``` -We can still add the `-m` option to add a new comment. - ---- -##### **Unstaging** -When we want to remove a file from our staging area because we accidentally added one too many files, we can use the code below: -```console -$ git reset HEAD README.md -``` -If we ever forget how to do this, running `git status` will remind us. - ---- -##### **Unmodify** -We can also revert our files back to the version from our previous commit using `git checkout --`. It's important to realize that this command essentially rewrites the file so any changes that were made will not be able to be recovered. - -As well, any commit can usually be recovered but anything that was never committed will most likely be lost forever. -```console -$ git checkout -- README.md -``` - ---- -##### **Select Previous Commit** -To select a previous commit to revert to, we need the hash of the commit: -```console -$ git log -$ git checkout file1 -``` -This can be used forwards or backwards, ie. you can also "revert" to a commit that later than your current version. - -You can also revert several files at the same time -```console -$ git checkout file1 file2 -``` - ---- - - -Questions? - ---- - -# **Remote Repositories** - ---- -**References** -- Chacon and Straub: Chapter 2 -- Timbers: Chapter 12.5-12.6 - ---- - - -## `$ git remote` - ---- -Remote repos are versions of our projects that are hosted on the internet or some network. This allows us to collaborate with others outside of our local repo. - -We can see the remote servers we've configured using `git remote`. If we add the option `-v`, we can see the URL: -```console -$ git remote -v -``` -Cloned repos will be displayed as origin by default. - ---- -##### **Remote Setup** -Before we connect our local repo to a remote repo, we need to setup our permissions. This is so we can send and retrieve work to and from our remote repositories. There are two ways to do this: - -1. Access Tokens - -2. SSH - ---- -##### **Access Tokens** - -               ![w:350 left](pics/settings.png)           ![w:340 right](pics/developer.png) - ---- -![w:1150 center](pics/personalauth.png) - ---- -##### **SSH** -```console -$ ls -al ~/.ssh -``` -If SSH has not been set up on your computer, you should see something like: - -```console -ls: cannot access '/c/Users/rachaellam/.ssh': No such file -or directory -``` - -Otherwise you'll see filenames `id_ed25519` and `id_ed25519.pub` OR `id_rsa` and `id_rsa.pub` which represent your public and private keys. - ---- -```console -$ ssh-keygen -t ed25519 -C "rachael.lam@mail.utoronto.ca" -``` -Use the code above but with your email. This will output: - -```console -Generating public/private ed25519 key pair. -Enter file in which to save the key (/c/Users/rachaellam/.ssh/ -id_ed25519): -``` -Press `enter` to use the default file. - ---- -You will then be prompted to add a passphrase. You cannot reset this passphrase, so be sure to remember it or write it down somewhere safe: - -```console -Created directory '/c/Users/Vlad Dracula/.ssh'. -Enter passphrase (empty for no passphrase): -``` - -It will then ask you to reenter the passphrase: - -```console -Enter same passphrase again: -``` - ---- -You will then get a confirmation with a random piece of art at the end. It will show the private key (*identification*) which you should never share, the *public key* and the *key fingerprint* which is a shorter version of the public key. -```console -Your identification has been saved in /c/Users/rachaellam/.ssh/ -id_ed25519 -Your public key has been saved in /c/Users/rachaellam/.ssh/ -id_ed25519.pub -The key fingerprint is: -SHA256:SMSPIStNyA00KPxuYu94KpZgRAYjgt9g4BA4kFy3g1o -rachael.lam@mail.utoronto.ca -``` - ---- -Now we can check that we have the public and private key files: -```console -$ ls -al ~/.ssh -``` - ---- -It's time to give GitHub our public key so let's read the public key file and copy it: -```console -$ cat ~/.ssh/id_ed25519.pub -``` -Output: -```console -ssh-ed25519 AAAAC3NzaC1lZDI1NPN7AAAAIDmRA3d51X0uu9wXek559gfn6UFNF -69yZjChyBIU2qKI rachael.lam@mail.utoronto.ca -``` -Copy the long public key to add to GitHub. - ---- -##### Settings --> SSH and GPG keys --> New SSH key -Add a title like `rachael's key` and paste the public key then click *Add SSH key*. - -Finally, we can check that it's been authenticated: -```console -$ ssh -T git@github.com -``` - ---- -##### **remote add** -To add a remote repo, we can use `git remote add` followed by the name and URL. Now we can connect our local repo to a remote repo: -```console -$ git remote add origin https://github.com/rachaellam/git-r.git -$ git remote -v -``` -After checking we'll see: -```console -origin https://github.com/rachaellam/git-r.git (fetch) -origin https://github.com/rachaellam/git-r.git (push) -``` - ---- -If we want to see more information about a remote repo, we can use the command: -```console -$ git remote show origin -``` -Here we can see the URL that we're fetching and pulling from, our remote branches, and configurations for git push (to the main branch or another). - ---- -To send and retrieve work between our local and remote repositories, we have to authenticate a personal access token: - -               ![w:350 left](pics/settings.png)           ![w:340 right](pics/developer.png) - ---- -![w:1150 center](pics/personalauth.png) - ---- - - -Questions? - ---- - - -## `$ git fetch` / `$ git push` - ---- -When collaborating with others, changes might be made that are important to copy to your local directory. `git fetch` will get any new changes but it won't merge it to our work or modify our work. -```console -$ git fetch origin -``` - ---- -`git pull` will automatically fetch and merge a remote branch to our current branch (more on branching later). It's a good practice to pull before every work session, especially when working with others. Otherwise, a collaborator might have made changes, and you won't be able to push your changes to GitHub. -```console -$ git pull -``` - ---- -If we've create our remote repository using `init` and `remote add`, we need to specify the remote that we want to pull to and the branch we want to pull from. -```console -$ git pull origin main -``` -`origin` being the name of the remote repo we created earlier and `main` being the main branch on our GitHub repo. - ---- - - -Questions? - ---- - - -## `$ git push` - ---- -When we're ready to share our modifications, we have to push our project and files upstream using `git push` -```console -$ git push origin main -``` -Here we're pushing to our origin server on your main branch. The main branch is sometimes called the master branch. - -This command only works if we have write access and if no collaborator is pushing upstream at the same time as we are. We'd have to instead pull and merge their work before pushing our own. - ---- - - -Questions? - ---- - -# **Git Branching** - ---- -**References** -- Chacon and Straub: Chapter 3 -- Timbers: Chapter 12.8 - ---- -Branching allows us to diverge from the main line to do work without accidentally messing with the main line. This helps with testing without making any accidental changes to the working branch. - -To understand how branching works, let's go back and understand how Git saves files. -- blob -- tree -- pointer - ---- -![w:1000 center](pics/blobs.png) - ---- -A branch is a way to move different pointers to a specific commit. In Git, the default branch is named *master* or *main*. When we first start making commits, we start at the master branch that automatically points to the last commit made. - -![w:700 center](pics/master.png) - ---- - - -## `$ git branch` - ---- -We can make a new branch which creates a new pointer for us to move around. We can do this by using the command `git branch`: -```console -$ git branch testing -``` - ---- -Here, we've created a branch called testing, which means we've created a new pointer that could point to our current commit. - -![w:800 center](pics/testing.png) - ---- - - -## `$ git checkout` - ---- -Git tracks what branch we're on using a pointer called `HEAD`. If we move the `HEAD` to the branch *main*, we'll see: -```console -Already on 'main' -``` -To move `HEAD` to point to the testing branch that we just created, we use `git checkout`: -```console -$ git checkout testing -``` -and we should see.. -```console -Switched to branch 'testing' -``` - ---- -![w:800 center](pics/testing-head.png) - ---- -If we make some changes to our testing branch and commit, our head will move with the new commit. - -![w:800 center](pics/testing-commit.png) - ---- -If we want to go back to an older version of our project and make changes, we can use `git checkout` again to redirect the head back to our master branch: -```console -$ git checkout main -``` -Using this command will move the `HEAD` pointer back to our master branch and revert our files in our working directory back to the snapshot that the master branch points to. - ---- - - -Questions? - ---- - - -## `Branching and Merging` - ---- -Let's take a look at a workflow that you might encounter: -```console -$ git commit -m "commits to master branch" -``` -![w:800 center](pics/workflow1.png) - ---- -```console -$ git checkout -b iss53 -``` -![w:600 center](pics/workflow2.png) - ---- -```console -$ git commit -a -m "commits to iss53" -``` -![w:700 center](pics/workflow3.png) - ---- -```console -$ git checkout master -$ git checkout -b 'hotfix' -$ git commit -m "commits to hotfix" -``` -![w:600 center](pics/workflow4.png) - ---- -```console -$ git checkout master -$ git merge hotfix -``` -![w:500 center](pics/workflow5.png) - ---- - - -## `$ git merge` - ---- -In the last step we saw a command called `git merge`. Once we've committed changes and are ready to deploy, we can use `git merge` to merge our working branch back into our master branch. - -```console -$ git merge testing -``` - ---- -![w:600 center](pics/delete-branch.png) - ---- -We can then delete the branch that we've created, as the master branch points to the same place. - -Adding the option `-d` will delete the branch that had been merged with the main, as we no longer need it. -```console -$ git branch -d testing -``` - ---- -Remember that changes to our master branch have not been added to our *iss53* branch. We either need to `pull` them in or wait to integrate them when we `pull` *iss53* into the master branch - -![w:600 center](pics/difference.png) - ---- -If we're merging a branch with the main that has been changed since we diverged, merging isn't as simple for Git. - -Git will create a new snapshot of the merge and automatically create a new commit that points to it, called a `merge commit`. - -![w:600 center](pics/merge.png) - ---- -We saw `git branch` earlier with the option `-d` to delete a branch, but to get a list of our current branches, we can run `git branch` without any arguments. -```console -$ git branch -``` -The `*` indicates the branch we are currently on or have checked out (`git checkout`) - ---- -If we run `git branch` with the option `-v`, we can see the last commit on each branch. This is another reason why comments are so important to add to our commits: they can be extremely useful when looking back at our work and seeing what we've done. - ---- -We can also add the options `--merged` or `--no-merged` to `git branch`. `--merged` allows us to see what branches been merged to the branch we're currently on. Branches without the `*` are generally safe to delete because we've already merged our work with our main branch. -```console -$ git branch --merged -``` - ---- -On the other hand, `--no-merged` allows us to see all the branches that haven't been merged. -```console -$ git branch --no-merged -``` -If we try to delete one of these branches, we will receive an error. We can force delete using the option `-D`. - ---- - - -## `Merge Conflicts` - ---- -Often times, merging our work with other topic branches or the main branch creates errors. - -For example, if we've changed the same part of the same file differently in the two branches we're merging, we will encounter a conflict. - -Luckily, Git helps us see where the error is to correct it. - ---- -![bg contain](pics/mergeconflicts.png) - ---- -Git shows us the beginning of the merge conflict with -`<<<<<<< HEAD` and the end with `>>>>>>>`. -
- -`=======` separates the differences. -
- -To fix the merge, you can choose one set of changes, the difference you prefer or re-write it entirely. You have to remove all identifiers of the merge conflict as well. - ---- - - -Questions? - ---- - - -## `Branching Workflow` - ---- -##### **Long-Running Branches** -Multiple long running branches are helpful when tackling large and complex projects. - -Typically, developers will keep the master branch as the stable branch or code that has been or will be released. They will then have parallel branches that are used for development and testing. - -Braches can also have various levels of stability, and will graduate/merge branches once they're fully tested. - ---- -![w:1000 center](pics/topics.png) - ---- -##### **Topic Branches** -Topic branches are short-lived branches that are created for a particular feature or related work. They allow us to quickly switch between topics and keep changes there for as long or as little as needed, regardless of the created or modified order, before merging. - ---- - -![w:600 left](pics/topics2.png) ![w:435 right](pics/topics3.png) - ---- - - -Questions? - ---- - - -## `Remote Branches` - ---- -Remote branches are pointers to the state of branches on our remote repositories. Our remote repositories can have multiple remote branches, just as we can have multiple braches on our local repositories. - -The format is `(remote)/(branch)` or `(remote) (branch)` - -If branches already exist on your GitHub repo, you will have access to these branches. If we're working with a branch that does not exist yet, we can push it to our remote repo. - ---- -##### **Pushing** -When we're ready to share our work, we'll use `git push`. If the remote branch already exists, we can push directly to that branch: -```console -$ git checkout testing -$ git add -A -$ git commit -m "testing branch commit" -$ git push origin testing -``` -This will push our changes to the existing testing branch on GitHub. - ---- -If we were working with a branch that only exists locally, we can push it to GitHub with a slight tweak: -```console -$ git checkout new-branch -$ git add -A -$ git commit -m "new branch commit" -$ git push origin main:new-branch -``` -This will create a new branch on GitHub called `new-branch`. From here, if we want to continue updating this branch, we can just run `git push origin new-branch`. - ---- -##### **Fetching** -When we `fetch` or `pull` files from our remote repos, we don't automatically have access to local, editable copies of files of the remote branches. - -We can do this in several steps. First we're going go fetch the remote branches: -```console -$ git fetch -``` - ---- -We can then see what branches exist remotely: -```console -$ git branch -v -a -``` -And we'll see something like this: -```console -* main 3d850f2 a commit - remotes/origin/HEAD -> origin/main - remotes/origin/main 3d850f2 another commit - remotes/origin/testing 3d850f2 another committ -``` - ---- -Then we'll create a branch that exists on our local drive: -```console -git checkout -b testing origin/testing -``` -Here we're pointing the `HEAD` to the new branch (`-b`) called `testing` from `origin/testing` - ---- -##### **Tracking Branches** -Tracking branches are branches that have a direct relationship with a remote branch. We can `push` and `pull` to and from these branches, as Git automatically knows which server and branch we're working with. - -For this to work, the name of your local branch must be the same as the remote branch - ---- -If the branches are named differently, we must run a different command for the push to be successful: -```console -$ git push origin HEAD:remote-branch -``` - ---- -##### **Deleting Branches** -If we've merged all our changes into our main branch, we can delete the remote branch with the following code: -```console -$ git push origin :testing -``` - ---- - - -Questions? - ---- - -# **Collaborating** - ---- -**References** -- Chacon and Straub: Chapter 3 + 5 -- Timbers: Chapter 12.8 - ---- -Much of the work that we do will involve working with others. It's important that we learn how best do this so we can successfully collaborate and avoid conflicts where possible. If conflicts arise, good collaboration practices help us resolve them with ease. - -So far we've learned several practices and commands that help us collaborate with others, including remote repositories and branches, `git pull` `git push` and `git merge` but we'll learn more practices that make collaboration straightforward. - ---- -There are many different factors that influence what workflow you might follow and how you might contribute to a project including: - -**1. Active contributor size** -Teams can vary from a few collaborators to thousands, varying the number of commits per day. - -**2. Chosen workflow** -Each project could have a different process to check patches including an integration manager or peer reviews. - -**3. Commit access** -Policies regarding how to contribute work can differ between projects, even by how much work or how often. - ---- -Let's take a look at a couple possible workflows: - -![w:500 center](pics/colabworkflow.png) - ---- -![w:600 center](pics/colabworkflow2.png) - ---- - - -## `GitHub` - ---- -##### **Adding Collaborators** -To collaborate with others on our GitHub repo, we can add collaborators so they have direct access to the repo: - -![w:1100 center](pics/gitcollabs1.png) - ---- -![w:1100 center](pics/gitcollabs2.png) - ---- -![w:800 center](pics/gitcollabs3.png) - ---- -Access does not have to be permanent. We can remove collaborators at any time and add additional ones when needed. - -Granting access to your repo this way, enables collaborators to make changes and push them to the repo without our constant permission. If we do not add push access, collaborators have to fork the repo and create pull request. - ---- -##### **Forking Projects** -Forking allows us to collaborate on projects without push access. We can fork a public project on GitHub and then clone it into our local server to begin making changes. - -![w:1150 center](pics/fork1.png) - ---- -Once a project has been forked, we can find the repo in our GitHub repositories. We can then clone the repo (`git clone`), make changes and push our changes without altering the original repo. - -Alternatively, we can clone the original repo, make our changes, fork the original repo and then merge our branch to the master branch of the forked repo. - -If we're collaborating with someone and we want our changes to be merged to the original repo, we can create a pull request. - ---- -##### **Pull Request** -After making a few changes, we now want to create a pull request to merge our changes with the original repo. We can do this directly in GitHub: - -![w:1100 center](pics/pullrequest2.png) - ---- -To the pull request, we can see what branches and repos we're attempting to merge: - -![w:800 center](pics/pullrequest3.png) - ---- -We can also see the changes that were made: - -![w:1100 center](pics/pullrequest4.png) - ---- -GitHub will also check to make sure that there are no conflicts with the base branch: - -![w:900 center](pics/pullrequest5.png) - ---- -Pull requests with no merge conflicts are easy to merge into the branches but it gets more complicated if there are merge conflicts: - -![w:1100 center](pics/pullrequestmergeconflict.png) - ---- -You can still create a pull request with merge conflicts: - -![w:1100 center](pics/pullrequestmergeconflict2.png) - ---- -![w:1000 center](pics/pullrequestmergeconflict3.png) - ---- -To resolve conflicts, it's very similar to merging conflicts through terminal: - -![w:1100 center](pics/pullrequestmergeconflict4.png) - -Because resolving conflicts is done on GitHub, it's a good practice to resove conflicts before creating a pull request. - ---- - - -Questions? - ---- - -# **Conflicts** - ---- -**References** -- Chacon and Straub: Chapter 3 + 6 -- Timbers: Chapter 12.5 - ---- -Conflicts are going to arise at some point, especially when working with others. It's important that we learn how to handle these conflicts for easier and more successful collaboration. - ---- - - -## `GitHub Issues` - ---- -GitHub issues are an extremely useful tool for communicating decisions, ideas and problems that are project specific. - -They are an alternative to email or Slack that keep communication isolated to a particular project. - -Issues can be *opened* on GitHub and even when they're *closed*, they remain available. They're also accessible to all collaborators for transperancy. - ---- -To open an issue, navigate to the project page and click *Issues*: - -![w:1100 center](pics/issues.png) - ---- -Then open a new issue: - -![w:1100 center](pics/issues2.png) - ---- -From here, we can add a title and description of the issue, and add any specific collaborators, labels, etc. - -![w:1100 center](pics/issues3.png) - ---- -##### **Information** -**Title:** should be descriptive and quickly convey what the issue is about - -**Description:** explain the purpose of the issue and how to potentially resolve it. If it's a bug fix, include a reprex, what you wanted to happen and what actually happen. You can also include steps already taken to solve the issue. - ---- -##### **Reprex** -- A reprex is a **REPR**oducible **EX**ample. - -- It contains just enough of the code to reproduce the error, ie. it is **self-contained** - -- We might have to create a smaller version of the code in order to create the reprex. Don't include anything that isn't related to the problem. - -- Sometimes, this process will help us solve our issue. - ---- -##### **Inclusions** -A minimal dataset to demonstrate the problem. This could be a regularly used one such as *iris* -```python -install.packages("dyplr") -library(dplyr) -head(mtcars) -``` - -or one easily built yourself. -```python -df <- data.frame (col1 = c(1, 2), - col2 = c(3, 4)) -df -``` - ---- -- Make sure to include classes that are necessary to your reprex (ex. dates, factors, etc.) - -- If you're using randomly sampled data, set the seed to so the same data is produced each time. -```rstudio -set.seed(853) -``` - ---- -Include all packages that you need. -
- -- Make sure they are placed at the top of the script so it's quick and easy to see what is necessary for the reprex. - ---- -##### **Other Inclusions** -- Details about the issues you are facing. - -- Comments that will add clarification to your error. - -- Add what fixes have been attempted. This could include pages to StackOverflow articles that you've viewed. - -- Communicate cleary what you're desired outcome is. - ---- -##### **Task Lists** -If an issue is quite large, it's possible to add tasks lists to break the issue into smaller pieces. -- Use square brackets `- [ ]` - -- To mark it complete, use `- [x]` - -- Issues can be linked to previous issues using - - the number `- [x] #11` - - a URL `- [x] https://github.com/rachaellam/git-r/issues/11` - ---- -Once an issue has been opened, we can respond and comment. - -When we decide it has been resolved, we can close the issue. The history of the issues can still be seen, even if it has been closed. - ---- - - -Questions? - ---- - - -## `Debugging` - ---- -##### **File Annotation** -File annotation can help us resolve issues in our code if we know where thie issue is. We can see when the code was introduced and by whom, line by line, using the aptly named `git blame`. - -```console -$ git blame -L 1,3 script.sh -^8e9b89da (Rachael Lam 2021-12-02 15:01:02 -0500 1) #line 1 -8e9b89da (Rachael Lam 2021-12-02 15:01:02 -0500 2) #line 2 -8e9b89da (Rachael Lam 2021-12-02 15:01:02 -0500 3) #line 3 -``` - ---- -`git blame` is combined with the filename we want to inspect. We can also use the option `-L` followed by two numbers to limit the number of lines shown. - -We can then see the partial SHA-1 of the commit that last modified the line, the author name and date of the commit, and the content of the file by line. - -When the SHA-1 is preceeded by a `^`, it indicates that those commits were when the file was first added to the project and have not changed since. - ---- -##### **Binary Search** -If we don't know where the issue is, we can use `git bisect` to get identify the commit that introduced an issue. -```console -$ git bisect start -$ git bisect bad -$ git bisect good [good_commit] -``` -First, we've started the bisect program. We then told the system that the current commit is broken using `bisect bad` followed by the last good commit using `bisect good [good_commit]`. We can see the different commit if we run `git log` that we learned earlier. - ---- -Git produced the number of commits that were between the good and the bad commit and then checked out the middle one. - -From here, we can run our test to see if the issue still exists. If it does, it means the issue was introduced in a commit before this middle commit and we can run `git bisect bad` to tell the system that there is still an issue. - -If it does not, then the issue was introduced after and we can run `git bisect good`. - ---- -We can keep running this loop until we find the commit that introduced an issue and make our corrections. - -When we're finished, we can run `git bisect reset` to reset our `HEAD` to where we were before we started. - ---- - - -## `Best Practices` - ---- -- Topic branches should be used to try out new code before integrating. They enable us to play around or leave for the time being it if it's not working. -- Commit often rather than submitting a massive commit. This makes it easier to review and merge changes, or revert if necessary. - ---- -- Create quality commit messages so that your collaborators can easily understand what has been done. For example: -``` -Short (50 chars or less) summary of changes - -More detailed explanatory text, if necessary. Wrap it to about -72 characters or so. In some contexts, the first line is treated -as the subject of an email and the rest of the text as the body, -the blank line separating thesummary from the body is critical -(unless you omit the body entirely). - -Further paragraphs come after blank lines. - -- Bullet points are okay, too - -- Typically a hyphen or asterisk is used for the bullet, preceded - by a single space with blank lines in between, but conventions - vary here -``` - ---- - - -Questions? - ---- - - -## `Reproducibility` - ---- -- Reproducibility is the ability for for independent researches to obtain the same or similar results when repeating an experiment or test. - -- This concept has been widely used in natural sciences, but is not yet as popular in data science. - -- Remember, data science is a science. We question, hypothesize, test, and therefore, we should also have the same rigour of confirmation. - ---- -- Skepticism should always be able to be independently verified. We should be able to defend our results and decisions. - -- Who would believe your results otherwise? More importantly, you should not believe results if they cannot be verified. - ---- - - -Why is reproducibility important? - ---- -1. New Insights - -2. Reduce Error Risks - -3. Validate Results - -4. Transparency - ---- - - -How can we make our work reproducible? - ---- -There are a number of practices that can help make our work reproducible including: -- Reproducible Examples -- Commenting Code -- Technical Documentation -- Folder Structure - ---- - - -## `Reproducible Examples` - ---- -##### **Reprex** -- A reprex is a **REPR**oducible **EX**ample. - -- It contains just enough of the code to reproduce the error, ie. it is **self-contained** - -- We might have to create a smaller version of the code in order to create the reprex. Don't include anything that isn't related to the problem. - -- Sometimes, this process will help us solve our issue. - ---- -##### **Inclusions** -A minimal dataset to demonstrate the problem. This could be a regularly used one such as *iris* -```python -install.packages("dyplr") -library(dplyr) -head(mtcars) -``` - -or one easily built yourself. -```python -df <- data.frame (col1 = c(1, 2), - col2 = c(3, 4)) -df -``` - ---- -- Make sure to include classes that are necessary to your reprex (ex. dates, factors, etc.) - -- If you're using randomly sampled data, set the seed to so the same data is produced each time. -```rstudio -set.seed(853) -``` - ---- -Include all packages that you need. -
- -- Make sure they are placed at the top of the script so it's quick and easy to see what is necessary for the reprex. - ---- -##### **Other Inclusions** -- Details about the issues you are facing. - -- Comments that will add clarification to your error. - -- Add what fixes have been attempted. This could include pages to StackOverflow articles that you've viewed. - -- Communicate cleary what you're desired outcome is. - ---- - - -## `Commenting Code` - ---- - - -How does commenting code help in reproducibility? - ---- -Commenting code is an important practice that benefits both ourselves and collaborators. - -Not only can we understand what we did to fix our own errors or improve our work, but others can better understand our code to reproduce it. - ---- -[Ellen Spertus](https://stackoverflow.blog/2021/12/23/best-practices-for-writing-code-comments/) outlines 9 rules to follow: -
- -1. Comments should not duplicate the code - -2. Good comments do not excuse unclear code -3. If you can’t write a clear comment, there may be a problem with the code -4. Comments should dispel confusion, not cause it - ---- -5. Explain unidiomatic code in comments - -6. Provide links to the original source of copied code -7. Include links to external references where they will be most helpful -8. Add comments when fixing bugs -9. Use comments to mark incomplete implementations - ---- -##### **1. Comments should not duplicate the code** -- Comments should add value to whoever is reading your code. -- Duplicating code adds unneccesary bulk and can actually make it more difficult to understand the code. -
- -**Can you think of a bad example?** - ---- -Here is an example of what you should **not** do: -```bash -x=5 - -if [ $x = 5 ]; then - echo "x equals 5." # if x = 5 then ouput x equals 5 - -else - echo "x does not equal 5." # otherwise output x does not equal 5 - -fi -``` - ---- -##### **2. Good comments do not excuse unclear code** -- Our aim should always be having clear code, rather than relying on our comments to add clarity. -- Remember, we should not be adding more bulk to the code that makes it more difficult to understand. - ---- -##### **3. If you can’t write a clear comment, there may be a problem with the code** ->Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to -debug it. - -\- Kernighan's Law - ---- -##### **4. Comments should dispel confusion, not cause it** -- If our comments are adding further confusion, we should either rewrite the comment or remove it entirely. -- A could comment should always be written with the intent to help better understand what is being done. - ---- -##### **5. Explain unidiomatic code in comments** -- If we've purposefully written code that others may find unecessary, we need to comment our reasoning. -- Others may try to simplify our code if we don't explain our reasoning. -
- -**Can you think of an example?** - ---- -##### **6. Provide links to the original source of copied code** -- Often times, we'll use code that others have written. It's important to give credit to the original source, but as well as give us a reminder as to where we got the code to reference it later if we need. -- Referencing the source can also provide other information such as what the problem was, why the solution was recommended and how it can be improved. It also means, we don't have to comment all of these details again in our own code. - ---- -An example: -```bash -# I got these 9 rules from Ellen Spertus' blog post on -# StackOverflow: https://stackoverflow.blog/2021/12/23/ -# best-practices-for-writing-code-comments/ -``` -- It's best to include the URL so other's don't have to search for the exact location. -- Remember: **never** copy code that you don't personally understand. -- Code from StackOverflow falls under Create Commons licenses so a reference comment is needed. - ---- -##### **7. Include links to external references where they will be most helpful** -- References don't just have to be used for copied code. They can also provide information on decisions made or changes in practices - ---- -##### **8. Add comments when fixing bugs** -- Comments can help others understand what we modified, if the modification is still needed, and how to test our modifications -- Although `git blame` can be used to find the commit that modified the code, a good comment can help locate the change and are quite brief. - ---- -##### **9. Use comments to mark incomplete implementations** -- Sometimes we have limitations in our knowledge or time. Adding code documenting these limitations can allow us to better address and fix the issues. - ---- -##### **Some other good practices:** -- Comments should be clear and efficient. Don't add more information than necessary, but don't be too vague -- Remember to update your comments if you update your code. Old comments can add more confusion. -- Inline comments can add noise as they're mixed with our code. Spacing can be helpful here: - -```python -colors = [[213/255,94/255,0], # vermillion - [86/255,180/255,233/255], # sky blue - [230/255,159/255,0], # orange - [204/255,121/255,167/255]] # reddish purple -``` - ---- ->Code tells you how, comments tell you why. - -\- Jeff Atwood, Co-founder of StackOverflow - ---- - - -## `Technical Documentation` -## `Writing` - ---- - - -What is technical documentation writing? - ---- - - -Why is it important to write a good technical documentation? - ---- -Technical documents are necessary for reproducibility as they relay important information about your project to others. Writing technical documents is not easy but should not be overlooked. - -A well done technical document will communicate the goals of a project and in doing so, can generate interest in the project. - ---- -GitHub outlines several pieces of information to include: -1. What the project does -2. Why the project is useful -3. How users can get started with the project -4. Where users can get help with the project -5. Who maintains and contributes to the project -
- -This is just part of the story and we'll add more to this in the coming slides. - ---- -##### **README** -- Technical documentation writing is typically found in a `README.md` file. -- If the `README.md` file is placed in our repo's root, `doc` folder, or hidden in the `.github` directory, GitHub will place the contents of the `README.md` on the main repo page. -- The `README.md` file will be the first thing visitors see when they come to the project page so it's important to make it as appealing as possible. - ---- -##### **Examples** -Let's walk through some good examples of `README.md` files: -- [Create Go App CLI](https://github.com/create-go-app/cli#readme) -- [Human Activity Recognition](https://github.com/ma-shamshiri/Human-Activity-Recognition#readme) -- [Markdownify](https://github.com/amitmerchant1990/electron-markdownify#readme) -- [More!](https://github.com/matiassingers/awesome-readme) - ---- - - -What did you like about these README files? - -What similarities can you see? - ---- -##### **What should be included?** -1. Name of the project -2. What the project does -3. The project's usages -4. How to get started -5. Where to find help -6. Who contributes - ---- -##### **1. Name of the Project** -- The name of your project should be unambiguous. - ---- -##### **2. What the project does** -- This should be a description of the project. -- Provide context to the project and any reference links. -- Include features or background information -- *Can be titled "Description"* - ---- -##### **3. The project's usages** -- This should include how the project can be used. -- Provide examples using the code along with the expected output of said code. -- It should be a smaller example. Longer examples can be linked to. -- *Can be titled "Usages"* - ---- -##### **4. How to get started** -- This is the installation guide. -- Think of your particular audience and how much detail you might need to include. -- Add a requirements section if there are specific dependencies or needs to run in a particular programming language. -- *Can be titled "Installation"* - ---- -##### **5. Where to find help** -- Direct people on where to find help if they need. -- This could be the issues page on GitHub, a forum, or an email address. -- *Can be titled "Support"* - ---- -##### **6. Who contributes** -- This should outline how others can contribute to your project and what your requirements are for accepting contributions. -- *Can be titled "Contributing"* - ---- -##### **Additional Additions** -- **Visuals:** Visuals can grab people's attention, but they can also be helpful for showcasing what the code does. Include screenshots or GIFs that demonstrate your project. -- **Badges:** Badges provide metadata such as issue tracking, test results and downloads. [Shields.io](https://shields.io/) provides this service and you can also look at their [GitHub](https://github.com/badges/shields) for more information. -- **Acknowledgements:** Include the authors or anyone that helped with the project. - ---- -##### **Markdown** -- As noted by the extension, `README.md` files are usually written in markdown, thus using markdown syntax for styling. -- [GitHub](https://docs.github.com/en/github/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) provides a good reference on how to write your README in markdown. - ---- -##### **Headings** -```markdown -# Largest Heading -## Second Largest Heading -### Third Largest Heading -``` -![w:1000 center](pics/headings.png) - ---- -##### **Text Styling** -```markdown -**bold** -*italic* -~~strikethrough~~ -**this is a *nested* example** -***bold and italic*** -``` -![w:1000 center](pics/text-styling.png) - ---- -##### **Quoting** -```markdown -> Block quote some text -``` -![w:1000 center](pics/blockquote.png) - ---- -##### **Unordered Lists** -```markdown -- this is an unordered list -- second item - - nested - - second nest -``` -![w:1000 center](pics/unordered.png) - ---- -##### **Ordered Lists** -```markdown -1. This is an ordered list -2. This is the second item - - with some additional information -3. This is the third -``` -![W:1000 center](pics/ordered.png) - ---- -##### **Codeblock** -Wrap your code in ``` to create a codeblock. - -![W:1000 center](pics/codeblock.png) - ---- -##### **Links** -```markdown -[Rachael's GitHub](https://github.com/rachaellam) -``` -![W:1000 center](pics/link.png) - ---- -##### **Images** -```markdown -![w:1000 center](pics/picture.png) -``` -![w:500 center](pics/bobs-burgers-louise.gif) -As we see, images can also be GIFs. We can also play around with the size and alignment. - ---- - - -## `Folder Structure` - ---- - - -What is folder structure and why is important? - ---- -A good folder structure is important for reproducibility because it easily allows for others to navigate and implement our projects. If someone references a file that is self contained, they know they won't have to change the file path to gain access. - -For example, what is the difference between these two paths: - -1. `"/Users/rachaellam/Documents/all-projects/this-project/data/"` - -2. `"this-project/data/"` - ---- -Folder structure can vary based on the project but a basic one to follow is... -- **/inputs** - - Everything that will not be edited including raw data and references -- **/outputs** - - Everything that was created during the project and your results -- **/scripts** - - All code that was written for the project - ---- -[Wilson et. al](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005510#sec009) also outline a file structure that is similar... -- **/doc** - - All text documents including documentation or references -- **/data** - - All raw data and metadata -- **/results** - - Files generated during the analysis including generated data or cleaned data - - Results can be further divided into subdirectories that contain intermediate files and finished files -- **/src** - - All code that was written for the project - ---- -**References** - -Reproducibility: -- [Reproducibility and Research Integrity](https://doi.org/10.1080/08989621.2016.1257387) -- [Reproducibility, Replicability, and Reliability](https://doi.org/10.1162/99608f92.dbfce7f9) - ---- -Commenting: -- [Elena Kosourova](https://towardsdatascience.com/the-art-of-writing-efficient-code-comments-692213ed71b1) -- [Ellen Spertus](https://stackoverflow.blog/2021/12/23/best-practices-for-writing-code-comments/) - ---- -Technical Documentation Writing: -- [GitHub README](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-readmes) -- [GitHub Markdown](https://docs.github.com/en/github/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) -- [KyuWoo Choi](https://www.freecodecamp.org/news/what-i-learned-from-an-old-github-project-that-won-3-000-stars-in-a-week-628349a5ee14/) -- [Make a README](https://www.makeareadme.com/) -- [Matias Singers](https://github.com/matiassingers/awesome-readme) - ---- -Folder Structure: -- [Rohan Alexander](https://www.tellingstorieswithdata.com/reproducible-workflows.html) -- [Wilson et. al](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005510#sec009) diff --git a/slides/pdf/git_slides.pdf b/slides/pdf/git_slides.pdf deleted file mode 100644 index 0e23425..0000000 Binary files a/slides/pdf/git_slides.pdf and /dev/null differ diff --git a/slides/pdf/unix_slides.pdf b/slides/pdf/unix_slides.pdf deleted file mode 100644 index 0a0c3c9..0000000 Binary files a/slides/pdf/unix_slides.pdf and /dev/null differ diff --git a/steps_to_ask_for_help.png b/steps_to_ask_for_help.png new file mode 100644 index 0000000..edc07a2 Binary files /dev/null and b/steps_to_ask_for_help.png differ