Merge pull request #29 from layuplist/develop

LL for 2020
layuplist · Sep 10, 2020 · fd873cb · fd873cb
2 parents 593c131 + d513882
commit fd873cb
Show file tree

Hide file tree

Showing 14 changed files with 50 additions and 24 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,18 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Python: Django",
+      "type": "python",
+      "request": "launch",
+      "program": "${workspaceFolder}/manage.py",
+      "args": [
+        "collectstatic"
+      ],
+      "django": true
+    }
+  ]
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+  "python.pythonPath": "/Users/zirayhao/Code/dplanner/ll/venv/bin/python2.7"
+}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -9,14 +9,15 @@ Feel free to email <a href="mailto:[email protected]">[email protected]<
 Local Setup (macOS or OS X)
 -----------------
 #### Installation
+* Use Python 2.7.16
 * Install [Homebrew](http://brew.sh/), [node.js](https://nodejs.org/en/), and Postgres (we recommend [Postgres.app](http://postgresapp.com/) with their [CLI Tools](http://postgresapp.com/documentation/cli-tools.html)).
 * Install the [Heroku CLI](https://cli.heroku.com). You don't need a Heroku account, they just offer good tools for configuration.
 * Install Redis using `brew install redis`.
 * We use yuglify to compress the static files. Install using `sudo npm install -g yuglify`.
 * Install forego using `brew install forego`. This is used to run the server.
 * Run `easy_install pip` if you do not have pip.
 * Run `pip install virtualenv` if you do not have virtualenv.
-* Run `virtualenv venv --no-site-packages` to create a Python virtual environment.
+* Run `virtualenv venv` to create a Python virtual environment.
 * Run `createdb layuplist`.
 * [Clone](https://help.github.com/articles/cloning-a-repository/) the main repository. `git clone https://github.com/layuplist/layup-list.git`.
 * Create a `.env` file in the root directory of the repository (fill out the items in brackets):
@@ -26,12 +27,14 @@ Local Setup (macOS or OS X)
   REDIS_URL=redis://[YOUR_USERNAME]@localhost:6379
   SECRET_KEY=[SOME_LONG_RANDOM_STRING]
   DEBUG=True
-  CURRENT_TERM=16S
+  CURRENT_TERM=20X
+  OFFERINGS_THRESHOLD_FOR_TERM_UPDATE=100
   ```
 
 * Run `source ./scripts/dev/environment.sh` to set up the heroku development environment.
 * Run `source ./scripts/dev/virtualize.sh` to activate the virtual environment.
 * Install Python dependencies using `pip install -r requirements.txt`.
+* Initialize the database with `python manage.py migrate`.
 
 Developing
 ----------

diff --git a/apps/spider/crawlers/medians.py b/apps/spider/crawlers/medians.py
@@ -9,9 +9,9 @@
     retrieve_soup
 )
 
-MEDIAN_PAGE_INDEX_URL = "http://www.dartmouth.edu/~reg/transcript/medians/"
+MEDIAN_PAGE_INDEX_URL = "http://www.dartmouth.edu/reg/transcript/medians/"
 MEDIANS_URL_FMT = (
-    "http://www.dartmouth.edu/~reg/transcript/medians/{term}.html")
+    "http://www.dartmouth.edu/reg/transcript/medians/{term}.html")
 
 
 def get_term_from_median_page_url(url):
@@ -33,7 +33,7 @@ def _retrieve_term_medians_urls_from_soup(soup):
 
 def _is_term_page_url(url):
     term = get_term_from_median_page_url(url)
-    return url == "/~reg/transcript/medians/{term}.html".format(term=term)
+    return url == "/reg/transcript/medians/{term}.html".format(term=term)
 
 
 def crawl_term_medians_for_url(url):

diff --git a/apps/spider/crawlers/timetable.py b/apps/spider/crawlers/timetable.py
@@ -22,10 +22,10 @@
 
 DATA_TO_SEND = (
     "distribradio=alldistribs&depts=no_value&periods=no_value&"
-    "distribs=no_value&distribs_i=no_value&distribs_wc=no_value&pmode=public&"
+    "distribs=no_value&distribs_i=no_value&distribs_wc=no_value&deliverymodes=no_value&pmode=public&"
     "term=&levl=&fys=n&wrt=n&pe=n&review=n&crnl=no_value&classyear=2008&"
     "searchtype=Subject+Area%28s%29&termradio=selectterms&terms=no_value&"
-    "subjectradio=selectsubjects&hoursradio=allhours&sortorder=dept"
+    "deliveryradio=selectdelivery&subjectradio=selectsubjects&hoursradio=allhours&sortorder=dept"
     "&terms={term}"
 )
 
@@ -51,7 +51,7 @@ def crawl_timetable(term):
         preprocess=lambda x: re.sub("</tr>", "", x),
     )
     num_columns = len(soup.find(class_="data-table").find_all("th"))
-    assert num_columns == 19
+    assert num_columns == 20
 
     tds = soup.find(class_="data-table").find_all("td")
     assert len(tds) % num_columns == 0
@@ -76,14 +76,14 @@ def crawl_timetable(term):
                 'ascii', 'ignore').decode('ascii'),
             "crosslisted": crosslisted_courses,
             "period": tds[8].get_text(strip=True),
-            "room": tds[9].get_text(strip=True),
-            "building": tds[10].get_text(strip=True),
-            "instructor": _parse_instructors(tds[11].get_text(strip=True)),
-            "world_culture": tds[12].get_text(strip=True),
-            "distribs": _parse_distribs(tds[13].get_text(strip=True)),
-            "limit": int_or_none(tds[14].get_text(strip=True)),
-            # "enrollment": int_or_none(tds[15].get_text(strip=True)),
-            "status": tds[16].get_text(strip=True),
+            "room": tds[10].get_text(strip=True),
+            "building": tds[11].get_text(strip=True),
+            "instructor": _parse_instructors(tds[12].get_text(strip=True)),
+            "world_culture": tds[13].get_text(strip=True),
+            "distribs": _parse_distribs(tds[14].get_text(strip=True)),
+            "limit": int_or_none(tds[15].get_text(strip=True)),
+            # "enrollment": int_or_none(tds[16].get_text(strip=True)),
+            "status": tds[17].get_text(strip=True),
         })
     return course_data
 

diff --git a/apps/spider/tasks.py b/apps/spider/tasks.py
@@ -31,7 +31,7 @@ def import_pending_crawled_data(crawled_data_pk):
 @task_utils.email_if_fails
 def crawl_medians():
     median_page_urls = medians.crawl_median_page_urls()
-    assert len(median_page_urls) == 10
+    assert len(median_page_urls) == 10 # the registrar medians web page always keeps a list links to the past ten academic terms
     for url in median_page_urls:
         crawl_term_median_page.delay(url)
     return median_page_urls

diff --git a/dump.rdb b/dump.rdb
diff --git a/layup_list/settings.py b/layup_list/settings.py
@@ -6,7 +6,7 @@
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 SECRET_KEY = os.environ['SECRET_KEY']
 DEBUG = os.environ['DEBUG'] == "True"
-ALLOWED_HOSTS = ['.layuplist.com'] if not DEBUG else []
+ALLOWED_HOSTS = ['.layuplist.com'] if not DEBUG else ['0.0.0.0']
 AUTO_IMPORT_CRAWLED_DATA = os.environ.get('AUTO_IMPORT_CRAWLED_DATA') == "True"
 
 INSTALLED_APPS = [
@@ -161,7 +161,7 @@
         'NAME': 'django.contrib.auth.password_validation.'
                 'NumericPasswordValidator',
     },
-]
+] if not DEBUG else []
 
 
 if not DEBUG:

diff --git a/layup_list/urls.py b/layup_list/urls.py
@@ -17,6 +17,7 @@
 from django.conf.urls import include, url
 from django.contrib import admin
 import django.contrib.auth.views as authviews
+
 from apps.web import views
 from apps.analytics import views as aviews
 from apps.recommendations import views as rviews

diff --git a/requirements.txt b/requirements.txt
@@ -6,7 +6,7 @@ billiard==3.5.0.2
 celery==4.0.1
 decorator==4.0.10
 dj-database-url==0.4.1
-Django==1.10.4
+Django==1.11.29
 django-celery-beat==1.0.1
 django-celery-results==1.0.1
 django-compat==1.0.13

diff --git a/runtime.txt b/runtime.txt
@@ -0,0 +1 @@
+python-2.7.16
diff --git a/scripts/__init__.py b/scripts/__init__.py
@@ -11,7 +11,7 @@ def crawl_and_import_data(include_orc=False):
     # If the ORC is not crawled, the course selection will only be limited,
     # but this should not interfere with development
     if include_orc:
-        print "Crawling ORC. This will take a while.""
+        print "Crawling ORC. This will take a while."
         crawl_orc()
     else:
         print "Skipping ORC crawling. Should be enough for development."

diff --git a/scripts/dev/environment.sh b/scripts/dev/environment.sh
@@ -2,5 +2,5 @@
 # Must run as `source setup_enviornment.sh`
 # start heroku enviornment, source user .bashrc, add (ll) to prompt, activate venv
 
-heroku local:run bash
+heroku local:run zsh
 
diff --git a/scripts/dev/virtualize.sh b/scripts/dev/virtualize.sh
@@ -1,3 +1,3 @@
-. ~/.bashrc
-export PS1="(ll) $PS1"
+. ~/.zshrc
+export PS1="(layup-list) $PS1"
 source venv/bin/activate