Skip to content

Commit

Permalink
Merge pull request #29 from layuplist/develop
Browse files Browse the repository at this point in the history
LL for 2020
  • Loading branch information
ziruihao authored Sep 10, 2020
2 parents 593c131 + d513882 commit fd873cb
Show file tree
Hide file tree
Showing 14 changed files with 50 additions and 24 deletions.
18 changes: 18 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Django",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/manage.py",
"args": [
"collectstatic"
],
"django": true
}
]
}
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"python.pythonPath": "/Users/zirayhao/Code/dplanner/ll/venv/bin/python2.7"
}
7 changes: 5 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ Feel free to email <a href="mailto:[email protected]">[email protected]<
Local Setup (macOS or OS X)
-----------------
#### Installation
* Use Python 2.7.16
* Install [Homebrew](http://brew.sh/), [node.js](https://nodejs.org/en/), and Postgres (we recommend [Postgres.app](http://postgresapp.com/) with their [CLI Tools](http://postgresapp.com/documentation/cli-tools.html)).
* Install the [Heroku CLI](https://cli.heroku.com). You don't need a Heroku account, they just offer good tools for configuration.
* Install Redis using `brew install redis`.
* We use yuglify to compress the static files. Install using `sudo npm install -g yuglify`.
* Install forego using `brew install forego`. This is used to run the server.
* Run `easy_install pip` if you do not have pip.
* Run `pip install virtualenv` if you do not have virtualenv.
* Run `virtualenv venv --no-site-packages` to create a Python virtual environment.
* Run `virtualenv venv` to create a Python virtual environment.
* Run `createdb layuplist`.
* [Clone](https://help.github.com/articles/cloning-a-repository/) the main repository. `git clone https://github.com/layuplist/layup-list.git`.
* Create a `.env` file in the root directory of the repository (fill out the items in brackets):
Expand All @@ -26,12 +27,14 @@ Local Setup (macOS or OS X)
REDIS_URL=redis://[YOUR_USERNAME]@localhost:6379
SECRET_KEY=[SOME_LONG_RANDOM_STRING]
DEBUG=True
CURRENT_TERM=16S
CURRENT_TERM=20X
OFFERINGS_THRESHOLD_FOR_TERM_UPDATE=100
```

* Run `source ./scripts/dev/environment.sh` to set up the heroku development environment.
* Run `source ./scripts/dev/virtualize.sh` to activate the virtual environment.
* Install Python dependencies using `pip install -r requirements.txt`.
* Initialize the database with `python manage.py migrate`.

Developing
----------
Expand Down
6 changes: 3 additions & 3 deletions apps/spider/crawlers/medians.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
retrieve_soup
)

MEDIAN_PAGE_INDEX_URL = "http://www.dartmouth.edu/~reg/transcript/medians/"
MEDIAN_PAGE_INDEX_URL = "http://www.dartmouth.edu/reg/transcript/medians/"
MEDIANS_URL_FMT = (
"http://www.dartmouth.edu/~reg/transcript/medians/{term}.html")
"http://www.dartmouth.edu/reg/transcript/medians/{term}.html")


def get_term_from_median_page_url(url):
Expand All @@ -33,7 +33,7 @@ def _retrieve_term_medians_urls_from_soup(soup):

def _is_term_page_url(url):
term = get_term_from_median_page_url(url)
return url == "/~reg/transcript/medians/{term}.html".format(term=term)
return url == "/reg/transcript/medians/{term}.html".format(term=term)


def crawl_term_medians_for_url(url):
Expand Down
22 changes: 11 additions & 11 deletions apps/spider/crawlers/timetable.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@

DATA_TO_SEND = (
"distribradio=alldistribs&depts=no_value&periods=no_value&"
"distribs=no_value&distribs_i=no_value&distribs_wc=no_value&pmode=public&"
"distribs=no_value&distribs_i=no_value&distribs_wc=no_value&deliverymodes=no_value&pmode=public&"
"term=&levl=&fys=n&wrt=n&pe=n&review=n&crnl=no_value&classyear=2008&"
"searchtype=Subject+Area%28s%29&termradio=selectterms&terms=no_value&"
"subjectradio=selectsubjects&hoursradio=allhours&sortorder=dept"
"deliveryradio=selectdelivery&subjectradio=selectsubjects&hoursradio=allhours&sortorder=dept"
"&terms={term}"
)

Expand All @@ -51,7 +51,7 @@ def crawl_timetable(term):
preprocess=lambda x: re.sub("</tr>", "", x),
)
num_columns = len(soup.find(class_="data-table").find_all("th"))
assert num_columns == 19
assert num_columns == 20

tds = soup.find(class_="data-table").find_all("td")
assert len(tds) % num_columns == 0
Expand All @@ -76,14 +76,14 @@ def crawl_timetable(term):
'ascii', 'ignore').decode('ascii'),
"crosslisted": crosslisted_courses,
"period": tds[8].get_text(strip=True),
"room": tds[9].get_text(strip=True),
"building": tds[10].get_text(strip=True),
"instructor": _parse_instructors(tds[11].get_text(strip=True)),
"world_culture": tds[12].get_text(strip=True),
"distribs": _parse_distribs(tds[13].get_text(strip=True)),
"limit": int_or_none(tds[14].get_text(strip=True)),
# "enrollment": int_or_none(tds[15].get_text(strip=True)),
"status": tds[16].get_text(strip=True),
"room": tds[10].get_text(strip=True),
"building": tds[11].get_text(strip=True),
"instructor": _parse_instructors(tds[12].get_text(strip=True)),
"world_culture": tds[13].get_text(strip=True),
"distribs": _parse_distribs(tds[14].get_text(strip=True)),
"limit": int_or_none(tds[15].get_text(strip=True)),
# "enrollment": int_or_none(tds[16].get_text(strip=True)),
"status": tds[17].get_text(strip=True),
})
return course_data

Expand Down
2 changes: 1 addition & 1 deletion apps/spider/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def import_pending_crawled_data(crawled_data_pk):
@task_utils.email_if_fails
def crawl_medians():
median_page_urls = medians.crawl_median_page_urls()
assert len(median_page_urls) == 10
assert len(median_page_urls) == 10 # the registrar medians web page always keeps a list links to the past ten academic terms
for url in median_page_urls:
crawl_term_median_page.delay(url)
return median_page_urls
Expand Down
Binary file added dump.rdb
Binary file not shown.
4 changes: 2 additions & 2 deletions layup_list/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
SECRET_KEY = os.environ['SECRET_KEY']
DEBUG = os.environ['DEBUG'] == "True"
ALLOWED_HOSTS = ['.layuplist.com'] if not DEBUG else []
ALLOWED_HOSTS = ['.layuplist.com'] if not DEBUG else ['0.0.0.0']
AUTO_IMPORT_CRAWLED_DATA = os.environ.get('AUTO_IMPORT_CRAWLED_DATA') == "True"

INSTALLED_APPS = [
Expand Down Expand Up @@ -161,7 +161,7 @@
'NAME': 'django.contrib.auth.password_validation.'
'NumericPasswordValidator',
},
]
] if not DEBUG else []


if not DEBUG:
Expand Down
1 change: 1 addition & 0 deletions layup_list/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from django.conf.urls import include, url
from django.contrib import admin
import django.contrib.auth.views as authviews

from apps.web import views
from apps.analytics import views as aviews
from apps.recommendations import views as rviews
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ billiard==3.5.0.2
celery==4.0.1
decorator==4.0.10
dj-database-url==0.4.1
Django==1.10.4
Django==1.11.29
django-celery-beat==1.0.1
django-celery-results==1.0.1
django-compat==1.0.13
Expand Down
1 change: 1 addition & 0 deletions runtime.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python-2.7.16
2 changes: 1 addition & 1 deletion scripts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def crawl_and_import_data(include_orc=False):
# If the ORC is not crawled, the course selection will only be limited,
# but this should not interfere with development
if include_orc:
print "Crawling ORC. This will take a while.""
print "Crawling ORC. This will take a while."
crawl_orc()
else:
print "Skipping ORC crawling. Should be enough for development."
Expand Down
2 changes: 1 addition & 1 deletion scripts/dev/environment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
# Must run as `source setup_enviornment.sh`
# start heroku enviornment, source user .bashrc, add (ll) to prompt, activate venv

heroku local:run bash
heroku local:run zsh

4 changes: 2 additions & 2 deletions scripts/dev/virtualize.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
. ~/.bashrc
export PS1="(ll) $PS1"
. ~/.zshrc
export PS1="(layup-list) $PS1"
source venv/bin/activate

0 comments on commit fd873cb

Please sign in to comment.