Skip to content

Commit

Permalink
rm pg connections havanagrawal#2
Browse files Browse the repository at this point in the history
  • Loading branch information
paulbroek committed Jan 4, 2023
1 parent 018b96f commit 5ba1a19
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 22 deletions.
11 changes: 6 additions & 5 deletions GoodreadsScraper/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@

settings = get_project_settings()
UPDATE_POSTGRES_PER_ITEM = settings.get("UPDATE_POSTGRES_PER_ITEM")
psql.host = settings.get("POSTGRES_HOST")
# psql.port = settings.get("POSTGRES_PORT")
psql.port = os.environ.get("POSTGRES_PORT")
psql_session = get_session(psql)()
logger.info(f"{psql.host=}")

if UPDATE_POSTGRES_PER_ITEM:
psql.host = settings.get("POSTGRES_HOST")
psql.port = os.environ.get("POSTGRES_PORT")
psql_session = get_session(psql)()
logger.info(f"{psql.host=}")


class JsonLineItemSegregator(object):
Expand Down
41 changes: 24 additions & 17 deletions GoodreadsScraper/spiders/pg_author_list_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,36 @@

logger = logging.getLogger(__name__)

UPDATE_POSTGRES_PER_ITEM = settings.get("UPDATE_POSTGRES_PER_ITEM")

# or use env variable?
settings = get_project_settings()
psql.host = settings.get("POSTGRES_HOST")
psql.port = settings.get("POSTGRES_PORT")
psql_session = get_session(psql)()

# get authors to scrape sorted by last_scraped
NSCRAPE = 10_000
q = (
select(AuthorToScrape)
.where(~AuthorToScrape.lock)
.order_by(AuthorToScrape.last_scraped.desc())
.limit(NSCRAPE)
)
# to_scrape = psql_session.execute(q).scalars().fetchall()
to_scrape = []

# and set block = True, so other workers cannot pick them!
# for item in to_scrape:

if UPDATE_POSTGRES_PER_ITEM:
psql_session = get_session(psql)()

# get authors to scrape sorted by last_scraped
NSCRAPE = 10_000
q = (
select(AuthorToScrape)
.where(~AuthorToScrape.lock)
.order_by(AuthorToScrape.last_scraped.desc())
.limit(NSCRAPE)
)
to_scrape = psql_session.execute(q).scalars().fetchall()

# and set block = True, so other workers cannot pick them!
# for item in to_scrape:
# item.lock = True

# psql_session.commit()
# psql_session.close()
# psql_session.commit()
# psql_session.close()

else:
to_scrape = []


logger.info(f"{len(to_scrape)=:,}")

Expand Down

0 comments on commit 5ba1a19

Please sign in to comment.