Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
ConlinH committed Sep 27, 2023
1 parent d79e7ed commit d25e13b
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 35 deletions.
1 change: 0 additions & 1 deletion aioscrapy/core/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from aioscrapy.exceptions import DontCloseSpider
from aioscrapy.http import Response
from aioscrapy.http.request import Request
from aioscrapy.utils.log import logformatter_adapter
from aioscrapy.utils.misc import load_instance
from aioscrapy.utils.tools import call_helper, create_task

Expand Down
3 changes: 2 additions & 1 deletion aioscrapy/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from aioscrapy.statscollectors import StatsCollector
from aioscrapy.utils.misc import load_instance
from aioscrapy.utils.tools import call_helper
from aioscrapy.utils.log import logger


class BaseSchedulerMeta(type):
Expand Down Expand Up @@ -122,7 +123,7 @@ async def from_crawler(cls: Type[SchedulerTV], crawler: "aioscrapy.Crawler") ->
await instance.flush()

count = await call_helper(instance.queue.len)
count and crawler.spider.log("Resuming crawl (%d requests scheduled)" % count)
count and logger.info("Resuming crawl (%d requests scheduled)" % count)

return instance

Expand Down
14 changes: 11 additions & 3 deletions aioscrapy/libs/extensions/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(self, spider_name: str, settings: Settings):
influxdb_url = settings.get('METRIC_INFLUXDB_URL')
token = settings.get('METRIC_INFLUXDB_TOKEN')
location = settings.get('METRIC_LOCATION')
self.retry_times = settings.getint('METRIC_RETRY_TIMES', 5)
self.location = location or f"{platform.node()}_{os.getpid()}"
self.spider_name = spider_name
self.session = ClientSession(headers={
Expand All @@ -54,11 +55,18 @@ async def record(self, obj: "Metric"):
continue
cnt = current_cnt - obj.prev.get(metric_name, 0)
if cnt:
data += self.format_metric(metric_name.replace('/', '-'), cnt, self.spider_name,
self.location) + '\n'
data += self.format_metric(
metric_name.replace('/', '-'), cnt, self.spider_name, self.location
) + '\n'
obj.prev[metric_name] = current_cnt
if data:
await self.emit(data)
for _ in range(self.retry_times):
try:
await self.emit(data)
return
except:
continue
logger.warning(f"emit metric failed:\n{data}")

async def close(self):
if self.session is not None:
Expand Down
40 changes: 12 additions & 28 deletions aioscrapy/utils/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from loguru import logger as _logger

from aioscrapy import Settings, Spider
from aioscrapy.exceptions import AioScrapyDeprecationWarning

_logger.remove(0)

Expand All @@ -32,34 +31,19 @@ def configure_logging(spider: Type[Spider], settings: Settings):
)


def logformatter_adapter(logkws):
"""
Helper that takes the dictionary output from the methods in LogFormatter
and adapts it into a tuple of positional arguments for logger.log calls,
handling backward compatibility as well.
"""
if not {'level', 'msg', 'args'} <= set(logkws):
warnings.warn('Missing keys in LogFormatter method',
AioScrapyDeprecationWarning)

if 'format' in logkws:
warnings.warn('`format` key in LogFormatter methods has been '
'deprecated, use `msg` instead',
AioScrapyDeprecationWarning)

level = logkws.get('level', "INFO")
message = logkws.get('format', logkws.get('msg'))
# NOTE: This also handles 'args' being an empty dict, that case doesn't
# play well in logger.log calls
args = logkws if not logkws.get('args') else logkws['args']
return level, message, args


class AioScrapyLogger:
__slots__ = (
'catch', 'complete', 'critical', 'debug', 'error', 'exception',
'info', 'log', 'patch', 'success', 'trace', 'warning'
)

def __getattr__(self, item):
spider_name = asyncio.current_task().get_name()
return getattr(_logger.bind(spidername=spider_name), item)
def __getattr__(self, method):
try:
spider_name = asyncio.current_task().get_name()
return getattr(_logger.bind(spidername=spider_name), method)
except Exception as e:
warnings.warn(f'Error on get logger: {e}')
return getattr(_logger, method)


logger: Type[_logger] = AioScrapyLogger()
logger = AioScrapyLogger()
3 changes: 1 addition & 2 deletions example/singlespider/demo_queue_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class DemoMemorySpider(Spider):
# 'DOWNLOAD_DELAY': 3,
# 'RANDOMIZE_DOWNLOAD_DELAY': True,
# 'CONCURRENT_REQUESTS': 1,
'LOG_LEVEL': 'INFO',
'LOG_LEVEL': 'DEBUG',
"CLOSE_SPIDER_ON_IDLE": True,
}

Expand All @@ -40,7 +40,6 @@ async def parse(self, response):
'author': quote.xpath('span/small/text()').get(),
'text': quote.css('span.text::text').get(),
}
raise Exception(111)

next_page = response.css('li.next a::attr("href")').get()
if next_page is not None:
Expand Down

0 comments on commit d25e13b

Please sign in to comment.