Skip to content

Commit e67b2fc

Browse files
sfaziludSudharsan-VS
and
Sudharsan-VS
authored
Logging Improvements (#14)
* Code changes to retry the failed request on http 429 * Changed the retry logic to try until succeed * Improved error message * Change severity from error to warning * Dropped duplicate logging * fix: ensure log_offset is properly printing instead of None in log * chore: bump version to 2.2.3 * Logging improvements for DLS - 1 * Logging improvements for DLS - 2 * Rollback version number * Fixed the failing test case * Updated the change log. * Fixed the code duplication due to mc res --------- Co-authored-by: Sudharsan-VS <[email protected]>
1 parent ced0e09 commit e67b2fc

File tree

9 files changed

+73
-51
lines changed

9 files changed

+73
-51
lines changed

Diff for: HISTORY.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
## Change Log
22

3+
### 2.3.0
4+
5+
Logging Improvements
6+
Bug Fixes
7+
Improved Error Handling
8+
39
### 2.2.2
410

511
Security Update: Updated PyYAML in setup.py to 6.0.1, due to a [security vulnerability](https://nvd.nist.gov/vuln/detail/CVE-2020-14343).

Diff for: README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Duo Log Sync (v2.2.2)
1+
Duo Log Sync (v2.3.0)
22
===================
33

44
[![Issues](https://img.shields.io/github/issues/duosecurity/duo_log_sync)](https://github.com/duosecurity/duo_log_sync/issues)

Diff for: duologsync/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
__title__ = "duologsync"
77
__description__ = "Ingest + sync logs from the Duo Admin API into another system."
88
__url__ = "https://github.com/duosecurity/duo_log_sync"
9-
__version__ = "2.2.3"
9+
__version__ = "2.3.0"
1010
__author__ = "Duo Security, Inc."
1111
__license__ = "MIT"
1212
__email__ = "[email protected]"

Diff for: duologsync/config.py

+6
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,11 @@ def get_value(cls, keys):
255255

256256
return curr_value
257257

258+
@classmethod
259+
def get_config_file_path(cls):
260+
"""@return the filepath of the config file"""
261+
return cls.get_value(['config_file_path'])
262+
258263
@classmethod
259264
def get_log_filepath(cls):
260265
"""@return the filepath where DLS program messages should be saved"""
@@ -354,6 +359,7 @@ def create_config(cls, config_filepath):
354359
if config.get('dls_settings').get('api').get('timeout') < cls.API_TIMEOUT_DEFAULT:
355360
config['dls_settings']['api']['timeout'] = cls.API_TIMEOUT_DEFAULT
356361
Program.log(f'DuoLogSync: Setting default api timeout to {cls.API_TIMEOUT_DEFAULT} seconds.')
362+
config['config_file_path'] = config_filepath
357363

358364
# Will occur when given a bad filepath or a bad file
359365
except OSError as os_error:

Diff for: duologsync/consumer/consumer.py

+10-21
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,10 @@ async def consume(self):
7272
# Specifically watch out for errno 32 - Broken pipe. This means
7373
# that the connect established by writer was reset or shutdown.
7474
except BrokenPipeError as broken_pipe_error:
75-
shutdown_reason = f"{broken_pipe_error}"
75+
error_code, error_message = getattr(broken_pipe_error, "args")
76+
shutdown_reason = f"{self.log_type} consumer: [{broken_pipe_error} error_code: {error_code}]"
77+
Program.log(f"{self.log_type} consumer: connection to the destination server was reset or shutdown", logging.ERROR)
7678
Program.initiate_shutdown(shutdown_reason)
77-
Program.log(
78-
"DuoLogSync: connection to server was reset", logging.WARNING
79-
)
8079

8180
finally:
8281
if successful_write:
@@ -132,26 +131,16 @@ def update_log_checkpoint(log_type, log_offset, child_account_id):
132131
@param log_offset Information to save in the checkpoint file
133132
"""
134133

135-
Program.log(
136-
f"{log_type} consumer: saving latest log offset '{log_offset}' to a checkpointing file",
137-
logging.INFO,
138-
)
134+
checkpoint_filename = f"{log_type}_checkpoint_data_" + child_account_id + ".txt" if child_account_id else f"{log_type}_checkpoint_data.txt"
135+
checkpoint_file_path = os.path.join(Config.get_checkpoint_dir(), checkpoint_filename)
139136

140-
file_path = (
141-
os.path.join(
142-
Config.get_checkpoint_dir(),
143-
f"{log_type}_checkpoint_data_" + child_account_id + ".txt",
144-
)
145-
if child_account_id
146-
else os.path.join(
147-
Config.get_checkpoint_dir(), f"{log_type}_checkpoint_data.txt"
148-
)
149-
)
150-
151-
checkpoint_filename = file_path
137+
if os.path.exists(checkpoint_file_path):
138+
Program.log(f"{log_type} consumer: saving latest log offset '{log_offset}' to a checkpoint file '{checkpoint_file_path}'", logging.INFO)
139+
else:
140+
Program.log(f"{log_type} consumer: checkpoint file '{checkpoint_file_path}' doesn't exist and it will be created to save the latest offset '{log_offset}'", logging.INFO)
152141

153142
# Open file checkpoint_filename in writing mode only
154-
checkpoint_file = open(checkpoint_filename, "w")
143+
checkpoint_file = open(checkpoint_file_path, "w")
155144
checkpoint_file.write(json.dumps(log_offset) + "\n")
156145

157146
# According to Python docs, closing a file also flushes the file

Diff for: duologsync/producer/producer.py

+29-15
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,11 @@ async def produce(self):
6969
f"{self.log_type} producer: no new logs available", logging.INFO
7070
)
7171

72-
# Incorrect api_hostname or proxy_server was provided
73-
# duo_client throws the same error if either the api_hostname or proxy_server is incorrect
74-
except (gaierror, OSError) as error:
75-
shutdown_reason = f"{self.log_type} producer: [{error}]"
76-
Program.log(
77-
"DuoLogSync: check that the duoclient host and/or proxy_server provided in the config "
78-
"file are correct"
79-
)
72+
except gaierror as gai_error:
73+
shutdown_reason = self.handle_address_info_error(gai_error)
74+
75+
except OSError as os_error:
76+
shutdown_reason = self.handle_os_error(os_error)
8077

8178
# duo_client throws a RuntimeError if the ikey or skey is invalid
8279
except RuntimeError as runtime_error:
@@ -93,31 +90,48 @@ async def produce(self):
9390
await self.log_queue.put([])
9491
Program.log(f"{self.log_type} producer: shutting down", logging.INFO)
9592

93+
def handle_os_error(self, os_error: OSError):
94+
"""
95+
Handle an OS error gracefully by logging the error and returning a string to indicate that the producer should shut down.
96+
"""
97+
error_code, error_message = getattr(os_error, "args")
98+
file_name = getattr(os_error, "filename", None)
99+
return f"{self.log_type} producer: [{error_message} error_code: {error_code} file_name: {file_name}]"
100+
101+
def handle_address_info_error(self, gai_error: gaierror):
102+
"""
103+
Handle an address info error gracefully by logging the error and returning a string to indicate that the producer should shut down.
104+
"""
105+
error_code, error_message = getattr(gai_error, "args")
106+
Program.log(f"{self.log_type} producer: make sure that the host information details provider in the configuration file {Config.get_config_file_path()} is correct", logging.ERROR)
107+
return f"{self.log_type} producer: [{error_message} error_code: {error_code}]"
108+
96109
def handle_runtime_error_gracefully(self, runtime_error: RuntimeError):
97110
"""
98111
Handle a runtime error gracefully by checking if the error is eligible for a retry.
99112
If it is, log the error and return None to indicate that the producer should retry.
100113
If it is not, log the error and return a string to indicate that the producer should shut down.
101114
"""
102-
if self.eligible_for_retry(runtime_error):
103-
error_data = getattr(runtime_error, "data", None)
104-
error_code = error_data['code'] if error_data else None
115+
http_error_code = getattr(runtime_error, "status", None)
116+
error_data = getattr(runtime_error, "data", None)
117+
error_code = error_data['code'] if error_data else None
118+
119+
if self.eligible_for_retry(http_error_code):
105120
Program.log(
106-
f"{self.log_type} producer: retrying due to error: {runtime_error} error_code: {error_code}",
121+
f"{self.log_type} producer: retrying due to error: {runtime_error} error_code: {error_code} http_status_code: {http_error_code}",
107122
logging.WARNING,
108123
)
109124

110125
return None
111126

112-
return f"{self.log_type} producer: [{runtime_error}]"
127+
return f"{self.log_type} producer: [{runtime_error} error_code: {error_code} http_status_code: {http_error_code}]"
113128

114129
@staticmethod
115-
def eligible_for_retry(runtime_error: RuntimeError):
130+
def eligible_for_retry(http_error_code):
116131
"""
117132
Check if the runtime error is eligible for a retry based on the status code.
118133
See the Config.GRACEFUL_RETRY_STATUS_CODES for the list of status codes that is eligible for a retry.
119134
"""
120-
http_error_code = getattr(runtime_error, "status", None)
121135

122136
if http_error_code:
123137
for http_status_code in Config.GRACEFUL_RETRY_STATUS_CODES:

Diff for: duologsync/util.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def get_log_offset(
101101
)
102102
)
103103
Program.log(
104-
f"Recovering log offset from checkpoint file at {checkpoint_file_path}",
104+
f"{log_type} producer: recovering log offset from checkpoint file at '{checkpoint_file_path}'",
105105
logging.INFO,
106106
)
107107

@@ -111,15 +111,21 @@ def get_log_offset(
111111
log_offset = json.loads(checkpoint.read())
112112

113113
# Most likely, the checkpoint file doesn't exist
114-
except OSError:
114+
except OSError as os_error:
115+
error_code, error_message = getattr(os_error, "args")
116+
file_name = getattr(os_error, "filename", None)
115117
display_offset = log_offset
116118
if log_type in MILLISECOND_BASED_LOG_TYPES:
117119
display_offset /= milliseconds_per_second
118120
iso_timestamp = datetime.fromtimestamp(
119121
display_offset, tz=timezone.utc
120122
).isoformat()
121123
Program.log(
122-
f"Could not read checkpoint file for {log_type} logs, consuming logs from {iso_timestamp} (UTC)",
124+
f"{log_type} producer: could not access checkpoint file '{file_name}' to read log offset due to error: {error_message} error_code: {error_code}",
125+
logging.WARNING,
126+
)
127+
Program.log(
128+
f"{log_type} producer: the logs will be consumed from offset: '{iso_timestamp} (UTC)'",
123129
logging.INFO,
124130
)
125131

Diff for: duologsync/writer.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import logging
88
import socket
99
from socket import gaierror
10+
11+
from duologsync.config import Config
1012
from duologsync.program import Program
1113

1214

@@ -113,9 +115,7 @@ async def create_writer(self, host, port, cert_filepath):
113115
logging.INFO)
114116

115117
# Message to be logged if an error occurs in this function
116-
help_message = (
117-
f"DuoLogSync: check that host-{host} and port-{port} "
118-
"are correct in the config file")
118+
help_message = f"DuoLogSync: check that host-{host} and port-{port} are correct in the configuration file '{Config.get_config_file_path()}'"
119119
writer = None
120120

121121
try:
@@ -132,20 +132,20 @@ async def create_writer(self, host, port, cert_filepath):
132132
writer = await Writer.create_tcp_writer(host, port)
133133

134134
# Failed to open the certificate file
135-
except FileNotFoundError:
136-
shutdown_reason = f"{cert_filepath} could not be opened."
137-
help_message = (
138-
'DuoLogSync: Make sure the filepath for SSL cert file is '
139-
'correct.')
135+
except FileNotFoundError as fnf_error:
136+
error_code, error_message = getattr(fnf_error, "args")
137+
shutdown_reason = f"certificate file '{cert_filepath}' could not be opened due to error: {error_message} error_code: {error_code}"
138+
help_message = f"make sure that certificate file '{cert_filepath}' to establish SSL connection is correct."
140139

141140
# Couldn't establish a connection within 60 seconds
142141
except asyncio.TimeoutError:
143-
shutdown_reason = 'connection to server timed-out after 60 seconds'
142+
shutdown_reason = 'connection to the destination server timed-out after 60 seconds'
144143

145144
# If an invalid hostname or port number is given or simply failed to
146145
# connect using the host and port given
147146
except (gaierror, OSError) as error:
148-
shutdown_reason = f"{error}"
147+
error_code, error_message = getattr(error, "args")
148+
shutdown_reason = f"error while connecting to {host}:{port} error_message: {error_message} error_code: {error_code}"
149149

150150
# An error did not occur and the writer was successfully created
151151
else:

Diff for: tests/test_config.py

+1
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ def test_get_account_block_list(self):
146146
def test_create_config_normal(self):
147147
config_filepath = 'tests/resources/config_files/standard.yml'
148148
correct_config = {
149+
'config_file_path': config_filepath,
149150
'version': '1.0.0',
150151
'dls_settings': {
151152
'log_filepath': '/tmp/duologsync.log',

0 commit comments

Comments
 (0)