1515from scipy .stats import combine_pvalues
1616import matplotlib .pyplot as plt
1717import math
18+ import logging
1819
19-
20- # %% Constants for Excess MAD calculation
21- # These constants are derived from the variance of the binomial distribution for each digit test.
22- # For the first-two-digits test, C = 158.8 as derived in Barney & Schulzke (2016).
23- # For other tests, constants are computed using the formula:
24- # C = K^2 * π / (2 * (Σ sqrt(p_k * (1 - p_k)))^2)
25- # where K is the number of digit categories and p_k are the Benford probabilities.
26- EXCESS_MAD_CONSTANTS = {
27- 'first_two' : 158.8 , # First-two-digits test (90 categories, k=10..99)
28- 1 : 21.27 , # First digit test (9 categories, k=1..9)
29- 2 : 30.30 , # Second digit test (10 categories, k=0..9)
30- 3 : 31.83 , # Third digit test (10 categories, approximately uniform)
31- }
32-
20+ logger = logging .getLogger (__name__ )
3321
3422# %% Class
3523class benfordslaw :
3624 """Class benfordslaw."""
3725
38- def __init__ (self , alpha = 0.05 , method = 'chi2' , pos = 1 , verbose = 3 ):
26+ def __init__ (self , alpha : float = 0.05 , method : str = 'chi2' , pos : int = 1 , verbose : [ str , int ] = 'info' , ):
3927 """Initialize benfordslaw with user-defined parameters.
4028
29+ Constants for Excess MAD calculation.
30+ The constants are derived from the variance of the binomial distribution for each digit test.
31+ For the first-two-digits test, C = 158.8 as derived in Barney & Schulzke (2016).
32+ For other tests, constants are computed using the formula: C = K^2 * π / (2 * (Σ sqrt(p_k * (1 - p_k)))^2), where K is the number of digit categories and p_k are the Benford probabilities.
33+
4134 Parameters
4235 ----------
4336 X : list or numpy array
@@ -58,19 +51,33 @@ def __init__(self, alpha=0.05, method='chi2', pos=1, verbose=3):
5851 * -2: second last digit (etc.)
5952 * 'first_two': First two digits combined (recommended for Benford's Law analysis,
6053 provides higher resolution with 90 categories instead of 9)
61- verbose : int, optional
62- Print message to screen. The default is 3.
54+ verbose : str or int, optional, default='info' (20)
55+ Logging verbosity level. Possible values:
56+ - 0, 60, None, 'silent', 'off', 'no' : no messages.
57+ - 10, 'debug' : debug level and above.
58+ - 20, 'info' : info level and above.
59+ - 30, 'warning' : warning level and above.
60+ - 50, 'critical' : critical level and above.
6361
6462 References
6563 ----------
6664 * Barney, B. J., & Schulzke, K. S. (2016). Moderating "Cry Wolf" Events with Excess MAD
6765 in Benford's Law Research and Practice. Journal of Forensic Accounting Research, 1(1), A66-A90.
6866
6967 """
68+ # Set the logger
69+ verbose = set_logger (verbose = verbose )
70+
7071 if (alpha is None ): alpha = 1
7172 self .alpha = alpha
7273 self .method = method
7374 self .pos = pos
75+ self .EXCESS_MAD_CONSTANTS = {
76+ 'first_two' : 158.8 , # First-two-digits test (90 categories, k=10..99)
77+ 1 : 21.27 , # First digit test (9 categories, k=1..9)
78+ 2 : 30.30 , # Second digit test (10 categories, k=0..9)
79+ 3 : 31.83 , # Third digit test (10 categories, approximately uniform)
80+ }
7481 self .verbose = verbose
7582
7683 # Benford's Law percentage-distribution for leading digits
@@ -89,9 +96,10 @@ def __init__(self, alpha=0.05, method='chi2', pos=1, verbose=3):
8996 self .leading_digits = [10.2 , 10.1 , 10.1 , 10.1 , 10.0 , 10.0 , 9.9 , 9.9 , 9.9 , 9.8 ]
9097 self .digit_range = range (0 , 10 )
9198 elif pos == 0 :
92- raise Exception ('[benfordslaw] >There is no leading digit distribution for the 0 digit!' )
99+ logger .error ("There is no leading digit distribution for the 0 digit!" )
100+ raise ValueError ("There is no leading digit distribution for the 0 digit!" )
93101 elif isinstance (pos , int ) and (pos > 3 or pos < 0 ):
94- if verbose >= 3 : print (f'[benfordslaw] >The is no leading digit distribution explicitly specified for digit [{ pos } ] and therefore the Uniform distribution is used instead.' )
102+ logger . info (f'[benfordslaw] >The is no leading digit distribution explicitly specified for digit [{ pos } ] and therefore the Uniform distribution is used instead.' )
95103 # Approximation, near-uniform distribution
96104 self .leading_digits = [10.0 ] * 10
97105 self .digit_range = range (0 , 10 )
@@ -120,7 +128,7 @@ def fit(self, X):
120128 >>> # Import library
121129 >>> from benfordslaw import benfordslaw
122130 >>> #
123- >>> # Initialize with MAD method (recommended for fraud detection)
131+ >>> # Initialize with MAD method
124132 >>> bl = benfordslaw(pos='first_two', method='mad')
125133 >>> #
126134 >>> # Get data for one candidate
@@ -154,7 +162,7 @@ def fit(self, X):
154162 excess_mad : float
155163 Excess MAD = MAD - E(MAD). Negative values indicate conformity,
156164 positive values indicate deviation from Benford's Law.
157- conformity : str
165+ conformity_mad : str
158166 Conformity assessment based on MAD thresholds ('close conformity',
159167 'acceptable conformity', 'marginally acceptable conformity', or 'nonconforming').
160168 N : int
@@ -169,7 +177,8 @@ def fit(self, X):
169177
170178 """
171179 # Make distribution first digits
172- if self .verbose >= 3 : print ("[benfordslaw] >Analyzing digit position: [%s]" % (self .pos ))
180+ logger .info (f"Analyzing digit position: { self .pos } " )
181+ self .results = {}
173182 # Convert pandas dataframe to numpy array
174183 if isinstance (X , pd .DataFrame ): X = X .values .ravel ()
175184 # Count digit based on position type
@@ -206,30 +215,30 @@ def fit(self, X):
206215
207216 # Show message
208217 if self .method == 'mad' :
209- if self .verbose >= 3 :
210- if excess_mad <= 0 :
211- print ("[benfordslaw] >[mad] No anomaly detected. Excess MAD=%g (%s)" % (excess_mad , conformity ))
212- else :
213- print ("[benfordslaw] >[mad] Potential anomaly. Excess MAD=%g (%s)" % (excess_mad , conformity ))
214- elif np .isnan (Praw ) and (self .verbose >= 3 ):
215- print (f"[benfordslaw] >No data available for this position." )
216- elif (Praw <= self .alpha ) and (self .verbose >= 3 ):
217- print ("[benfordslaw] >[%s] Anomaly detected! P=%g, Tstat=%g" % (self .method , Praw , tstats ))
218- elif (Praw > self .alpha ) and self .verbose >= 3 :
219- print ("[benfordslaw] >[%s] No anomaly detected. P=%g, Tstat=%g" % (self .method , Praw , tstats ))
220-
218+ logger .info (f"[{ self .method } ] { 'No anomaly detected' if excess_mad <= 0 else 'Potential anomaly' } . Excess MAD={ excess_mad } ({ conformity } )" )
219+ elif np .isnan (Praw ):
220+ logger .info ("No data available for this position." )
221+ elif (Praw <= self .alpha ):
222+ logger .info (f"[{ self .method } ] Anomaly detected! P={ Praw } , Tstat={ tstats } " )
223+ elif (Praw > self .alpha ):
224+ logger .info (f"[{ self .method } ] No anomaly detected. P={ Praw } , Tstat={ tstats } " )
225+
226+ # Set bool based on selected method
227+ if self .method == 'mad' :
228+ self .results ['P_significant' ] = excess_mad > 0
229+ else :
230+ self .results ['P_significant' ] = Praw <= self .alpha
231+
221232 # Store
222- self .results = {}
233+ self .results [ 'N' ] = int ( total_count )
223234 self .results ['P' ] = Praw
224235 self .results ['t' ] = tstats
225- self .results ['P_significant' ] = Praw <= self .alpha if not np .isnan (Praw ) else (excess_mad > 0 )
226236 self .results ['percentage_emp' ] = np .c_ [digit , percentage_emp ]
227237 # Always include MAD statistics
228238 self .results ['mad' ] = mad
229239 self .results ['expected_mad' ] = expected_mad
230240 self .results ['excess_mad' ] = excess_mad
231- self .results ['conformity' ] = conformity
232- self .results ['N' ] = int (total_count )
241+ self .results ['conformity_mad' ] = conformity
233242
234243 # return
235244 return self .results
@@ -339,9 +348,9 @@ def _get_excess_mad_constant(self):
339348
340349 """
341350 if self .pos == 'first_two' :
342- return EXCESS_MAD_CONSTANTS ['first_two' ]
343- elif self .pos in EXCESS_MAD_CONSTANTS :
344- return EXCESS_MAD_CONSTANTS [self .pos ]
351+ return self . EXCESS_MAD_CONSTANTS ['first_two' ]
352+ elif self .pos in self . EXCESS_MAD_CONSTANTS :
353+ return self . EXCESS_MAD_CONSTANTS [self .pos ]
345354 else :
346355 # For other positions, use an approximation based on the distribution
347356 # C ≈ K² × π / 2 for approximately uniform distributions
@@ -392,7 +401,7 @@ def plot(self, title='', fontsize=16, barcolor='black', barwidth=0.3, label='Emp
392401
393402 # Build title based on method
394403 if self .method == 'mad' :
395- title = title + "\n Excess MAD=%g (%s)" % (self .results ['excess_mad' ], self .results ['conformity ' ])
404+ title = title + "\n Excess MAD=%g (%s)" % (self .results ['excess_mad' ], self .results ['conformity_mad ' ])
396405 elif not np .isnan (self .results ['P' ]) and self .results ['P' ] <= self .alpha :
397406 title = title + "\n Anomaly detected! P=%g, Tstat=%g" % (self .results ['P' ], self .results ['t' ])
398407 elif not np .isnan (self .results ['P' ]):
@@ -429,7 +438,7 @@ def plot(self, title='', fontsize=16, barcolor='black', barwidth=0.3, label='Emp
429438 plt .show ()
430439 return fig , ax
431440
432- def import_example (self , data = 'elections' , url = None , sep = ',' , verbose = 3 ):
441+ def import_example (self , data = 'elections' , url = None , sep = ',' , verbose = 'info' ):
433442 """Import example dataset from github source.
434443
435444 Import one of the few datasets from github source or specify your own download url link.
@@ -622,17 +631,77 @@ def compute_excess_mad(data, pos='first_two'):
622631 in Benford's Law Research and Practice. Journal of Forensic Accounting Research, 1(1), A66-A90.
623632
624633 """
625- bl = benfordslaw (pos = pos , method = 'mad' , verbose = 0 )
634+ bl = benfordslaw (pos = pos , method = 'mad' , verbose = 'info' )
626635 results = bl .fit (np .asarray (data ))
627636 return {
628637 'mad' : results ['mad' ],
629638 'expected_mad' : results ['expected_mad' ],
630639 'excess_mad' : results ['excess_mad' ],
631- 'conformity ' : results ['conformity ' ],
640+ 'conformity_mad ' : results ['conformity_mad ' ],
632641 'N' : results ['N' ]
633642 }
634643
635644
645+
646+ # %%
647+ def set_logger (verbose : [str , int ] = 'info' , return_status : bool = False ):
648+ """Set the logger for verbosity messages.
649+
650+ Parameters
651+ ----------
652+ verbose : [str, int], default is 'info' or 20
653+ Set the verbose messages using string or integer values.
654+ * 0, 60, None, 'silent', 'off', 'no']: No message.
655+ * 10, 'debug': Messages from debug level and higher.
656+ * 20, 'info': Messages from info level and higher.
657+ * 30, 'warning': Messages from warning level and higher.
658+ * 50, 'critical': Messages from critical level and higher.
659+
660+ Returns
661+ -------
662+ None.
663+
664+ Examples
665+ --------
666+ >>> # Set the logger to warning
667+ >>> set_logger(verbose='warning')
668+ >>>
669+ >>> # Test with different messages
670+ >>> logger.debug("Hello debug")
671+ >>> logger.info("Hello info")
672+ >>> logger.warning("Hello warning")
673+ >>> logger.critical("Hello critical")
674+ >>>
675+ """
676+ # Set 0 and None as no messages.
677+ if (verbose == 0 ) or (verbose is None ):
678+ verbose = 60
679+ # Convert str to levels
680+ if isinstance (verbose , str ):
681+ levels = {
682+ 'silent' : logging .CRITICAL + 10 ,
683+ 'off' : logging .CRITICAL + 10 ,
684+ 'no' : logging .CRITICAL + 10 ,
685+ 'debug' : logging .DEBUG ,
686+ 'info' : logging .INFO ,
687+ 'warning' : logging .WARNING ,
688+ 'error' : logging .ERROR ,
689+ 'critical' : logging .CRITICAL ,
690+ }
691+ verbose = levels [verbose ]
692+
693+ # Show examples
694+ logger .setLevel (verbose )
695+ if return_status :
696+ return verbose
697+
698+
699+ # %%
700+ def get_logger ():
701+ return logger .getEffectiveLevel ()
702+
703+
636704# %% Main
637705if __name__ == "__main__" :
638- print ('[benfordslaw] >Please bootup python and run benfordslaw as described in the readme file: https://github.com/erdogant/benfordslaw' )
706+ logger .info (f'Please bootup python and run benfordslaw as described in the readme file: https://github.com/erdogant/benfordslaw' )
707+
0 commit comments