-
Notifications
You must be signed in to change notification settings - Fork 178
/
references.bib
1329 lines (1153 loc) · 44.1 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
## Background Card fraud
@MISC{ECB2020,
author = "European Central Bank",
title = "6th report on card fraud",
month = "August",
year = "2020",
url = "\url{https://www.ecb.europa.eu/pub/cardfraud/html/ecb.cardfraudreport202008~521edb602b.en.html#toc2}",
note = "[Online; Last consulted 09-October-2020]",
}
@MISC{NilsonReport2019,
author = "Nilson report",
title = "Nilson report issue 1164",
month = "November",
year = "2019",
url = "\url{https://nilsonreport.com/upload/content_promo/The_Nilson_Report_Issue_1164.pdf }",
note = "[Online; Last consulted 09-October-2020]",
}
@MISC{StatisticBrain2018,
author = "Statistic Brain Research Institute",
title = "Credit Card Fraud Statistics",
month = "April",
year = "2018",
url = "\url{https://www.statisticbrain.com/credit-card-fraud-statistics/}",
note = "[Online; Last consulted 30-March-2021]",
}
## Background ML and credit card fraud
@article{chaudhary2012review,
title={A review of fraud detection techniques: Credit card},
author={Chaudhary, Khyati and Yadav, Jyoti and Mallick, Bhawna},
journal={International Journal of Computer Applications},
volume={45},
number={1},
pages={39--44},
year={2012},
publisher={Citeseer}
}
@article{dal2014learned,
title={Learned lessons in credit card fraud detection from a practitioner perspective},
author={Dal Pozzolo, Andrea and Caelen, Olivier and Le Borgne, Yann-Ael and Waterschoot, Serge and Bontempi, Gianluca},
journal={Expert systems with applications},
volume={41},
number={10},
pages={4915--4928},
year={2014},
publisher={Elsevier}
}
@article{NGAI2011559,
title = "The application of data mining techniques in financial fraud detection: A classification framework and an academic review of literature",
journal = "Decision Support Systems",
volume = "50",
number = "3",
pages = "559 - 569",
year = "2011",
note = "On quantitative methods for detection of financial fraud",
issn = "0167-9236",
doi = "https://doi.org/10.1016/j.dss.2010.08.006",
url = "http://www.sciencedirect.com/science/article/pii/S0167923610001302",
author = "E.W.T. Ngai and Yong Hu and Y.H. Wong and Yijun Chen and Xin Sun",
keywords = "Financial fraud, Fraud detection, Literature review, Data mining, Business intelligence",
abstract = "This paper presents a review of — and classification scheme for — the literature on the application of data mining techniques for the detection of financial fraud. Although financial fraud detection (FFD) is an emerging topic of great importance, a comprehensive literature review of the subject has yet to be carried out. This paper thus represents the first systematic, identifiable and comprehensive academic literature review of the data mining techniques that have been applied to FFD. 49 journal articles on the subject published between 1997 and 2008 was analyzed and classified into four categories of financial fraud (bank fraud, insurance fraud, securities and commodities fraud, and other related financial fraud) and six classes of data mining techniques (classification, regression, clustering, prediction, outlier detection, and visualization). The findings of this review clearly show that data mining techniques have been applied most extensively to the detection of insurance fraud, although corporate fraud and credit card fraud have also attracted a great deal of attention in recent years. In contrast, we find a distinct lack of research on mortgage fraud, money laundering, and securities and commodities fraud. The main data mining techniques used for FFD are logistic models, neural networks, the Bayesian belief network, and decision trees, all of which provide primary solutions to the problems inherent in the detection and classification of fraudulent data. This paper also addresses the gaps between FFD and the needs of the industry to encourage additional research on neglected topics, and concludes with several suggestions for further FFD research."
}
@article{zojaji2016survey,
title={A survey of credit card fraud detection techniques: data and technique oriented perspective},
author={Zojaji, Zahra and Atani, Reza Ebrahimi and Monadjemi, Amir Hassan and others},
journal={arXiv preprint arXiv:1611.06439 },
year={2016}
}
@inproceedings{lopez2016review,
title={A review of computer simulation for fraud detection research in financial datasets},
author={Lopez-Rojas, Edgar Alonso and Axelsson, Stefan},
booktitle={2016 Future Technologies Conference (FTC)},
pages={932--935},
year={2016},
organization={IEEE}
}
@article{adewumi2017survey,
title={A survey of machine-learning and nature-inspired based credit card fraud detection techniques},
author={Adewumi, Aderemi O and Akinyelu, Andronicus A},
journal={International Journal of System Assurance Engineering and Management},
volume={8},
number={2},
pages={937--953},
year={2017},
publisher={Springer}
}
@inproceedings{popat2018survey,
title={A survey on credit card fraud detection using machine learning},
author={Popat, Rimpal R and Chaudhary, Jayesh},
booktitle={2018 2nd International Conference on Trends in Electronics and Informatics (ICOEI)},
pages={1120--1125},
year={2018},
organization={IEEE}
}
@inproceedings{sinayobye2018state,
title={A state-of-the-art review of machine learning techniques for fraud detection research},
author={Sinayobye, Janvier Omar and Kiwanuka, Fred and Kyanda, Swaib Kaawaase},
booktitle={2018 IEEE/ACM Symposium on Software Engineering in Africa (SEiA)},
pages={11--19},
year={2018},
organization={IEEE}
}
@article{mekterovic2018systematic,
title={A systematic review of data mining approaches to credit card fraud detection},
author={Mekterovi\'c, Igor and Brki\'c, Ljiljana and Baranovi\'c, Mirta},
journal={WSEAS Transactions on Business and Economics},
volume={15},
pages={437--444},
year={2018}
}
@article{sadgali2018detection,
title={Detection of credit card fraud: State of art},
author={Sadgali, Imane and Sael, Nawal and Benabbou, Faouzia},
journal={International Journal of computer science and network security},
volume={18},
number={11},
pages={76--83},
year={2018}
}
@article{patil2018survey,
title={A survey on different data mining \& machine learning methods for credit card fraud detection},
author={Patil, Vipul and Lilhore, Umesh Kumar},
journal={International Journal of Scientific Research in Computer Science, Engineering and Information Technology},
volume={3},
number={5},
pages={320--325},
year={2018}
}
@article{yousefi2019comprehensive,
title={A Comprehensive Survey on Machine Learning Techniques and User Authentication Approaches for Credit Card Fraud Detection},
author={Yousefi, Niloofar and Alaghband, Marie and Garibay, Ivan},
journal={arXiv preprint arXiv:1912.02629},
year={2019}
}
@inproceedings{priscilla2019credit,
title={Credit Card Fraud Detection: A Systematic Review},
author={Priscilla, C Victoria and Prabha, D Padma},
booktitle={International Conference on Information, Communication and Computing Technology},
pages={290--303},
year={2019},
organization={Springer}
}
@article{lucas2020credit,
title={Credit card fraud detection using machine learning: A survey},
author={Lucas, Yvan and Jurgovsky, Johannes},
journal={arXiv preprint arXiv:2010.06479},
year={2020}
}
## ML, more general
@book{friedman2001elements,
title={The elements of statistical learning},
author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert},
volume={1},
number={10},
year={2001},
publisher={Springer series in statistics New York}
}
@book{bishop2006pattern,
title={Pattern recognition and machine learning},
author={Bishop, Christopher M},
year={2006},
publisher={springer}
}
@book{bontempi2021statistical,
title={Statistical foundations of machine learning, 2nd edition},
author={Bontempi, Gianluca},
publisher={Universit{\'e} Libre de Bruxelles},
year={2021}
}
## Python/Data science
@book{muller2016introduction,
title={Introduction to machine learning with Python: a guide for data scientists},
author={M{\"u}ller, Andreas C and Guido, Sarah},
year={2016},
publisher={O'Reilly Media, Inc.}
}
@book{mckinney2017python,
title={Python for data analysis: Data wrangling with Pandas, NumPy, and IPython - 2nd Edition},
author={McKinney, Wes},
year={2017},
publisher={O'Reilly Media, Inc.}
}
## MLG publications
@book{leborgne2022fraud,
title={Reproducible Machine Learning for Credit Card Fraud Detection - Practical Handbook},
author={Le Borgne, Yann-A{\"e}l and Siblini, Wissam and Lebichot, Bertrand and Bontempi, Gianluca},
url={https://github.com/Fraud-Detection-Handbook/fraud-detection-handbook},
year={2022},
publisher={Universit{\'e} Libre de Bruxelles}
}
@inproceedings{siblini2021transfer,
author = {Wissam Siblini and
Guillaume Coter and
R{\'{e}}my Fabry and
Liyun He{-}Guelton and
Fr{\'{e}}d{\'{e}}ric Obl{\'{e}} and
Bertrand Lebichot and
Yann{-}A{\"{e}}l Le Borgne and
Gianluca Bontempi},
title = {Transfer Learning for Credit Card Fraud Detection: {A} Journey from
Research to Production},
booktitle = {Proceedings of Data Science and Advanced Analytics (DSAA 2021) },
year = {2021},
url = {https://arxiv.org/abs/2107.09323},
}
@article{lebichot2021transfer,
title={Transfer Learning Strategies for Credit Card Fraud Detection},
author={Lebichot, Bertrand and Verhelst, Th{\'e}o and Le Borgne, Yann-A{\"e}l and He-Guelton, Liyun and Obl{\'e}, Fr{\'e}d{\'e}ric and Bontempi, Gianluca},
journal={IEEE access},
volume={9},
pages={114754--114766},
year={2021},
publisher={IEEE}
}
@article{lebichot2021incremental,
title={Incremental learning strategies for credit cards fraud detection},
author={Lebichot, Bertrand and Paldino, Gian Marco and Siblini, W and He-Guelton, L and Obl{\'e}, F and Bontempi, G},
journal={International Journal of Data Science and Analytics},
pages={1--10},
year={2021},
publisher={Springer}
}
@article{carcillo2019combining,
title={Combining unsupervised and supervised learning in credit card fraud detection},
author={Carcillo, Fabrizio and Le Borgne, Yann-A{\"e}l and Caelen, Olivier and Kessaci, Yacine and Obl{\'e}, Fr{\'e}d{\'e}ric and Bontempi, Gianluca},
journal={Information Sciences},
year={2019},
publisher={Elsevier}
}
@inproceedings{lebichot2019deep,
title={Deep-learning domain adaptation techniques for credit cards fraud detection},
author={Lebichot, Bertrand and Le Borgne, Yann-A{\"e}l and He-Guelton, Liyun and Obl{\'e}, Fr{\'e}d{\'e}ric and Bontempi, Gianluca},
booktitle={INNS Big Data and Deep Learning conference},
pages={78--88},
year={2019},
organization={Springer}
}
@article{carcillo2018streaming,
title={Streaming active learning strategies for real-life credit card fraud detection: assessment and visualization},
author={Carcillo, Fabrizio and Le Borgne, Yann-A{\"e}l and Caelen, Olivier and Bontempi, Gianluca},
journal={International Journal of Data Science and Analytics},
volume={5},
number={4},
pages={285--300},
year={2018},
publisher={Springer}
}
@book{carcillo2018beyond,
title={Beyond Supervised Learning in Credit Card Fraud Detection: A Dive into Semi-supervised and Distributed Learning},
author={Carcillo, Fabrizio},
year={2018},
publisher={Universit{\'e} libre de Bruxelles}
}
@article{carcillo2018scarff,
title={Scarff: a scalable framework for streaming credit card fraud detection with spark},
author={Carcillo, Fabrizio and Dal Pozzolo, Andrea and Le Borgne, Yann-A{\"e}l and Caelen, Olivier and Mazzer, Yannis and Bontempi, Gianluca},
journal={Information fusion},
volume={41},
pages={182--194},
year={2018},
publisher={Elsevier}
}
@article{dal2017credit,
title={Credit card fraud detection: a realistic modeling and a novel learning strategy},
author={Dal Pozzolo, Andrea and Boracchi, Giacomo and Caelen, Olivier and Alippi, Cesare and Bontempi, Gianluca},
journal={IEEE transactions on neural networks and learning systems},
volume={29},
number={8},
pages={3784--3797},
year={2017},
publisher={IEEE}
}
@book{dal2015adaptive,
title={Adaptive machine learning for credit card fraud detection},
author={Dal Pozzolo, Andrea},
year={2015},
publisher={Universit{\'e} libre de Bruxelles}
}
## Imbalance learning
@article{bentejac2021comparative,
title={A comparative analysis of gradient boosting algorithms},
author={Bent{\'e}jac, Candice and Cs{\"o}rgo, Anna and Mart{\'\i}nez-Mu{\~n}oz, Gonzalo},
journal={Artificial Intelligence Review},
volume={54},
number={3},
pages={1937--1967},
year={2021},
publisher={Springer}
}
@article{gupta2020class,
title={Class-Weighted Evaluation Metrics for Imbalanced Data Classification},
author={Gupta, Akhilesh and Tatbul, Nesime and Marcus, Ryan and Zhou, Shengtian and Lee, Insup and Gottschlich, Justin},
journal={arXiv preprint arXiv:2010.05995},
year={2020}
}
@article{ali2019review,
title={A review on data preprocessing methods for class imbalance problem},
author={Ali, Haseeb and Salleh, Mohd Najib Mohd and Hussain, Kashif and Ahmad, Arshad and Ullah, Ayaz and Muhammad, Arshad and Naseem, Rashid and Khan, Muzammil},
journal={International Journal of Engineering \& Technology},
volume={8},
pages={390--397},
year={2019}
}
@article{makki2019experimental,
title={An experimental study with imbalanced classification approaches for credit card fraud detection},
author={Makki, Sara and Assaghir, Zainab and Taher, Yehia and Haque, Rafiqul and Hacid, Mohand-Said and Zeineddine, Hassan},
journal={IEEE Access},
volume={7},
pages={93010--93022},
year={2019},
publisher={IEEE}
}
@book{fernandez2018learning,
title={Learning from imbalanced data sets},
author={Fern{\'a}ndez, Alberto and Garc{\'\i}a, Salvador and Galar, Mikel and Prati, Ronaldo C and Krawczyk, Bartosz and Herrera, Francisco},
year={2018},
publisher={Springer}
}
@article{dorogush2018catboost,
title={CatBoost: gradient boosting with categorical features support},
author={Dorogush, Anna Veronika and Ershov, Vasily and Gulin, Andrey},
journal={arXiv preprint arXiv:1810.11363},
year={2018}
}
@article{JMLR:v18:16-365,
author = {Guillaume Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas},
title = {Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning},
journal = {Journal of Machine Learning Research},
year = {2017},
volume = {18},
number = {17},
pages = {1-5},
url = {http://jmlr.org/papers/v18/16-365.html}
}
@article{last2017oversampling,
title={Oversampling for imbalanced learning based on k-means and smote},
author={Last, Felix and Douzas, Georgios and Bacao, Fernando},
journal={arXiv preprint arXiv:1711.00837},
year={2017}
}
@article{haixiang2017learning,
title={Learning from class-imbalanced data: Review of methods and applications},
author={Haixiang, Guo and Yijing, Li and Shang, Jennifer and Mingyun, Gu and Yuanyue, Huang and Bing, Gong},
journal={Expert Systems with Applications},
volume={73},
pages={220--239},
year={2017},
publisher={Elsevier}
}
@article{nguyen2011borderline,
title={Borderline over-sampling for imbalanced data classification},
author={Nguyen, Hien M and Cooper, Eric W and Kamei, Katsuari},
journal={International Journal of Knowledge Engineering and Soft Data Paradigms},
volume={3},
number={1},
pages={4--21},
year={2011},
publisher={Inderscience Publishers}
}
@article{KRIVKO20106070,
title = "A hybrid model for plastic card fraud detection systems",
journal = "Expert Systems with Applications",
volume = "37",
number = "8",
pages = "6070 - 6076",
year = "2010",
issn = "0957-4174",
doi = "https://doi.org/10.1016/j.eswa.2010.02.119",
url = "http://www.sciencedirect.com/science/article/pii/S0957417410001582",
author = "M. Krivko",
keywords = "Fraud detection, Hybrid model, Plastic card fraud, One-class classification",
abstract = "In this paper we present the framework for a hybrid model for plastic card fraud detection systems. The proposed data-customised approach combines elements of supervised and unsupervised methodologies aiming to compensate for the individual deficiencies of the methods. We demonstrate the ability of the hybrid model to identify fraudulent activity on the real debit card transaction data. We also explore the model’s efficiency against that of the existing monitoring system of the collaborating bank, using appropriate performance assessment criteria."
}
@article{yen2009cluster,
title={Cluster-based under-sampling approaches for imbalanced data distributions},
author={Yen, Show-Jane and Lee, Yue-Shi},
journal={Expert Systems with Applications},
volume={36},
number={3},
pages={5718--5727},
year={2009},
publisher={Elsevier}
}
@inproceedings{he2008adasyn,
title={ADASYN: Adaptive synthetic sampling approach for imbalanced learning},
author={He, Haibo and Bai, Yang and Garcia, Edwardo A and Li, Shutao},
booktitle={2008 IEEE international joint conference on neural networks (IEEE world congress on computational intelligence)},
pages={1322--1328},
year={2008},
organization={IEEE}
}
@article{liu2008exploratory,
title={Exploratory undersampling for class-imbalance learning},
author={Liu, Xu-Ying and Wu, Jianxin and Zhou, Zhi-Hua},
journal={IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics)},
volume={39},
number={2},
pages={539--550},
year={2008},
publisher={IEEE}
}
@article{chawla2008automatically,
title={Automatically countering imbalance and its empirical relationship to cost},
author={Chawla, Nitesh V and Cieslak, David A and Hall, Lawrence O and Joshi, Ajay},
journal={Data Mining and Knowledge Discovery},
volume={17},
number={2},
pages={225--252},
year={2008},
publisher={Springer}
}
@inproceedings{han2005borderline,
title={Borderline-SMOTE: a new over-sampling method in imbalanced data sets learning},
author={Han, Hui and Wang, Wen-Yuan and Mao, Bing-Huan},
booktitle={International conference on intelligent computing},
pages={878--887},
year={2005},
organization={Springer}
}
@article{chawla2004special,
title={Special issue on learning from imbalanced data sets},
author={Chawla, Nitesh V and Japkowicz, Nathalie and Kotcz, Aleksander},
journal={ACM SIGKDD explorations newsletter},
volume={6},
number={1},
pages={1--6},
year={2004},
publisher={ACM New York, NY, USA}
}
@article{chen2004using,
title={Using random forest to learn imbalanced data},
author={Chen, Chao and Liaw, Andy and Breiman, Leo and others},
journal={University of California, Berkeley},
volume={110},
number={1-12},
pages={24},
year={2004}
}
@article{batista2004study,
title={A study of the behavior of several methods for balancing machine learning training data},
author={Batista, Gustavo EAPA and Prati, Ronaldo C and Monard, Maria Carolina},
journal={ACM SIGKDD explorations newsletter},
volume={6},
number={1},
pages={20--29},
year={2004},
publisher={ACM New York, NY, USA}
}
@inproceedings{batista2003balancing,
title={Balancing Training Data for Automated Annotation of Keywords: a Case Study.},
author={Batista, Gustavo EAPA and Bazzan, Ana LC and Monard, Maria Carolina and others},
booktitle={WOB},
pages={10--18},
year={2003}
}
@inproceedings{mani2003knn,
title={kNN approach to unbalanced data distributions: a case study involving information extraction},
author={Mani, Inderjeet and Zhang, I},
booktitle={Proceedings of workshop on learning from imbalanced datasets},
volume={126},
year={2003},
organization={ICML United States}
}
@article{chawla2002smote,
title={SMOTE: synthetic minority over-sampling technique},
author={Chawla, Nitesh V and Bowyer, Kevin W and Hall, Lawrence O and Kegelmeyer, W Philip},
journal={Journal of artificial intelligence research},
volume={16},
pages={321--357},
year={2002}
}
@article{friedman2001greedy,
title={Greedy function approximation: a gradient boosting machine},
author={Friedman, Jerome H},
journal={Annals of statistics},
pages={1189--1232},
year={2001},
publisher={JSTOR}
}
@inproceedings{laurikkala2001improving,
title={Improving identification of difficult small classes by balancing class distribution},
author={Laurikkala, Jorma},
booktitle={Conference on Artificial Intelligence in Medicine in Europe},
pages={63--66},
year={2001},
organization={Springer}
}
@inproceedings{provost2000machine,
title={Machine learning from imbalanced data sets 101},
author={Provost, Foster},
booktitle={Proceedings of the AAAI’2000 workshop on imbalanced data sets},
volume={68},
number={2000},
pages={1--3},
year={2000},
organization={AAAI Press}
}
@article{maclin1997empirical,
title={An empirical evaluation of bagging and boosting},
author={Maclin, Richard and Opitz, David},
journal={AAAI/IAAI},
volume={1997},
pages={546--551},
year={1997},
publisher={Citeseer}
}
@article{freund1997decision,
title={A decision-theoretic generalization of on-line learning and an application to boosting},
author={Freund, Yoav and Schapire, Robert E},
journal={Journal of computer and system sciences},
volume={55},
number={1},
pages={119--139},
year={1997},
publisher={Elsevier}
}
@article{tomek1976two,
title={Two modifications of CNN.},
journal={IEEE Trans. Syst. Man Commun},
volume={1},
pages={769--772},
author={Tomek, Ivan and others},
year={1976}
}
@article{wilson1972asymptotic,
title={Asymptotic properties of nearest neighbor rules using edited data},
author={Wilson, Dennis L},
journal={IEEE Transactions on Systems, Man, and Cybernetics},
number={3},
pages={408--421},
year={1972},
publisher={IEEE}
}
## Feature engineering
@article{whitrow2009transaction,
title={Transaction aggregation as a strategy for credit card fraud detection},
author={Whitrow, Christopher and Hand, David J and Juszczak, Piotr and Weston, David and Adams, Niall M},
journal={Data mining and knowledge discovery},
volume={18},
number={1},
pages={30--55},
year={2009},
publisher={Springer}
}
@article{VANVLASSELAER201538,
title={APATE: A novel approach for automated credit card transaction fraud detection using network-based extensions},
author={Van Vlasselaer, V{\'e}ronique and Bravo, Cristi{\'a}n and Caelen, Olivier and Eliassi-Rad, Tina and Akoglu, Leman and Snoeck, Monique and Baesens, Bart},
journal={Decision Support Systems},
volume={75},
pages={38--48},
year={2015},
publisher={Elsevier}
}
## Anomaly detection
@article{AHMED2016278,
title = "A survey of anomaly detection techniques in financial domain",
journal = "Future Generation Computer Systems",
volume = "55",
pages = "278 - 288",
year = "2016",
issn = "0167-739X",
doi = "https://doi.org/10.1016/j.future.2015.01.001",
url = "http://www.sciencedirect.com/science/article/pii/S0167739X15000023",
author = "Mohiuddin Ahmed and Abdun Naser Mahmood and Md. Rafiqul Islam",
keywords = "Clustering, Fraud detection, Anomaly detection",
abstract = "Anomaly detection is an important data analysis task. It is used to identify interesting and emerging patterns, trends and anomalies from data. Anomaly detection is an important tool to detect abnormalities in many different domains including financial fraud detection, computer network intrusion, human behavioural analysis, gene expression analysis and many more. Recently, in the financial sector, there has been renewed interest in research on detection of fraudulent activities. There has been a lot of work in the area of clustering based unsupervised anomaly detection in the financial domain. This paper presents an in-depth survey of various clustering based anomaly detection technique and compares them from different perspectives. In addition, we discuss the lack of real world data and how synthetic data has been used to validate current detection techniques."
}
@article{zhang2019hoba,
title={HOBA: A novel feature engineering methodology for credit card fraud detection with a deep learning architecture},
author={Zhang, Xinwei and Han, Yaoci and Xu, Wei and Wang, Qili},
journal={Information Sciences},
year={2019},
publisher={Elsevier}
}
@article{jones2019setting,
title={Setting the standards for machine learning in biology},
author={Jones, David T},
journal={Nature Reviews Molecular Cell Biology},
volume={20},
number={11},
pages={659--660},
year={2019},
publisher={Nature Publishing Group}
}
## Cost-sensitive
@article{ling2008cost,
title={Cost-sensitive learning and the class imbalance problem},
author={Ling, Charles X and Sheng, Victor S},
journal={Encyclopedia of machine learning},
volume={2011},
pages={231--235},
year={2008},
publisher={Citeseer}
}
## Accuracy metrics
@inproceedings{elkan2001foundations,
title={The foundations of cost-sensitive learning},
author={Elkan, Charles},
booktitle={International joint conference on artificial intelligence},
volume={17},
number={1},
pages={973--978},
year={2001},
organization={Lawrence Erlbaum Associates Ltd}
}
@article{fawcett2004roc,
title={ROC graphs: Notes and practical considerations for researchers},
author={Fawcett, Tom},
journal={Machine learning},
volume={31},
number={1},
pages={1--38},
year={2004}
}
@article{fawcett2006introduction,
title={An introduction to ROC analysis},
author={Fawcett, Tom},
journal={Pattern recognition letters},
volume={27},
number={8},
pages={861--874},
year={2006},
publisher={Elsevier}
}
@inproceedings{davis2006relationship,
title={The relationship between Precision-Recall and ROC curves},
author={Davis, Jesse and Goadrich, Mark},
booktitle={Proceedings of the 23rd international conference on Machine learning},
pages={233--240},
year={2006}
}
@incollection{chawla2009data,
title={Data mining for imbalanced datasets: An overview},
author={Chawla, Nitesh V},
booktitle={Data mining and knowledge discovery handbook},
pages={875--886},
year={2009},
publisher={Springer}
}
@article{fan2011detection,
title={Detection of rare items with target},
author={Fan, Guangzhe and Zhu, Mu},
journal={Statistics and Its Interface},
volume={4},
number={1},
pages={11--17},
year={2011},
publisher={International Press of Boston}
}
@inproceedings{boyd2013area,
title={Area under the precision-recall curve: point estimates and confidence intervals},
author={Boyd, Kendrick and Eng, Kevin H and Page, C David},
booktitle={Joint European conference on machine learning and knowledge discovery in databases},
pages={451--466},
year={2013},
organization={Springer}
}
@article{saito2015precision,
title={The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets},
author={Saito, Takaya and Rehmsmeier, Marc},
journal={PloS one},
volume={10},
number={3},
pages={e0118432},
year={2015},
publisher={Public Library of Science}
}
@inproceedings{flach2015precision,
title={Precision-recall-gain curves: PR analysis done right},
author={Flach, Peter and Kull, Meelis},
booktitle={Advances in neural information processing systems},
pages={838--846},
year={2015}
}
@article{muschelli2019roc,
title={Roc and auc with a binary predictor: a potentially misleading metric},
author={Muschelli, John},
journal={Journal of Classification},
pages={1--13},
year={2019},
publisher={Springer}
}
@article{tharwat2020classification,
title={Classification assessment methods},
author={Tharwat, Alaa},
journal={Applied Computing and Informatics},
year={2020},
publisher={Emerald Publishing Limited}
}
## Model selection
@article{cerqueira2020evaluating,
title={Evaluating time series forecasting models: an empirical study on performance estimation methods},
author={Cerqueira, Vitor and Torgo, Luis and Mozeti{\v{c}}, Igor},
journal={Machine Learning},
volume={109},
number={11},
pages={1997--2028},
year={2020},
publisher={Springer}
}
@article{gama2014survey,
title={A survey on concept drift adaptation},
author={Gama, Jo{\~a}o and {\v{Z}}liobait{\.e}, Indr{\.e} and Bifet, Albert and Pechenizkiy, Mykola and Bouchachia, Abdelhamid},
journal={ACM computing surveys (CSUR)},
volume={46},
number={4},
pages={1--37},
year={2014},
publisher={ACM New York, NY, USA}
}
@article{bergstra2012random,
title={Random search for hyper-parameter optimization.},
author={Bergstra, James and Bengio, Yoshua},
journal={Journal of machine learning research},
volume={13},
number={2},
year={2012}
}
## Datasets
@MISC{Kaggle2016,
author = "Kaggle",
title = "Credit Card Fraud Detection dataset",
month = "November",
year = "2016",
url = "\url{https://www.kaggle.com/mlg-ulb/creditcardfraud}",
note = "[Online; Last consulted 09-March-2021]",
}
## Libraries
@MISC{Imblearn,
author = "Imblearn",
title = "Imbalanced learning library for Python",
year = "2021",
url = "\url{https://imbalanced-learn.org/}",
note = "[Online; Last consulted 26-June-2021]",
}
## Cloud
@article{beg2021using,
title={Using Jupyter for reproducible scientific workflows},
author={Beg, Marijan and Taka, Juliette and Kluyver, Thomas and Konovalov, Alexander and Ragan-Kelley, Min and Thi{\'e}ry, Nicolas M and Fangohr, Hans},
journal={Computing in Science \& Engineering},
volume={23},
number={2},
pages={36--46},
year={2021},
publisher={IEEE}
}
## Deep learning
@MISC{kaggle2019fraud,
author = "Kaggle",
title = "IEEE-CIS Fraud Detection - Can you detect fraud from customer transactions?",
month = "September",
year = "2019",
url = "\url{https://www.kaggle.com/c/ieee-fraud-detection}",
note = "[Online; Last consulted 26-August-2021]",
}
@inproceedings{chen2016xgboost,
title={Xgboost: A scalable tree boosting system},
author={Chen, Tianqi and Guestrin, Carlos},
booktitle={Proceedings of the 22nd acm sigkdd international conference on knowledge discovery and data mining},
pages={785--794},
year={2016}
}
@article{ke2017lightgbm,
title={Lightgbm: A highly efficient gradient boosting decision tree},
author={Ke, Guolin and Meng, Qi and Finley, Thomas and Wang, Taifeng and Chen, Wei and Ma, Weidong and Ye, Qiwei and Liu, Tie-Yan},
journal={Advances in neural information processing systems},
volume={30},
pages={3146--3154},
year={2017}
}
@article{prokhorenkova2017catboost,
title={CatBoost: unbiased boosting with categorical features},
author={Prokhorenkova, Liudmila and Gusev, Gleb and Vorobev, Aleksandr and Dorogush, Anna Veronika and Gulin, Andrey},
journal={arXiv preprint arXiv:1706.09516},
year={2017}
}
@article{breiman2001random,
title={Random forests},
author={Breiman, Leo},
journal={Machine learning},
volume={45},
number={1},
pages={5--32},
year={2001},
publisher={Springer}
}
@inproceedings{domingos2000mining,
title={Mining high-speed data streams},
author={Domingos, Pedro and Hulten, Geoff},
booktitle={Proceedings of the sixth ACM SIGKDD international conference on Knowledge discovery and data mining},
pages={71--80},
year={2000}
}
@article{lakshminarayanan2014mondrian,
title={Mondrian forests: Efficient online random forests},
author={Lakshminarayanan, Balaji and Roy, Daniel M and Teh, Yee Whye},
journal={Advances in neural information processing systems},
volume={27},
pages={3140--3148},
year={2014}
}
@article{sun2018concept,
title={Concept drift adaptation by exploiting historical knowledge},
author={Sun, Yu and Tang, Ke and Zhu, Zexuan and Yao, Xin},
journal={IEEE transactions on neural networks and learning systems},
volume={29},
number={10},
pages={4822--4832},
year={2018},
publisher={IEEE}
}
@article{bahnsen2016feature,
title={Feature engineering strategies for credit card fraud detection},
author={Bahnsen, Alejandro Correa and Aouada, Djamila and Stojanovic, Aleksandar and Ottersten, Bj{\"o}rn},
journal={Expert Systems with Applications},
volume={51},
pages={134--142},
year={2016},
publisher={Elsevier}
}
@inproceedings{fu2016credit,
title={Credit card fraud detection using convolutional neural networks},
author={Fu, Kang and Cheng, Dawei and Tu, Yi and Zhang, Liqing},
booktitle={International conference on neural information processing},
pages={483--490},
year={2016},
organization={Springer}
}
@article{jurgovsky2018sequence,
title={Sequence classification for credit-card fraud detection},
author={Jurgovsky, Johannes and Granitzer, Michael and Ziegler, Konstantin and Calabretto, Sylvie and Portier, Pierre-Edouard and He-Guelton, Liyun and Caelen, Olivier},
journal={Expert Systems with Applications},
volume={100},
pages={234--245},
year={2018},
publisher={Elsevier}
}
@inproceedings{dastidar2020nag,
title={NAG: Neural feature aggregation framework for credit card fraud detection},
author={Dastidar, Kanishka Ghosh and Jurgovsky, Johannes and Siblini, Wissam and He-Guelton, Liyun and Granitzer, Michael},
booktitle={2020 IEEE International Conference on Data Mining (ICDM)},
pages={92--101},
year={2020},
organization={IEEE}
}
@article{konevcny2016federated,
title={Federated learning: Strategies for improving communication efficiency},
author={Kone{\v{c}}n{\`y}, Jakub and McMahan, H Brendan and Yu, Felix X and Richt{\'a}rik, Peter and Suresh, Ananda Theertha and Bacon, Dave},
journal={arXiv preprint arXiv:1610.05492},
year={2016}
}
@inproceedings{ghosh1994credit,
title={Credit card fraud detection with a neural-network},
author={Ghosh, Sushmito and Reilly, Douglas L},
booktitle={System Sciences, 1994. Proceedings of the Twenty-Seventh Hawaii International Conference on},
volume={3},
pages={621--630},
year={1994},
organization={IEEE}
}
@inproceedings{aleskerov1997cardwatch,
title={Cardwatch: A neural network based database mining system for credit card fraud detection},
author={Aleskerov, Emin and Freisleben, Bernd and Rao, Bharat},
booktitle={Proceedings of the IEEE/IAFE 1997 computational intelligence for financial engineering (CIFEr)},