-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
575 lines (515 loc) · 35.7 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
@misc{noauthor_scipystatsmannwhitneyu_nodate,
title = {scipy.stats.mannwhitneyu — {SciPy} v1.13.0 {Manual}},
url = {https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html},
urldate = {2024-05-07},
}
@article{mann_test_1947,
title = {On a {Test} of {Whether} one of {Two} {Random} {Variables} is {Stochastically} {Larger} than the {Other}},
volume = {18},
issn = {0003-4851},
url = {https://www.jstor.org/stable/2236101},
abstract = {Let x and y be two random variables with continuous cumulative distribution functions f and g. A statistic U depending on the relative ranks of the x's and y's is proposed for testing the hypothesis f = g. Wilcoxon proposed an equivalent test in the Biometrics Bulletin, December, 1945, but gave only a few points of the distribution of his statistic. Under the hypothesis f = g the probability of obtaining a given U in a sample of n x's and m y's is the solution of a certain recurrence relation involving n and m. Using this recurrence relation tables have been computed giving the probability of U for samples up to n = m = 8. At this point the distribution is almost normal. From the recurrence relation explicit expressions for the mean, variance, and fourth moment are obtained. The 2rth moment is shown to have a certain form which enabled us to prove that the limit distribution is normal if m, n go to infinity in any arbitrary manner. The test is shown to be consistent with respect to the class of alternatives \$f(x) {\textgreater} g(x)\$ for every x.},
number = {1},
urldate = {2024-05-07},
journal = {The Annals of Mathematical Statistics},
author = {Mann, H. B. and Whitney, D. R.},
year = {1947},
note = {Publisher: Institute of Mathematical Statistics},
pages = {50--60},
}
@misc{zaal_tears_2018,
title = {Tears of {Steel} {Bridge} {HDRI} • {Poly} {Haven}},
url = {https://polyhaven.com/a/tears_of_steel_bridge},
abstract = {Download this free HDRI from Poly Haven},
language = {en},
urldate = {2024-05-02},
journal = {Poly Haven},
author = {Zaal, Greg},
month = apr,
year = {2018},
}
@misc{noauthor_heater_nodate,
title = {Heater {Matrix} {Leaking} - {Heater} {Matrix} {Repair}},
url = {https://www.kseal.com/uk/expert-advice/engine-problems/heater-core},
abstract = {Don't shiver in a chilly car! One bottle of K-Seal can permanently repair leaks or splits in your engine's heater matrix.},
language = {en-GB},
urldate = {2024-05-02},
journal = {K-Seal®},
}
@misc{zaal_rostock-laage_2024,
title = {Rostock-{Laage} {Airport} {HDRI} • {Poly} {Haven}},
url = {https://polyhaven.com/a/rostock_laage_airport},
abstract = {Download this free HDRI from Poly Haven},
language = {en},
urldate = {2024-05-02},
journal = {Poly Haven},
author = {Zaal, Greg},
month = jan,
year = {2024},
}
@misc{bristol_queens_nodate,
title = {Queen's {Building} {Library}},
copyright = {https://www.bristol.ac.uk/style-guides/web/policies/legal/terms/},
url = {https://www.bristol.ac.uk/library/locations-opening/queens-library/},
language = {en-GB},
urldate = {2024-05-02},
author = {Bristol, University of},
note = {Last Modified: 2024-04-28
Publisher: University of Bristol},
}
@misc{sanders_how_2023,
title = {How to format inputs to {ChatGPT} models},
url = {https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models},
abstract = {Open-source examples and guides for building with the OpenAI API. Browse a collection of snippets, advanced techniques and walkthroughs. Share your own examples and guides.},
language = {en},
urldate = {2024-04-28},
journal = {OpenAI Cookbook},
author = {Sanders, Ted},
month = mar,
year = {2023},
}
@misc{noauthor_types_nodate,
title = {Types of {Online} {Shoppers}: {Characteristics} + {Ways} to {Target}},
shorttitle = {Types of {Online} {Shoppers}},
url = {https://www.bigcommerce.com/articles/ecommerce/types-of-online-shoppers/},
abstract = {Learn more about the different types of online shoppers and how to focus your time and money on efforts that have the most significant return on investment.},
language = {en-US},
urldate = {2024-04-28},
journal = {BigCommerce},
}
@misc{noauthor_python_2023,
title = {Python {Nested} {Dictionary}},
url = {https://www.geeksforgeeks.org/python-nested-dictionary/},
abstract = {A Computer Science portal for geeks. It contains well written, well thought and well explained computer science and programming articles, quizzes and practice/competitive programming/company interview Questions.},
language = {en},
urldate = {2024-04-28},
journal = {GeeksforGeeks},
month = jun,
year = {2023},
note = {Section: Python},
}
@misc{noauthor_domain_nodate,
title = {Domain {Verification} {Walkthrough}},
url = {https://help.mailgun.com/hc/en-us/articles/360026833053-Domain-Verification-Walkthrough},
abstract = {Overview
Our aim with this step-by-step guide will walk you through how to add and verify a domain in Mailgun.
Verifying your domain is an essential step to start sending email through Mailgun. We...},
language = {en-US},
urldate = {2024-04-25},
journal = {Mailgun Help Center},
}
@misc{noauthor_how_2024,
title = {How to structure a web form - {Learn} web development {\textbar} {MDN}},
url = {https://developer.mozilla.org/en-US/docs/Learn/Forms/How_to_structure_a_web_form},
abstract = {You now have all the knowledge you'll need to properly structure your web forms. We will cover many of the features introduced here in the next few articles, with the next article looking in more detail at using all the different types of form widgets you'll want to use to collect information from your users.},
language = {en-US},
urldate = {2024-04-19},
month = feb,
year = {2024},
}
@mastersthesis{mehmedov_automated_2021,
title = {Automated classification of pet scam websites},
abstract = {Pet scams are a type of cyber fraud where criminals create website platforms to advertise fictitious pets
and extract money without providing any goods to the victims. Scammers also use fake delivery websites
to extract additional funds from victims on the pretext of delivery costs and charges. This project seeks
to develop a classifier that differentiates between legitimate and fraudulent pet sale and delivery websites. In order to do this, I collected data online by creating a web scraper which extracts HTML and images from pet sale and delivery websites. Subsequently, I processed the data to create features that can be used for automated decision making. By testing these features as data points for a classifier, the project also explores the usefulness of these features in predicting whether a website is a pet scam or not, which is useful information that can be used by other researchers who seek to develop anti-fraud software in this context.
My research hypothesis is that a machine learning classifier can be developed that can detect pet
scam websites with high degree of accuracy. This project explored two features that have been proved to
connect pet scam websites in a previous study - HTML and image similarity. The project assessed their
value as data points for a machine learning classifier. In addition to this, the project explored four more
features which have not been previously discussed in literature regarding pet scams - presence of address, image to HTML ratio, menu options and financial data. I found that the feature that evaluates menu
options has very high predictive accuracy in classifying pet sale websites as legitimate or fraudulent. I
also found that the minimum, maximum and average price found in a website could be also a useful
indicator in that regard as well as the ratio between HTML pages in a website and the number of images
found in it. Moreover, I tested four different algorithms and identified that Random Forests provide the
best performance for classifying pet advertising websites and Logistic Regression has the highest results
for pet delivery websites. The important outcomes of this project can be summarized as follows:
• I developed a machine learning classifier that can predict whether a website is a fraudulent pet sale
website with 97\% accuracy. The classifier for delivery websites can predict whether a website is a
fake delivery website with 89\% accuracy.
• In order to do this, I created scraping scripts which extracted HTML and image files from 5808
websites. The scraper extracts statically served pages as well as asynchronously loaded content.
• I created two features that explored the usefulness of data points which have been previously
highlighted to have high predictive accuracy in the context of pet scams.
• I analysed the data obtained and created four features that can be used to automate the detection of
fraudulent websites in the context of pet scams. These features have not been discussed in previous
pet scam literature and can be applied in subsequent projects in the same field.
The code used for scraping images and HTML, feature engineering and training the machine learning
models is available for viewing at https://github.com/Ronel-Mehmedov/dissertation2021.git.},
language = {en},
urldate = {2024-04-09},
school = {University of Bristol},
author = {Mehmedov, Ronel},
month = sep,
year = {2021},
}
@mastersthesis{alsaleh_scam_2017,
title = {Scam {Detection} in {Online} {Classified} {Advertisements}},
copyright = {Database copyright ProQuest LLC; ProQuest does not claim copyright in the individual underlying works.},
url = {https://www.proquest.com/docview/2019897494/abstract/6DCA3AFA48354982PQ/1},
abstract = {Online classified ad websites have become one of the most fundamental parts of the advertisement industry. Popular customer-to-customer marketplaces such as Craigslist and eBay, have attracted millions of consumers for trading and purchasing secondhand items. Because of the high financial return sellers can gain by using these sites and the anonymity some websites provide by not requiring their users to create user accounts to post ads, online classified sites have a high potential for fraudulent activities. The primary objective of this thesis is to develop a computational approach to scam detection in online classified ads. In this research, we first highlight the unique characteristics of scams compared with spam and provide a definition of scams in the general context of online classified ads; then we identify a set of novel features that signal scam or legitimate ads based on the heuristics we derived from observing and exploring the real-world data; and finally, we develop machine learning models for detecting scams in online classified ads and test the models with real-world data collected from Craigslist. The experiment results show that the proposed scam detection models achieved an F-measure of 0.955. The findings of this thesis have significant implications for improving the trustworthiness of customer-to-customer online marketplaces.},
language = {en},
urldate = {2024-04-15},
school = {University of Maryland},
author = {Alsaleh, Hamad},
year = {2017},
note = {ISBN: 9780355674071},
keywords = {Applied sciences, Classification, Classified ads, Communication and the arts, Data mining, Deception, Machine learning, Scam detection, Social sciences},
}
@article{norazman_development_2014,
title = {Development of {Scammed} {Posts} {Detector}: {A} {Case} {Study} of {Pet} {Scammed} {Posting}},
abstract = {This paper presents a research on scammed posts detector which focused on pet scammed posting detection. The research was motivated by the unawareness of pet lovers and owners on the pet scamming activities over the internet. Additionally, the current scam detectors are not able to notify the user on the potential pet scam posts they are dealing with. The objective of the research is to develop an application to automatically identify and alert the user on the potential pet scammed posting as an extension to dear current email system. The scope of the research are online pet advertisements and email communications between victims and scammers. Preliminary investigation on real pet scammed emails, experiences of the real pet scam victims and the public awareness regarding pet scams have been conducted to support the research development. Pet details, hardship of the family, obstacles faced and victim details are among the patterns extracted in the collection of real pet scammed emails. The system is tested against the human baseline and reached up to 86\% of accuracy.},
language = {en},
author = {Norazman, Nor Sa’datul Aqma and Zamin, Norshuhani},
year = {2014},
}
@misc{lloyds_bank_fraudsters_2023,
title = {Fraudsters go unleashed online as pet scams rise},
url = {https://www.lloydsbankinggroup.com/media/press-releases/2023/lloyds-bank-2023/fraudsters-go-unleashed-online-as-pet-scams-rise.html},
abstract = {New research from Lloyds Bank shows that people are losing an average £307 to pet scams – with over 50\% of scams originating on Meta-owned platforms Facebook and Instagram.},
language = {en},
urldate = {2024-04-15},
author = {{LLOYDS BANK}},
month = jul,
year = {2023},
}
@misc{better_business_bureau_bbb_2017,
title = {{BBB} {Study}: {Puppy} {Scams} {Full} {Study}},
shorttitle = {{BBB} {Study}},
url = {https://www.bbb.org/all/scamstudies/puppy-scams/puppy-scams-full-study},
abstract = {Fraud in the sale of online pets is on the rise, with scammers victimizing American consumers at an alarming rate. In recent years, Americans have filed tens of thousands of complaints with law enforcement, consumer organizations and online websites.},
language = {en},
urldate = {2024-04-15},
journal = {International Association of Better Business Bureaus},
author = {{Better Business Bureau}},
year = {2017},
}
@article{better_business_bureau_bbb_2022,
title = {{BBB} {Study} {Update}: {Average} losses in puppy scams rising, even as cases fall},
shorttitle = {{BBB} {Study} {Update}},
url = {https://www.bbb.org/article/investigations/27895-bbb-study-update-average-losses-in-puppy-scams-rising-even-as-cases-fall},
abstract = {BBB urges consumers to exercise caution this holiday season when purchasing a pet online.},
language = {en},
urldate = {2024-04-15},
author = {{Better Business Bureau}},
month = dec,
year = {2022},
}
@article{shaw_report_2024,
title = {Report shows impact fraud has on people's mental health},
url = {https://www.getreading.co.uk/news/cost-of-living/report-shows-impact-fraud-peoples-28861638},
abstract = {Fraud now accounts for approximately 38\% of all crime experienced by adults in England and Wales},
language = {en},
urldate = {2024-04-15},
journal = {Wales Online},
author = {Shaw, Neil},
month = mar,
year = {2024},
note = {Section: Cost Of Living},
}
@misc{ipata_current_nodate,
title = {Current {Pet} {Scams}},
url = {https://www.ipata.org/current-pet-scams},
abstract = {With online scams on the rise, please be aware of the many different types of pet scams. Many times users are lured in by a cute puppy or other pet for sale, only to find out that they have been scammed out of their money. The best way to avoid being scammed by those who are selling pets online is to educate yourself on the warning signs of a typical online pet scam. Below are listed some of the common and known scams that we see on a regular basis. If you have questions about a company, email, website or if you feel you have been scammed, please contact us at [email protected].},
language = {EN},
urldate = {2024-04-15},
author = {{IPATA}},
}
@inproceedings{price_resource_2020,
title = {Resource {Networks} of {Pet} {Scam} {Websites}},
url = {https://ieeexplore.ieee.org/document/9493253},
doi = {10.1109/eCrime51433.2020.9493253},
abstract = {The pet scam is a form of online fraud in which scammers leverage victims’ emotional attachment to fictitious pets as a means for extorting money. Both fraudulent pet seller sites and fraudulent delivery sites are involved in the scam. When sites of either kind are taken down, scammers create new sites, often reusing effective content from previous scams.We explore connections within the largest current collection of pet scam websites, examining four distinct types of resource sharing that are indicative of shared authorship. We find that 90\% of all accessible sites share at least one form of connection to another known site, including many identifiable links between seller and delivery sites, and that some scam authors could be behind hundreds of individual scam websites. We partially validate our linkage methods using domain registration data, and discuss the implications of using different connection types to analyse online fraud more generally.},
language = {EN},
urldate = {2024-04-15},
booktitle = {2020 {APWG} {Symposium} on {Electronic} {Crime} {Research} ({eCrime})},
author = {Price, Benjamin and Edwards, Matthew},
month = nov,
year = {2020},
note = {ISSN: 2159-1245},
keywords = {Couplings, Frequency-domain analysis, Resource management, clustering, link analysis, online fraud, pet scam},
pages = {1--10},
}
@misc{sturtz_sets_nodate,
title = {Sets in {Python} – {Real} {Python}},
url = {https://realpython.com/python-sets/},
abstract = {In this tutorial you'll learn how to work effectively with Python's set data type. You'll see how to define set objects in Python and discover the operations that they support and by the end of the tutorial you'll have a good feel for when a set is an appropriate choice in your own programs.},
language = {en},
urldate = {2024-04-12},
author = {Sturtz, John},
}
@misc{noauthor_robotstxt_nodate,
title = {Robots.txt {Introduction} and {Guide} {\textbar} {Google} {Search} {Central} {\textbar} {Documentation}},
url = {https://developers.google.com/search/docs/crawling-indexing/robots/intro},
abstract = {Robots.txt is used to manage crawler traffic. Explore this robots.txt introduction guide to learn what robot.txt files are and how to use them.},
language = {en},
urldate = {2024-04-12},
journal = {Google for Developers},
}
@misc{an19352_an19352scambaiter_back_2023,
title = {an19352/scambaiter\_back},
url = {https://github.com/an19352/scambaiter_back},
urldate = {2024-04-09},
author = {an19352},
month = may,
year = {2023},
note = {original-date: 2023-05-16T15:55:50Z},
}
@misc{noauthor_openai_nodate,
title = {{OpenAI} {Platform}},
url = {https://platform.openai.com/docs},
abstract = {Explore developer resources, tutorials, API docs, and dynamic examples to get the most out of OpenAI's platform.},
language = {en},
urldate = {2024-04-07},
}
@misc{3blue1brown_but_2024,
title = {But what is a {GPT}? {Visual} intro to {Transformers} {\textbar} {Chapter} 5, {Deep} {Learning}},
shorttitle = {But what is a {GPT}?},
url = {https://www.youtube.com/watch?v=wjZofJX0v4M},
abstract = {Unpacking how large language models work under the hood
Early view of the next chapter for patrons: https://3b1b.co/early-attention
Special thanks to these supporters: https://3b1b.co/lessons/gpt\#thanks
To contribute edits to the subtitles, visit https://translate.3blue1brown.com/
Other recommended resources on the topic.
Richard Turner's introduction is one of the best starting places:
https://arxiv.org/pdf/2304.10557.pdf
Coding a GPT with Andrej Karpathy
• Let's build GPT: from scratch, in cod...
Introduction to self-attention by John Hewitt
https://web.stanford.edu/class/cs224n...
History of language models by Brit Cruise:
• ChatGPT: 30 Year History {\textbar} How AI Lea...
Paper about examples like the “woman - man” one presented here:
https://arxiv.org/pdf/1301.3781.pdf
------------------
Timestamps
0:00 - Predict, sample, repeat
3:03 - Inside a transformer
6:36 - Chapter layout
7:20 - The premise of Deep Learning
12:27 - Word embeddings
18:25 - Embeddings beyond words
20:22 - Unembedding
22:22 - Softmax with temperature
26:03 - Up next
------------------
These animations are largely made using a custom Python library, manim. See the FAQ comments here:
https://3b1b.co/faq\#manim
https://github.com/3b1b/manim
https://github.com/ManimCommunity/manim/
All code for specific videos is visible here:
https://github.com/3b1b/videos/
The music is by Vincent Rubinetti.
https://www.vincentrubinetti.com
https://vincerubinetti.bandcamp.com/a...
https://open.spotify.com/album/1dVyjw...
------------------
3blue1brown is a channel about animating math, in all senses of the word animate. If you're reading the bottom of a video description, I'm guessing you're more interested than the average viewer in lessons here. It would mean a lot to me if you chose to stay up to date on new ones, either by subscribing here on YouTube or otherwise following on whichever platform below you check most regularly.
Mailing list: https://3blue1brown.substack.com
Twitter: / 3blue1brown
Instagram: / 3blue1brown
Reddit: / 3blue1brown
Facebook: / 3blue1brown
Patreon: / 3blue1brown
Website: https://www.3blue1brown.com},
urldate = {2024-04-07},
author = {{3Blue1Brown}},
month = apr,
year = {2024},
}
@patent{zhao_information_2014,
title = {Information search method based on heuristic method},
url = {https://patents.google.com/patent/CN103646035A/en},
abstract = {The invention relates to an information search method based on a heuristic method. The information search method includes the steps of determining parameter types corresponding to information importance according to features of information, wherein each parameter type comprises at least one keyword, keywords in parameters of the same type are divided into different levels and different weights are set; storing the parameter types, the keywords and the weights of the keywords in a hash mode, and building an index; obtaining a keyword vector of each piece of undetermined information, finding the keywords and the weights in the index, aggregating the weights of the keywords in parameters of different types, obtaining the weight of the information, and subtracting the upper limit of the weights from the weight of the information to obtain heuristic information; obtaining the overall assessment of each piece of the undetermined information according to the heuristic information by means of a heuristic algorithm, determining the most valuable information and outputting a search result. The heuristic search method guarantees time-validity of the information, can save computation time and space, and improves information search efficiency and accuracy.},
nationality = {CN},
language = {en},
assignee = {Beijing Ruian Technology Co Ltd},
number = {CN103646035A},
urldate = {2024-04-04},
author = {Zhao, Jie and Zhao, Jiyan and Chang, Yuxin},
month = mar,
year = {2014},
keywords = {heuristic, information, key word, search, weights},
}
@article{yang_secure_2021,
title = {A secure heuristic semantic searching scheme with blockchain-based verification},
volume = {58},
issn = {0306-4573},
url = {https://www.sciencedirect.com/science/article/pii/S0306457321000534},
doi = {10.1016/j.ipm.2021.102548},
abstract = {Secure semantic searching provides privacy-preserving information retrieval for arbitrary queries semantically related to keywords. However, current verification mechanisms cannot verify the correctness of search results for semantically related queries in mutual distrust scenarios. Moreover, most secure semantic searching schemes still perform exact matching on ciphertext after query expansion, then directly accumulate the weights of the matched keywords as similarity measurements to rank the documents, resulting in the search results with unsatisfied ranking. In this paper, we propose a secure heuristic semantic searching scheme, in which a privacy-preserving word nonlinear matching (PPWNM) method is developed, and a blockchain-based verification is designed to obtain trustworthy search results with high retrieval accuracy. Specifically, we describe several retrieval heuristics and formulate them as a word nonlinear matching problem, then transform it to the PPWNM problem for calculating the similarity measurement between the query and document so that the highly accurate ranked results can be obtained. Also we design a blockchain-based verification mechanism in which the blockchain nodes utilize the proofs generated during the matching process to verify the correctness of the search results and reach a consensus to ensure the trustworthy results and fair payment between the participants in a mutual distrust model. Our security analysis and experimental results show that the proposed scheme is secure and has higher accuracy compared with the existing other schemes.},
number = {4},
urldate = {2024-04-04},
journal = {Information Processing \& Management},
author = {Yang, Wenyuan and Sun, Boyu and Zhu, Yuesheng and Wu, Dehao},
month = jul,
year = {2021},
keywords = {Blockchain-based verification, Retrieval heuristic, Secure semantic searching, Word nonlinear matching},
pages = {102548},
}
@misc{majumder_fuzzywuzzy_2021,
title = {{FuzzyWuzzy} {Python} {Library}: {Interesting} {Tool} for {NLP} and {Text} {Analytics}},
shorttitle = {{FuzzyWuzzy} {Python} {Library}},
url = {https://www.analyticsvidhya.com/blog/2021/06/fuzzywuzzy-python-library-interesting-tool-for-nlp-and-text-analytics/},
abstract = {The FuzzyWuzzy python library uses Levenshtein distance to calculate the difference between two strings. Let's understand it},
language = {en},
urldate = {2024-04-04},
journal = {Analytics Vidhya},
author = {Majumder, Prateek},
month = jun,
year = {2021},
}
@article{uzun_comparison_2018,
title = {{COMPARISON} {OF} {PYTHON} {LIBRARIES} {USED} {FOR} {WEB} {DATA} {EXTRACTION}},
abstract = {There are several libraries for extracting useful data from web pages in Python. In this study, we compare three different well-known extraction libraries including BeautifulSoup, lxml and regex. The experimental results indicate that regex achieves the best results with an average of 0.071 ms. However, it is difficult to generate correct extraction rules for regex when the number of inner elements is not known. In experiments, only \%43.5 of the extraction rules are suitable for this task. In this case, BeautifulSoup and lxml, which are the DOM-based libraries, are used for extraction process. In experiments, lxml library yields the best results with an average of 9.074 ms.},
language = {en},
author = {Uzun, Erdi̇Nç and Yerli̇Kaya, Tarik and Kirat, Oğuz},
month = may,
year = {2018},
}
@article{manning_introduction_2009,
title = {Introduction to {Information} {Retrieval}},
language = {en},
author = {Manning, Christopher and Raghavan, Prabhakar and Schuetze, Hinrich},
year = {2009},
pages = {1--2},
}
@incollection{a_chapter_2001,
title = {Chapter {I}},
isbn = {978-1-59140-011-0},
abstract = {Real life problems are known to be messy, dynamic and multi-objective, and involve high levels of uncertainty and constraints. Because traditional problem-solving methods are no longer capable of handling this level of complexity, heuristic search methods have attracted increasing attention in recent years for solving such problems. Inspired by nature, biology, statistical mechanics, physics and neuroscience, heuristics techniques are used to solve many problems where traditional methods have failed. Data Mining: A Heuristic Approach will be a repository for the applications of these techniques in the area of data mining.},
language = {en},
urldate = {2024-04-04},
booktitle = {Data {Mining}: {A} {Heuristic} {Approach}: {A} {Heuristic} {Approach}},
publisher = {Idea Group Inc (IGI)},
author = {A, Hussein, Abbass and Ruhul, Sarker and S, Charles, Newton},
month = jul,
year = {2001},
note = {Google-Books-ID: vmV2B\_bzyD8C},
keywords = {Computers / Data Processing, Computers / Data Transmission Systems / General, Computers / Databases / Data Mining, Technology \& Engineering / Data Transmission Systems / General},
pages = {4},
}
@incollection{zhao_web_2017,
title = {Web {Scraping}},
isbn = {978-3-319-32001-4},
booktitle = {Encyclopedia of big data},
author = {Zhao, Bo},
month = may,
year = {2017},
doi = {10.1007/978-3-319-32001-4_483-1},
pages = {1--3},
}
@misc{noauthor_intro_nodate,
title = {Intro to {Web} {Scraping} {With} {Scrapy} {\textbar} {ScrapeOps}},
url = {https://scrapeops.io/python-scrapy-playbook/scrapy-web-scraping-intro/},
abstract = {Everything you need to know about Scrapy, its pros and cons, and how to get started.},
language = {en},
urldate = {2024-04-02},
journal = {ScrapeOps},
}
@misc{fariha_beautifulsoup_2023,
title = {{BeautifulSoup} vs {Selenium} vs {Scrapy}: {Which} is the {Best} for {Web} {Scraping}?},
shorttitle = {{BeautifulSoup} vs {Selenium} vs {Scrapy}},
url = {https://coredevsltd.com/articles/beautifulsoup-vs-selenium-vs-scrapy/},
abstract = {Let us explore the key differences in BeautifulSoup vs Selenium vs Scrapy to determine the best tool for your web scraping needs and much more about it.},
language = {en-US},
urldate = {2024-04-02},
journal = {Core Devs Ltd},
author = {Fariha, Faojia},
month = nov,
year = {2023},
}
@misc{noauthor_requests_nodate,
title = {Requests: {HTTP} for {Humans}™ — {Requests} 2.31.0 documentation},
url = {https://requests.readthedocs.io/en/latest/},
urldate = {2024-04-01},
}
@article{chandra_python_2015,
title = {Python {Requests} {Essentials}},
abstract = {Learn how to integrate your applications seamlessly with web services using Python Requests},
language = {en},
author = {Chandra, Rakesh Vidya and Varanasi, Bala Subrahmanyam},
month = jun,
year = {2015},
}
@article{khder_web_2021,
title = {Web {Scraping} or {Web} {Crawling}: {State} of {Art}, {Techniques}, {Approaches} and {Application}},
volume = {13},
issn = {27101274, 20748523},
shorttitle = {Web {Scraping} or {Web} {Crawling}},
url = {http://ijasca.zuj.edu.jo/PapersUploaded/2021.3.11.pdf},
doi = {10.15849/IJASCA.211128.11},
abstract = {Web scraping or web crawling refers to the procedure of automatic extraction of data from websites using software. It is a process that is particularly important in fields such as Business Intelligence in the modern age. Web scrapping is a technology that allow us to extract structured data from text such as HTML. Web scrapping is extremely useful in situations where data isn’t provided in machine readable format such as JSON or XML. The use of web scrapping to gather data allows us to gather prices in near real time from retail store sites and provide further details, web scrapping can also be used to gather intelligence of illicit businesses such as drug marketplaces in the darknet to provide law enforcement and researchers valuable data such as drug prices and varieties that would be unavailable with conventional methods. It has been found that using a web scraping program would yield data that is far more thorough, accurate, and consistent than manual entry. Based on the result it has been concluded that Web scraping is a highly useful tool in the information age, and an essential one in the modern fields. Multiple technologies are required to implement web scrapping properly such as spidering and pattern matching which are discussed. This paper is looking into what web scraping is, how it works, web scraping stages, technologies, how it relates to Business Intelligence, artificial intelligence, data science, big data, cyber securityو how it can be done with the Python language, some of the main benefits of web scraping, and what the future of web scraping may look like, and a special degree of emphasis is placed on highlighting the ethical and legal issues.},
language = {en},
number = {3},
urldate = {2024-04-01},
journal = {International Journal of Advances in Soft Computing and its Applications},
author = {Khder, Moaiad Ahmad},
month = dec,
year = {2021},
pages = {145--168},
}
@book{persson_evaluating_2019,
title = {Evaluating tools and techniques for web scraping},
url = {https://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-271206},
abstract = {DiVA portal is a finding tool for research publications and student theses written at the following 50 universities and research institutions.},
language = {eng},
urldate = {2024-04-01},
author = {Persson, Emil},
year = {2019},
}
@misc{brady_fighting_2024,
title = {Fighting {Online} {Pet} {Scams}},
url = {https://petscams.com/},
abstract = {Pet Scams are a huge issue at the moment. We list online Puppy Scams as well as other pet scams in order to warn consumers about scammers websites.},
language = {en-US},
urldate = {2024-03-30},
journal = {PetScams.com},
author = {Brady, Paul},
month = mar,
year = {2024},
}
@article{whittaker_understanding_2020,
title = {Understanding pet scams: {A} case study of advance fee and non-delivery fraud using victims’ accounts},
volume = {53},
issn = {0004-8658},
shorttitle = {Understanding pet scams},
url = {https://doi.org/10.1177/0004865820957077},
doi = {10.1177/0004865820957077},
abstract = {Advance fee and non-delivery frauds have become very common with the growing preference for online shopping and the new opportunities this brings for online offenders. This article uses unique access to a volunteer group’s database focused on preventing pet scams to explore this type of crime. Distances, among other factors, make the purchase of pets online common in countries such as the USA, Australia and South Africa. This modality of purchase has been exploited by organized criminals largely based in Cameroon to conduct advance fee and non-delivery frauds. The article uses data from the volunteer group Petscams.com to provide unique insights on the techniques of the offenders with particular reference to the strategies used to maximize victimization by using real accounts of victims of such frauds. It also briefly notes how the COVID-19 crisis has been used to adapt this type of scam. The article’s discussion identifies the need for a more nuanced assessment into the role of victim oriented voluntary organizations.},
language = {en},
number = {4},
urldate = {2024-03-10},
journal = {Australian \& New Zealand Journal of Criminology},
author = {Whittaker, Jack M and Button, Mark},
month = dec,
year = {2020},
note = {Publisher: SAGE Publications Ltd},
pages = {497--514},
}
@misc{stripe_crime_2023,
title = {Crime in {England} and {Wales} - {Office} for {National} {Statistics}},
url = {https://www.ons.gov.uk/peoplepopulationandcommunity/crimeandjustice/bulletins/crimeinenglandandwales/yearendingmarch2023},
urldate = {2024-03-09},
author = {Stripe, Nick},
month = jul,
year = {2023},
}
@article{claude_toward_2014,
title = {Toward a {Rational} {Choice} {Process} {Theory} of {Internet} {Scamming}: {The} {Offender}’s {Perspective}},
url = {https://core.ac.uk/download/pdf/301363409.pdf},
abstract = {Internet fraud scam is a crime enabled by the Internet to swindle Internet users. The global costs of these scams are in the billions of US dollars. Existing research suggests that scammers maximize their economic gain. Although this is a plausible explanation, since the idea of the scam is to fool people to send money, this explanation alone, cannot explain why individuals become Internet scammers. An equally important, albeit unexplored riddle, is the question of what strategies Internet scammers adopt to perform the act. As a first step to address these gaps, we interviewed five Internet scammers in order to develop a rational choice process theory of Internet scammers’ behavior. The initial results suggest that an interplay of socioeconomic and dynamic thinking processes explains why individuals drift into Internet scamming. Once an individual drifts into Internet scamming, a successful scam involves two processes: persuasive strategy and advance fee strategy.},
language = {en},
author = {Claude, Tambe Ebot Alain and Siponen, Mikko},
year = {2014},
}
@misc{noauthor_2019_nodate,
title = {2019 {IC3} {Alabama} {Annual} {Report}},
url = {https://www.ic3.gov/Media/PDF/AnnualReport/2019State/StateReport.aspx},
urldate = {2024-03-09},
}
@misc{noauthor_2023_nodate,
title = {2023 {IC3} {Alabama} {Annual} {Report}},
url = {https://www.ic3.gov/Media/PDF/AnnualReport/2023State/StateReport.aspx},
urldate = {2024-03-09},
}