-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.swift
65 lines (53 loc) · 1.81 KB
/
main.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
//
// dummy.swift
// CrossroadRegex
//
// Created by Ilya Mikhaltsou on 6/15/17.
//
//
import Foundation
var booksOutput: FileTextOutputStream = try FileTextOutputStream(fileAtPath: "books.csv", append: false)
var profilesOutput: FileTextOutputStream = try FileTextOutputStream(fileAtPath: "profiles.csv", append: false)
var reviewsOutput: FileTextOutputStream = try FileTextOutputStream(fileAtPath: "reviews.csv", append: false)
let baseUrl = URL(string: "https://www.fanfiction.net/")!
let repo = Repository()
var counter = 0
//print("Purging")
//repo.purge()
print("Starting processing")
crawler(inRepository: repo, withBaseUrl: baseUrl, themesFilter: { i in
// return i < 40
return false
}, pageLimit: 20,
bookFilter: { b in return true
}, authorFilter: { b in return true
}, reviewFilter: { b in return true
}, statusCallback: { remaining, done, failed in
if done % 100 == 0 {
print("Commiting")
repo.save()
}
print("Done: \(done), remaining: \(remaining), failed: \(failed)")
// return done < 100000
return true // Continue
})
repo.save()
print("Writing books")
for book in repo.allBooks() {
print("\(book.title ?? ""), \(book.objectUrl)", to: &booksOutput)
}
print("Writing profiles")
for profile in repo.allProfiles() {
print("\(profile.name ?? "")", to: &profilesOutput)
}
print("Writing reviews")
for review in repo.allReviews() {
let row = [
review.author?.name?.replacingOccurrences(of: ",", with: "\\,") ?? "",
review.page?.book?.title?.replacingOccurrences(of: ",", with: "\\,") ?? "",
review.text?.replacingOccurrences(of: "\n", with: "").replacingOccurrences(of: ",", with: "\\,") ?? "",
]
print(row.joined(separator: ","), to: &reviewsOutput)
}
print("Done. Current working directory:")
print(FileManager.default.currentDirectoryPath)