-
Notifications
You must be signed in to change notification settings - Fork 0
/
Queries.py
79 lines (74 loc) · 2.3 KB
/
Queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# coding: utf8
# noinspection SqlWithoutWhere
match = [
"""
-- stage 0 - remove previous matches
UPDATE LocalFiles
set RemoteId = NULL ;
""",
"""
-- stage 1 - look for unique matches
UPDATE LocalFiles
set RemoteId = (SELECT RemoteId
FROM SyncFiles
WHERE LocalFiles.OriginalFileName == SyncFiles.OrigFileName
AND (LocalFiles.Uid == SyncFiles.Uid AND
LocalFiles.CreateDate = SyncFiles.CreateDate)
-- 32 character ids are legitimate and unique
OR (LocalFiles.Uid == SyncFiles.Uid AND
length(LocalFiles.Uid) == 32)
)
WHERE LocalFiles.Uid notnull and LocalFiles.Uid != 'not_supported'
;
""",
"""
-- stage 2 - mop up entries that have no UID (this is a small enough
-- population that filename + CreateDate is probably unique)
with pre_match(RemoteId) as
(SELECT RemoteId from LocalFiles where RemoteId notnull)
UPDATE LocalFiles
set RemoteId = (SELECT RemoteId
FROM SyncFiles
WHERE LocalFiles.OriginalFileName == SyncFiles.OrigFileName
AND LocalFiles.CreateDate = SyncFiles.CreateDate
AND SyncFiles.RemoteId NOT IN (select RemoteId from pre_match)
)
WHERE LocalFiles.RemoteId isnull
;
""",
"""
-- stage 3 FINAL - mop up on filename only
with pre_match(RemoteId) as
(SELECT RemoteId from LocalFiles where RemoteId notnull)
UPDATE LocalFiles
set RemoteId = (SELECT RemoteId
FROM SyncFiles
WHERE LocalFiles.OriginalFileName == SyncFiles.OrigFileName
AND SyncFiles.RemoteId NOT IN (select RemoteId from pre_match)
)
WHERE LocalFiles.RemoteId isnull
;
""",
]
missing_files = """select * from LocalFiles where RemoteId isnull;"""
pre_extra_files = """
-- overwrite NULL RemoteIds or extra_files will get no matches
update LocalFiles set RemoteId='not_found' where RemoteId isnull
"""
extra_files = """
select * from SyncFiles where RemoteId not in (select RemoteId from LocalFiles)
-- and uid not in (select uid from LocalFiles where length(SyncFiles.Uid) = 32)
;
"""
duplicate_files = """
with matches(RemoteId) as (
select RemoteId from LocalFiles
GROUP BY LocalFiles.RemoteId
HAVING COUNT(LocalFiles.RemoteId) > 1
)
SELECT *
FROM LocalFiles
JOIN matches
WHERE LocalFiles.RemoteId = matches.RemoteId
;
"""