1
1
from __future__ import annotations
2
2
3
+ from collections import defaultdict
3
4
from collections .abc import AsyncGenerator , Sequence
4
5
from contextlib import aclosing
5
6
from dataclasses import dataclass , field
37
38
from .zarr import ZarrLink , sync_zarr
38
39
39
40
41
+ @dataclass
42
+ class UpdateReport :
43
+ last_modified : datetime
44
+ # List is in sorted order
45
+ tagged_releases : list [str ] | None
46
+
47
+
40
48
@dataclass
41
49
class DandiDatasetter (AsyncResource ):
42
50
dandi_client : AsyncDandiClient
@@ -80,12 +88,19 @@ async def update_from_backup(
80
88
workers = self .config .workers ,
81
89
)
82
90
to_save : list [str ] = []
83
- access_status : dict [str , str ] = {}
84
- for d , changed in report .results :
85
- if changed :
91
+ gitmodule_attrs : dict [str , dict [ str , str ]] = defaultdict ( dict )
92
+ for d , r in report .results :
93
+ if r is not None :
86
94
to_save .append (d .identifier )
95
+ gitmodule_attrs [d .identifier ]["dandiset-last-modified" ] = str (
96
+ r .last_modified
97
+ )
98
+ if r .tagged_releases is not None :
99
+ gitmodule_attrs [d .identifier ]["dandiset-tagged-releases" ] = (
100
+ "," .join (r .tagged_releases )
101
+ )
87
102
if self .config .gh_org is not None :
88
- access_status [d .identifier ] = (
103
+ gitmodule_attrs [d .identifier ][ "github-access-status" ] = (
89
104
"public"
90
105
if d .embargo_status is EmbargoStatus .OPEN
91
106
else "private"
@@ -110,24 +125,23 @@ async def update_from_backup(
110
125
GHRepo (self .config .gh_org , d ),
111
126
private = True ,
112
127
)
113
- access_status [ d ] = "private"
128
+ gitmodule_attrs [ d ][ "github-access-status" ] = "private"
114
129
if to_save :
115
130
log .debug ("Committing superdataset" )
116
131
superds .assert_no_duplicates_in_gitmodules ()
117
132
msg = await self .get_superds_commit_message (superds , to_save )
118
- await superds .save (message = msg , path = to_save )
133
+ await superds .save (message = msg , path = to_save + [ ".gitmodules" ] )
119
134
superds .assert_no_duplicates_in_gitmodules ()
120
135
log .debug ("Superdataset committed" )
121
- if access_status :
122
- log .debug ("Ensuring github-access-status in .gitmodules is up-to-date" )
123
- for did , access in access_status .items ():
124
- await superds .set_repo_config (
125
- f"submodule.{ did } .github-access-status" ,
126
- access ,
127
- file = ".gitmodules" ,
128
- )
136
+ if gitmodule_attrs :
137
+ log .debug ("Updating submodule properties in .gitmodules" )
138
+ for did , attrs in gitmodule_attrs .items ():
139
+ for k , v in attrs .items ():
140
+ await superds .set_repo_config (
141
+ f"submodule.{ did } .{ k } " , v , file = ".gitmodules"
142
+ )
129
143
await superds .commit_if_changed (
130
- "[backups2datalad] Update github-access-status keys in .gitmodules" ,
144
+ "[backups2datalad] Update .gitmodules" ,
131
145
paths = [".gitmodules" ],
132
146
check_dirty = False ,
133
147
)
@@ -168,11 +182,11 @@ async def ensure_github_remote(self, ds: AsyncDataset, dandiset_id: str) -> None
168
182
169
183
async def update_dandiset (
170
184
self , dandiset : RemoteDandiset , ds : AsyncDataset | None = None
171
- ) -> bool :
172
- # Returns true iff any changes were committed to the repository
185
+ ) -> UpdateReport | None :
186
+ # Returns non-None iff any changes were committed to the repository
173
187
if dandiset .embargo_status is EmbargoStatus .UNEMBARGOING :
174
188
log .info ("Dandiset %s is unembargoing; not syncing" , dandiset .identifier )
175
- return False
189
+ return None
176
190
if ds is None :
177
191
ds = await self .init_dataset (
178
192
self .config .dandiset_root / dandiset .identifier ,
@@ -206,7 +220,7 @@ async def update_dandiset(
206
220
)
207
221
changed = False
208
222
await self .ensure_github_remote (ds , dandiset .identifier )
209
- await self .tag_releases (
223
+ tagged_releases = await self .tag_releases (
210
224
dandiset , ds , push = self .config .gh_org is not None , log = dmanager .log
211
225
)
212
226
# Call `get_stats()` even if gh_org is None so that out-of-date stats
@@ -217,7 +231,12 @@ async def update_dandiset(
217
231
dmanager .log .info ("Pushing to sibling" )
218
232
await ds .push (to = "github" , jobs = self .config .jobs , data = "nothing" )
219
233
await self .manager .set_dandiset_description (dandiset , stats , ds )
220
- return changed
234
+ newstate = ds .get_assets_state ()
235
+ assert newstate is not None
236
+ return UpdateReport (
237
+ last_modified = newstate .timestamp ,
238
+ tagged_releases = tagged_releases ,
239
+ )
221
240
222
241
async def sync_dataset (
223
242
self ,
@@ -323,19 +342,22 @@ async def tag_releases(
323
342
ds : AsyncDataset ,
324
343
push : bool ,
325
344
log : PrefixedLogger ,
326
- ) -> None :
345
+ ) -> list [str ] | None :
346
+ # Returns a sorted list of all tagged releases
327
347
if not self .config .enable_tags :
328
- return
348
+ return None
329
349
log .info ("Tagging releases for Dandiset" )
330
350
versions = [v async for v in dandiset .aget_versions (include_draft = False )]
331
351
changed = False
352
+ tagged_releases = []
332
353
for v in versions :
333
354
if await ds .read_git ("tag" , "-l" , v .identifier ):
334
355
log .debug ("Version %s already tagged" , v .identifier )
335
356
else :
336
357
log .info ("Tagging version %s" , v .identifier )
337
358
await self .mkrelease (dandiset .for_version (v ), ds , push = push , log = log )
338
359
changed = True
360
+ tagged_releases .append (v .identifier )
339
361
if versions :
340
362
latest = max (map (attrgetter ("identifier" ), versions ), key = PkgVersion )
341
363
description = await ds .read_git ("describe" , "--tags" , "--long" , "--always" )
@@ -361,6 +383,7 @@ async def tag_releases(
361
383
)
362
384
if push and (changed or merge ):
363
385
await ds .push (to = "github" , jobs = self .config .jobs , data = "nothing" )
386
+ return sorted (tagged_releases )
364
387
365
388
async def mkrelease (
366
389
self ,
0 commit comments