-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathlog.c
267 lines (242 loc) · 6.94 KB
/
log.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
#include "types.h"
#include "defs.h"
#include "param.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "buf.h"
// Simple logging that allows concurrent FS system calls.
//
// A log transaction contains the updates of multiple FS system
// calls. The logging system only commits when there are
// no FS system calls active. Thus there is never
// any reasoning required about whether a commit might
// write an uncommitted system call's updates to disk.
//
// A system call should call begin_op()/end_op() to mark
// its start and end. Usually begin_op() just increments
// the count of in-progress FS system calls and returns.
// But if it thinks the log is close to running out, it
// sleeps until the last outstanding end_op() commits.
//
// The log is a physical re-do log containing disk blocks.
// The on-disk log format:
// header block, containing block #s for block A, B, C, ...
// block A
// block B
// block C
// ...
// Log appends are synchronous.
// Contents of the header block, used for both the on-disk header block
// and to keep track in memory of logged block# before commit.
struct logheader {
int n;
// Sector numbers that correspond to dirty
// buffers in the buffer cache (see log_write
// below). These buffers get written to the
// on-disk write-ahead journal in write_log below.
int block[LOGSIZE];
};
struct log {
struct spinlock lock;
int start; // block number of first log block (from superblock)
int size; // number of logs in block (from superblock)
int outstanding; // how many FS sys calls are executing.
int committing; // in commit(), please wait.
int dev; // device number
struct logheader lh;
};
struct log log;
static void recover_from_log(void);
static void commit();
void
initlog(int dev)
{
if (sizeof(struct logheader) >= BSIZE)
panic("initlog: too big logheader");
struct superblock sb;
initlock(&log.lock, "log");
readsb(dev, &sb);
log.start = sb.logstart;
log.size = sb.nlog;
log.dev = dev;
recover_from_log();
}
// Copy committed blocks from log to their home location
// Same function as log_write but with src and dst flipped.
static void
install_trans(void)
{
int tail;
for (tail = 0; tail < log.lh.n; tail++) {
struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block
struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst
memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst
bwrite(dbuf); // write dst to disk
brelse(lbuf);
brelse(dbuf);
}
}
// Read the log header from disk into the in-memory log header
static void
read_head(void)
{
struct buf *buf = bread(log.dev, log.start);
struct logheader *lh = (struct logheader *) (buf->data);
int i;
log.lh.n = lh->n;
for (i = 0; i < log.lh.n; i++) {
log.lh.block[i] = lh->block[i];
}
brelse(buf);
}
// Write in-memory log header to disk.
// This is the true point at which the
// current transaction commits.
static void
write_head(void)
{
struct buf *buf = bread(log.dev, log.start);
struct logheader *hb = (struct logheader *) (buf->data);
int i;
hb->n = log.lh.n;
for (i = 0; i < log.lh.n; i++) {
hb->block[i] = log.lh.block[i];
}
bwrite(buf);
brelse(buf);
}
static void
recover_from_log(void)
{
read_head();
install_trans(); // if committed, copy from log to disk
log.lh.n = 0;
write_head(); // clear the log
}
// called at the start of each FS system call.
// Waits until the logging system is not currently
// committing, and until there is enough free space
// to hold the writes from this call and all currently
// executing system calls (as counted in log.outstanding).
void
begin_op(void)
{
acquire(&log.lock);
while(1){
if(log.committing){
sleep(&log, &log.lock);
} else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){
// this op might exhaust log space; wait for commit.
sleep(&log, &log.lock);
} else {
// Reserve space and
// prevent a commit
// from occurring during
// this syscall.
log.outstanding += 1;
release(&log.lock);
break;
}
}
}
// called at the end of each FS system call.
// commits if this was the last outstanding operation.
void
end_op(void)
{
int do_commit = 0;
acquire(&log.lock);
log.outstanding -= 1;
if(log.committing)
panic("log.committing");
if(log.outstanding == 0){
do_commit = 1;
log.committing = 1;
} else {
// begin_op() may be waiting for log space.
wakeup(&log);
}
release(&log.lock);
if(do_commit){
// call commit w/o holding locks, since not allowed
// to sleep with locks (bget acquires buffer sleeplocks).
// Setting log.committing above prevents other kernel threads
// from modifying the in-memory log.
commit();
acquire(&log.lock); // Get lock to avoid missed wakeup
log.committing = 0;
wakeup(&log);
release(&log.lock);
}
}
// Copy modified blocks from buffer cache to log on disk
static void
write_log(void)
{
int tail;
for (tail = 0; tail < log.lh.n; tail++) {
// Get pointers to buffer cache entries corresponding
// to the log block on disk we're about to modify
// and to the dirty block in the buffer cache
struct buf *to = bread(log.dev, log.start+tail+1); // log block
struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block
memmove(to->data, from->data, BSIZE);
bwrite(to); // write the cached log block to disk
brelse(from);
brelse(to);
}
}
static void
commit()
{
if (log.lh.n > 0) {
write_log(); // Write modified blocks from cache to log
write_head(); // Write header to disk -- the real commit
install_trans(); // Now install writes to home locations
// Erase the transaction from the log
log.lh.n = 0;
write_head();
}
}
// Caller has modified b->data and is done with the buffer.
// Record the block number and pin in the cache with B_DIRTY.
// commit()/write_log() will do the disk write.
//
// log_write() replaces bwrite(); a typical use is:
// bp = bread(...)
// modify bp->data[]
// log_write(bp)
// brelse(bp)
void
log_write(struct buf *b)
{
int i;
if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1)
panic("too big a transaction");
if (log.outstanding < 1)
panic("log_write outside of trans");
acquire(&log.lock);
// Log absorption: it's common for
// the same block to be written
// multiple times in the same
// transaction--we want to re-use
// the same log block to save space
// in the log and so that multiple edits
// are combined into one disk I/O operation.
for (i = 0; i < log.lh.n; i++) {
if (log.lh.block[i] == b->blockno)
break;
}
// At this point, i either refers
// to the already-existing log block
// corresponding to this buffer
// (i.e. this buffer has already
// been edited in this transaction)
// or is log.lh.n
log.lh.block[i] = b->blockno;
if (i == log.lh.n)
log.lh.n++;
b->flags |= B_DIRTY; // prevent eviction
release(&log.lock);
}