2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1997-2009 Oracle. All rights reserved.
15 * Zero out bytes in the file.
17 * Pages allocated by writing pages past end-of-file are not zeroed,
18 * on some systems. Recovery could theoretically be fooled by a page
19 * showing up that contained garbage. In order to avoid this, we
20 * have to write the pages out to disk, and flush them. The reason
21 * for the flush is because if we don't sync, the allocation of another
22 * page subsequent to this one might reach the disk first, and if we
23 * crashed at the right moment, leave us with this page as the one
24 * allocated by writing a page past it in the file.
26 * PUBLIC: int __db_zero_fill __P((ENV *, DB_FH *));
29 __db_zero_fill(env, fhp)
33 #ifdef HAVE_FILESYSTEM_NOTZERO
34 off_t stat_offset, write_offset;
36 u_int32_t bytes, mbytes;
40 /* Calculate the byte offset of the next write. */
41 write_offset = (off_t)fhp->pgno * fhp->pgsize + fhp->offset;
44 if ((ret = __os_ioinfo(env, NULL, fhp, &mbytes, &bytes, NULL)) != 0)
46 stat_offset = (off_t)mbytes * MEGABYTE + bytes;
48 /* Check if the file is large enough. */
49 if (stat_offset >= write_offset)
52 /* Get a large buffer if we're writing lots of data. */
54 #define ZF_LARGE_WRITE (64 * 1024)
55 if ((ret = __os_calloc(env, 1, ZF_LARGE_WRITE, &bp)) != 0)
57 blen = ZF_LARGE_WRITE;
59 /* Seek to the current end of the file. */
60 if ((ret = __os_seek(env, fhp, mbytes, MEGABYTE, bytes)) != 0)
64 * Hash is the only access method that allocates groups of pages. Hash
65 * uses the existence of the last page in a group to signify the entire
66 * group is OK; so, write all the pages but the last one in the group,
67 * flush them to disk, then write the last one to disk and flush it.
69 for (group_sync = 0; stat_offset < write_offset; group_sync = 1) {
70 if (write_offset - stat_offset <= (off_t)blen) {
71 blen = (size_t)(write_offset - stat_offset);
72 if (group_sync && (ret = __os_fsync(env, fhp)) != 0)
75 if ((ret = __os_physwrite(env, fhp, bp, blen, &nw)) != 0)
79 if ((ret = __os_fsync(env, fhp)) != 0)
82 /* Seek back to where we started. */
83 mbytes = (u_int32_t)(write_offset / MEGABYTE);
84 bytes = (u_int32_t)(write_offset % MEGABYTE);
85 ret = __os_seek(env, fhp, mbytes, MEGABYTE, bytes);
87 err: __os_free(env, bp);
93 #endif /* HAVE_FILESYSTEM_NOTZERO */
98 * Zero to the end of the file.
100 * PUBLIC: int __db_zero_extend __P((ENV *,
101 * PUBLIC: DB_FH *, db_pgno_t, db_pgno_t, u_int32_t));
104 __db_zero_extend(env, fhp, pgno, last_pgno, pgsize)
107 db_pgno_t pgno, last_pgno;
114 if ((ret = __os_calloc(env, 1, pgsize, &buf)) != 0)
116 memset(buf, 0, pgsize);
117 for (; pgno <= last_pgno; pgno++)
118 if ((ret = __os_io(env, DB_IO_WRITE,
119 fhp, pgno, pgsize, 0, pgsize, buf, &nwrote)) != 0) {
127 err: __os_free(env, buf);