Add BSD license file
[platform/upstream/db4.git] / lock / lock_list.c
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996-2009 Oracle.  All rights reserved.
5  *
6  * $Id$
7  */
8
9 #include "db_config.h"
10
11 #include "db_int.h"
12 #include "dbinc/lock.h"
13 #include "dbinc/log.h"
14
15 static int __lock_sort_cmp __P((const void *, const void *));
16
17 /*
18  * Lock list routines.
19  *      The list is composed of a 32-bit count of locks followed by
20  * each lock.  A lock is represented by a 16-bit page-count, a lock
21  * object and a page list.  A lock object consists of a 16-bit size
22  * and the object itself.   In a pseudo BNF notation, you get:
23  *
24  * LIST = COUNT32 LOCK*
25  * LOCK = COUNT16 LOCKOBJ PAGELIST
26  * LOCKOBJ = COUNT16 OBJ
27  * PAGELIST = COUNT32*
28  *
29  * (Recall that X* means "0 or more X's")
30  *
31  * In most cases, the OBJ is a struct __db_ilock and the page list is
32  * a series of (32-bit) page numbers that should get written into the
33  * pgno field of the __db_ilock.  So, the actual number of pages locked
34  * is the number of items in the PAGELIST plus 1. If this is an application-
35  * specific lock, then we cannot interpret obj and the pagelist must
36  * be empty.
37  *
38  * Consider a lock list for: File A, pages 1&2, File B pages 3-5, Applock
39  * This would be represented as:
40  *      5 1 [fid=A;page=1] 2 2 [fid=B;page=3] 4 5 0 APPLOCK
41  *        ------------------ -------------------- ---------
42  *         LOCK for file A    LOCK for file B     application-specific lock
43  */
44
45 #define MAX_PGNOS       0xffff
46
47 /*
48  * These macros are bigger than one might expect because some compilers say a
49  * cast does not return an lvalue, so constructs like *(u_int32_t*)dp = count;
50  * generate warnings.
51  */
52 #define RET_SIZE(size, count)  ((size) +                                \
53      sizeof(u_int32_t) + (count) * 2 * sizeof(u_int16_t))
54
55 #define PUT_COUNT(dp, count)    do {    u_int32_t __c = (count);        \
56                                         LOGCOPY_32(env, dp, &__c);      \
57                                         dp = (u_int8_t *)dp +           \
58                                              sizeof(u_int32_t);         \
59                                 } while (0)
60 #define PUT_PCOUNT(dp, count)   do {    u_int16_t __c = (count);        \
61                                         LOGCOPY_16(env, dp, &__c);      \
62                                         dp = (u_int8_t *)dp +           \
63                                             sizeof(u_int16_t);          \
64                                 } while (0)
65 #define PUT_SIZE(dp, size)      do {    u_int16_t __s = (size);         \
66                                         LOGCOPY_16(env, dp, &__s);      \
67                                         dp = (u_int8_t *)dp +           \
68                                             sizeof(u_int16_t);          \
69                                 } while (0)
70 #define PUT_PGNO(dp, pgno)      do {    db_pgno_t __pg = (pgno);        \
71                                         LOGCOPY_32(env, dp, &__pg);     \
72                                         dp = (u_int8_t *)dp +           \
73                                             sizeof(db_pgno_t);          \
74                                 } while (0)
75 #define COPY_OBJ(dp, obj)       do {                                    \
76                                         memcpy(dp,                      \
77                                             (obj)->data, (obj)->size);  \
78                                         dp = (u_int8_t *)dp +           \
79                                              DB_ALIGN((obj)->size,      \
80                                              sizeof(u_int32_t));        \
81                                 } while (0)
82 #define GET_COUNT(dp, count)    do {    LOGCOPY_32(env, &count, dp);    \
83                                         dp = (u_int8_t *)dp +           \
84                                              sizeof(u_int32_t); \
85                                 } while (0)
86 #define GET_PCOUNT(dp, count)   do {    LOGCOPY_16(env, &count, dp);    \
87                                         dp = (u_int8_t *)dp +           \
88                                              sizeof(u_int16_t); \
89                                 } while (0)
90 #define GET_SIZE(dp, size)      do {    LOGCOPY_16(env, &size, dp);     \
91                                         dp = (u_int8_t *)dp +           \
92                                              sizeof(u_int16_t); \
93                                 } while (0)
94 #define GET_PGNO(dp, pgno)      do {    LOGCOPY_32(env, &pgno, dp);     \
95                                         dp = (u_int8_t *)dp +           \
96                                              sizeof(db_pgno_t); \
97                                 } while (0)
98
99 /*
100  * __lock_fix_list --
101  *
102  * PUBLIC: int __lock_fix_list __P((ENV *, DBT *, u_int32_t));
103  */
104 int
105 __lock_fix_list(env, list_dbt, nlocks)
106         ENV *env;
107         DBT *list_dbt;
108         u_int32_t nlocks;
109 {
110         DBT *obj;
111         DB_LOCK_ILOCK *lock, *plock;
112         u_int32_t i, j, nfid, npgno, size;
113         u_int8_t *data, *dp;
114         int ret;
115
116         if ((size = list_dbt->size) == 0)
117                 return (0);
118
119         obj = (DBT *)list_dbt->data;
120
121         /*
122          * If necessary sort the list of locks so that locks on the same fileid
123          * are together.  We do not sort 1 or 2 locks because by definition if
124          * there are locks on the same fileid they will be together.  The sort
125          * will also move any locks that do not look like page locks to the end
126          * of the list so we can stop looking for locks we can combine when we
127          * hit one.
128          */
129         switch (nlocks) {
130         case 1:
131                 size = RET_SIZE(obj->size, 1);
132                 if ((ret = __os_malloc(env, size, &data)) != 0)
133                         return (ret);
134
135                 dp = data;
136                 PUT_COUNT(dp, 1);
137                 PUT_PCOUNT(dp, 0);
138                 PUT_SIZE(dp, obj->size);
139                 COPY_OBJ(dp, obj);
140                 break;
141         default:
142                 /* Sort so that all locks with same fileid are together. */
143                 qsort(list_dbt->data, nlocks, sizeof(DBT), __lock_sort_cmp);
144                 /* FALLTHROUGH */
145         case 2:
146                 nfid = npgno = 0;
147                 i = 0;
148                 if (obj->size != sizeof(DB_LOCK_ILOCK))
149                         goto not_ilock;
150
151                 nfid = 1;
152                 plock = (DB_LOCK_ILOCK *)obj->data;
153
154                 /* We use ulen to keep track of the number of pages. */
155                 j = 0;
156                 obj[0].ulen = 0;
157                 for (i = 1; i < nlocks; i++) {
158                         if (obj[i].size != sizeof(DB_LOCK_ILOCK))
159                                 break;
160                         lock = (DB_LOCK_ILOCK *)obj[i].data;
161                         if (obj[j].ulen < MAX_PGNOS &&
162                              lock->type == plock->type &&
163                              memcmp(lock->fileid,
164                              plock->fileid, DB_FILE_ID_LEN) == 0) {
165                                 obj[j].ulen++;
166                                 npgno++;
167                         } else {
168                                 nfid++;
169                                 plock = lock;
170                                 j = i;
171                                 obj[j].ulen = 0;
172                         }
173                 }
174
175 not_ilock:      size = nfid * sizeof(DB_LOCK_ILOCK);
176                 size += npgno * sizeof(db_pgno_t);
177                 /* Add the number of nonstandard locks and get their size. */
178                 nfid += nlocks - i;
179                 for (; i < nlocks; i++) {
180                         size += obj[i].size;
181                         obj[i].ulen = 0;
182                 }
183
184                 size = RET_SIZE(size, nfid);
185                 if ((ret = __os_malloc(env, size, &data)) != 0)
186                         return (ret);
187
188                 dp = data;
189                 PUT_COUNT(dp, nfid);
190
191                 for (i = 0; i < nlocks; i = j) {
192                         PUT_PCOUNT(dp, obj[i].ulen);
193                         PUT_SIZE(dp, obj[i].size);
194                         COPY_OBJ(dp, &obj[i]);
195                         lock = (DB_LOCK_ILOCK *)obj[i].data;
196                         for (j = i + 1; j <= i + obj[i].ulen; j++) {
197                                 lock = (DB_LOCK_ILOCK *)obj[j].data;
198                                 PUT_PGNO(dp, lock->pgno);
199                         }
200                 }
201         }
202
203         __os_free(env, list_dbt->data);
204
205         list_dbt->data = data;
206         list_dbt->size = size;
207
208         return (0);
209 }
210
211 /*
212  * PUBLIC: int __lock_get_list __P((ENV *, DB_LOCKER *, u_int32_t,
213  * PUBLIC:            db_lockmode_t, DBT *));
214  */
215 int
216 __lock_get_list(env, locker, flags, lock_mode, list)
217         ENV *env;
218         DB_LOCKER *locker;
219         u_int32_t flags;
220         db_lockmode_t lock_mode;
221         DBT *list;
222 {
223         DBT obj_dbt;
224         DB_LOCK ret_lock;
225         DB_LOCKREGION *region;
226         DB_LOCKTAB *lt;
227         DB_LOCK_ILOCK *lock;
228         db_pgno_t save_pgno;
229         u_int16_t npgno, size;
230         u_int32_t i, nlocks;
231         int ret;
232         void *data, *dp;
233
234         if (list->size == 0)
235                 return (0);
236         ret = 0;
237         data = NULL;
238
239         lt = env->lk_handle;
240         dp = list->data;
241
242         /*
243          * There is no assurance log records will be aligned.  If not, then
244          * copy the data to an aligned region so the rest of the code does
245          * not have to worry about it.
246          */
247         if ((uintptr_t)dp != DB_ALIGN((uintptr_t)dp, sizeof(u_int32_t))) {
248                 if ((ret = __os_malloc(env, list->size, &data)) != 0)
249                         return (ret);
250                 memcpy(data, list->data, list->size);
251                 dp = data;
252         }
253
254         region = lt->reginfo.primary;
255         LOCK_SYSTEM_LOCK(lt, region);
256         GET_COUNT(dp, nlocks);
257
258         for (i = 0; i < nlocks; i++) {
259                 GET_PCOUNT(dp, npgno);
260                 GET_SIZE(dp, size);
261                 lock = (DB_LOCK_ILOCK *) dp;
262                 save_pgno = lock->pgno;
263                 obj_dbt.data = dp;
264                 obj_dbt.size = size;
265                 dp = ((u_int8_t *)dp) + DB_ALIGN(size, sizeof(u_int32_t));
266                 do {
267                         if ((ret = __lock_get_internal(lt, locker,
268                              flags, &obj_dbt, lock_mode, 0, &ret_lock)) != 0) {
269                              lock->pgno = save_pgno;
270                              goto err;
271                         }
272                         if (npgno != 0)
273                                 GET_PGNO(dp, lock->pgno);
274                 } while (npgno-- != 0);
275                 lock->pgno = save_pgno;
276         }
277
278 err:    LOCK_SYSTEM_UNLOCK(lt, region);
279         if (data != NULL)
280                 __os_free(env, data);
281         return (ret);
282 }
283
284 #define UINT32_CMP(A, B)        ((A) == (B) ? 0 : ((A) > (B) ? 1 : -1))
285 static int
286 __lock_sort_cmp(a, b)
287         const void *a, *b;
288 {
289         const DBT *d1, *d2;
290         DB_LOCK_ILOCK *l1, *l2;
291
292         d1 = a;
293         d2 = b;
294
295         /* Force all non-standard locks to sort at end. */
296         if (d1->size != sizeof(DB_LOCK_ILOCK)) {
297                 if (d2->size != sizeof(DB_LOCK_ILOCK))
298                         return (UINT32_CMP(d1->size, d2->size));
299                 else
300                         return (1);
301         } else if (d2->size != sizeof(DB_LOCK_ILOCK))
302                 return (-1);
303
304         l1 = d1->data;
305         l2 = d2->data;
306         if (l1->type != l2->type)
307                 return (UINT32_CMP(l1->type, l2->type));
308         return (memcmp(l1->fileid, l2->fileid, DB_FILE_ID_LEN));
309 }
310
311 /*
312  * PUBLIC: void __lock_list_print __P((ENV *, DBT *));
313  */
314 void
315 __lock_list_print(env, list)
316         ENV *env;
317         DBT *list;
318 {
319         DB_LOCK_ILOCK *lock;
320         db_pgno_t pgno;
321         u_int16_t npgno, size;
322         u_int32_t i, nlocks;
323         u_int8_t *fidp;
324         char *fname, *dname, *p, namebuf[26];
325         void *dp;
326
327         if (list->size == 0)
328                 return;
329         dp = list->data;
330
331         GET_COUNT(dp, nlocks);
332
333         for (i = 0; i < nlocks; i++) {
334                 GET_PCOUNT(dp, npgno);
335                 GET_SIZE(dp, size);
336                 lock = (DB_LOCK_ILOCK *) dp;
337                 fidp = lock->fileid;
338                 (void)__dbreg_get_name(env, fidp, &fname, &dname);
339                 printf("\t");
340                 if (fname == NULL && dname == NULL)
341                         printf("(%lx %lx %lx %lx %lx)",
342                         (u_long)fidp[0], (u_long)fidp[1], (u_long)fidp[2],
343                         (u_long)fidp[3], (u_long)fidp[4]);
344                 else {
345                         if (fname != NULL && dname != NULL) {
346                                 (void)snprintf(namebuf, sizeof(namebuf),
347                                     "%14s.%-10s", fname, dname);
348                                 p = namebuf;
349                         } else if (fname != NULL)
350                                 p = fname;
351                         else
352                                 p = dname;
353                         printf("%-25s", p);
354                 }
355                 dp = ((u_int8_t *)dp) + DB_ALIGN(size, sizeof(u_int32_t));
356                 LOGCOPY_32(env, &pgno, &lock->pgno);
357                 do {
358                         printf(" %d", pgno);
359                         if (npgno != 0)
360                                 GET_PGNO(dp, pgno);
361                 } while (npgno-- != 0);
362                 printf("\n");
363         }
364 }