Merge tag 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm
[platform/kernel/linux-starfive.git] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
6  *
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  *
9  * For licensing information, see the file 'LICENCE' in this directory.
10  *
11  */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/kernel.h>
16 #include <linux/mtd/mtd.h>
17 #include <linux/slab.h>
18 #include <linux/pagemap.h>
19 #include <linux/crc32.h>
20 #include <linux/compiler.h>
21 #include <linux/stat.h>
22 #include "nodelist.h"
23 #include "compr.h"
24
25 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
26                                           struct jffs2_inode_cache *ic,
27                                           struct jffs2_raw_node_ref *raw);
28 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
29                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
30 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
31                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
32 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
33                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
34 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
35                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
36                                       uint32_t start, uint32_t end);
37 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
38                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
39                                        uint32_t start, uint32_t end);
40 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
41                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
42
43 /* Called with erase_completion_lock held */
44 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
45 {
46         struct jffs2_eraseblock *ret;
47         struct list_head *nextlist = NULL;
48         int n = jiffies % 128;
49
50         /* Pick an eraseblock to garbage collect next. This is where we'll
51            put the clever wear-levelling algorithms. Eventually.  */
52         /* We possibly want to favour the dirtier blocks more when the
53            number of free blocks is low. */
54 again:
55         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
56                 jffs2_dbg(1, "Picking block from bad_used_list to GC next\n");
57                 nextlist = &c->bad_used_list;
58         } else if (n < 50 && !list_empty(&c->erasable_list)) {
59                 /* Note that most of them will have gone directly to be erased.
60                    So don't favour the erasable_list _too_ much. */
61                 jffs2_dbg(1, "Picking block from erasable_list to GC next\n");
62                 nextlist = &c->erasable_list;
63         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
64                 /* Most of the time, pick one off the very_dirty list */
65                 jffs2_dbg(1, "Picking block from very_dirty_list to GC next\n");
66                 nextlist = &c->very_dirty_list;
67         } else if (n < 126 && !list_empty(&c->dirty_list)) {
68                 jffs2_dbg(1, "Picking block from dirty_list to GC next\n");
69                 nextlist = &c->dirty_list;
70         } else if (!list_empty(&c->clean_list)) {
71                 jffs2_dbg(1, "Picking block from clean_list to GC next\n");
72                 nextlist = &c->clean_list;
73         } else if (!list_empty(&c->dirty_list)) {
74                 jffs2_dbg(1, "Picking block from dirty_list to GC next (clean_list was empty)\n");
75
76                 nextlist = &c->dirty_list;
77         } else if (!list_empty(&c->very_dirty_list)) {
78                 jffs2_dbg(1, "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n");
79                 nextlist = &c->very_dirty_list;
80         } else if (!list_empty(&c->erasable_list)) {
81                 jffs2_dbg(1, "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n");
82
83                 nextlist = &c->erasable_list;
84         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
85                 /* There are blocks are wating for the wbuf sync */
86                 jffs2_dbg(1, "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n");
87                 spin_unlock(&c->erase_completion_lock);
88                 jffs2_flush_wbuf_pad(c);
89                 spin_lock(&c->erase_completion_lock);
90                 goto again;
91         } else {
92                 /* Eep. All were empty */
93                 jffs2_dbg(1, "No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
94                 return NULL;
95         }
96
97         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
98         list_del(&ret->list);
99         c->gcblock = ret;
100         ret->gc_node = ret->first_node;
101         if (!ret->gc_node) {
102                 pr_warn("Eep. ret->gc_node for block at 0x%08x is NULL\n",
103                         ret->offset);
104                 BUG();
105         }
106
107         /* Have we accidentally picked a clean block with wasted space ? */
108         if (ret->wasted_size) {
109                 jffs2_dbg(1, "Converting wasted_size %08x to dirty_size\n",
110                           ret->wasted_size);
111                 ret->dirty_size += ret->wasted_size;
112                 c->wasted_size -= ret->wasted_size;
113                 c->dirty_size += ret->wasted_size;
114                 ret->wasted_size = 0;
115         }
116
117         return ret;
118 }
119
120 /* jffs2_garbage_collect_pass
121  * Make a single attempt to progress GC. Move one node, and possibly
122  * start erasing one eraseblock.
123  */
124 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
125 {
126         struct jffs2_inode_info *f;
127         struct jffs2_inode_cache *ic;
128         struct jffs2_eraseblock *jeb;
129         struct jffs2_raw_node_ref *raw;
130         uint32_t gcblock_dirty;
131         int ret = 0, inum, nlink;
132         int xattr = 0;
133
134         if (mutex_lock_interruptible(&c->alloc_sem))
135                 return -EINTR;
136
137
138         for (;;) {
139                 /* We can't start doing GC until we've finished checking
140                    the node CRCs etc. */
141                 int bucket, want_ino;
142
143                 spin_lock(&c->erase_completion_lock);
144                 if (!c->unchecked_size)
145                         break;
146                 spin_unlock(&c->erase_completion_lock);
147
148                 if (!xattr)
149                         xattr = jffs2_verify_xattr(c);
150
151                 spin_lock(&c->inocache_lock);
152                 /* Instead of doing the inodes in numeric order, doing a lookup
153                  * in the hash for each possible number, just walk the hash
154                  * buckets of *existing* inodes. This means that we process
155                  * them out-of-order, but it can be a lot faster if there's
156                  * a sparse inode# space. Which there often is. */
157                 want_ino = c->check_ino;
158                 for (bucket = c->check_ino % c->inocache_hashsize ; bucket < c->inocache_hashsize; bucket++) {
159                         for (ic = c->inocache_list[bucket]; ic; ic = ic->next) {
160                                 if (ic->ino < want_ino)
161                                         continue;
162
163                                 if (ic->state != INO_STATE_CHECKEDABSENT &&
164                                     ic->state != INO_STATE_PRESENT)
165                                         goto got_next; /* with inocache_lock held */
166
167                                 jffs2_dbg(1, "Skipping ino #%u already checked\n",
168                                           ic->ino);
169                         }
170                         want_ino = 0;
171                 }
172
173                 /* Point c->check_ino past the end of the last bucket. */
174                 c->check_ino = ((c->highest_ino + c->inocache_hashsize + 1) &
175                                 ~c->inocache_hashsize) - 1;
176
177                 spin_unlock(&c->inocache_lock);
178
179                 pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
180                         c->unchecked_size);
181                 jffs2_dbg_dump_block_lists_nolock(c);
182                 mutex_unlock(&c->alloc_sem);
183                 return -ENOSPC;
184
185         got_next:
186                 /* For next time round the loop, we want c->checked_ino to indicate
187                  * the *next* one we want to check. And since we're walking the
188                  * buckets rather than doing it sequentially, it's: */
189                 c->check_ino = ic->ino + c->inocache_hashsize;
190
191                 if (!ic->pino_nlink) {
192                         jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
193                                   ic->ino);
194                         spin_unlock(&c->inocache_lock);
195                         jffs2_xattr_delete_inode(c, ic);
196                         continue;
197                 }
198                 switch(ic->state) {
199                 case INO_STATE_CHECKEDABSENT:
200                 case INO_STATE_PRESENT:
201                         spin_unlock(&c->inocache_lock);
202                         continue;
203
204                 case INO_STATE_GC:
205                 case INO_STATE_CHECKING:
206                         pr_warn("Inode #%u is in state %d during CRC check phase!\n",
207                                 ic->ino, ic->state);
208                         spin_unlock(&c->inocache_lock);
209                         BUG();
210
211                 case INO_STATE_READING:
212                         /* We need to wait for it to finish, lest we move on
213                            and trigger the BUG() above while we haven't yet
214                            finished checking all its nodes */
215                         jffs2_dbg(1, "Waiting for ino #%u to finish reading\n",
216                                   ic->ino);
217                         /* We need to come back again for the _same_ inode. We've
218                          made no progress in this case, but that should be OK */
219                         c->check_ino = ic->ino;
220
221                         mutex_unlock(&c->alloc_sem);
222                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
223                         return 0;
224
225                 default:
226                         BUG();
227
228                 case INO_STATE_UNCHECKED:
229                         ;
230                 }
231                 ic->state = INO_STATE_CHECKING;
232                 spin_unlock(&c->inocache_lock);
233
234                 jffs2_dbg(1, "%s(): triggering inode scan of ino#%u\n",
235                           __func__, ic->ino);
236
237                 ret = jffs2_do_crccheck_inode(c, ic);
238                 if (ret)
239                         pr_warn("Returned error for crccheck of ino #%u. Expect badness...\n",
240                                 ic->ino);
241
242                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
243                 mutex_unlock(&c->alloc_sem);
244                 return ret;
245         }
246
247         /* If there are any blocks which need erasing, erase them now */
248         if (!list_empty(&c->erase_complete_list) ||
249             !list_empty(&c->erase_pending_list)) {
250                 spin_unlock(&c->erase_completion_lock);
251                 mutex_unlock(&c->alloc_sem);
252                 jffs2_dbg(1, "%s(): erasing pending blocks\n", __func__);
253                 if (jffs2_erase_pending_blocks(c, 1))
254                         return 0;
255
256                 jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n");
257                 mutex_lock(&c->alloc_sem);
258                 spin_lock(&c->erase_completion_lock);
259         }
260
261         /* First, work out which block we're garbage-collecting */
262         jeb = c->gcblock;
263
264         if (!jeb)
265                 jeb = jffs2_find_gc_block(c);
266
267         if (!jeb) {
268                 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
269                 if (c->nr_erasing_blocks) {
270                         spin_unlock(&c->erase_completion_lock);
271                         mutex_unlock(&c->alloc_sem);
272                         return -EAGAIN;
273                 }
274                 jffs2_dbg(1, "Couldn't find erase block to garbage collect!\n");
275                 spin_unlock(&c->erase_completion_lock);
276                 mutex_unlock(&c->alloc_sem);
277                 return -EIO;
278         }
279
280         jffs2_dbg(1, "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n",
281                   jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size);
282         D1(if (c->nextblock)
283            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
284
285         if (!jeb->used_size) {
286                 mutex_unlock(&c->alloc_sem);
287                 goto eraseit;
288         }
289
290         raw = jeb->gc_node;
291         gcblock_dirty = jeb->dirty_size;
292
293         while(ref_obsolete(raw)) {
294                 jffs2_dbg(1, "Node at 0x%08x is obsolete... skipping\n",
295                           ref_offset(raw));
296                 raw = ref_next(raw);
297                 if (unlikely(!raw)) {
298                         pr_warn("eep. End of raw list while still supposedly nodes to GC\n");
299                         pr_warn("erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
300                                 jeb->offset, jeb->free_size,
301                                 jeb->dirty_size, jeb->used_size);
302                         jeb->gc_node = raw;
303                         spin_unlock(&c->erase_completion_lock);
304                         mutex_unlock(&c->alloc_sem);
305                         BUG();
306                 }
307         }
308         jeb->gc_node = raw;
309
310         jffs2_dbg(1, "Going to garbage collect node at 0x%08x\n",
311                   ref_offset(raw));
312
313         if (!raw->next_in_ino) {
314                 /* Inode-less node. Clean marker, snapshot or something like that */
315                 spin_unlock(&c->erase_completion_lock);
316                 if (ref_flags(raw) == REF_PRISTINE) {
317                         /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
318                         jffs2_garbage_collect_pristine(c, NULL, raw);
319                 } else {
320                         /* Just mark it obsolete */
321                         jffs2_mark_node_obsolete(c, raw);
322                 }
323                 mutex_unlock(&c->alloc_sem);
324                 goto eraseit_lock;
325         }
326
327         ic = jffs2_raw_ref_to_ic(raw);
328
329 #ifdef CONFIG_JFFS2_FS_XATTR
330         /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
331          * We can decide whether this node is inode or xattr by ic->class.     */
332         if (ic->class == RAWNODE_CLASS_XATTR_DATUM
333             || ic->class == RAWNODE_CLASS_XATTR_REF) {
334                 spin_unlock(&c->erase_completion_lock);
335
336                 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
337                         ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
338                 } else {
339                         ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
340                 }
341                 goto test_gcnode;
342         }
343 #endif
344
345         /* We need to hold the inocache. Either the erase_completion_lock or
346            the inocache_lock are sufficient; we trade down since the inocache_lock
347            causes less contention. */
348         spin_lock(&c->inocache_lock);
349
350         spin_unlock(&c->erase_completion_lock);
351
352         jffs2_dbg(1, "%s(): collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n",
353                   __func__, jeb->offset, ref_offset(raw), ref_flags(raw),
354                   ic->ino);
355
356         /* Three possibilities:
357            1. Inode is already in-core. We must iget it and do proper
358               updating to its fragtree, etc.
359            2. Inode is not in-core, node is REF_PRISTINE. We lock the
360               inocache to prevent a read_inode(), copy the node intact.
361            3. Inode is not in-core, node is not pristine. We must iget()
362               and take the slow path.
363         */
364
365         switch(ic->state) {
366         case INO_STATE_CHECKEDABSENT:
367                 /* It's been checked, but it's not currently in-core.
368                    We can just copy any pristine nodes, but have
369                    to prevent anyone else from doing read_inode() while
370                    we're at it, so we set the state accordingly */
371                 if (ref_flags(raw) == REF_PRISTINE)
372                         ic->state = INO_STATE_GC;
373                 else {
374                         jffs2_dbg(1, "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
375                                   ic->ino);
376                 }
377                 break;
378
379         case INO_STATE_PRESENT:
380                 /* It's in-core. GC must iget() it. */
381                 break;
382
383         case INO_STATE_UNCHECKED:
384         case INO_STATE_CHECKING:
385         case INO_STATE_GC:
386                 /* Should never happen. We should have finished checking
387                    by the time we actually start doing any GC, and since
388                    we're holding the alloc_sem, no other garbage collection
389                    can happen.
390                 */
391                 pr_crit("Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
392                         ic->ino, ic->state);
393                 mutex_unlock(&c->alloc_sem);
394                 spin_unlock(&c->inocache_lock);
395                 BUG();
396
397         case INO_STATE_READING:
398                 /* Someone's currently trying to read it. We must wait for
399                    them to finish and then go through the full iget() route
400                    to do the GC. However, sometimes read_inode() needs to get
401                    the alloc_sem() (for marking nodes invalid) so we must
402                    drop the alloc_sem before sleeping. */
403
404                 mutex_unlock(&c->alloc_sem);
405                 jffs2_dbg(1, "%s(): waiting for ino #%u in state %d\n",
406                           __func__, ic->ino, ic->state);
407                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
408                 /* And because we dropped the alloc_sem we must start again from the
409                    beginning. Ponder chance of livelock here -- we're returning success
410                    without actually making any progress.
411
412                    Q: What are the chances that the inode is back in INO_STATE_READING
413                    again by the time we next enter this function? And that this happens
414                    enough times to cause a real delay?
415
416                    A: Small enough that I don't care :)
417                 */
418                 return 0;
419         }
420
421         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
422            node intact, and we don't have to muck about with the fragtree etc.
423            because we know it's not in-core. If it _was_ in-core, we go through
424            all the iget() crap anyway */
425
426         if (ic->state == INO_STATE_GC) {
427                 spin_unlock(&c->inocache_lock);
428
429                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
430
431                 spin_lock(&c->inocache_lock);
432                 ic->state = INO_STATE_CHECKEDABSENT;
433                 wake_up(&c->inocache_wq);
434
435                 if (ret != -EBADFD) {
436                         spin_unlock(&c->inocache_lock);
437                         goto test_gcnode;
438                 }
439
440                 /* Fall through if it wanted us to, with inocache_lock held */
441         }
442
443         /* Prevent the fairly unlikely race where the gcblock is
444            entirely obsoleted by the final close of a file which had
445            the only valid nodes in the block, followed by erasure,
446            followed by freeing of the ic because the erased block(s)
447            held _all_ the nodes of that inode.... never been seen but
448            it's vaguely possible. */
449
450         inum = ic->ino;
451         nlink = ic->pino_nlink;
452         spin_unlock(&c->inocache_lock);
453
454         f = jffs2_gc_fetch_inode(c, inum, !nlink);
455         if (IS_ERR(f)) {
456                 ret = PTR_ERR(f);
457                 goto release_sem;
458         }
459         if (!f) {
460                 ret = 0;
461                 goto release_sem;
462         }
463
464         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
465
466         jffs2_gc_release_inode(c, f);
467
468  test_gcnode:
469         if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
470                 /* Eep. This really should never happen. GC is broken */
471                 pr_err("Error garbage collecting node at %08x!\n",
472                        ref_offset(jeb->gc_node));
473                 ret = -ENOSPC;
474         }
475  release_sem:
476         mutex_unlock(&c->alloc_sem);
477
478  eraseit_lock:
479         /* If we've finished this block, start it erasing */
480         spin_lock(&c->erase_completion_lock);
481
482  eraseit:
483         if (c->gcblock && !c->gcblock->used_size) {
484                 jffs2_dbg(1, "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n",
485                           c->gcblock->offset);
486                 /* We're GC'ing an empty block? */
487                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
488                 c->gcblock = NULL;
489                 c->nr_erasing_blocks++;
490                 jffs2_garbage_collect_trigger(c);
491         }
492         spin_unlock(&c->erase_completion_lock);
493
494         return ret;
495 }
496
497 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
498                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
499 {
500         struct jffs2_node_frag *frag;
501         struct jffs2_full_dnode *fn = NULL;
502         struct jffs2_full_dirent *fd;
503         uint32_t start = 0, end = 0, nrfrags = 0;
504         int ret = 0;
505
506         mutex_lock(&f->sem);
507
508         /* Now we have the lock for this inode. Check that it's still the one at the head
509            of the list. */
510
511         spin_lock(&c->erase_completion_lock);
512
513         if (c->gcblock != jeb) {
514                 spin_unlock(&c->erase_completion_lock);
515                 jffs2_dbg(1, "GC block is no longer gcblock. Restart\n");
516                 goto upnout;
517         }
518         if (ref_obsolete(raw)) {
519                 spin_unlock(&c->erase_completion_lock);
520                 jffs2_dbg(1, "node to be GC'd was obsoleted in the meantime.\n");
521                 /* They'll call again */
522                 goto upnout;
523         }
524         spin_unlock(&c->erase_completion_lock);
525
526         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
527         if (f->metadata && f->metadata->raw == raw) {
528                 fn = f->metadata;
529                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
530                 goto upnout;
531         }
532
533         /* FIXME. Read node and do lookup? */
534         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
535                 if (frag->node && frag->node->raw == raw) {
536                         fn = frag->node;
537                         end = frag->ofs + frag->size;
538                         if (!nrfrags++)
539                                 start = frag->ofs;
540                         if (nrfrags == frag->node->frags)
541                                 break; /* We've found them all */
542                 }
543         }
544         if (fn) {
545                 if (ref_flags(raw) == REF_PRISTINE) {
546                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
547                         if (!ret) {
548                                 /* Urgh. Return it sensibly. */
549                                 frag->node->raw = f->inocache->nodes;
550                         }
551                         if (ret != -EBADFD)
552                                 goto upnout;
553                 }
554                 /* We found a datanode. Do the GC */
555                 if((start >> PAGE_SHIFT) < ((end-1) >> PAGE_SHIFT)) {
556                         /* It crosses a page boundary. Therefore, it must be a hole. */
557                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
558                 } else {
559                         /* It could still be a hole. But we GC the page this way anyway */
560                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
561                 }
562                 goto upnout;
563         }
564
565         /* Wasn't a dnode. Try dirent */
566         for (fd = f->dents; fd; fd=fd->next) {
567                 if (fd->raw == raw)
568                         break;
569         }
570
571         if (fd && fd->ino) {
572                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
573         } else if (fd) {
574                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
575         } else {
576                 pr_warn("Raw node at 0x%08x wasn't in node lists for ino #%u\n",
577                         ref_offset(raw), f->inocache->ino);
578                 if (ref_obsolete(raw)) {
579                         pr_warn("But it's obsolete so we don't mind too much\n");
580                 } else {
581                         jffs2_dbg_dump_node(c, ref_offset(raw));
582                         BUG();
583                 }
584         }
585  upnout:
586         mutex_unlock(&f->sem);
587
588         return ret;
589 }
590
591 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
592                                           struct jffs2_inode_cache *ic,
593                                           struct jffs2_raw_node_ref *raw)
594 {
595         union jffs2_node_union *node;
596         size_t retlen;
597         int ret;
598         uint32_t phys_ofs, alloclen;
599         uint32_t crc, rawlen;
600         int retried = 0;
601
602         jffs2_dbg(1, "Going to GC REF_PRISTINE node at 0x%08x\n",
603                   ref_offset(raw));
604
605         alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
606
607         /* Ask for a small amount of space (or the totlen if smaller) because we
608            don't want to force wastage of the end of a block if splitting would
609            work. */
610         if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
611                 alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
612
613         ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
614         /* 'rawlen' is not the exact summary size; it is only an upper estimation */
615
616         if (ret)
617                 return ret;
618
619         if (alloclen < rawlen) {
620                 /* Doesn't fit untouched. We'll go the old route and split it */
621                 return -EBADFD;
622         }
623
624         node = kmalloc(rawlen, GFP_KERNEL);
625         if (!node)
626                 return -ENOMEM;
627
628         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
629         if (!ret && retlen != rawlen)
630                 ret = -EIO;
631         if (ret)
632                 goto out_node;
633
634         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
635         if (je32_to_cpu(node->u.hdr_crc) != crc) {
636                 pr_warn("Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
637                         ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
638                 goto bail;
639         }
640
641         switch(je16_to_cpu(node->u.nodetype)) {
642         case JFFS2_NODETYPE_INODE:
643                 crc = crc32(0, node, sizeof(node->i)-8);
644                 if (je32_to_cpu(node->i.node_crc) != crc) {
645                         pr_warn("Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
646                                 ref_offset(raw), je32_to_cpu(node->i.node_crc),
647                                 crc);
648                         goto bail;
649                 }
650
651                 if (je32_to_cpu(node->i.dsize)) {
652                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
653                         if (je32_to_cpu(node->i.data_crc) != crc) {
654                                 pr_warn("Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
655                                         ref_offset(raw),
656                                         je32_to_cpu(node->i.data_crc), crc);
657                                 goto bail;
658                         }
659                 }
660                 break;
661
662         case JFFS2_NODETYPE_DIRENT:
663                 crc = crc32(0, node, sizeof(node->d)-8);
664                 if (je32_to_cpu(node->d.node_crc) != crc) {
665                         pr_warn("Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
666                                 ref_offset(raw),
667                                 je32_to_cpu(node->d.node_crc), crc);
668                         goto bail;
669                 }
670
671                 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
672                         pr_warn("Name in dirent node at 0x%08x contains zeroes\n",
673                                 ref_offset(raw));
674                         goto bail;
675                 }
676
677                 if (node->d.nsize) {
678                         crc = crc32(0, node->d.name, node->d.nsize);
679                         if (je32_to_cpu(node->d.name_crc) != crc) {
680                                 pr_warn("Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
681                                         ref_offset(raw),
682                                         je32_to_cpu(node->d.name_crc), crc);
683                                 goto bail;
684                         }
685                 }
686                 break;
687         default:
688                 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
689                 if (ic) {
690                         pr_warn("Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
691                                 ref_offset(raw), je16_to_cpu(node->u.nodetype));
692                         goto bail;
693                 }
694         }
695
696         /* OK, all the CRCs are good; this node can just be copied as-is. */
697  retry:
698         phys_ofs = write_ofs(c);
699
700         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
701
702         if (ret || (retlen != rawlen)) {
703                 pr_notice("Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
704                           rawlen, phys_ofs, ret, retlen);
705                 if (retlen) {
706                         jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
707                 } else {
708                         pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
709                                   phys_ofs);
710                 }
711                 if (!retried) {
712                         /* Try to reallocate space and retry */
713                         uint32_t dummy;
714                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
715
716                         retried = 1;
717
718                         jffs2_dbg(1, "Retrying failed write of REF_PRISTINE node.\n");
719
720                         jffs2_dbg_acct_sanity_check(c,jeb);
721                         jffs2_dbg_acct_paranoia_check(c, jeb);
722
723                         ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
724                                                 /* this is not the exact summary size of it,
725                                                         it is only an upper estimation */
726
727                         if (!ret) {
728                                 jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write.\n",
729                                           phys_ofs);
730
731                                 jffs2_dbg_acct_sanity_check(c,jeb);
732                                 jffs2_dbg_acct_paranoia_check(c, jeb);
733
734                                 goto retry;
735                         }
736                         jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
737                                   ret);
738                 }
739
740                 if (!ret)
741                         ret = -EIO;
742                 goto out_node;
743         }
744         jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
745
746         jffs2_mark_node_obsolete(c, raw);
747         jffs2_dbg(1, "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n",
748                   ref_offset(raw));
749
750  out_node:
751         kfree(node);
752         return ret;
753  bail:
754         ret = -EBADFD;
755         goto out_node;
756 }
757
758 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
759                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
760 {
761         struct jffs2_full_dnode *new_fn;
762         struct jffs2_raw_inode ri;
763         struct jffs2_node_frag *last_frag;
764         union jffs2_device_node dev;
765         char *mdata = NULL;
766         int mdatalen = 0;
767         uint32_t alloclen, ilen;
768         int ret;
769
770         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
771             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
772                 /* For these, we don't actually need to read the old node */
773                 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
774                 mdata = (char *)&dev;
775                 jffs2_dbg(1, "%s(): Writing %d bytes of kdev_t\n",
776                           __func__, mdatalen);
777         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
778                 mdatalen = fn->size;
779                 mdata = kmalloc(fn->size, GFP_KERNEL);
780                 if (!mdata) {
781                         pr_warn("kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
782                         return -ENOMEM;
783                 }
784                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
785                 if (ret) {
786                         pr_warn("read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n",
787                                 ret);
788                         kfree(mdata);
789                         return ret;
790                 }
791                 jffs2_dbg(1, "%s(): Writing %d bites of symlink target\n",
792                           __func__, mdatalen);
793
794         }
795
796         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
797                                 JFFS2_SUMMARY_INODE_SIZE);
798         if (ret) {
799                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
800                         sizeof(ri) + mdatalen, ret);
801                 goto out;
802         }
803
804         last_frag = frag_last(&f->fragtree);
805         if (last_frag)
806                 /* Fetch the inode length from the fragtree rather then
807                  * from i_size since i_size may have not been updated yet */
808                 ilen = last_frag->ofs + last_frag->size;
809         else
810                 ilen = JFFS2_F_I_SIZE(f);
811
812         memset(&ri, 0, sizeof(ri));
813         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
814         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
815         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
816         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
817
818         ri.ino = cpu_to_je32(f->inocache->ino);
819         ri.version = cpu_to_je32(++f->highest_version);
820         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
821         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
822         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
823         ri.isize = cpu_to_je32(ilen);
824         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
825         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
826         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
827         ri.offset = cpu_to_je32(0);
828         ri.csize = cpu_to_je32(mdatalen);
829         ri.dsize = cpu_to_je32(mdatalen);
830         ri.compr = JFFS2_COMPR_NONE;
831         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
832         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
833
834         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
835
836         if (IS_ERR(new_fn)) {
837                 pr_warn("Error writing new dnode: %ld\n", PTR_ERR(new_fn));
838                 ret = PTR_ERR(new_fn);
839                 goto out;
840         }
841         jffs2_mark_node_obsolete(c, fn->raw);
842         jffs2_free_full_dnode(fn);
843         f->metadata = new_fn;
844  out:
845         if (S_ISLNK(JFFS2_F_I_MODE(f)))
846                 kfree(mdata);
847         return ret;
848 }
849
850 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
851                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
852 {
853         struct jffs2_full_dirent *new_fd;
854         struct jffs2_raw_dirent rd;
855         uint32_t alloclen;
856         int ret;
857
858         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
859         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
860         rd.nsize = strlen(fd->name);
861         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
862         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
863
864         rd.pino = cpu_to_je32(f->inocache->ino);
865         rd.version = cpu_to_je32(++f->highest_version);
866         rd.ino = cpu_to_je32(fd->ino);
867         /* If the times on this inode were set by explicit utime() they can be different,
868            so refrain from splatting them. */
869         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
870                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
871         else
872                 rd.mctime = cpu_to_je32(0);
873         rd.type = fd->type;
874         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
875         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
876
877         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
878                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
879         if (ret) {
880                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
881                         sizeof(rd)+rd.nsize, ret);
882                 return ret;
883         }
884         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
885
886         if (IS_ERR(new_fd)) {
887                 pr_warn("jffs2_write_dirent in garbage_collect_dirent failed: %ld\n",
888                         PTR_ERR(new_fd));
889                 return PTR_ERR(new_fd);
890         }
891         jffs2_add_fd_to_list(c, new_fd, &f->dents);
892         return 0;
893 }
894
895 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
896                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
897 {
898         struct jffs2_full_dirent **fdp = &f->dents;
899         int found = 0;
900
901         /* On a medium where we can't actually mark nodes obsolete
902            pernamently, such as NAND flash, we need to work out
903            whether this deletion dirent is still needed to actively
904            delete a 'real' dirent with the same name that's still
905            somewhere else on the flash. */
906         if (!jffs2_can_mark_obsolete(c)) {
907                 struct jffs2_raw_dirent *rd;
908                 struct jffs2_raw_node_ref *raw;
909                 int ret;
910                 size_t retlen;
911                 int name_len = strlen(fd->name);
912                 uint32_t name_crc = crc32(0, fd->name, name_len);
913                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
914
915                 rd = kmalloc(rawlen, GFP_KERNEL);
916                 if (!rd)
917                         return -ENOMEM;
918
919                 /* Prevent the erase code from nicking the obsolete node refs while
920                    we're looking at them. I really don't like this extra lock but
921                    can't see any alternative. Suggestions on a postcard to... */
922                 mutex_lock(&c->erase_free_sem);
923
924                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
925
926                         cond_resched();
927
928                         /* We only care about obsolete ones */
929                         if (!(ref_obsolete(raw)))
930                                 continue;
931
932                         /* Any dirent with the same name is going to have the same length... */
933                         if (ref_totlen(c, NULL, raw) != rawlen)
934                                 continue;
935
936                         /* Doesn't matter if there's one in the same erase block. We're going to
937                            delete it too at the same time. */
938                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
939                                 continue;
940
941                         jffs2_dbg(1, "Check potential deletion dirent at %08x\n",
942                                   ref_offset(raw));
943
944                         /* This is an obsolete node belonging to the same directory, and it's of the right
945                            length. We need to take a closer look...*/
946                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
947                         if (ret) {
948                                 pr_warn("%s(): Read error (%d) reading obsolete node at %08x\n",
949                                         __func__, ret, ref_offset(raw));
950                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
951                                 continue;
952                         }
953                         if (retlen != rawlen) {
954                                 pr_warn("%s(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
955                                         __func__, retlen, rawlen,
956                                         ref_offset(raw));
957                                 continue;
958                         }
959
960                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
961                                 continue;
962
963                         /* If the name CRC doesn't match, skip */
964                         if (je32_to_cpu(rd->name_crc) != name_crc)
965                                 continue;
966
967                         /* If the name length doesn't match, or it's another deletion dirent, skip */
968                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
969                                 continue;
970
971                         /* OK, check the actual name now */
972                         if (memcmp(rd->name, fd->name, name_len))
973                                 continue;
974
975                         /* OK. The name really does match. There really is still an older node on
976                            the flash which our deletion dirent obsoletes. So we have to write out
977                            a new deletion dirent to replace it */
978                         mutex_unlock(&c->erase_free_sem);
979
980                         jffs2_dbg(1, "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
981                                   ref_offset(fd->raw), fd->name,
982                                   ref_offset(raw), je32_to_cpu(rd->ino));
983                         kfree(rd);
984
985                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
986                 }
987
988                 mutex_unlock(&c->erase_free_sem);
989                 kfree(rd);
990         }
991
992         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
993            we should update the metadata node with those times accordingly */
994
995         /* No need for it any more. Just mark it obsolete and remove it from the list */
996         while (*fdp) {
997                 if ((*fdp) == fd) {
998                         found = 1;
999                         *fdp = fd->next;
1000                         break;
1001                 }
1002                 fdp = &(*fdp)->next;
1003         }
1004         if (!found) {
1005                 pr_warn("Deletion dirent \"%s\" not found in list for ino #%u\n",
1006                         fd->name, f->inocache->ino);
1007         }
1008         jffs2_mark_node_obsolete(c, fd->raw);
1009         jffs2_free_full_dirent(fd);
1010         return 0;
1011 }
1012
1013 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1014                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1015                                       uint32_t start, uint32_t end)
1016 {
1017         struct jffs2_raw_inode ri;
1018         struct jffs2_node_frag *frag;
1019         struct jffs2_full_dnode *new_fn;
1020         uint32_t alloclen, ilen;
1021         int ret;
1022
1023         jffs2_dbg(1, "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
1024                   f->inocache->ino, start, end);
1025
1026         memset(&ri, 0, sizeof(ri));
1027
1028         if(fn->frags > 1) {
1029                 size_t readlen;
1030                 uint32_t crc;
1031                 /* It's partially obsoleted by a later write. So we have to
1032                    write it out again with the _same_ version as before */
1033                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
1034                 if (readlen != sizeof(ri) || ret) {
1035                         pr_warn("Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n",
1036                                 ret, readlen);
1037                         goto fill;
1038                 }
1039                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
1040                         pr_warn("%s(): Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
1041                                 __func__, ref_offset(fn->raw),
1042                                 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
1043                         return -EIO;
1044                 }
1045                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
1046                         pr_warn("%s(): Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
1047                                 __func__, ref_offset(fn->raw),
1048                                 je32_to_cpu(ri.totlen), sizeof(ri));
1049                         return -EIO;
1050                 }
1051                 crc = crc32(0, &ri, sizeof(ri)-8);
1052                 if (crc != je32_to_cpu(ri.node_crc)) {
1053                         pr_warn("%s: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
1054                                 __func__, ref_offset(fn->raw),
1055                                 je32_to_cpu(ri.node_crc), crc);
1056                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
1057                         pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1058                                 start, end, f->inocache->ino);
1059                         goto fill;
1060                 }
1061                 if (ri.compr != JFFS2_COMPR_ZERO) {
1062                         pr_warn("%s(): Node 0x%08x wasn't a hole node!\n",
1063                                 __func__, ref_offset(fn->raw));
1064                         pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1065                                 start, end, f->inocache->ino);
1066                         goto fill;
1067                 }
1068         } else {
1069         fill:
1070                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1071                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1072                 ri.totlen = cpu_to_je32(sizeof(ri));
1073                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1074
1075                 ri.ino = cpu_to_je32(f->inocache->ino);
1076                 ri.version = cpu_to_je32(++f->highest_version);
1077                 ri.offset = cpu_to_je32(start);
1078                 ri.dsize = cpu_to_je32(end - start);
1079                 ri.csize = cpu_to_je32(0);
1080                 ri.compr = JFFS2_COMPR_ZERO;
1081         }
1082
1083         frag = frag_last(&f->fragtree);
1084         if (frag)
1085                 /* Fetch the inode length from the fragtree rather then
1086                  * from i_size since i_size may have not been updated yet */
1087                 ilen = frag->ofs + frag->size;
1088         else
1089                 ilen = JFFS2_F_I_SIZE(f);
1090
1091         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1092         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1093         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1094         ri.isize = cpu_to_je32(ilen);
1095         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1096         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1097         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1098         ri.data_crc = cpu_to_je32(0);
1099         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1100
1101         ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1102                                      JFFS2_SUMMARY_INODE_SIZE);
1103         if (ret) {
1104                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1105                         sizeof(ri), ret);
1106                 return ret;
1107         }
1108         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1109
1110         if (IS_ERR(new_fn)) {
1111                 pr_warn("Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1112                 return PTR_ERR(new_fn);
1113         }
1114         if (je32_to_cpu(ri.version) == f->highest_version) {
1115                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1116                 if (f->metadata) {
1117                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1118                         jffs2_free_full_dnode(f->metadata);
1119                         f->metadata = NULL;
1120                 }
1121                 return 0;
1122         }
1123
1124         /*
1125          * We should only get here in the case where the node we are
1126          * replacing had more than one frag, so we kept the same version
1127          * number as before. (Except in case of error -- see 'goto fill;'
1128          * above.)
1129          */
1130         D1(if(unlikely(fn->frags <= 1)) {
1131                         pr_warn("%s(): Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1132                                 __func__, fn->frags, je32_to_cpu(ri.version),
1133                                 f->highest_version, je32_to_cpu(ri.ino));
1134         });
1135
1136         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1137         mark_ref_normal(new_fn->raw);
1138
1139         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1140              frag; frag = frag_next(frag)) {
1141                 if (frag->ofs > fn->size + fn->ofs)
1142                         break;
1143                 if (frag->node == fn) {
1144                         frag->node = new_fn;
1145                         new_fn->frags++;
1146                         fn->frags--;
1147                 }
1148         }
1149         if (fn->frags) {
1150                 pr_warn("%s(): Old node still has frags!\n", __func__);
1151                 BUG();
1152         }
1153         if (!new_fn->frags) {
1154                 pr_warn("%s(): New node has no frags!\n", __func__);
1155                 BUG();
1156         }
1157
1158         jffs2_mark_node_obsolete(c, fn->raw);
1159         jffs2_free_full_dnode(fn);
1160
1161         return 0;
1162 }
1163
1164 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1165                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1166                                        uint32_t start, uint32_t end)
1167 {
1168         struct inode *inode = OFNI_EDONI_2SFFJ(f);
1169         struct jffs2_full_dnode *new_fn;
1170         struct jffs2_raw_inode ri;
1171         uint32_t alloclen, offset, orig_end, orig_start;
1172         int ret = 0;
1173         unsigned char *comprbuf = NULL, *writebuf;
1174         struct page *page;
1175         unsigned char *pg_ptr;
1176
1177         memset(&ri, 0, sizeof(ri));
1178
1179         jffs2_dbg(1, "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1180                   f->inocache->ino, start, end);
1181
1182         orig_end = end;
1183         orig_start = start;
1184
1185         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1186                 /* Attempt to do some merging. But only expand to cover logically
1187                    adjacent frags if the block containing them is already considered
1188                    to be dirty. Otherwise we end up with GC just going round in
1189                    circles dirtying the nodes it already wrote out, especially
1190                    on NAND where we have small eraseblocks and hence a much higher
1191                    chance of nodes having to be split to cross boundaries. */
1192
1193                 struct jffs2_node_frag *frag;
1194                 uint32_t min, max;
1195
1196                 min = start & ~(PAGE_SIZE-1);
1197                 max = min + PAGE_SIZE;
1198
1199                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1200
1201                 /* BUG_ON(!frag) but that'll happen anyway... */
1202
1203                 BUG_ON(frag->ofs != start);
1204
1205                 /* First grow down... */
1206                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1207
1208                         /* If the previous frag doesn't even reach the beginning, there's
1209                            excessive fragmentation. Just merge. */
1210                         if (frag->ofs > min) {
1211                                 jffs2_dbg(1, "Expanding down to cover partial frag (0x%x-0x%x)\n",
1212                                           frag->ofs, frag->ofs+frag->size);
1213                                 start = frag->ofs;
1214                                 continue;
1215                         }
1216                         /* OK. This frag holds the first byte of the page. */
1217                         if (!frag->node || !frag->node->raw) {
1218                                 jffs2_dbg(1, "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1219                                           frag->ofs, frag->ofs+frag->size);
1220                                 break;
1221                         } else {
1222
1223                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1224                                    in a block which is still considered clean? If so, don't obsolete it.
1225                                    If not, cover it anyway. */
1226
1227                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1228                                 struct jffs2_eraseblock *jeb;
1229
1230                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1231
1232                                 if (jeb == c->gcblock) {
1233                                         jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1234                                                   frag->ofs,
1235                                                   frag->ofs + frag->size,
1236                                                   ref_offset(raw));
1237                                         start = frag->ofs;
1238                                         break;
1239                                 }
1240                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1241                                         jffs2_dbg(1, "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1242                                                   frag->ofs,
1243                                                   frag->ofs + frag->size,
1244                                                   jeb->offset);
1245                                         break;
1246                                 }
1247
1248                                 jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1249                                           frag->ofs,
1250                                           frag->ofs + frag->size,
1251                                           jeb->offset);
1252                                 start = frag->ofs;
1253                                 break;
1254                         }
1255                 }
1256
1257                 /* ... then up */
1258
1259                 /* Find last frag which is actually part of the node we're to GC. */
1260                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1261
1262                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1263
1264                         /* If the previous frag doesn't even reach the beginning, there's lots
1265                            of fragmentation. Just merge. */
1266                         if (frag->ofs+frag->size < max) {
1267                                 jffs2_dbg(1, "Expanding up to cover partial frag (0x%x-0x%x)\n",
1268                                           frag->ofs, frag->ofs+frag->size);
1269                                 end = frag->ofs + frag->size;
1270                                 continue;
1271                         }
1272
1273                         if (!frag->node || !frag->node->raw) {
1274                                 jffs2_dbg(1, "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1275                                           frag->ofs, frag->ofs+frag->size);
1276                                 break;
1277                         } else {
1278
1279                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1280                                    in a block which is still considered clean? If so, don't obsolete it.
1281                                    If not, cover it anyway. */
1282
1283                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1284                                 struct jffs2_eraseblock *jeb;
1285
1286                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1287
1288                                 if (jeb == c->gcblock) {
1289                                         jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1290                                                   frag->ofs,
1291                                                   frag->ofs + frag->size,
1292                                                   ref_offset(raw));
1293                                         end = frag->ofs + frag->size;
1294                                         break;
1295                                 }
1296                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1297                                         jffs2_dbg(1, "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1298                                                   frag->ofs,
1299                                                   frag->ofs + frag->size,
1300                                                   jeb->offset);
1301                                         break;
1302                                 }
1303
1304                                 jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1305                                           frag->ofs,
1306                                           frag->ofs + frag->size,
1307                                           jeb->offset);
1308                                 end = frag->ofs + frag->size;
1309                                 break;
1310                         }
1311                 }
1312                 jffs2_dbg(1, "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1313                           orig_start, orig_end, start, end);
1314
1315                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1316                 BUG_ON(end < orig_end);
1317                 BUG_ON(start > orig_start);
1318         }
1319
1320         /* The rules state that we must obtain the page lock *before* f->sem, so
1321          * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
1322          * actually going to *change* so we're safe; we only allow reading.
1323          *
1324          * It is important to note that jffs2_write_begin() will ensure that its
1325          * page is marked Uptodate before allocating space. That means that if we
1326          * end up here trying to GC the *same* page that jffs2_write_begin() is
1327          * trying to write out, read_cache_page() will not deadlock. */
1328         mutex_unlock(&f->sem);
1329         page = read_cache_page(inode->i_mapping, start >> PAGE_SHIFT,
1330                                jffs2_do_readpage_unlock, inode);
1331         if (IS_ERR(page)) {
1332                 pr_warn("read_cache_page() returned error: %ld\n",
1333                         PTR_ERR(page));
1334                 mutex_lock(&f->sem);
1335                 return PTR_ERR(page);
1336         }
1337
1338         pg_ptr = kmap(page);
1339         mutex_lock(&f->sem);
1340
1341         offset = start;
1342         while(offset < orig_end) {
1343                 uint32_t datalen;
1344                 uint32_t cdatalen;
1345                 uint16_t comprtype = JFFS2_COMPR_NONE;
1346
1347                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1348                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1349
1350                 if (ret) {
1351                         pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1352                                 sizeof(ri) + JFFS2_MIN_DATA_LEN, ret);
1353                         break;
1354                 }
1355                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1356                 datalen = end - offset;
1357
1358                 writebuf = pg_ptr + (offset & (PAGE_SIZE -1));
1359
1360                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1361
1362                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1363                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1364                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1365                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1366
1367                 ri.ino = cpu_to_je32(f->inocache->ino);
1368                 ri.version = cpu_to_je32(++f->highest_version);
1369                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1370                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1371                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1372                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1373                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1374                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1375                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1376                 ri.offset = cpu_to_je32(offset);
1377                 ri.csize = cpu_to_je32(cdatalen);
1378                 ri.dsize = cpu_to_je32(datalen);
1379                 ri.compr = comprtype & 0xff;
1380                 ri.usercompr = (comprtype >> 8) & 0xff;
1381                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1382                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1383
1384                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1385
1386                 jffs2_free_comprbuf(comprbuf, writebuf);
1387
1388                 if (IS_ERR(new_fn)) {
1389                         pr_warn("Error writing new dnode: %ld\n",
1390                                 PTR_ERR(new_fn));
1391                         ret = PTR_ERR(new_fn);
1392                         break;
1393                 }
1394                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1395                 offset += datalen;
1396                 if (f->metadata) {
1397                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1398                         jffs2_free_full_dnode(f->metadata);
1399                         f->metadata = NULL;
1400                 }
1401         }
1402
1403         kunmap(page);
1404         put_page(page);
1405         return ret;
1406 }