ceph: eliminate the recursion when rebuilding the snap context
[platform/kernel/linux-rpi.git] / fs / ceph / snap.c
index b41e672..66a1a92 100644 (file)
@@ -127,6 +127,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
        INIT_LIST_HEAD(&realm->child_item);
        INIT_LIST_HEAD(&realm->empty_item);
        INIT_LIST_HEAD(&realm->dirty_item);
+       INIT_LIST_HEAD(&realm->rebuild_item);
        INIT_LIST_HEAD(&realm->inodes_with_caps);
        spin_lock_init(&realm->inodes_with_caps_lock);
        __insert_snap_realm(&mdsc->snap_realms, realm);
@@ -320,7 +321,8 @@ static int cmpu64_rev(const void *a, const void *b)
  * build the snap context for a given realm.
  */
 static int build_snap_context(struct ceph_snap_realm *realm,
-                             struct list_head* dirty_realms)
+                             struct list_head *realm_queue,
+                             struct list_head *dirty_realms)
 {
        struct ceph_snap_realm *parent = realm->parent;
        struct ceph_snap_context *snapc;
@@ -334,9 +336,9 @@ static int build_snap_context(struct ceph_snap_realm *realm,
         */
        if (parent) {
                if (!parent->cached_context) {
-                       err = build_snap_context(parent, dirty_realms);
-                       if (err)
-                               goto fail;
+                       /* add to the queue head */
+                       list_add(&parent->rebuild_item, realm_queue);
+                       return 1;
                }
                num += parent->cached_context->num_snaps;
        }
@@ -420,13 +422,50 @@ fail:
 static void rebuild_snap_realms(struct ceph_snap_realm *realm,
                                struct list_head *dirty_realms)
 {
-       struct ceph_snap_realm *child;
+       LIST_HEAD(realm_queue);
+       int last = 0;
+       bool skip = false;
 
-       dout("rebuild_snap_realms %llx %p\n", realm->ino, realm);
-       build_snap_context(realm, dirty_realms);
+       list_add_tail(&realm->rebuild_item, &realm_queue);
 
-       list_for_each_entry(child, &realm->children, child_item)
-               rebuild_snap_realms(child, dirty_realms);
+       while (!list_empty(&realm_queue)) {
+               struct ceph_snap_realm *_realm, *child;
+
+               _realm = list_first_entry(&realm_queue,
+                                         struct ceph_snap_realm,
+                                         rebuild_item);
+
+               /*
+                * If the last building failed dues to memory
+                * issue, just empty the realm_queue and return
+                * to avoid infinite loop.
+                */
+               if (last < 0) {
+                       list_del_init(&_realm->rebuild_item);
+                       continue;
+               }
+
+               last = build_snap_context(_realm, &realm_queue, dirty_realms);
+               dout("rebuild_snap_realms %llx %p, %s\n", _realm->ino, _realm,
+                    last > 0 ? "is deferred" : !last ? "succeeded" : "failed");
+
+               /* is any child in the list ? */
+               list_for_each_entry(child, &_realm->children, child_item) {
+                       if (!list_empty(&child->rebuild_item)) {
+                               skip = true;
+                               break;
+                       }
+               }
+
+               if (!skip) {
+                       list_for_each_entry(child, &_realm->children, child_item)
+                               list_add_tail(&child->rebuild_item, &realm_queue);
+               }
+
+               /* last == 1 means need to build parent first */
+               if (last <= 0)
+                       list_del_init(&_realm->rebuild_item);
+       }
 }
 
 
@@ -482,7 +521,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci)
        struct ceph_buffer *old_blob = NULL;
        int used, dirty;
 
-       capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
+       capsnap = kmem_cache_zalloc(ceph_cap_snap_cachep, GFP_NOFS);
        if (!capsnap) {
                pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
                return;
@@ -603,7 +642,8 @@ update_snapc:
        spin_unlock(&ci->i_ceph_lock);
 
        ceph_buffer_put(old_blob);
-       kfree(capsnap);
+       if (capsnap)
+               kmem_cache_free(ceph_cap_snap_cachep, capsnap);
        ceph_put_snap_context(old_snapc);
 }
 
@@ -707,7 +747,8 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
        __le64 *prior_parent_snaps;        /* encoded */
        struct ceph_snap_realm *realm = NULL;
        struct ceph_snap_realm *first_realm = NULL;
-       int invalidate = 0;
+       struct ceph_snap_realm *realm_to_rebuild = NULL;
+       int rebuild_snapcs;
        int err = -ENOMEM;
        LIST_HEAD(dirty_realms);
 
@@ -715,6 +756,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
 
        dout("update_snap_trace deletion=%d\n", deletion);
 more:
+       rebuild_snapcs = 0;
        ceph_decode_need(&p, e, sizeof(*ri), bad);
        ri = p;
        p += sizeof(*ri);
@@ -738,7 +780,7 @@ more:
        err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
        if (err < 0)
                goto fail;
-       invalidate += err;
+       rebuild_snapcs += err;
 
        if (le64_to_cpu(ri->seq) > realm->seq) {
                dout("update_snap_trace updating %llx %p %lld -> %lld\n",
@@ -763,22 +805,30 @@ more:
                if (realm->seq > mdsc->last_snap_seq)
                        mdsc->last_snap_seq = realm->seq;
 
-               invalidate = 1;
+               rebuild_snapcs = 1;
        } else if (!realm->cached_context) {
                dout("update_snap_trace %llx %p seq %lld new\n",
                     realm->ino, realm, realm->seq);
-               invalidate = 1;
+               rebuild_snapcs = 1;
        } else {
                dout("update_snap_trace %llx %p seq %lld unchanged\n",
                     realm->ino, realm, realm->seq);
        }
 
-       dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
-            realm, invalidate, p, e);
+       dout("done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino,
+            realm, rebuild_snapcs, p, e);
+
+       /*
+        * this will always track the uppest parent realm from which
+        * we need to rebuild the snapshot contexts _downward_ in
+        * hierarchy.
+        */
+       if (rebuild_snapcs)
+               realm_to_rebuild = realm;
 
-       /* invalidate when we reach the _end_ (root) of the trace */
-       if (invalidate && p >= e)
-               rebuild_snap_realms(realm, &dirty_realms);
+       /* rebuild_snapcs when we reach the _end_ (root) of the trace */
+       if (realm_to_rebuild && p >= e)
+               rebuild_snap_realms(realm_to_rebuild, &dirty_realms);
 
        if (!first_realm)
                first_realm = realm;