staging/lustre/osc: use global osc_rq_pool to reduce memory usage

author Li Xi <lixi@ddn.com>

Mon, 14 Sep 2015 22:41:32 +0000 (18:41 -0400)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 15 Sep 2015 13:26:55 +0000 (06:26 -0700)
author Li Xi <lixi@ddn.com>
Mon, 14 Sep 2015 22:41:32 +0000 (18:41 -0400)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 15 Sep 2015 13:26:55 +0000 (06:26 -0700)
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h

index 5a38f3d..c8b89a3 100644 (file)
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -306,8 +306,6 @@ struct obd_import {
         __u32                imp_msg_magic;
         __u32                imp_msghdr_flags;       /* adjusted based on server capability */
  
-       struct ptlrpc_request_pool *imp_rq_pool;          /* emergency request pool */
-
         struct imp_at        imp_at;             /* adaptive timeout data */
         time_t              imp_last_reply_time;    /* for health check */
  };
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h

index 5df493e..313a56c 100644 (file)
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -500,7 +500,7 @@ struct ptlrpc_request_pool {
         /** Maximum message size that would fit into a request from this pool */
         int prp_rq_size;
         /** Function to allocate more requests for this pool */
-       void (*prp_populate)(struct ptlrpc_request_pool *, int);
+       int (*prp_populate)(struct ptlrpc_request_pool *, int);
  };
  
  struct lu_context;
@@ -2381,11 +2381,11 @@ void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
                             struct ptlrpc_request *req);
  
  void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool);
-void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
+int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
  
  struct ptlrpc_request_pool *
  ptlrpc_init_rq_pool(int, int,
-                   void (*populate_pool)(struct ptlrpc_request_pool *, int));
+                   int (*populate_pool)(struct ptlrpc_request_pool *, int));
  
  void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req);
  struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp,
@@ -2957,7 +2957,6 @@ void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes);
  
  /* ptlrpc/llog_client.c */
  extern struct llog_operations llog_client_ops;
-
  /** @} net */
  
  #endif
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h

index 87bb2ce..ce6fa55 100644 (file)
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -626,10 +626,6 @@ static inline void obd_cleanup_client_import(struct obd_device *obd)
                 CDEBUG(D_CONFIG, "%s: client import never connected\n",
                        obd->obd_name);
                 ptlrpc_invalidate_import(imp);
-               if (imp->imp_rq_pool) {
-                       ptlrpc_free_rq_pool(imp->imp_rq_pool);
-                       imp->imp_rq_pool = NULL;
-               }
                 client_destroy_import(imp);
                 obd->u.cli.cl_import = NULL;
         }
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c

index 370d5b4..594955d 100644 (file)
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -1663,7 +1663,6 @@ static void obd_zombie_export_add(struct obd_export *exp)
  static void obd_zombie_import_add(struct obd_import *imp)
  {
         LASSERT(!imp->imp_sec);
-       LASSERT(!imp->imp_rq_pool);
         spin_lock(&obd_zombie_impexp_lock);
         LASSERT(list_empty(&imp->imp_zombie_chain));
         zombies_count++;
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c

index ff6d2e2..c504d15 100644 (file)
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -96,9 +96,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
         struct obd_device *dev = container_of(kobj, struct obd_device,
                                               obd_kobj);
         struct client_obd *cli = &dev->u.cli;
-       struct ptlrpc_request_pool *pool = cli->cl_import->imp_rq_pool;
         int rc;
         unsigned long val;
+       int adding, added, req_count;
  
         rc = kstrtoul(buffer, 10, &val);
         if (rc)
@@ -107,8 +107,19 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
         if (val < 1 || val > OSC_MAX_RIF_MAX)
                 return -ERANGE;
  
-       if (pool && val > cli->cl_max_rpcs_in_flight)
-               pool->prp_populate(pool, val-cli->cl_max_rpcs_in_flight);
+       adding = val - cli->cl_max_rpcs_in_flight;
+       req_count = atomic_read(&osc_pool_req_count);
+       if (adding > 0 && req_count < osc_reqpool_maxreqcount) {
+               /*
+                * There might be some race which will cause over-limit
+                * allocation, but it is fine.
+                */
+               if (req_count + adding > osc_reqpool_maxreqcount)
+                       adding = osc_reqpool_maxreqcount - req_count;
+
+               added = osc_rq_pool->prp_populate(osc_rq_pool, adding);
+               atomic_add(added, &osc_pool_req_count);
+       }
  
         client_obd_list_lock(&cli->cl_loi_list_lock);
         cli->cl_max_rpcs_in_flight = val;
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h

index 470698b..7d0a3e2 100644 (file)
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -39,6 +39,10 @@
  
  #define OAP_MAGIC 8675309
  
+extern atomic_t osc_pool_req_count;
+extern unsigned int osc_reqpool_maxreqcount;
+extern struct ptlrpc_request_pool *osc_rq_pool;
+
  struct lu_env;
  
  enum async_flags {
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c

index 114c550..f41f762 100644 (file)
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -50,9 +50,18 @@
  #include "../include/lustre_param.h"
  #include "../include/lustre_fid.h"
  #include "../include/obd_class.h"
+#include "../include/obd.h"
  #include "osc_internal.h"
  #include "osc_cl_internal.h"
  
+atomic_t osc_pool_req_count;
+unsigned int osc_reqpool_maxreqcount;
+struct ptlrpc_request_pool *osc_rq_pool;
+
+/* max memory used for request pool, unit is MB */
+static unsigned int osc_reqpool_mem_max = 5;
+module_param(osc_reqpool_mem_max, uint, 0444);
+
  struct osc_brw_async_args {
         struct obdo       *aa_oa;
         int             aa_requested_nob;
@@ -1268,7 +1277,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
         if ((cmd & OBD_BRW_WRITE) != 0) {
                 opc = OST_WRITE;
                 req = ptlrpc_request_alloc_pool(cli->cl_import,
-                                               cli->cl_import->imp_rq_pool,
+                                               osc_rq_pool,
                                                 &RQF_OST_BRW_WRITE);
         } else {
                 opc = OST_READ;
@@ -3163,6 +3172,9 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         struct client_obd *cli = &obd->u.cli;
         void *handler;
         int rc;
+       int adding;
+       int added;
+       int req_count;
  
         rc = ptlrpcd_addref();
         if (rc)
@@ -3191,15 +3203,20 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                 ptlrpc_lprocfs_register_obd(obd);
         }
  
-       /* We need to allocate a few requests more, because
-        * brw_interpret tries to create new requests before freeing
-        * previous ones, Ideally we want to have 2x max_rpcs_in_flight
-        * reserved, but I'm afraid that might be too much wasted RAM
-        * in fact, so 2 is just my guess and still should work. */
-       cli->cl_import->imp_rq_pool =
-               ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2,
-                                   OST_MAXREQSIZE,
-                                   ptlrpc_add_rqs_to_pool);
+       /*
+        * We try to control the total number of requests with a upper limit
+        * osc_reqpool_maxreqcount. There might be some race which will cause
+        * over-limit allocation, but it is fine.
+        */
+       req_count = atomic_read(&osc_pool_req_count);
+       if (req_count < osc_reqpool_maxreqcount) {
+               adding = cli->cl_max_rpcs_in_flight + 2;
+               if (req_count + adding > osc_reqpool_maxreqcount)
+                       adding = osc_reqpool_maxreqcount - req_count;
+
+               added = ptlrpc_add_rqs_to_pool(osc_rq_pool, adding);
+               atomic_add(added, &osc_pool_req_count);
+       }
  
         INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
         ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery);
@@ -3339,6 +3356,8 @@ extern struct lock_class_key osc_ast_guard_class;
  static int __init osc_init(void)
  {
         struct lprocfs_static_vars lvars = { NULL };
+       unsigned int reqpool_size;
+       unsigned int reqsize;
         int rc;
  
         /* print an address of _any_ initialized kernel symbol from this
@@ -3354,14 +3373,45 @@ static int __init osc_init(void)
  
         rc = class_register_type(&osc_obd_ops, NULL,
                                  LUSTRE_OSC_NAME, &osc_device_type);
-       if (rc) {
-               lu_kmem_fini(osc_caches);
-               return rc;
-       }
+       if (rc)
+               goto out_kmem;
  
         spin_lock_init(&osc_ast_guard);
         lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class);
  
+       /* This is obviously too much memory, only prevent overflow here */
+       if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
+               rc = -EINVAL;
+               goto out_type;
+       }
+
+       reqpool_size = osc_reqpool_mem_max << 20;
+
+       reqsize = 1;
+       while (reqsize < OST_MAXREQSIZE)
+               reqsize = reqsize << 1;
+
+       /*
+        * We don't enlarge the request count in OSC pool according to
+        * cl_max_rpcs_in_flight. The allocation from the pool will only be
+        * tried after normal allocation failed. So a small OSC pool won't
+        * cause much performance degression in most of cases.
+        */
+       osc_reqpool_maxreqcount = reqpool_size / reqsize;
+
+       atomic_set(&osc_pool_req_count, 0);
+       osc_rq_pool = ptlrpc_init_rq_pool(0, OST_MAXREQSIZE,
+                                         ptlrpc_add_rqs_to_pool);
+
+       if (osc_rq_pool)
+               return 0;
+
+       rc = -ENOMEM;
+
+out_type:
+       class_unregister_type(LUSTRE_OSC_NAME);
+out_kmem:
+       lu_kmem_fini(osc_caches);
         return rc;
  }
  
@@ -3369,6 +3419,7 @@ static void /*__exit*/ osc_exit(void)
  {
         class_unregister_type(LUSTRE_OSC_NAME);
         lu_kmem_fini(osc_caches);
+       ptlrpc_free_rq_pool(osc_rq_pool);
  }
  
  MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c

index 1800db1..90b24fc 100644 (file)
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -446,7 +446,7 @@ EXPORT_SYMBOL(ptlrpc_free_rq_pool);
  /**
   * Allocates, initializes and adds \a num_rq requests to the pool \a pool
   */
-void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
+int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
  {
         int i;
         int size = 1;
@@ -468,11 +468,11 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
                 spin_unlock(&pool->prp_lock);
                 req = ptlrpc_request_cache_alloc(GFP_NOFS);
                 if (!req)
-                       return;
+                       return i;
                 msg = libcfs_kvzalloc(size, GFP_NOFS);
                 if (!msg) {
                         ptlrpc_request_cache_free(req);
-                       return;
+                       return i;
                 }
                 req->rq_reqbuf = msg;
                 req->rq_reqbuf_len = size;
@@ -481,6 +481,7 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
                 list_add_tail(&req->rq_list, &pool->prp_req_list);
         }
         spin_unlock(&pool->prp_lock);
+       return num_rq;
  }
  EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool);
  
@@ -494,7 +495,7 @@ EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool);
   */
  struct ptlrpc_request_pool *
  ptlrpc_init_rq_pool(int num_rq, int msgsize,
-                   void (*populate_pool)(struct ptlrpc_request_pool *, int))
+                   int (*populate_pool)(struct ptlrpc_request_pool *, int))
  {
         struct ptlrpc_request_pool *pool;
  
@@ -512,11 +513,6 @@ ptlrpc_init_rq_pool(int num_rq, int msgsize,
  
         populate_pool(pool, num_rq);
  
-       if (list_empty(&pool->prp_req_list)) {
-               /* have not allocated a single request for the pool */
-               kfree(pool);
-               pool = NULL;
-       }
         return pool;
  }
  EXPORT_SYMBOL(ptlrpc_init_rq_pool);
@@ -702,11 +698,10 @@ struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
  {
         struct ptlrpc_request *request = NULL;
  
-       if (pool)
-               request = ptlrpc_prep_req_from_pool(pool);
+       request = ptlrpc_request_cache_alloc(GFP_NOFS);
  
-       if (!request)
-               request = ptlrpc_request_cache_alloc(GFP_NOFS);
+       if (!request && pool)
+               request = ptlrpc_prep_req_from_pool(pool);
  
         if (request) {
                 LASSERTF((unsigned long)imp > 0x1000, "%p", imp);
author	Li Xi <lixi@ddn.com>
	Mon, 14 Sep 2015 22:41:32 +0000 (18:41 -0400)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 15 Sep 2015 13:26:55 +0000 (06:26 -0700)
drivers/staging/lustre/lustre/include/lustre_import.h		patch \| blob \| history
drivers/staging/lustre/lustre/include/lustre_net.h		patch \| blob \| history
drivers/staging/lustre/lustre/include/obd_class.h		patch \| blob \| history
drivers/staging/lustre/lustre/obdclass/genops.c		patch \| blob \| history
drivers/staging/lustre/lustre/osc/lproc_osc.c		patch \| blob \| history
drivers/staging/lustre/lustre/osc/osc_internal.h		patch \| blob \| history
drivers/staging/lustre/lustre/osc/osc_request.c		patch \| blob \| history
drivers/staging/lustre/lustre/ptlrpc/client.c		patch \| blob \| history