hugetlbfs: per mount huge page sizes

author Andi Kleen <ak@suse.de>

Thu, 24 Jul 2008 04:27:43 +0000 (21:27 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 24 Jul 2008 17:47:17 +0000 (10:47 -0700)
author Andi Kleen <ak@suse.de>
Thu, 24 Jul 2008 04:27:43 +0000 (21:27 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Jul 2008 17:47:17 +0000 (10:47 -0700)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 516c581..dbd01d2 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
  enum {
         Opt_size, Opt_nr_inodes,
         Opt_mode, Opt_uid, Opt_gid,
+       Opt_pagesize,
         Opt_err,
  };
  
@@ -62,6 +63,7 @@ static match_table_t tokens = {
         {Opt_mode,      "mode=%o"},
         {Opt_uid,       "uid=%u"},
         {Opt_gid,       "gid=%u"},
+       {Opt_pagesize,  "pagesize=%s"},
         {Opt_err,       NULL},
  };
  
@@ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
         char *p, *rest;
         substring_t args[MAX_OPT_ARGS];
         int option;
+       unsigned long long size = 0;
+       enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
  
         if (!options)
                 return 0;
@@ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
                         break;
  
                 case Opt_size: {
-                       unsigned long long size;
                         /* memparse() will accept a K/M/G without a digit */
                         if (!isdigit(*args[0].from))
                                 goto bad_val;
                         size = memparse(args[0].from, &rest);
-                       if (*rest == '%') {
-                               size <<= HPAGE_SHIFT;
-                               size *= max_huge_pages;
-                               do_div(size, 100);
-                       }
-                       pconfig->nr_blocks = (size >> HPAGE_SHIFT);
+                       setsize = SIZE_STD;
+                       if (*rest == '%')
+                               setsize = SIZE_PERCENT;
                         break;
                 }
  
@@ -801,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
                         pconfig->nr_inodes = memparse(args[0].from, &rest);
                         break;
  
+               case Opt_pagesize: {
+                       unsigned long ps;
+                       ps = memparse(args[0].from, &rest);
+                       pconfig->hstate = size_to_hstate(ps);
+                       if (!pconfig->hstate) {
+                               printk(KERN_ERR
+                               "hugetlbfs: Unsupported page size %lu MB\n",
+                                       ps >> 20);
+                               return -EINVAL;
+                       }
+                       break;
+               }
+
                 default:
                         printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
                                  p);
@@ -808,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
                         break;
                 }
         }
+
+       /* Do size after hstate is set up */
+       if (setsize > NO_SIZE) {
+               struct hstate *h = pconfig->hstate;
+               if (setsize == SIZE_PERCENT) {
+                       size <<= huge_page_shift(h);
+                       size *= h->max_huge_pages;
+                       do_div(size, 100);
+               }
+               pconfig->nr_blocks = (size >> huge_page_shift(h));
+       }
+
         return 0;
  
  bad_val:
@@ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
         config.uid = current->fsuid;
         config.gid = current->fsgid;
         config.mode = 0755;
+       config.hstate = &default_hstate;
         ret = hugetlbfs_parse_options(data, &config);
         if (ret)
                 return ret;
@@ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
         if (!sbinfo)
                 return -ENOMEM;
         sb->s_fs_info = sbinfo;
+       sbinfo->hstate = config.hstate;
         spin_lock_init(&sbinfo->stat_lock);
         sbinfo->max_blocks = config.nr_blocks;
         sbinfo->free_blocks = config.nr_blocks;
         sbinfo->max_inodes = config.nr_inodes;
         sbinfo->free_inodes = config.nr_inodes;
         sb->s_maxbytes = MAX_LFS_FILESIZE;
-       sb->s_blocksize = HPAGE_SIZE;
-       sb->s_blocksize_bits = HPAGE_SHIFT;
+       sb->s_blocksize = huge_page_size(config.hstate);
+       sb->s_blocksize_bits = huge_page_shift(config.hstate);
         sb->s_magic = HUGETLBFS_MAGIC;
         sb->s_op = &hugetlbfs_ops;
         sb->s_time_gran = 1;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index b75bdb4..ba9263e 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -100,6 +100,7 @@ struct hugetlbfs_config {
         umode_t mode;
         long    nr_blocks;
         long    nr_inodes;
+       struct hstate *hstate;
  };
  
  struct hugetlbfs_sb_info {
@@ -108,6 +109,7 @@ struct hugetlbfs_sb_info {
         long    max_inodes;   /* inodes allowed */
         long    free_inodes;  /* inodes free */
         spinlock_t      stat_lock;
+       struct hstate *hstate;
  };
  
  
@@ -191,19 +193,21 @@ extern unsigned int default_hstate_idx;
  
  #define default_hstate (hstates[default_hstate_idx])
  
-static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
+static inline struct hstate *hstate_inode(struct inode *i)
  {
-       return &default_hstate;
+       struct hugetlbfs_sb_info *hsb;
+       hsb = HUGETLBFS_SB(i->i_sb);
+       return hsb->hstate;
  }
  
  static inline struct hstate *hstate_file(struct file *f)
  {
-       return &default_hstate;
+       return hstate_inode(f->f_dentry->d_inode);
  }
  
-static inline struct hstate *hstate_inode(struct inode *i)
+static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
  {
-       return &default_hstate;
+       return hstate_file(vma->vm_file);
  }
  
  static inline unsigned long huge_page_size(struct hstate *h)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 82378d4..4cf7a90 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1439,19 +1439,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
  void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
                           unsigned long end, struct page *ref_page)
  {
-       /*
-        * It is undesirable to test vma->vm_file as it should be non-null
-        * for valid hugetlb area. However, vm_file will be NULL in the error
-        * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
-        * do_mmap_pgoff() nullifies vma->vm_file before calling this function
-        * to clean up. Since no pte has actually been setup, it is safe to
-        * do nothing in this case.
-        */
-       if (vma->vm_file) {
-               spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
-               __unmap_hugepage_range(vma, start, end, ref_page);
-               spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
-       }
+       spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+       __unmap_hugepage_range(vma, start, end, ref_page);
+       spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
  }
  
  /*
diff --git a/mm/memory.c b/mm/memory.c

index c1c1d6d..02fc6b1 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -901,9 +901,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
                         }
  
                         if (unlikely(is_vm_hugetlb_page(vma))) {
-                               unmap_hugepage_range(vma, start, end, NULL);
-                               zap_work -= (end - start) /
+                               /*
+                                * It is undesirable to test vma->vm_file as it
+                                * should be non-null for valid hugetlb area.
+                                * However, vm_file will be NULL in the error
+                                * cleanup path of do_mmap_pgoff. When
+                                * hugetlbfs ->mmap method fails,
+                                * do_mmap_pgoff() nullifies vma->vm_file
+                                * before calling this function to clean up.
+                                * Since no pte has actually been setup, it is
+                                * safe to do nothing in this case.
+                                */
+                               if (vma->vm_file) {
+                                       unmap_hugepage_range(vma, start, end, NULL);
+                                       zap_work -= (end - start) /
                                         pages_per_huge_page(hstate_vma(vma));
+                               }
+
                                 start = end;
                         } else
                                 start = unmap_page_range(*tlbp, vma,
author	Andi Kleen <ak@suse.de>
	Thu, 24 Jul 2008 04:27:43 +0000 (21:27 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 24 Jul 2008 17:47:17 +0000 (10:47 -0700)
fs/hugetlbfs/inode.c		patch \| blob \| history
include/linux/hugetlb.h		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history