KVM: VMX: Tell the nested hypervisor to skip L1D flush on vmentry
[platform/kernel/linux-rpi.git] / ipc / shm.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * linux/ipc/shm.c
4  * Copyright (C) 1992, 1993 Krishna Balasubramanian
5  *       Many improvements/fixes by Bruno Haible.
6  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
7  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8  *
9  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
10  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
11  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
12  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
13  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
14  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
16  *
17  * support for audit of ipc object properties and permission changes
18  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19  *
20  * namespaces support
21  * OpenVZ, SWsoft Inc.
22  * Pavel Emelianov <xemul@openvz.org>
23  *
24  * Better ipc lock (kern_ipc_perm.lock) handling
25  * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
26  */
27
28 #include <linux/slab.h>
29 #include <linux/mm.h>
30 #include <linux/hugetlb.h>
31 #include <linux/shm.h>
32 #include <linux/init.h>
33 #include <linux/file.h>
34 #include <linux/mman.h>
35 #include <linux/shmem_fs.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/audit.h>
39 #include <linux/capability.h>
40 #include <linux/ptrace.h>
41 #include <linux/seq_file.h>
42 #include <linux/rwsem.h>
43 #include <linux/nsproxy.h>
44 #include <linux/mount.h>
45 #include <linux/ipc_namespace.h>
46
47 #include <linux/uaccess.h>
48
49 #include "util.h"
50
51 struct shm_file_data {
52         int id;
53         struct ipc_namespace *ns;
54         struct file *file;
55         const struct vm_operations_struct *vm_ops;
56 };
57
58 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
59
60 static const struct file_operations shm_file_operations;
61 static const struct vm_operations_struct shm_vm_ops;
62
63 #define shm_ids(ns)     ((ns)->ids[IPC_SHM_IDS])
64
65 #define shm_unlock(shp)                 \
66         ipc_unlock(&(shp)->shm_perm)
67
68 static int newseg(struct ipc_namespace *, struct ipc_params *);
69 static void shm_open(struct vm_area_struct *vma);
70 static void shm_close(struct vm_area_struct *vma);
71 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
72 #ifdef CONFIG_PROC_FS
73 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
74 #endif
75
76 int shm_init_ns(struct ipc_namespace *ns)
77 {
78         ns->shm_ctlmax = SHMMAX;
79         ns->shm_ctlall = SHMALL;
80         ns->shm_ctlmni = SHMMNI;
81         ns->shm_rmid_forced = 0;
82         ns->shm_tot = 0;
83         return ipc_init_ids(&shm_ids(ns));
84 }
85
86 /*
87  * Called with shm_ids.rwsem (writer) and the shp structure locked.
88  * Only shm_ids.rwsem remains locked on exit.
89  */
90 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
91 {
92         struct shmid_kernel *shp;
93
94         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
95
96         if (shp->shm_nattch) {
97                 shp->shm_perm.mode |= SHM_DEST;
98                 /* Do not find it any more */
99                 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
100                 shm_unlock(shp);
101         } else
102                 shm_destroy(ns, shp);
103 }
104
105 #ifdef CONFIG_IPC_NS
106 void shm_exit_ns(struct ipc_namespace *ns)
107 {
108         free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
109         idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
110         rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
111 }
112 #endif
113
114 static int __init ipc_ns_init(void)
115 {
116         const int err = shm_init_ns(&init_ipc_ns);
117         WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
118         return err;
119 }
120
121 pure_initcall(ipc_ns_init);
122
123 void __init shm_init(void)
124 {
125         ipc_init_proc_interface("sysvipc/shm",
126 #if BITS_PER_LONG <= 32
127                                 "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
128 #else
129                                 "       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
130 #endif
131                                 IPC_SHM_IDS, sysvipc_shm_proc_show);
132 }
133
134 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
135 {
136         struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
137
138         if (IS_ERR(ipcp))
139                 return ERR_CAST(ipcp);
140
141         return container_of(ipcp, struct shmid_kernel, shm_perm);
142 }
143
144 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
145 {
146         struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
147
148         if (IS_ERR(ipcp))
149                 return ERR_CAST(ipcp);
150
151         return container_of(ipcp, struct shmid_kernel, shm_perm);
152 }
153
154 /*
155  * shm_lock_(check_) routines are called in the paths where the rwsem
156  * is not necessarily held.
157  */
158 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
159 {
160         struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
161
162         /*
163          * Callers of shm_lock() must validate the status of the returned ipc
164          * object pointer (as returned by ipc_lock()), and error out as
165          * appropriate.
166          */
167         if (IS_ERR(ipcp))
168                 return (void *)ipcp;
169         return container_of(ipcp, struct shmid_kernel, shm_perm);
170 }
171
172 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
173 {
174         rcu_read_lock();
175         ipc_lock_object(&ipcp->shm_perm);
176 }
177
178 static void shm_rcu_free(struct rcu_head *head)
179 {
180         struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
181                                                         rcu);
182         struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
183                                                         shm_perm);
184         security_shm_free(shp);
185         kvfree(shp);
186 }
187
188 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
189 {
190         list_del(&s->shm_clist);
191         ipc_rmid(&shm_ids(ns), &s->shm_perm);
192 }
193
194
195 static int __shm_open(struct vm_area_struct *vma)
196 {
197         struct file *file = vma->vm_file;
198         struct shm_file_data *sfd = shm_file_data(file);
199         struct shmid_kernel *shp;
200
201         shp = shm_lock(sfd->ns, sfd->id);
202
203         if (IS_ERR(shp))
204                 return PTR_ERR(shp);
205
206         if (shp->shm_file != sfd->file) {
207                 /* ID was reused */
208                 shm_unlock(shp);
209                 return -EINVAL;
210         }
211
212         shp->shm_atim = ktime_get_real_seconds();
213         shp->shm_lprid = task_tgid_vnr(current);
214         shp->shm_nattch++;
215         shm_unlock(shp);
216         return 0;
217 }
218
219 /* This is called by fork, once for every shm attach. */
220 static void shm_open(struct vm_area_struct *vma)
221 {
222         int err = __shm_open(vma);
223         /*
224          * We raced in the idr lookup or with shm_destroy().
225          * Either way, the ID is busted.
226          */
227         WARN_ON_ONCE(err);
228 }
229
230 /*
231  * shm_destroy - free the struct shmid_kernel
232  *
233  * @ns: namespace
234  * @shp: struct to free
235  *
236  * It has to be called with shp and shm_ids.rwsem (writer) locked,
237  * but returns with shp unlocked and freed.
238  */
239 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
240 {
241         struct file *shm_file;
242
243         shm_file = shp->shm_file;
244         shp->shm_file = NULL;
245         ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
246         shm_rmid(ns, shp);
247         shm_unlock(shp);
248         if (!is_file_hugepages(shm_file))
249                 shmem_lock(shm_file, 0, shp->mlock_user);
250         else if (shp->mlock_user)
251                 user_shm_unlock(i_size_read(file_inode(shm_file)),
252                                 shp->mlock_user);
253         fput(shm_file);
254         ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
255 }
256
257 /*
258  * shm_may_destroy - identifies whether shm segment should be destroyed now
259  *
260  * Returns true if and only if there are no active users of the segment and
261  * one of the following is true:
262  *
263  * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
264  *
265  * 2) sysctl kernel.shm_rmid_forced is set to 1.
266  */
267 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
268 {
269         return (shp->shm_nattch == 0) &&
270                (ns->shm_rmid_forced ||
271                 (shp->shm_perm.mode & SHM_DEST));
272 }
273
274 /*
275  * remove the attach descriptor vma.
276  * free memory for segment if it is marked destroyed.
277  * The descriptor has already been removed from the current->mm->mmap list
278  * and will later be kfree()d.
279  */
280 static void shm_close(struct vm_area_struct *vma)
281 {
282         struct file *file = vma->vm_file;
283         struct shm_file_data *sfd = shm_file_data(file);
284         struct shmid_kernel *shp;
285         struct ipc_namespace *ns = sfd->ns;
286
287         down_write(&shm_ids(ns).rwsem);
288         /* remove from the list of attaches of the shm segment */
289         shp = shm_lock(ns, sfd->id);
290
291         /*
292          * We raced in the idr lookup or with shm_destroy().
293          * Either way, the ID is busted.
294          */
295         if (WARN_ON_ONCE(IS_ERR(shp)))
296                 goto done; /* no-op */
297
298         shp->shm_lprid = task_tgid_vnr(current);
299         shp->shm_dtim = ktime_get_real_seconds();
300         shp->shm_nattch--;
301         if (shm_may_destroy(ns, shp))
302                 shm_destroy(ns, shp);
303         else
304                 shm_unlock(shp);
305 done:
306         up_write(&shm_ids(ns).rwsem);
307 }
308
309 /* Called with ns->shm_ids(ns).rwsem locked */
310 static int shm_try_destroy_orphaned(int id, void *p, void *data)
311 {
312         struct ipc_namespace *ns = data;
313         struct kern_ipc_perm *ipcp = p;
314         struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
315
316         /*
317          * We want to destroy segments without users and with already
318          * exit'ed originating process.
319          *
320          * As shp->* are changed under rwsem, it's safe to skip shp locking.
321          */
322         if (shp->shm_creator != NULL)
323                 return 0;
324
325         if (shm_may_destroy(ns, shp)) {
326                 shm_lock_by_ptr(shp);
327                 shm_destroy(ns, shp);
328         }
329         return 0;
330 }
331
332 void shm_destroy_orphaned(struct ipc_namespace *ns)
333 {
334         down_write(&shm_ids(ns).rwsem);
335         if (shm_ids(ns).in_use)
336                 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
337         up_write(&shm_ids(ns).rwsem);
338 }
339
340 /* Locking assumes this will only be called with task == current */
341 void exit_shm(struct task_struct *task)
342 {
343         struct ipc_namespace *ns = task->nsproxy->ipc_ns;
344         struct shmid_kernel *shp, *n;
345
346         if (list_empty(&task->sysvshm.shm_clist))
347                 return;
348
349         /*
350          * If kernel.shm_rmid_forced is not set then only keep track of
351          * which shmids are orphaned, so that a later set of the sysctl
352          * can clean them up.
353          */
354         if (!ns->shm_rmid_forced) {
355                 down_read(&shm_ids(ns).rwsem);
356                 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
357                         shp->shm_creator = NULL;
358                 /*
359                  * Only under read lock but we are only called on current
360                  * so no entry on the list will be shared.
361                  */
362                 list_del(&task->sysvshm.shm_clist);
363                 up_read(&shm_ids(ns).rwsem);
364                 return;
365         }
366
367         /*
368          * Destroy all already created segments, that were not yet mapped,
369          * and mark any mapped as orphan to cover the sysctl toggling.
370          * Destroy is skipped if shm_may_destroy() returns false.
371          */
372         down_write(&shm_ids(ns).rwsem);
373         list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
374                 shp->shm_creator = NULL;
375
376                 if (shm_may_destroy(ns, shp)) {
377                         shm_lock_by_ptr(shp);
378                         shm_destroy(ns, shp);
379                 }
380         }
381
382         /* Remove the list head from any segments still attached. */
383         list_del(&task->sysvshm.shm_clist);
384         up_write(&shm_ids(ns).rwsem);
385 }
386
387 static int shm_fault(struct vm_fault *vmf)
388 {
389         struct file *file = vmf->vma->vm_file;
390         struct shm_file_data *sfd = shm_file_data(file);
391
392         return sfd->vm_ops->fault(vmf);
393 }
394
395 static int shm_split(struct vm_area_struct *vma, unsigned long addr)
396 {
397         struct file *file = vma->vm_file;
398         struct shm_file_data *sfd = shm_file_data(file);
399
400         if (sfd->vm_ops && sfd->vm_ops->split)
401                 return sfd->vm_ops->split(vma, addr);
402
403         return 0;
404 }
405
406 #ifdef CONFIG_NUMA
407 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
408 {
409         struct file *file = vma->vm_file;
410         struct shm_file_data *sfd = shm_file_data(file);
411         int err = 0;
412
413         if (sfd->vm_ops->set_policy)
414                 err = sfd->vm_ops->set_policy(vma, new);
415         return err;
416 }
417
418 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
419                                         unsigned long addr)
420 {
421         struct file *file = vma->vm_file;
422         struct shm_file_data *sfd = shm_file_data(file);
423         struct mempolicy *pol = NULL;
424
425         if (sfd->vm_ops->get_policy)
426                 pol = sfd->vm_ops->get_policy(vma, addr);
427         else if (vma->vm_policy)
428                 pol = vma->vm_policy;
429
430         return pol;
431 }
432 #endif
433
434 static int shm_mmap(struct file *file, struct vm_area_struct *vma)
435 {
436         struct shm_file_data *sfd = shm_file_data(file);
437         int ret;
438
439         /*
440          * In case of remap_file_pages() emulation, the file can represent an
441          * IPC ID that was removed, and possibly even reused by another shm
442          * segment already.  Propagate this case as an error to caller.
443          */
444         ret = __shm_open(vma);
445         if (ret)
446                 return ret;
447
448         ret = call_mmap(sfd->file, vma);
449         if (ret) {
450                 shm_close(vma);
451                 return ret;
452         }
453         sfd->vm_ops = vma->vm_ops;
454 #ifdef CONFIG_MMU
455         WARN_ON(!sfd->vm_ops->fault);
456 #endif
457         vma->vm_ops = &shm_vm_ops;
458         return 0;
459 }
460
461 static int shm_release(struct inode *ino, struct file *file)
462 {
463         struct shm_file_data *sfd = shm_file_data(file);
464
465         put_ipc_ns(sfd->ns);
466         fput(sfd->file);
467         shm_file_data(file) = NULL;
468         kfree(sfd);
469         return 0;
470 }
471
472 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
473 {
474         struct shm_file_data *sfd = shm_file_data(file);
475
476         if (!sfd->file->f_op->fsync)
477                 return -EINVAL;
478         return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
479 }
480
481 static long shm_fallocate(struct file *file, int mode, loff_t offset,
482                           loff_t len)
483 {
484         struct shm_file_data *sfd = shm_file_data(file);
485
486         if (!sfd->file->f_op->fallocate)
487                 return -EOPNOTSUPP;
488         return sfd->file->f_op->fallocate(file, mode, offset, len);
489 }
490
491 static unsigned long shm_get_unmapped_area(struct file *file,
492         unsigned long addr, unsigned long len, unsigned long pgoff,
493         unsigned long flags)
494 {
495         struct shm_file_data *sfd = shm_file_data(file);
496
497         return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
498                                                 pgoff, flags);
499 }
500
501 static const struct file_operations shm_file_operations = {
502         .mmap           = shm_mmap,
503         .fsync          = shm_fsync,
504         .release        = shm_release,
505         .get_unmapped_area      = shm_get_unmapped_area,
506         .llseek         = noop_llseek,
507         .fallocate      = shm_fallocate,
508 };
509
510 /*
511  * shm_file_operations_huge is now identical to shm_file_operations,
512  * but we keep it distinct for the sake of is_file_shm_hugepages().
513  */
514 static const struct file_operations shm_file_operations_huge = {
515         .mmap           = shm_mmap,
516         .fsync          = shm_fsync,
517         .release        = shm_release,
518         .get_unmapped_area      = shm_get_unmapped_area,
519         .llseek         = noop_llseek,
520         .fallocate      = shm_fallocate,
521 };
522
523 bool is_file_shm_hugepages(struct file *file)
524 {
525         return file->f_op == &shm_file_operations_huge;
526 }
527
528 static const struct vm_operations_struct shm_vm_ops = {
529         .open   = shm_open,     /* callback for a new vm-area open */
530         .close  = shm_close,    /* callback for when the vm-area is released */
531         .fault  = shm_fault,
532         .split  = shm_split,
533 #if defined(CONFIG_NUMA)
534         .set_policy = shm_set_policy,
535         .get_policy = shm_get_policy,
536 #endif
537 };
538
539 /**
540  * newseg - Create a new shared memory segment
541  * @ns: namespace
542  * @params: ptr to the structure that contains key, size and shmflg
543  *
544  * Called with shm_ids.rwsem held as a writer.
545  */
546 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
547 {
548         key_t key = params->key;
549         int shmflg = params->flg;
550         size_t size = params->u.size;
551         int error;
552         struct shmid_kernel *shp;
553         size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
554         struct file *file;
555         char name[13];
556         vm_flags_t acctflag = 0;
557
558         if (size < SHMMIN || size > ns->shm_ctlmax)
559                 return -EINVAL;
560
561         if (numpages << PAGE_SHIFT < size)
562                 return -ENOSPC;
563
564         if (ns->shm_tot + numpages < ns->shm_tot ||
565                         ns->shm_tot + numpages > ns->shm_ctlall)
566                 return -ENOSPC;
567
568         shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
569         if (unlikely(!shp))
570                 return -ENOMEM;
571
572         shp->shm_perm.key = key;
573         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
574         shp->mlock_user = NULL;
575
576         shp->shm_perm.security = NULL;
577         error = security_shm_alloc(shp);
578         if (error) {
579                 kvfree(shp);
580                 return error;
581         }
582
583         sprintf(name, "SYSV%08x", key);
584         if (shmflg & SHM_HUGETLB) {
585                 struct hstate *hs;
586                 size_t hugesize;
587
588                 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
589                 if (!hs) {
590                         error = -EINVAL;
591                         goto no_file;
592                 }
593                 hugesize = ALIGN(size, huge_page_size(hs));
594
595                 /* hugetlb_file_setup applies strict accounting */
596                 if (shmflg & SHM_NORESERVE)
597                         acctflag = VM_NORESERVE;
598                 file = hugetlb_file_setup(name, hugesize, acctflag,
599                                   &shp->mlock_user, HUGETLB_SHMFS_INODE,
600                                 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
601         } else {
602                 /*
603                  * Do not allow no accounting for OVERCOMMIT_NEVER, even
604                  * if it's asked for.
605                  */
606                 if  ((shmflg & SHM_NORESERVE) &&
607                                 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
608                         acctflag = VM_NORESERVE;
609                 file = shmem_kernel_file_setup(name, size, acctflag);
610         }
611         error = PTR_ERR(file);
612         if (IS_ERR(file))
613                 goto no_file;
614
615         shp->shm_cprid = task_tgid_vnr(current);
616         shp->shm_lprid = 0;
617         shp->shm_atim = shp->shm_dtim = 0;
618         shp->shm_ctim = ktime_get_real_seconds();
619         shp->shm_segsz = size;
620         shp->shm_nattch = 0;
621         shp->shm_file = file;
622         shp->shm_creator = current;
623
624         error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
625         if (error < 0)
626                 goto no_id;
627
628         list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
629
630         /*
631          * shmid gets reported as "inode#" in /proc/pid/maps.
632          * proc-ps tools use this. Changing this will break them.
633          */
634         file_inode(file)->i_ino = shp->shm_perm.id;
635
636         ns->shm_tot += numpages;
637         error = shp->shm_perm.id;
638
639         ipc_unlock_object(&shp->shm_perm);
640         rcu_read_unlock();
641         return error;
642
643 no_id:
644         if (is_file_hugepages(file) && shp->mlock_user)
645                 user_shm_unlock(size, shp->mlock_user);
646         fput(file);
647 no_file:
648         call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
649         return error;
650 }
651
652 /*
653  * Called with shm_ids.rwsem and ipcp locked.
654  */
655 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
656 {
657         struct shmid_kernel *shp;
658
659         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
660         return security_shm_associate(shp, shmflg);
661 }
662
663 /*
664  * Called with shm_ids.rwsem and ipcp locked.
665  */
666 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
667                                 struct ipc_params *params)
668 {
669         struct shmid_kernel *shp;
670
671         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
672         if (shp->shm_segsz < params->u.size)
673                 return -EINVAL;
674
675         return 0;
676 }
677
678 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
679 {
680         struct ipc_namespace *ns;
681         static const struct ipc_ops shm_ops = {
682                 .getnew = newseg,
683                 .associate = shm_security,
684                 .more_checks = shm_more_checks,
685         };
686         struct ipc_params shm_params;
687
688         ns = current->nsproxy->ipc_ns;
689
690         shm_params.key = key;
691         shm_params.flg = shmflg;
692         shm_params.u.size = size;
693
694         return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
695 }
696
697 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
698 {
699         switch (version) {
700         case IPC_64:
701                 return copy_to_user(buf, in, sizeof(*in));
702         case IPC_OLD:
703             {
704                 struct shmid_ds out;
705
706                 memset(&out, 0, sizeof(out));
707                 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
708                 out.shm_segsz   = in->shm_segsz;
709                 out.shm_atime   = in->shm_atime;
710                 out.shm_dtime   = in->shm_dtime;
711                 out.shm_ctime   = in->shm_ctime;
712                 out.shm_cpid    = in->shm_cpid;
713                 out.shm_lpid    = in->shm_lpid;
714                 out.shm_nattch  = in->shm_nattch;
715
716                 return copy_to_user(buf, &out, sizeof(out));
717             }
718         default:
719                 return -EINVAL;
720         }
721 }
722
723 static inline unsigned long
724 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
725 {
726         switch (version) {
727         case IPC_64:
728                 if (copy_from_user(out, buf, sizeof(*out)))
729                         return -EFAULT;
730                 return 0;
731         case IPC_OLD:
732             {
733                 struct shmid_ds tbuf_old;
734
735                 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
736                         return -EFAULT;
737
738                 out->shm_perm.uid       = tbuf_old.shm_perm.uid;
739                 out->shm_perm.gid       = tbuf_old.shm_perm.gid;
740                 out->shm_perm.mode      = tbuf_old.shm_perm.mode;
741
742                 return 0;
743             }
744         default:
745                 return -EINVAL;
746         }
747 }
748
749 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
750 {
751         switch (version) {
752         case IPC_64:
753                 return copy_to_user(buf, in, sizeof(*in));
754         case IPC_OLD:
755             {
756                 struct shminfo out;
757
758                 if (in->shmmax > INT_MAX)
759                         out.shmmax = INT_MAX;
760                 else
761                         out.shmmax = (int)in->shmmax;
762
763                 out.shmmin      = in->shmmin;
764                 out.shmmni      = in->shmmni;
765                 out.shmseg      = in->shmseg;
766                 out.shmall      = in->shmall;
767
768                 return copy_to_user(buf, &out, sizeof(out));
769             }
770         default:
771                 return -EINVAL;
772         }
773 }
774
775 /*
776  * Calculate and add used RSS and swap pages of a shm.
777  * Called with shm_ids.rwsem held as a reader
778  */
779 static void shm_add_rss_swap(struct shmid_kernel *shp,
780         unsigned long *rss_add, unsigned long *swp_add)
781 {
782         struct inode *inode;
783
784         inode = file_inode(shp->shm_file);
785
786         if (is_file_hugepages(shp->shm_file)) {
787                 struct address_space *mapping = inode->i_mapping;
788                 struct hstate *h = hstate_file(shp->shm_file);
789                 *rss_add += pages_per_huge_page(h) * mapping->nrpages;
790         } else {
791 #ifdef CONFIG_SHMEM
792                 struct shmem_inode_info *info = SHMEM_I(inode);
793
794                 spin_lock_irq(&info->lock);
795                 *rss_add += inode->i_mapping->nrpages;
796                 *swp_add += info->swapped;
797                 spin_unlock_irq(&info->lock);
798 #else
799                 *rss_add += inode->i_mapping->nrpages;
800 #endif
801         }
802 }
803
804 /*
805  * Called with shm_ids.rwsem held as a reader
806  */
807 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
808                 unsigned long *swp)
809 {
810         int next_id;
811         int total, in_use;
812
813         *rss = 0;
814         *swp = 0;
815
816         in_use = shm_ids(ns).in_use;
817
818         for (total = 0, next_id = 0; total < in_use; next_id++) {
819                 struct kern_ipc_perm *ipc;
820                 struct shmid_kernel *shp;
821
822                 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
823                 if (ipc == NULL)
824                         continue;
825                 shp = container_of(ipc, struct shmid_kernel, shm_perm);
826
827                 shm_add_rss_swap(shp, rss, swp);
828
829                 total++;
830         }
831 }
832
833 /*
834  * This function handles some shmctl commands which require the rwsem
835  * to be held in write mode.
836  * NOTE: no locks must be held, the rwsem is taken inside this function.
837  */
838 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
839                        struct shmid64_ds *shmid64)
840 {
841         struct kern_ipc_perm *ipcp;
842         struct shmid_kernel *shp;
843         int err;
844
845         down_write(&shm_ids(ns).rwsem);
846         rcu_read_lock();
847
848         ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
849                                       &shmid64->shm_perm, 0);
850         if (IS_ERR(ipcp)) {
851                 err = PTR_ERR(ipcp);
852                 goto out_unlock1;
853         }
854
855         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
856
857         err = security_shm_shmctl(shp, cmd);
858         if (err)
859                 goto out_unlock1;
860
861         switch (cmd) {
862         case IPC_RMID:
863                 ipc_lock_object(&shp->shm_perm);
864                 /* do_shm_rmid unlocks the ipc object and rcu */
865                 do_shm_rmid(ns, ipcp);
866                 goto out_up;
867         case IPC_SET:
868                 ipc_lock_object(&shp->shm_perm);
869                 err = ipc_update_perm(&shmid64->shm_perm, ipcp);
870                 if (err)
871                         goto out_unlock0;
872                 shp->shm_ctim = ktime_get_real_seconds();
873                 break;
874         default:
875                 err = -EINVAL;
876                 goto out_unlock1;
877         }
878
879 out_unlock0:
880         ipc_unlock_object(&shp->shm_perm);
881 out_unlock1:
882         rcu_read_unlock();
883 out_up:
884         up_write(&shm_ids(ns).rwsem);
885         return err;
886 }
887
888 static int shmctl_ipc_info(struct ipc_namespace *ns,
889                            struct shminfo64 *shminfo)
890 {
891         int err = security_shm_shmctl(NULL, IPC_INFO);
892         if (!err) {
893                 memset(shminfo, 0, sizeof(*shminfo));
894                 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
895                 shminfo->shmmax = ns->shm_ctlmax;
896                 shminfo->shmall = ns->shm_ctlall;
897                 shminfo->shmmin = SHMMIN;
898                 down_read(&shm_ids(ns).rwsem);
899                 err = ipc_get_maxid(&shm_ids(ns));
900                 up_read(&shm_ids(ns).rwsem);
901                 if (err < 0)
902                         err = 0;
903         }
904         return err;
905 }
906
907 static int shmctl_shm_info(struct ipc_namespace *ns,
908                            struct shm_info *shm_info)
909 {
910         int err = security_shm_shmctl(NULL, SHM_INFO);
911         if (!err) {
912                 memset(shm_info, 0, sizeof(*shm_info));
913                 down_read(&shm_ids(ns).rwsem);
914                 shm_info->used_ids = shm_ids(ns).in_use;
915                 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
916                 shm_info->shm_tot = ns->shm_tot;
917                 shm_info->swap_attempts = 0;
918                 shm_info->swap_successes = 0;
919                 err = ipc_get_maxid(&shm_ids(ns));
920                 up_read(&shm_ids(ns).rwsem);
921                 if (err < 0)
922                         err = 0;
923         }
924         return err;
925 }
926
927 static int shmctl_stat(struct ipc_namespace *ns, int shmid,
928                         int cmd, struct shmid64_ds *tbuf)
929 {
930         struct shmid_kernel *shp;
931         int result;
932         int err;
933
934         rcu_read_lock();
935         if (cmd == SHM_STAT) {
936                 shp = shm_obtain_object(ns, shmid);
937                 if (IS_ERR(shp)) {
938                         err = PTR_ERR(shp);
939                         goto out_unlock;
940                 }
941                 result = shp->shm_perm.id;
942         } else {
943                 shp = shm_obtain_object_check(ns, shmid);
944                 if (IS_ERR(shp)) {
945                         err = PTR_ERR(shp);
946                         goto out_unlock;
947                 }
948                 result = 0;
949         }
950
951         err = -EACCES;
952         if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
953                 goto out_unlock;
954
955         err = security_shm_shmctl(shp, cmd);
956         if (err)
957                 goto out_unlock;
958
959         memset(tbuf, 0, sizeof(*tbuf));
960         kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
961         tbuf->shm_segsz = shp->shm_segsz;
962         tbuf->shm_atime = shp->shm_atim;
963         tbuf->shm_dtime = shp->shm_dtim;
964         tbuf->shm_ctime = shp->shm_ctim;
965         tbuf->shm_cpid  = shp->shm_cprid;
966         tbuf->shm_lpid  = shp->shm_lprid;
967         tbuf->shm_nattch = shp->shm_nattch;
968         rcu_read_unlock();
969         return result;
970
971 out_unlock:
972         rcu_read_unlock();
973         return err;
974 }
975
976 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
977 {
978         struct shmid_kernel *shp;
979         struct file *shm_file;
980         int err;
981
982         rcu_read_lock();
983         shp = shm_obtain_object_check(ns, shmid);
984         if (IS_ERR(shp)) {
985                 err = PTR_ERR(shp);
986                 goto out_unlock1;
987         }
988
989         audit_ipc_obj(&(shp->shm_perm));
990         err = security_shm_shmctl(shp, cmd);
991         if (err)
992                 goto out_unlock1;
993
994         ipc_lock_object(&shp->shm_perm);
995
996         /* check if shm_destroy() is tearing down shp */
997         if (!ipc_valid_object(&shp->shm_perm)) {
998                 err = -EIDRM;
999                 goto out_unlock0;
1000         }
1001
1002         if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1003                 kuid_t euid = current_euid();
1004
1005                 if (!uid_eq(euid, shp->shm_perm.uid) &&
1006                     !uid_eq(euid, shp->shm_perm.cuid)) {
1007                         err = -EPERM;
1008                         goto out_unlock0;
1009                 }
1010                 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1011                         err = -EPERM;
1012                         goto out_unlock0;
1013                 }
1014         }
1015
1016         shm_file = shp->shm_file;
1017         if (is_file_hugepages(shm_file))
1018                 goto out_unlock0;
1019
1020         if (cmd == SHM_LOCK) {
1021                 struct user_struct *user = current_user();
1022
1023                 err = shmem_lock(shm_file, 1, user);
1024                 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1025                         shp->shm_perm.mode |= SHM_LOCKED;
1026                         shp->mlock_user = user;
1027                 }
1028                 goto out_unlock0;
1029         }
1030
1031         /* SHM_UNLOCK */
1032         if (!(shp->shm_perm.mode & SHM_LOCKED))
1033                 goto out_unlock0;
1034         shmem_lock(shm_file, 0, shp->mlock_user);
1035         shp->shm_perm.mode &= ~SHM_LOCKED;
1036         shp->mlock_user = NULL;
1037         get_file(shm_file);
1038         ipc_unlock_object(&shp->shm_perm);
1039         rcu_read_unlock();
1040         shmem_unlock_mapping(shm_file->f_mapping);
1041
1042         fput(shm_file);
1043         return err;
1044
1045 out_unlock0:
1046         ipc_unlock_object(&shp->shm_perm);
1047 out_unlock1:
1048         rcu_read_unlock();
1049         return err;
1050 }
1051
1052 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1053 {
1054         int err, version;
1055         struct ipc_namespace *ns;
1056         struct shmid64_ds sem64;
1057
1058         if (cmd < 0 || shmid < 0)
1059                 return -EINVAL;
1060
1061         version = ipc_parse_version(&cmd);
1062         ns = current->nsproxy->ipc_ns;
1063
1064         switch (cmd) {
1065         case IPC_INFO: {
1066                 struct shminfo64 shminfo;
1067                 err = shmctl_ipc_info(ns, &shminfo);
1068                 if (err < 0)
1069                         return err;
1070                 if (copy_shminfo_to_user(buf, &shminfo, version))
1071                         err = -EFAULT;
1072                 return err;
1073         }
1074         case SHM_INFO: {
1075                 struct shm_info shm_info;
1076                 err = shmctl_shm_info(ns, &shm_info);
1077                 if (err < 0)
1078                         return err;
1079                 if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1080                         err = -EFAULT;
1081                 return err;
1082         }
1083         case SHM_STAT:
1084         case IPC_STAT: {
1085                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1086                 if (err < 0)
1087                         return err;
1088                 if (copy_shmid_to_user(buf, &sem64, version))
1089                         err = -EFAULT;
1090                 return err;
1091         }
1092         case IPC_SET:
1093                 if (copy_shmid_from_user(&sem64, buf, version))
1094                         return -EFAULT;
1095                 /* fallthru */
1096         case IPC_RMID:
1097                 return shmctl_down(ns, shmid, cmd, &sem64);
1098         case SHM_LOCK:
1099         case SHM_UNLOCK:
1100                 return shmctl_do_lock(ns, shmid, cmd);
1101         default:
1102                 return -EINVAL;
1103         }
1104 }
1105
1106 #ifdef CONFIG_COMPAT
1107
1108 struct compat_shmid_ds {
1109         struct compat_ipc_perm shm_perm;
1110         int shm_segsz;
1111         compat_time_t shm_atime;
1112         compat_time_t shm_dtime;
1113         compat_time_t shm_ctime;
1114         compat_ipc_pid_t shm_cpid;
1115         compat_ipc_pid_t shm_lpid;
1116         unsigned short shm_nattch;
1117         unsigned short shm_unused;
1118         compat_uptr_t shm_unused2;
1119         compat_uptr_t shm_unused3;
1120 };
1121
1122 struct compat_shminfo64 {
1123         compat_ulong_t shmmax;
1124         compat_ulong_t shmmin;
1125         compat_ulong_t shmmni;
1126         compat_ulong_t shmseg;
1127         compat_ulong_t shmall;
1128         compat_ulong_t __unused1;
1129         compat_ulong_t __unused2;
1130         compat_ulong_t __unused3;
1131         compat_ulong_t __unused4;
1132 };
1133
1134 struct compat_shm_info {
1135         compat_int_t used_ids;
1136         compat_ulong_t shm_tot, shm_rss, shm_swp;
1137         compat_ulong_t swap_attempts, swap_successes;
1138 };
1139
1140 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1141                                         int version)
1142 {
1143         if (in->shmmax > INT_MAX)
1144                 in->shmmax = INT_MAX;
1145         if (version == IPC_64) {
1146                 struct compat_shminfo64 info;
1147                 memset(&info, 0, sizeof(info));
1148                 info.shmmax = in->shmmax;
1149                 info.shmmin = in->shmmin;
1150                 info.shmmni = in->shmmni;
1151                 info.shmseg = in->shmseg;
1152                 info.shmall = in->shmall;
1153                 return copy_to_user(buf, &info, sizeof(info));
1154         } else {
1155                 struct shminfo info;
1156                 memset(&info, 0, sizeof(info));
1157                 info.shmmax = in->shmmax;
1158                 info.shmmin = in->shmmin;
1159                 info.shmmni = in->shmmni;
1160                 info.shmseg = in->shmseg;
1161                 info.shmall = in->shmall;
1162                 return copy_to_user(buf, &info, sizeof(info));
1163         }
1164 }
1165
1166 static int put_compat_shm_info(struct shm_info *ip,
1167                                 struct compat_shm_info __user *uip)
1168 {
1169         struct compat_shm_info info;
1170
1171         memset(&info, 0, sizeof(info));
1172         info.used_ids = ip->used_ids;
1173         info.shm_tot = ip->shm_tot;
1174         info.shm_rss = ip->shm_rss;
1175         info.shm_swp = ip->shm_swp;
1176         info.swap_attempts = ip->swap_attempts;
1177         info.swap_successes = ip->swap_successes;
1178         return copy_to_user(uip, &info, sizeof(info));
1179 }
1180
1181 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1182                                         int version)
1183 {
1184         if (version == IPC_64) {
1185                 struct compat_shmid64_ds v;
1186                 memset(&v, 0, sizeof(v));
1187                 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1188                 v.shm_atime = in->shm_atime;
1189                 v.shm_dtime = in->shm_dtime;
1190                 v.shm_ctime = in->shm_ctime;
1191                 v.shm_segsz = in->shm_segsz;
1192                 v.shm_nattch = in->shm_nattch;
1193                 v.shm_cpid = in->shm_cpid;
1194                 v.shm_lpid = in->shm_lpid;
1195                 return copy_to_user(buf, &v, sizeof(v));
1196         } else {
1197                 struct compat_shmid_ds v;
1198                 memset(&v, 0, sizeof(v));
1199                 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1200                 v.shm_perm.key = in->shm_perm.key;
1201                 v.shm_atime = in->shm_atime;
1202                 v.shm_dtime = in->shm_dtime;
1203                 v.shm_ctime = in->shm_ctime;
1204                 v.shm_segsz = in->shm_segsz;
1205                 v.shm_nattch = in->shm_nattch;
1206                 v.shm_cpid = in->shm_cpid;
1207                 v.shm_lpid = in->shm_lpid;
1208                 return copy_to_user(buf, &v, sizeof(v));
1209         }
1210 }
1211
1212 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1213                                         int version)
1214 {
1215         memset(out, 0, sizeof(*out));
1216         if (version == IPC_64) {
1217                 struct compat_shmid64_ds *p = buf;
1218                 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1219         } else {
1220                 struct compat_shmid_ds *p = buf;
1221                 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1222         }
1223 }
1224
1225 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1226 {
1227         struct ipc_namespace *ns;
1228         struct shmid64_ds sem64;
1229         int version = compat_ipc_parse_version(&cmd);
1230         int err;
1231
1232         ns = current->nsproxy->ipc_ns;
1233
1234         if (cmd < 0 || shmid < 0)
1235                 return -EINVAL;
1236
1237         switch (cmd) {
1238         case IPC_INFO: {
1239                 struct shminfo64 shminfo;
1240                 err = shmctl_ipc_info(ns, &shminfo);
1241                 if (err < 0)
1242                         return err;
1243                 if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1244                         err = -EFAULT;
1245                 return err;
1246         }
1247         case SHM_INFO: {
1248                 struct shm_info shm_info;
1249                 err = shmctl_shm_info(ns, &shm_info);
1250                 if (err < 0)
1251                         return err;
1252                 if (put_compat_shm_info(&shm_info, uptr))
1253                         err = -EFAULT;
1254                 return err;
1255         }
1256         case IPC_STAT:
1257         case SHM_STAT:
1258                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1259                 if (err < 0)
1260                         return err;
1261                 if (copy_compat_shmid_to_user(uptr, &sem64, version))
1262                         err = -EFAULT;
1263                 return err;
1264
1265         case IPC_SET:
1266                 if (copy_compat_shmid_from_user(&sem64, uptr, version))
1267                         return -EFAULT;
1268                 /* fallthru */
1269         case IPC_RMID:
1270                 return shmctl_down(ns, shmid, cmd, &sem64);
1271         case SHM_LOCK:
1272         case SHM_UNLOCK:
1273                 return shmctl_do_lock(ns, shmid, cmd);
1274                 break;
1275         default:
1276                 return -EINVAL;
1277         }
1278         return err;
1279 }
1280 #endif
1281
1282 /*
1283  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1284  *
1285  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1286  * "raddr" thing points to kernel space, and there has to be a wrapper around
1287  * this.
1288  */
1289 long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1290               ulong *raddr, unsigned long shmlba)
1291 {
1292         struct shmid_kernel *shp;
1293         unsigned long addr = (unsigned long)shmaddr;
1294         unsigned long size;
1295         struct file *file;
1296         int    err;
1297         unsigned long flags = MAP_SHARED;
1298         unsigned long prot;
1299         int acc_mode;
1300         struct ipc_namespace *ns;
1301         struct shm_file_data *sfd;
1302         struct path path;
1303         fmode_t f_mode;
1304         unsigned long populate = 0;
1305
1306         err = -EINVAL;
1307         if (shmid < 0)
1308                 goto out;
1309
1310         if (addr) {
1311                 if (addr & (shmlba - 1)) {
1312                         if (shmflg & SHM_RND) {
1313                                 addr &= ~(shmlba - 1);  /* round down */
1314
1315                                 /*
1316                                  * Ensure that the round-down is non-nil
1317                                  * when remapping. This can happen for
1318                                  * cases when addr < shmlba.
1319                                  */
1320                                 if (!addr && (shmflg & SHM_REMAP))
1321                                         goto out;
1322                         } else
1323 #ifndef __ARCH_FORCE_SHMLBA
1324                                 if (addr & ~PAGE_MASK)
1325 #endif
1326                                         goto out;
1327                 }
1328
1329                 flags |= MAP_FIXED;
1330         } else if ((shmflg & SHM_REMAP))
1331                 goto out;
1332
1333         if (shmflg & SHM_RDONLY) {
1334                 prot = PROT_READ;
1335                 acc_mode = S_IRUGO;
1336                 f_mode = FMODE_READ;
1337         } else {
1338                 prot = PROT_READ | PROT_WRITE;
1339                 acc_mode = S_IRUGO | S_IWUGO;
1340                 f_mode = FMODE_READ | FMODE_WRITE;
1341         }
1342         if (shmflg & SHM_EXEC) {
1343                 prot |= PROT_EXEC;
1344                 acc_mode |= S_IXUGO;
1345         }
1346
1347         /*
1348          * We cannot rely on the fs check since SYSV IPC does have an
1349          * additional creator id...
1350          */
1351         ns = current->nsproxy->ipc_ns;
1352         rcu_read_lock();
1353         shp = shm_obtain_object_check(ns, shmid);
1354         if (IS_ERR(shp)) {
1355                 err = PTR_ERR(shp);
1356                 goto out_unlock;
1357         }
1358
1359         err = -EACCES;
1360         if (ipcperms(ns, &shp->shm_perm, acc_mode))
1361                 goto out_unlock;
1362
1363         err = security_shm_shmat(shp, shmaddr, shmflg);
1364         if (err)
1365                 goto out_unlock;
1366
1367         ipc_lock_object(&shp->shm_perm);
1368
1369         /* check if shm_destroy() is tearing down shp */
1370         if (!ipc_valid_object(&shp->shm_perm)) {
1371                 ipc_unlock_object(&shp->shm_perm);
1372                 err = -EIDRM;
1373                 goto out_unlock;
1374         }
1375
1376         path = shp->shm_file->f_path;
1377         path_get(&path);
1378         shp->shm_nattch++;
1379         size = i_size_read(d_inode(path.dentry));
1380         ipc_unlock_object(&shp->shm_perm);
1381         rcu_read_unlock();
1382
1383         err = -ENOMEM;
1384         sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1385         if (!sfd) {
1386                 path_put(&path);
1387                 goto out_nattch;
1388         }
1389
1390         file = alloc_file(&path, f_mode,
1391                           is_file_hugepages(shp->shm_file) ?
1392                                 &shm_file_operations_huge :
1393                                 &shm_file_operations);
1394         err = PTR_ERR(file);
1395         if (IS_ERR(file)) {
1396                 kfree(sfd);
1397                 path_put(&path);
1398                 goto out_nattch;
1399         }
1400
1401         file->private_data = sfd;
1402         file->f_mapping = shp->shm_file->f_mapping;
1403         sfd->id = shp->shm_perm.id;
1404         sfd->ns = get_ipc_ns(ns);
1405         /*
1406          * We need to take a reference to the real shm file to prevent the
1407          * pointer from becoming stale in cases where the lifetime of the outer
1408          * file extends beyond that of the shm segment.  It's not usually
1409          * possible, but it can happen during remap_file_pages() emulation as
1410          * that unmaps the memory, then does ->mmap() via file reference only.
1411          * We'll deny the ->mmap() if the shm segment was since removed, but to
1412          * detect shm ID reuse we need to compare the file pointers.
1413          */
1414         sfd->file = get_file(shp->shm_file);
1415         sfd->vm_ops = NULL;
1416
1417         err = security_mmap_file(file, prot, flags);
1418         if (err)
1419                 goto out_fput;
1420
1421         if (down_write_killable(&current->mm->mmap_sem)) {
1422                 err = -EINTR;
1423                 goto out_fput;
1424         }
1425
1426         if (addr && !(shmflg & SHM_REMAP)) {
1427                 err = -EINVAL;
1428                 if (addr + size < addr)
1429                         goto invalid;
1430
1431                 if (find_vma_intersection(current->mm, addr, addr + size))
1432                         goto invalid;
1433         }
1434
1435         addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
1436         *raddr = addr;
1437         err = 0;
1438         if (IS_ERR_VALUE(addr))
1439                 err = (long)addr;
1440 invalid:
1441         up_write(&current->mm->mmap_sem);
1442         if (populate)
1443                 mm_populate(addr, populate);
1444
1445 out_fput:
1446         fput(file);
1447
1448 out_nattch:
1449         down_write(&shm_ids(ns).rwsem);
1450         shp = shm_lock(ns, shmid);
1451         shp->shm_nattch--;
1452         if (shm_may_destroy(ns, shp))
1453                 shm_destroy(ns, shp);
1454         else
1455                 shm_unlock(shp);
1456         up_write(&shm_ids(ns).rwsem);
1457         return err;
1458
1459 out_unlock:
1460         rcu_read_unlock();
1461 out:
1462         return err;
1463 }
1464
1465 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1466 {
1467         unsigned long ret;
1468         long err;
1469
1470         err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1471         if (err)
1472                 return err;
1473         force_successful_syscall_return();
1474         return (long)ret;
1475 }
1476
1477 #ifdef CONFIG_COMPAT
1478
1479 #ifndef COMPAT_SHMLBA
1480 #define COMPAT_SHMLBA   SHMLBA
1481 #endif
1482
1483 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1484 {
1485         unsigned long ret;
1486         long err;
1487
1488         err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1489         if (err)
1490                 return err;
1491         force_successful_syscall_return();
1492         return (long)ret;
1493 }
1494 #endif
1495
1496 /*
1497  * detach and kill segment if marked destroyed.
1498  * The work is done in shm_close.
1499  */
1500 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1501 {
1502         struct mm_struct *mm = current->mm;
1503         struct vm_area_struct *vma;
1504         unsigned long addr = (unsigned long)shmaddr;
1505         int retval = -EINVAL;
1506 #ifdef CONFIG_MMU
1507         loff_t size = 0;
1508         struct file *file;
1509         struct vm_area_struct *next;
1510 #endif
1511
1512         if (addr & ~PAGE_MASK)
1513                 return retval;
1514
1515         if (down_write_killable(&mm->mmap_sem))
1516                 return -EINTR;
1517
1518         /*
1519          * This function tries to be smart and unmap shm segments that
1520          * were modified by partial mlock or munmap calls:
1521          * - It first determines the size of the shm segment that should be
1522          *   unmapped: It searches for a vma that is backed by shm and that
1523          *   started at address shmaddr. It records it's size and then unmaps
1524          *   it.
1525          * - Then it unmaps all shm vmas that started at shmaddr and that
1526          *   are within the initially determined size and that are from the
1527          *   same shm segment from which we determined the size.
1528          * Errors from do_munmap are ignored: the function only fails if
1529          * it's called with invalid parameters or if it's called to unmap
1530          * a part of a vma. Both calls in this function are for full vmas,
1531          * the parameters are directly copied from the vma itself and always
1532          * valid - therefore do_munmap cannot fail. (famous last words?)
1533          */
1534         /*
1535          * If it had been mremap()'d, the starting address would not
1536          * match the usual checks anyway. So assume all vma's are
1537          * above the starting address given.
1538          */
1539         vma = find_vma(mm, addr);
1540
1541 #ifdef CONFIG_MMU
1542         while (vma) {
1543                 next = vma->vm_next;
1544
1545                 /*
1546                  * Check if the starting address would match, i.e. it's
1547                  * a fragment created by mprotect() and/or munmap(), or it
1548                  * otherwise it starts at this address with no hassles.
1549                  */
1550                 if ((vma->vm_ops == &shm_vm_ops) &&
1551                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1552
1553                         /*
1554                          * Record the file of the shm segment being
1555                          * unmapped.  With mremap(), someone could place
1556                          * page from another segment but with equal offsets
1557                          * in the range we are unmapping.
1558                          */
1559                         file = vma->vm_file;
1560                         size = i_size_read(file_inode(vma->vm_file));
1561                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1562                         /*
1563                          * We discovered the size of the shm segment, so
1564                          * break out of here and fall through to the next
1565                          * loop that uses the size information to stop
1566                          * searching for matching vma's.
1567                          */
1568                         retval = 0;
1569                         vma = next;
1570                         break;
1571                 }
1572                 vma = next;
1573         }
1574
1575         /*
1576          * We need look no further than the maximum address a fragment
1577          * could possibly have landed at. Also cast things to loff_t to
1578          * prevent overflows and make comparisons vs. equal-width types.
1579          */
1580         size = PAGE_ALIGN(size);
1581         while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1582                 next = vma->vm_next;
1583
1584                 /* finding a matching vma now does not alter retval */
1585                 if ((vma->vm_ops == &shm_vm_ops) &&
1586                     ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1587                     (vma->vm_file == file))
1588                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1589                 vma = next;
1590         }
1591
1592 #else   /* CONFIG_MMU */
1593         /* under NOMMU conditions, the exact address to be destroyed must be
1594          * given
1595          */
1596         if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1597                 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1598                 retval = 0;
1599         }
1600
1601 #endif
1602
1603         up_write(&mm->mmap_sem);
1604         return retval;
1605 }
1606
1607 #ifdef CONFIG_PROC_FS
1608 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1609 {
1610         struct user_namespace *user_ns = seq_user_ns(s);
1611         struct kern_ipc_perm *ipcp = it;
1612         struct shmid_kernel *shp;
1613         unsigned long rss = 0, swp = 0;
1614
1615         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1616         shm_add_rss_swap(shp, &rss, &swp);
1617
1618 #if BITS_PER_LONG <= 32
1619 #define SIZE_SPEC "%10lu"
1620 #else
1621 #define SIZE_SPEC "%21lu"
1622 #endif
1623
1624         seq_printf(s,
1625                    "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1626                    "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
1627                    SIZE_SPEC " " SIZE_SPEC "\n",
1628                    shp->shm_perm.key,
1629                    shp->shm_perm.id,
1630                    shp->shm_perm.mode,
1631                    shp->shm_segsz,
1632                    shp->shm_cprid,
1633                    shp->shm_lprid,
1634                    shp->shm_nattch,
1635                    from_kuid_munged(user_ns, shp->shm_perm.uid),
1636                    from_kgid_munged(user_ns, shp->shm_perm.gid),
1637                    from_kuid_munged(user_ns, shp->shm_perm.cuid),
1638                    from_kgid_munged(user_ns, shp->shm_perm.cgid),
1639                    shp->shm_atim,
1640                    shp->shm_dtim,
1641                    shp->shm_ctim,
1642                    rss * PAGE_SIZE,
1643                    swp * PAGE_SIZE);
1644
1645         return 0;
1646 }
1647 #endif