swap: change swap_info singly-linked list to list_head
[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / swapfile.c
index 4a7f7e6..6c95a8c 100644 (file)
@@ -51,14 +51,17 @@ atomic_long_t nr_swap_pages;
 /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
 long total_swap_pages;
 static int least_priority;
-static atomic_t highest_priority_index = ATOMIC_INIT(-1);
 
 static const char Bad_file[] = "Bad swap file entry ";
 static const char Unused_file[] = "Unused swap file entry ";
 static const char Bad_offset[] = "Bad swap offset entry ";
 static const char Unused_offset[] = "Unused swap offset entry ";
 
-struct swap_list_t swap_list = {-1, -1};
+/*
+ * all active swap_info_structs
+ * protected with swap_lock, and ordered by priority.
+ */
+LIST_HEAD(swap_list_head);
 
 struct swap_info_struct *swap_info[MAX_SWAPFILES];
 
@@ -640,66 +643,54 @@ no_page:
 
 swp_entry_t get_swap_page(void)
 {
-       struct swap_info_struct *si;
+       struct swap_info_struct *si, *next;
        pgoff_t offset;
-       int type, next;
-       int wrapped = 0;
-       int hp_index;
+       struct list_head *tmp;
 
        spin_lock(&swap_lock);
        if (atomic_long_read(&nr_swap_pages) <= 0)
                goto noswap;
        atomic_long_dec(&nr_swap_pages);
 
-       for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
-               hp_index = atomic_xchg(&highest_priority_index, -1);
-               /*
-                * highest_priority_index records current highest priority swap
-                * type which just frees swap entries. If its priority is
-                * higher than that of swap_list.next swap type, we use it.  It
-                * isn't protected by swap_lock, so it can be an invalid value
-                * if the corresponding swap type is swapoff. We double check
-                * the flags here. It's even possible the swap type is swapoff
-                * and swapon again and its priority is changed. In such rare
-                * case, low prority swap type might be used, but eventually
-                * high priority swap will be used after several rounds of
-                * swap.
-                */
-               if (hp_index != -1 && hp_index != type &&
-                   swap_info[type]->prio < swap_info[hp_index]->prio &&
-                   (swap_info[hp_index]->flags & SWP_WRITEOK)) {
-                       type = hp_index;
-                       swap_list.next = type;
-               }
-
-               si = swap_info[type];
-               next = si->next;
-               if (next < 0 ||
-                   (!wrapped && si->prio != swap_info[next]->prio)) {
-                       next = swap_list.head;
-                       wrapped++;
-               }
-
+       list_for_each(tmp, &swap_list_head) {
+               si = list_entry(tmp, typeof(*si), list);
                spin_lock(&si->lock);
-               if (!si->highest_bit) {
-                       spin_unlock(&si->lock);
-                       continue;
-               }
-               if (!(si->flags & SWP_WRITEOK)) {
+               if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
                        spin_unlock(&si->lock);
                        continue;
                }
 
-               swap_list.next = next;
+               /*
+                * rotate the current swap_info that we're going to use
+                * to after any other swap_info that have the same prio,
+                * so that all equal-priority swap_info get used equally
+                */
+               next = si;
+               list_for_each_entry_continue(next, &swap_list_head, list) {
+                       if (si->prio != next->prio)
+                               break;
+                       list_rotate_left(&si->list);
+                       next = si;
+               }
 
                spin_unlock(&swap_lock);
                /* This is called for allocating swap entry for cache */
                offset = scan_swap_map(si, SWAP_HAS_CACHE);
                spin_unlock(&si->lock);
                if (offset)
-                       return swp_entry(type, offset);
+                       return swp_entry(si->type, offset);
                spin_lock(&swap_lock);
-               next = swap_list.next;
+               /*
+                * if we got here, it's likely that si was almost full before,
+                * and since scan_swap_map() can drop the si->lock, multiple
+                * callers probably all tried to get a page from the same si
+                * and it filled up before we could get one.  So we need to
+                * try again.  Since we dropped the swap_lock, there may now
+                * be non-full higher priority swap_infos, and this si may have
+                * even been removed from the list (although very unlikely).
+                * Let's start over.
+                */
+               tmp = &swap_list_head;
        }
 
        atomic_long_inc(&nr_swap_pages);
@@ -766,27 +757,6 @@ out:
        return NULL;
 }
 
-/*
- * This swap type frees swap entry, check if it is the highest priority swap
- * type which just frees swap entry. get_swap_page() uses
- * highest_priority_index to search highest priority swap type. The
- * swap_info_struct.lock can't protect us if there are multiple swap types
- * active, so we use atomic_cmpxchg.
- */
-static void set_highest_priority_index(int type)
-{
-       int old_hp_index, new_hp_index;
-
-       do {
-               old_hp_index = atomic_read(&highest_priority_index);
-               if (old_hp_index != -1 &&
-                       swap_info[old_hp_index]->prio >= swap_info[type]->prio)
-                       break;
-               new_hp_index = type;
-       } while (atomic_cmpxchg(&highest_priority_index,
-               old_hp_index, new_hp_index) != old_hp_index);
-}
-
 static unsigned char swap_entry_free(struct swap_info_struct *p,
                                     swp_entry_t entry, unsigned char usage)
 {
@@ -830,7 +800,6 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
                        p->lowest_bit = offset;
                if (offset > p->highest_bit)
                        p->highest_bit = offset;
-               set_highest_priority_index(p->type);
                atomic_long_inc(&nr_swap_pages);
                p->inuse_pages--;
                frontswap_invalidate_page(p->type, offset);
@@ -1765,7 +1734,7 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
                                unsigned char *swap_map,
                                struct swap_cluster_info *cluster_info)
 {
-       int i, prev;
+       struct swap_info_struct *si;
 
        if (prio >= 0)
                p->prio = prio;
@@ -1777,18 +1746,28 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
        atomic_long_add(p->pages, &nr_swap_pages);
        total_swap_pages += p->pages;
 
-       /* insert swap space into swap_list: */
-       prev = -1;
-       for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
-               if (p->prio >= swap_info[i]->prio)
-                       break;
-               prev = i;
+       assert_spin_locked(&swap_lock);
+       BUG_ON(!list_empty(&p->list));
+       /*
+        * insert into swap list; the list is in priority order,
+        * so that get_swap_page() can get a page from the highest
+        * priority swap_info_struct with available page(s), and
+        * swapoff can adjust the auto-assigned (i.e. negative) prio
+        * values for any lower-priority swap_info_structs when
+        * removing a negative-prio swap_info_struct
+        */
+       list_for_each_entry(si, &swap_list_head, list) {
+               if (p->prio >= si->prio) {
+                       list_add_tail(&p->list, &si->list);
+                       return;
+               }
        }
-       p->next = i;
-       if (prev < 0)
-               swap_list.head = swap_list.next = p->type;
-       else
-               swap_info[prev]->next = p->type;
+       /*
+        * this covers two cases:
+        * 1) p->prio is less than all existing prio
+        * 2) the swap list is empty
+        */
+       list_add_tail(&p->list, &swap_list_head);
 }
 
 static void enable_swap_info(struct swap_info_struct *p, int prio,
@@ -1823,8 +1802,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        struct address_space *mapping;
        struct inode *inode;
        struct filename *pathname;
-       int i, type, prev;
-       int err;
+       int err, found = 0;
        unsigned int old_block_size;
 
        if (!capable(CAP_SYS_ADMIN))
@@ -1842,17 +1820,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                goto out;
 
        mapping = victim->f_mapping;
-       prev = -1;
        spin_lock(&swap_lock);
-       for (type = swap_list.head; type >= 0; type = swap_info[type]->next) {
-               p = swap_info[type];
+       list_for_each_entry(p, &swap_list_head, list) {
                if (p->flags & SWP_WRITEOK) {
-                       if (p->swap_file->f_mapping == mapping)
+                       if (p->swap_file->f_mapping == mapping) {
+                               found = 1;
                                break;
+                       }
                }
-               prev = type;
        }
-       if (type < 0) {
+       if (!found) {
                err = -EINVAL;
                spin_unlock(&swap_lock);
                goto out_dput;
@@ -1864,20 +1841,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                spin_unlock(&swap_lock);
                goto out_dput;
        }
-       if (prev < 0)
-               swap_list.head = p->next;
-       else
-               swap_info[prev]->next = p->next;
-       if (type == swap_list.next) {
-               /* just pick something that's safe... */
-               swap_list.next = swap_list.head;
-       }
        spin_lock(&p->lock);
        if (p->prio < 0) {
-               for (i = p->next; i >= 0; i = swap_info[i]->next)
-                       swap_info[i]->prio = p->prio--;
+               struct swap_info_struct *si = p;
+
+               list_for_each_entry_continue(si, &swap_list_head, list) {
+                       si->prio++;
+               }
                least_priority++;
        }
+       list_del_init(&p->list);
        atomic_long_sub(p->pages, &nr_swap_pages);
        total_swap_pages -= p->pages;
        p->flags &= ~SWP_WRITEOK;
@@ -1885,7 +1858,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        spin_unlock(&swap_lock);
 
        set_current_oom_origin();
-       err = try_to_unuse(type, false, 0); /* force all pages to be unused */
+       err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
        clear_current_oom_origin();
 
        if (err) {
@@ -1926,7 +1899,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        frontswap_map = frontswap_map_get(p);
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);
-       frontswap_invalidate_area(type);
+       frontswap_invalidate_area(p->type);
        frontswap_map_set(p, NULL);
        mutex_unlock(&swapon_mutex);
        free_percpu(p->percpu_cluster);
@@ -1935,7 +1908,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        vfree(cluster_info);
        vfree(frontswap_map);
        /* Destroy swap account information */
-       swap_cgroup_swapoff(type);
+       swap_cgroup_swapoff(p->type);
 
        inode = mapping->host;
        if (S_ISBLK(inode->i_mode)) {
@@ -2142,8 +2115,8 @@ static struct swap_info_struct *alloc_swap_info(void)
                 */
        }
        INIT_LIST_HEAD(&p->first_swap_extent.list);
+       INIT_LIST_HEAD(&p->list);
        p->flags = SWP_USED;
-       p->next = -1;
        spin_unlock(&swap_lock);
        spin_lock_init(&p->lock);