disable_radix [PPC]
Disable RADIX MMU mode on POWER9
+ radix_hcall_invalidate=on [PPC/PSERIES]
+ Disable RADIX GTSE feature and use hcall for TLB
+ invalidate.
+
disable_tlbie [PPC]
Disable TLBIE instruction. Currently does not work
with KVM, with HASH MMU, or with coherent accelerators.
fragmentation. Defaults to 1 for systems with
more than 32MB of RAM, 0 otherwise.
- slub_debug[=options[,slabs]] [MM, SLUB]
+ slub_debug[=options[,slabs][;[options[,slabs]]...] [MM, SLUB]
Enabling slub_debug allows one to determine the
culprit if slab objects become corrupted. Enabling
slub_debug can create guard zones around objects and
#include <asm/mmu_context.h>
#include <asm/mcf_pgalloc.h>
#include <asm/tlbflush.h>
+ #include <asm/pgalloc.h>
#define KMAPAREA(x) ((x >= VMALLOC_START) && (x < KMAP_END))
unsigned long address, size;
unsigned long next_pgtable, bootmem_end;
unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
- enum zone_type zone;
int i;
empty_zero_page = (void *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
/*
* Steal a context from a task that has one at the moment.
- * This is only used on 8xx and 4xx and we presently assume that
- * they don't do SMP. If they do then thicfpgalloc.hs will have to check
- * whether the MM we steal is in use.
- * We also assume that this is only used on systems that don't
- * use an MMU hash table - this is true for 8xx and 4xx.
* This isn't an LRU system, it just frees up each context in
* turn (sort-of pseudo-random replacement :). This would be the
* place to implement an LRU scheme if anyone was motivated to do it.
#include <linux/slab.h>
#include <linux/hugetlb.h>
- #include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/io.h>
#include <asm/mmu.h>
btext_unmap();
#endif
+ kasan_mmu_init();
+
setup_kup();
/* Shortly after that, the entire linear mapping will be available */
* fall back to system memory if the altmap allocation fail.
*/
if (altmap && !altmap_cross_boundary(altmap, start, page_size)) {
- p = altmap_alloc_block_buf(page_size, altmap);
+ p = vmemmap_alloc_block_buf(page_size, node, altmap);
if (!p)
pr_debug("altmap block allocation failed, falling back to system memory");
}
if (!p)
- p = vmemmap_alloc_block_buf(page_size, node);
+ p = vmemmap_alloc_block_buf(page_size, node, NULL);
if (!p)
return -ENOMEM;
}
if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
OV5_FEAT(OV5_RADIX_GTSE))) {
- pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
- }
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
+ } else
+ cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
/* Do radix anyway - the hypervisor said we had to */
cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
/* Hypervisor only supports hash - disable radix */
cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
}
}
#include <linux/dma-direct.h>
#include <linux/kprobes.h>
- #include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
unsigned long nr_pages = size >> PAGE_SHIFT;
int rc;
- resize_hpt_for_hotplug(memblock_phys_mem_size());
-
start = (unsigned long)__va(start);
rc = create_section_mapping(start, start + size, nid,
params->pgprot);
* hit that section of memory
*/
vm_unmap_aliases();
-
- if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
- pr_warn("Hash collision while resizing HPT\n");
}
#endif
void __init initmem_init(void)
{
- /* XXX need to clip this if using highmem? */
- sparse_memory_present_with_active_regions(0);
sparse_init();
}
}
}
-/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
+/*
+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
* info is found.
*/
static int associativity_to_nid(const __be32 *associativity)
nid = of_read_number(&associativity[min_common_depth], 1);
/* POWER4 LPAR uses 0xffff as invalid node */
- if (nid == 0xffff || nid >= MAX_NUMNODES)
+ if (nid == 0xffff || nid >= nr_node_ids)
nid = NUMA_NO_NODE;
if (nid > 0 &&
index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
nid = of_read_number(&aa.arrays[index], 1);
- if (nid == 0xffff || nid >= MAX_NUMNODES)
+ if (nid == 0xffff || nid >= nr_node_ids)
nid = default_nid;
if (nid > 0) {
* Extract NUMA information from the ibm,dynamic-reconfiguration-memory
* node. This assumes n_mem_{addr,size}_cells have been set.
*/
-static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
- const __be32 **usm)
+static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm,
+ void *data)
{
unsigned int ranges, is_kexec_kdump = 0;
unsigned long base, size, sz;
*/
if ((lmb->flags & DRCONF_MEM_RESERVED)
|| !(lmb->flags & DRCONF_MEM_ASSIGNED))
- return;
+ return 0;
if (*usm)
is_kexec_kdump = 1;
if (is_kexec_kdump) {
ranges = read_usm_ranges(usm);
if (!ranges) /* there are no (base, size) duple */
- return;
+ return 0;
}
do {
if (sz)
memblock_set_node(base, sz, &memblock.memory, nid);
} while (--ranges);
+
+ return 0;
}
static int __init parse_numa_properties(void)
*/
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (memory) {
- walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
+ walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb);
of_node_put(memory);
}
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
setup_node_data(nid, start_pfn, end_pfn);
- sparse_memory_present_with_active_regions(nid);
}
sparse_init();
}
early_param("numa", early_numa);
-/*
- * The platform can inform us through one of several mechanisms
- * (post-migration device tree updates, PRRN or VPHN) that the NUMA
- * assignment of a resource has changed. This controls whether we act
- * on that. Disabled by default.
- */
-static bool topology_updates_enabled;
-
-static int __init early_topology_updates(char *p)
-{
- if (!p)
- return 0;
-
- if (!strcmp(p, "on")) {
- pr_warn("Caution: enabling topology updates\n");
- topology_updates_enabled = true;
- }
-
- return 0;
-}
-early_param("topology_updates", early_topology_updates);
-
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Find the node associated with a hot added memory section for
/* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR
-struct topology_update_data {
- struct topology_update_data *next;
- unsigned int cpu;
- int old_nid;
- int new_nid;
-};
-
-#define TOPOLOGY_DEF_TIMER_SECS 60
-
-static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
-static cpumask_t cpu_associativity_changes_mask;
-static int vphn_enabled;
-static int prrn_enabled;
-static void reset_topology_timer(void);
-static int topology_timer_secs = 1;
static int topology_inited;
/*
- * Change polling interval for associativity changes.
- */
-int timed_topology_update(int nsecs)
-{
- if (vphn_enabled) {
- if (nsecs > 0)
- topology_timer_secs = nsecs;
- else
- topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
-
- reset_topology_timer();
- }
-
- return 0;
-}
-
-/*
- * Store the current values of the associativity change counters in the
- * hypervisor.
- */
-static void setup_cpu_associativity_change_counters(void)
-{
- int cpu;
-
- /* The VPHN feature supports a maximum of 8 reference points */
- BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
-
- for_each_possible_cpu(cpu) {
- int i;
- u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
- for (i = 0; i < distance_ref_points_depth; i++)
- counts[i] = hypervisor_counts[i];
- }
-}
-
-/*
- * The hypervisor maintains a set of 8 associativity change counters in
- * the VPA of each cpu that correspond to the associativity levels in the
- * ibm,associativity-reference-points property. When an associativity
- * level changes, the corresponding counter is incremented.
- *
- * Set a bit in cpu_associativity_changes_mask for each cpu whose home
- * node associativity levels have changed.
- *
- * Returns the number of cpus with unhandled associativity changes.
- */
-static int update_cpu_associativity_changes_mask(void)
-{
- int cpu;
- cpumask_t *changes = &cpu_associativity_changes_mask;
-
- for_each_possible_cpu(cpu) {
- int i, changed = 0;
- u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
- for (i = 0; i < distance_ref_points_depth; i++) {
- if (hypervisor_counts[i] != counts[i]) {
- counts[i] = hypervisor_counts[i];
- changed = 1;
- }
- }
- if (changed) {
- cpumask_or(changes, changes, cpu_sibling_mask(cpu));
- cpu = cpu_last_thread_sibling(cpu);
- }
- }
-
- return cpumask_weight(changes);
-}
-
-/*
* Retrieve the new associativity information for a virtual processor's
* home node.
*/
switch (rc) {
case H_SUCCESS:
dbg("VPHN hcall succeeded. Reset polling...\n");
- timed_topology_update(0);
goto out;
case H_FUNCTION:
, rc);
break;
}
-
- stop_topology_update();
out:
return rc;
}
return new_nid;
}
-/*
- * Update the CPU maps and sysfs entries for a single CPU when its NUMA
- * characteristics change. This function doesn't perform any locking and is
- * only safe to call from stop_machine().
- */
-static int update_cpu_topology(void *data)
-{
- struct topology_update_data *update;
- unsigned long cpu;
-
- if (!data)
- return -EINVAL;
-
- cpu = smp_processor_id();
-
- for (update = data; update; update = update->next) {
- int new_nid = update->new_nid;
- if (cpu != update->cpu)
- continue;
-
- unmap_cpu_from_node(cpu);
- map_cpu_to_node(cpu, new_nid);
- set_cpu_numa_node(cpu, new_nid);
- set_cpu_numa_mem(cpu, local_memory_node(new_nid));
- vdso_getcpu_init();
- }
-
- return 0;
-}
-
-static int update_lookup_table(void *data)
-{
- struct topology_update_data *update;
-
- if (!data)
- return -EINVAL;
-
- /*
- * Upon topology update, the numa-cpu lookup table needs to be updated
- * for all threads in the core, including offline CPUs, to ensure that
- * future hotplug operations respect the cpu-to-node associativity
- * properly.
- */
- for (update = data; update; update = update->next) {
- int nid, base, j;
-
- nid = update->new_nid;
- base = cpu_first_thread_sibling(update->cpu);
-
- for (j = 0; j < threads_per_core; j++) {
- update_numa_cpu_lookup_table(base + j, nid);
- }
- }
-
- return 0;
-}
-
-/*
- * Update the node maps and sysfs entries for each cpu whose home node
- * has changed. Returns 1 when the topology has changed, and 0 otherwise.
- *
- * cpus_locked says whether we already hold cpu_hotplug_lock.
- */
-int numa_update_cpu_topology(bool cpus_locked)
-{
- unsigned int cpu, sibling, changed = 0;
- struct topology_update_data *updates, *ud;
- cpumask_t updated_cpus;
- struct device *dev;
- int weight, new_nid, i = 0;
-
- if (!prrn_enabled && !vphn_enabled && topology_inited)
- return 0;
-
- weight = cpumask_weight(&cpu_associativity_changes_mask);
- if (!weight)
- return 0;
-
- updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL);
- if (!updates)
- return 0;
-
- cpumask_clear(&updated_cpus);
-
- for_each_cpu(cpu, &cpu_associativity_changes_mask) {
- /*
- * If siblings aren't flagged for changes, updates list
- * will be too short. Skip on this update and set for next
- * update.
- */
- if (!cpumask_subset(cpu_sibling_mask(cpu),
- &cpu_associativity_changes_mask)) {
- pr_info("Sibling bits not set for associativity "
- "change, cpu%d\n", cpu);
- cpumask_or(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask,
- cpu_sibling_mask(cpu));
- cpu = cpu_last_thread_sibling(cpu);
- continue;
- }
-
- new_nid = find_and_online_cpu_nid(cpu);
-
- if (new_nid == numa_cpu_lookup_table[cpu]) {
- cpumask_andnot(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask,
- cpu_sibling_mask(cpu));
- dbg("Assoc chg gives same node %d for cpu%d\n",
- new_nid, cpu);
- cpu = cpu_last_thread_sibling(cpu);
- continue;
- }
-
- for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
- ud = &updates[i++];
- ud->next = &updates[i];
- ud->cpu = sibling;
- ud->new_nid = new_nid;
- ud->old_nid = numa_cpu_lookup_table[sibling];
- cpumask_set_cpu(sibling, &updated_cpus);
- }
- cpu = cpu_last_thread_sibling(cpu);
- }
-
- /*
- * Prevent processing of 'updates' from overflowing array
- * where last entry filled in a 'next' pointer.
- */
- if (i)
- updates[i-1].next = NULL;
-
- pr_debug("Topology update for the following CPUs:\n");
- if (cpumask_weight(&updated_cpus)) {
- for (ud = &updates[0]; ud; ud = ud->next) {
- pr_debug("cpu %d moving from node %d "
- "to %d\n", ud->cpu,
- ud->old_nid, ud->new_nid);
- }
- }
-
- /*
- * In cases where we have nothing to update (because the updates list
- * is too short or because the new topology is same as the old one),
- * skip invoking update_cpu_topology() via stop-machine(). This is
- * necessary (and not just a fast-path optimization) since stop-machine
- * can end up electing a random CPU to run update_cpu_topology(), and
- * thus trick us into setting up incorrect cpu-node mappings (since
- * 'updates' is kzalloc()'ed).
- *
- * And for the similar reason, we will skip all the following updating.
- */
- if (!cpumask_weight(&updated_cpus))
- goto out;
-
- if (cpus_locked)
- stop_machine_cpuslocked(update_cpu_topology, &updates[0],
- &updated_cpus);
- else
- stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
-
- /*
- * Update the numa-cpu lookup table with the new mappings, even for
- * offline CPUs. It is best to perform this update from the stop-
- * machine context.
- */
- if (cpus_locked)
- stop_machine_cpuslocked(update_lookup_table, &updates[0],
- cpumask_of(raw_smp_processor_id()));
- else
- stop_machine(update_lookup_table, &updates[0],
- cpumask_of(raw_smp_processor_id()));
-
- for (ud = &updates[0]; ud; ud = ud->next) {
- unregister_cpu_under_node(ud->cpu, ud->old_nid);
- register_cpu_under_node(ud->cpu, ud->new_nid);
-
- dev = get_cpu_device(ud->cpu);
- if (dev)
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
- cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
- changed = 1;
- }
-
-out:
- kfree(updates);
- return changed;
-}
-
-int arch_update_cpu_topology(void)
-{
- return numa_update_cpu_topology(true);
-}
-
-static void topology_work_fn(struct work_struct *work)
-{
- rebuild_sched_domains();
-}
-static DECLARE_WORK(topology_work, topology_work_fn);
-
-static void topology_schedule_update(void)
-{
- schedule_work(&topology_work);
-}
-
-static void topology_timer_fn(struct timer_list *unused)
-{
- if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
- topology_schedule_update();
- else if (vphn_enabled) {
- if (update_cpu_associativity_changes_mask() > 0)
- topology_schedule_update();
- reset_topology_timer();
- }
-}
-static struct timer_list topology_timer;
-
-static void reset_topology_timer(void)
-{
- if (vphn_enabled)
- mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
-}
-
-#ifdef CONFIG_SMP
-
-static int dt_update_callback(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct of_reconfig_data *update = data;
- int rc = NOTIFY_DONE;
-
- switch (action) {
- case OF_RECONFIG_UPDATE_PROPERTY:
- if (of_node_is_type(update->dn, "cpu") &&
- !of_prop_cmp(update->prop->name, "ibm,associativity")) {
- u32 core_id;
- of_property_read_u32(update->dn, "reg", &core_id);
- rc = dlpar_cpu_readd(core_id);
- rc = NOTIFY_OK;
- }
- break;
- }
-
- return rc;
-}
-
-static struct notifier_block dt_update_nb = {
- .notifier_call = dt_update_callback,
-};
-
-#endif
-
-/*
- * Start polling for associativity changes.
- */
-int start_topology_update(void)
-{
- int rc = 0;
-
- if (!topology_updates_enabled)
- return 0;
-
- if (firmware_has_feature(FW_FEATURE_PRRN)) {
- if (!prrn_enabled) {
- prrn_enabled = 1;
-#ifdef CONFIG_SMP
- rc = of_reconfig_notifier_register(&dt_update_nb);
-#endif
- }
- }
- if (firmware_has_feature(FW_FEATURE_VPHN) &&
- lppaca_shared_proc(get_lppaca())) {
- if (!vphn_enabled) {
- vphn_enabled = 1;
- setup_cpu_associativity_change_counters();
- timer_setup(&topology_timer, topology_timer_fn,
- TIMER_DEFERRABLE);
- reset_topology_timer();
- }
- }
-
- pr_info("Starting topology update%s%s\n",
- (prrn_enabled ? " prrn_enabled" : ""),
- (vphn_enabled ? " vphn_enabled" : ""));
-
- return rc;
-}
-
-/*
- * Disable polling for VPHN associativity changes.
- */
-int stop_topology_update(void)
-{
- int rc = 0;
-
- if (!topology_updates_enabled)
- return 0;
-
- if (prrn_enabled) {
- prrn_enabled = 0;
-#ifdef CONFIG_SMP
- rc = of_reconfig_notifier_unregister(&dt_update_nb);
-#endif
- }
- if (vphn_enabled) {
- vphn_enabled = 0;
- rc = del_timer_sync(&topology_timer);
- }
-
- pr_info("Stopping topology update\n");
-
- return rc;
-}
-
-int prrn_is_enabled(void)
-{
- return prrn_enabled;
-}
-
-static int topology_read(struct seq_file *file, void *v)
-{
- if (vphn_enabled || prrn_enabled)
- seq_puts(file, "on\n");
- else
- seq_puts(file, "off\n");
-
- return 0;
-}
-
-static int topology_open(struct inode *inode, struct file *file)
-{
- return single_open(file, topology_read, NULL);
-}
-
-static ssize_t topology_write(struct file *file, const char __user *buf,
- size_t count, loff_t *off)
-{
- char kbuf[4]; /* "on" or "off" plus null. */
- int read_len;
-
- read_len = count < 3 ? count : 3;
- if (copy_from_user(kbuf, buf, read_len))
- return -EINVAL;
-
- kbuf[read_len] = '\0';
-
- if (!strncmp(kbuf, "on", 2)) {
- topology_updates_enabled = true;
- start_topology_update();
- } else if (!strncmp(kbuf, "off", 3)) {
- stop_topology_update();
- topology_updates_enabled = false;
- } else
- return -EINVAL;
-
- return count;
-}
-
-static const struct proc_ops topology_proc_ops = {
- .proc_read = seq_read,
- .proc_write = topology_write,
- .proc_open = topology_open,
- .proc_release = single_release,
-};
-
static int topology_update_init(void)
{
- start_topology_update();
-
- if (vphn_enabled)
- topology_schedule_update();
-
- if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops))
- return -ENOMEM;
-
topology_inited = 1;
return 0;
}
#include <linux/seq_file.h>
#include <linux/const.h>
#include <asm/page.h>
- #include <asm/pgalloc.h>
#include <asm/plpar_wrappers.h>
#include <linux/memblock.h>
#include <asm/firmware.h>
+ #include <asm/pgalloc.h>
struct pg_state {
struct seq_file *seq;
for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
- if (lpar_rc != H_SUCCESS)
+ if (lpar_rc)
continue;
for (j = 0; j < 4; j++) {
if (HPTE_V_COMPARE(ptes[j].v, want_v) &&
#include <asm/fixmap.h>
#include <linux/const.h>
#include <asm/page.h>
- #include <asm/pgalloc.h>
#include <asm/hugetlb.h>
#include <mm/mmu_decl.h>
static struct addr_marker address_markers[] = {
{ 0, "Start of kernel VM" },
+#ifdef MODULES_VADDR
+ { 0, "modules start" },
+ { 0, "modules end" },
+#endif
{ 0, "vmalloc() Area" },
{ 0, "vmalloc() End" },
#ifdef CONFIG_PPC64
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
+static void note_page_update_state(struct pg_state *st, unsigned long addr,
+ unsigned int level, u64 val, unsigned long page_size)
+{
+ u64 flag = val & pg_level[level].mask;
+ u64 pa = val & PTE_RPN_MASK;
+
+ st->level = level;
+ st->current_flags = flag;
+ st->start_address = addr;
+ st->start_pa = pa;
+ st->page_size = page_size;
+
+ while (addr >= st->marker[1].start_address) {
+ st->marker++;
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ }
+}
+
static void note_page(struct pg_state *st, unsigned long addr,
unsigned int level, u64 val, unsigned long page_size)
{
/* At first no level is set */
if (!st->level) {
- st->level = level;
- st->current_flags = flag;
- st->start_address = addr;
- st->start_pa = pa;
- st->last_pa = pa;
- st->page_size = page_size;
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ note_page_update_state(st, addr, level, val, page_size);
/*
* Dump the section of virtual memory when:
* - the PTE flags from one entry to the next differs.
* Address indicates we have passed the end of the
* current section of virtual memory
*/
- while (addr >= st->marker[1].start_address) {
- st->marker++;
- pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
- }
- st->start_address = addr;
- st->start_pa = pa;
- st->last_pa = pa;
- st->page_size = page_size;
- st->current_flags = flag;
- st->level = level;
- } else {
- st->last_pa = pa;
+ note_page_update_state(st, addr, level, val, page_size);
}
+ st->last_pa = pa;
}
static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
{
int i = 0;
+#ifdef CONFIG_PPC64
address_markers[i++].start_address = PAGE_OFFSET;
+#else
+ address_markers[i++].start_address = TASK_SIZE;
+#endif
+#ifdef MODULES_VADDR
+ address_markers[i++].start_address = MODULES_VADDR;
+ address_markers[i++].start_address = MODULES_END;
+#endif
address_markers[i++].start_address = VMALLOC_START;
address_markers[i++].start_address = VMALLOC_END;
#ifdef CONFIG_PPC64
struct pg_state st = {
.seq = m,
.marker = address_markers,
- .start_address = PAGE_OFFSET,
+ .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
};
#ifdef CONFIG_PPC64
.seq = NULL,
.marker = address_markers,
.check_wx = true,
- .start_address = PAGE_OFFSET,
+ .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
};
#ifdef CONFIG_PPC64
}
#endif
+static void __init resource_init(void)
+{
+ struct memblock_region *region;
+
+ for_each_memblock(memory, region) {
+ struct resource *res;
+
+ res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
+ if (!res)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(struct resource));
+
+ if (memblock_is_nomap(region)) {
+ res->name = "reserved";
+ res->flags = IORESOURCE_MEM;
+ } else {
+ res->name = "System RAM";
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ }
+ res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
+ res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+
+ request_resource(&iomem_resource, res);
+ }
+}
+
void __init paging_init(void)
{
setup_vm_final();
- memblocks_present();
sparse_init();
setup_zero_page();
zone_sizes_init();
+ resource_init();
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
- return vmemmap_populate_basepages(start, end, node);
+ return vmemmap_populate_basepages(start, end, node, NULL);
}
#endif
#include <linux/compat.h>
#include <trace/syscall.h>
#include <asm/page.h>
- #include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <asm/switch_to.h>
static int s390_regs_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
+ unsigned pos;
if (target == current)
save_access_regs(target->thread.acrs);
- if (kbuf) {
- unsigned long *k = kbuf;
- while (count > 0) {
- *k++ = __peek_user(target, pos);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- unsigned long __user *u = ubuf;
- while (count > 0) {
- if (__put_user(__peek_user(target, pos), u++))
- return -EFAULT;
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
- }
+ for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long))
+ membuf_store(&to, __peek_user(target, pos));
return 0;
}
}
static int s390_fpregs_get(struct task_struct *target,
- const struct user_regset *regset, unsigned int pos,
- unsigned int count, void *kbuf, void __user *ubuf)
+ const struct user_regset *regset,
+ struct membuf to)
{
_s390_fp_regs fp_regs;
fp_regs.fpc = target->thread.fpu.fpc;
fpregs_store(&fp_regs, &target->thread.fpu);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &fp_regs, 0, -1);
+ return membuf_write(&to, &fp_regs, sizeof(fp_regs));
}
static int s390_fpregs_set(struct task_struct *target,
static int s390_last_break_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- if (count > 0) {
- if (kbuf) {
- unsigned long *k = kbuf;
- *k = target->thread.last_break;
- } else {
- unsigned long __user *u = ubuf;
- if (__put_user(target->thread.last_break, u))
- return -EFAULT;
- }
- }
- return 0;
+ return membuf_store(&to, target->thread.last_break);
}
static int s390_last_break_set(struct task_struct *target,
static int s390_tdb_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct pt_regs *regs = task_pt_regs(target);
- unsigned char *data;
if (!(regs->int_code & 0x200))
return -ENODATA;
- data = target->thread.trap_tdb;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256);
+ return membuf_write(&to, target->thread.trap_tdb, 256);
}
static int s390_tdb_set(struct task_struct *target,
static int s390_vxrs_low_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
__u64 vxrs[__NUM_VXRS_LOW];
int i;
save_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+ return membuf_write(&to, vxrs, sizeof(vxrs));
}
static int s390_vxrs_low_set(struct task_struct *target,
static int s390_vxrs_high_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- __vector128 vxrs[__NUM_VXRS_HIGH];
-
if (!MACHINE_HAS_VX)
return -ENODEV;
if (target == current)
save_fpu_regs();
- memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs));
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+ return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ __NUM_VXRS_HIGH * sizeof(__vector128));
}
static int s390_vxrs_high_set(struct task_struct *target,
static int s390_system_call_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- unsigned int *data = &target->thread.system_call;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(unsigned int));
+ return membuf_store(&to, target->thread.system_call);
}
static int s390_system_call_set(struct task_struct *target,
static int s390_gs_cb_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct gs_cb *data = target->thread.gs_cb;
return -ENODATA;
if (target == current)
save_gs_cb(data);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(struct gs_cb));
+ return membuf_write(&to, data, sizeof(struct gs_cb));
}
static int s390_gs_cb_set(struct task_struct *target,
static int s390_gs_bc_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct gs_cb *data = target->thread.gs_bc_cb;
return -ENODEV;
if (!data)
return -ENODATA;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(struct gs_cb));
+ return membuf_write(&to, data, sizeof(struct gs_cb));
}
static int s390_gs_bc_set(struct task_struct *target,
static int s390_runtime_instr_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct runtime_instr_cb *data = target->thread.ri_cb;
if (!data)
return -ENODATA;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(struct runtime_instr_cb));
+ return membuf_write(&to, data, sizeof(struct runtime_instr_cb));
}
static int s390_runtime_instr_set(struct task_struct *target,
.n = sizeof(s390_regs) / sizeof(long),
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_regs_get,
+ .regset_get = s390_regs_get,
.set = s390_regs_set,
},
{
.n = sizeof(s390_fp_regs) / sizeof(long),
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_fpregs_get,
+ .regset_get = s390_fpregs_get,
.set = s390_fpregs_set,
},
{
.n = 1,
.size = sizeof(unsigned int),
.align = sizeof(unsigned int),
- .get = s390_system_call_get,
+ .regset_get = s390_system_call_get,
.set = s390_system_call_set,
},
{
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_last_break_get,
+ .regset_get = s390_last_break_get,
.set = s390_last_break_set,
},
{
.n = 1,
.size = 256,
.align = 1,
- .get = s390_tdb_get,
+ .regset_get = s390_tdb_get,
.set = s390_tdb_set,
},
{
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_vxrs_low_get,
+ .regset_get = s390_vxrs_low_get,
.set = s390_vxrs_low_set,
},
{
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
- .get = s390_vxrs_high_get,
+ .regset_get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
{
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_cb_get,
+ .regset_get = s390_gs_cb_get,
.set = s390_gs_cb_set,
},
{
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_bc_get,
+ .regset_get = s390_gs_bc_get,
.set = s390_gs_bc_set,
},
{
.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_runtime_instr_get,
+ .regset_get = s390_runtime_instr_get,
.set = s390_runtime_instr_set,
},
};
#ifdef CONFIG_COMPAT
static int s390_compat_regs_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
+ unsigned n;
+
if (target == current)
save_access_regs(target->thread.acrs);
- if (kbuf) {
- compat_ulong_t *k = kbuf;
- while (count > 0) {
- *k++ = __peek_user_compat(target, pos);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- compat_ulong_t __user *u = ubuf;
- while (count > 0) {
- if (__put_user(__peek_user_compat(target, pos), u++))
- return -EFAULT;
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
- }
+ for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t))
+ membuf_store(&to, __peek_user_compat(target, n));
return 0;
}
static int s390_compat_regs_high_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
compat_ulong_t *gprs_high;
+ int i;
- gprs_high = (compat_ulong_t *)
- &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
- if (kbuf) {
- compat_ulong_t *k = kbuf;
- while (count > 0) {
- *k++ = *gprs_high;
- gprs_high += 2;
- count -= sizeof(*k);
- }
- } else {
- compat_ulong_t __user *u = ubuf;
- while (count > 0) {
- if (__put_user(*gprs_high, u++))
- return -EFAULT;
- gprs_high += 2;
- count -= sizeof(*u);
- }
- }
+ gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs;
+ for (i = 0; i < NUM_GPRS; i++, gprs_high += 2)
+ membuf_store(&to, *gprs_high);
return 0;
}
static int s390_compat_last_break_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- compat_ulong_t last_break;
+ compat_ulong_t last_break = target->thread.last_break;
- if (count > 0) {
- last_break = target->thread.last_break;
- if (kbuf) {
- unsigned long *k = kbuf;
- *k = last_break;
- } else {
- unsigned long __user *u = ubuf;
- if (__put_user(last_break, u))
- return -EFAULT;
- }
- }
- return 0;
+ return membuf_store(&to, (unsigned long)last_break);
}
static int s390_compat_last_break_set(struct task_struct *target,
.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
- .get = s390_compat_regs_get,
+ .regset_get = s390_compat_regs_get,
.set = s390_compat_regs_set,
},
{
.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
- .get = s390_fpregs_get,
+ .regset_get = s390_fpregs_get,
.set = s390_fpregs_set,
},
{
.n = 1,
.size = sizeof(compat_uint_t),
.align = sizeof(compat_uint_t),
- .get = s390_system_call_get,
+ .regset_get = s390_system_call_get,
.set = s390_system_call_set,
},
{
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_compat_last_break_get,
+ .regset_get = s390_compat_last_break_get,
.set = s390_compat_last_break_set,
},
{
.n = 1,
.size = 256,
.align = 1,
- .get = s390_tdb_get,
+ .regset_get = s390_tdb_get,
.set = s390_tdb_set,
},
{
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_vxrs_low_get,
+ .regset_get = s390_vxrs_low_get,
.set = s390_vxrs_low_set,
},
{
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
- .get = s390_vxrs_high_get,
+ .regset_get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
{
.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
- .get = s390_compat_regs_high_get,
+ .regset_get = s390_compat_regs_high_get,
.set = s390_compat_regs_high_set,
},
{
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_cb_get,
+ .regset_get = s390_gs_cb_get,
.set = s390_gs_cb_set,
},
{
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_bc_get,
+ .regset_get = s390_gs_bc_get,
.set = s390_gs_bc_set,
},
{
.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_runtime_instr_get,
+ .regset_get = s390_runtime_instr_get,
.set = s390_runtime_instr_set,
},
};
void __init paging_init(void)
{
- sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
/*
p4d_t *p4d;
pud_t *pud;
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- /* Can only happen with 5-level paging */
- p4d = p4d_alloc(&init_mm, pgd, addr);
- if (!p4d) {
- lvl = "p4d";
- goto failed;
- }
- }
+ lvl = "p4d";
+ p4d = p4d_alloc(&init_mm, pgd, addr);
+ if (!p4d)
+ goto failed;
+ /*
+ * With 5-level paging the P4D level is not folded. So the PGDs
+ * are now populated and there is no need to walk down to the
+ * PUD level.
+ */
if (pgtable_l5_enabled())
continue;
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- /* Ends up here only with 4-level paging */
- pud = pud_alloc(&init_mm, p4d, addr);
- if (!pud) {
- lvl = "pud";
- goto failed;
- }
- }
+ lvl = "pud";
+ pud = pud_alloc(&init_mm, p4d, addr);
+ if (!pud)
+ goto failed;
}
return;
if (pmd_none(*pmd)) {
void *p;
- if (altmap)
- p = altmap_alloc_block_buf(PMD_SIZE, altmap);
- else
- p = vmemmap_alloc_block_buf(PMD_SIZE, node);
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
if (p) {
pte_t entry;
vmemmap_verify((pte_t *)pmd, node, addr, next);
continue;
}
- if (vmemmap_populate_basepages(addr, next, node))
+ if (vmemmap_populate_basepages(addr, next, node, NULL))
return -ENOMEM;
}
return 0;
int err;
if (end - start < PAGES_PER_SECTION * sizeof(struct page))
- err = vmemmap_populate_basepages(start, end, node);
+ err = vmemmap_populate_basepages(start, end, node, NULL);
else if (boot_cpu_has(X86_FEATURE_PSE))
err = vmemmap_populate_hugepages(start, end, node, altmap);
else if (altmap) {
__func__);
err = -ENOMEM;
} else
- err = vmemmap_populate_basepages(start, end, node);
+ err = vmemmap_populate_basepages(start, end, node, NULL);
if (!err)
sync_global_pgds(start, end - 1);
return err;
#include <linux/sysctl.h>
#include <asm/page.h>
- #include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/xen/hypervisor.h>
return NULL;
}
-#ifdef CONFIG_SPARSEMEM
- {
- unsigned long limit = 1UL << (MAX_PHYSMEM_BITS - PAGE_SHIFT);
- unsigned long pfn = res->start >> PAGE_SHIFT;
-
- if (pfn > limit) {
- pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
- pfn, limit);
- release_memory_resource(res);
- return NULL;
- }
- }
-#endif
-
return res;
}
if (xen_hotplug_unpopulated) {
st = reserve_additional_memory();
if (st != BP_ECANCELED) {
+ int rc;
+
mutex_unlock(&balloon_mutex);
- wait_event(balloon_wq,
+ rc = wait_event_interruptible(balloon_wq,
!list_empty(&ballooned_pages));
mutex_lock(&balloon_mutex);
- return 0;
+ return rc ? -ENOMEM : 0;
}
}
out_undo:
mutex_unlock(&balloon_mutex);
free_xenballooned_pages(pgno, pages);
+ /*
+ * NB: free_xenballooned_pages will only subtract pgno pages, but since
+ * target_unpopulated is incremented with nr_pages at the start we need
+ * to remove the remaining ones also, or accounting will be screwed.
+ */
+ balloon_stats.target_unpopulated -= nr_pages - pgno;
return ret;
}
EXPORT_SYMBOL(alloc_xenballooned_pages);
#include <linux/miscdevice.h>
#include <linux/moduleparam.h>
- #include <asm/pgalloc.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
static int lock_pages(
struct privcmd_dm_op_buf kbufs[], unsigned int num,
- struct page *pages[], unsigned int nr_pages)
+ struct page *pages[], unsigned int nr_pages, unsigned int *pinned)
{
unsigned int i;
for (i = 0; i < num; i++) {
unsigned int requested;
- int pinned;
+ int page_count;
requested = DIV_ROUND_UP(
offset_in_page(kbufs[i].uptr) + kbufs[i].size,
if (requested > nr_pages)
return -ENOSPC;
- pinned = get_user_pages_fast(
+ page_count = pin_user_pages_fast(
(unsigned long) kbufs[i].uptr,
requested, FOLL_WRITE, pages);
- if (pinned < 0)
- return pinned;
+ if (page_count < 0)
+ return page_count;
- nr_pages -= pinned;
- pages += pinned;
+ *pinned += page_count;
+ nr_pages -= page_count;
+ pages += page_count;
}
return 0;
static void unlock_pages(struct page *pages[], unsigned int nr_pages)
{
- unsigned int i;
-
- if (!pages)
- return;
-
- for (i = 0; i < nr_pages; i++) {
- if (pages[i])
- put_page(pages[i]);
- }
+ unpin_user_pages_dirty_lock(pages, nr_pages, true);
}
static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
struct xen_dm_op_buf *xbufs = NULL;
unsigned int i;
long rc;
+ unsigned int pinned = 0;
if (copy_from_user(&kdata, udata, sizeof(kdata)))
return -EFAULT;
goto out;
}
- rc = lock_pages(kbufs, kdata.num, pages, nr_pages);
- if (rc)
+ rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned);
+ if (rc < 0) {
+ nr_pages = pinned;
goto out;
+ }
for (i = 0; i < kdata.num; i++) {
set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
#ifdef CONFIG_CIFS_DFS_UPCALL
/* These functions must be called with server->srv_mutex held */
-static void reconn_inval_dfs_target(struct TCP_Server_Info *server,
- struct cifs_sb_info *cifs_sb,
- struct dfs_cache_tgt_list *tgt_list,
- struct dfs_cache_tgt_iterator **tgt_it)
+static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
+ struct cifs_sb_info *cifs_sb,
+ struct dfs_cache_tgt_list *tgt_list,
+ struct dfs_cache_tgt_iterator **tgt_it)
{
const char *name;
- if (!cifs_sb || !cifs_sb->origin_fullpath || !tgt_list ||
- !server->nr_targets)
+ if (!cifs_sb || !cifs_sb->origin_fullpath)
return;
if (!*tgt_it) {
sb = NULL;
} else {
cifs_sb = CIFS_SB(sb);
-
rc = reconn_setup_dfs_targets(cifs_sb, &tgt_list);
- if (rc && (rc != -EOPNOTSUPP)) {
- cifs_server_dbg(VFS, "%s: no target servers for DFS failover\n",
- __func__);
+ if (rc) {
+ cifs_sb = NULL;
+ if (rc != -EOPNOTSUPP) {
+ cifs_server_dbg(VFS, "%s: no target servers for DFS failover\n",
+ __func__);
+ }
} else {
server->nr_targets = dfs_cache_get_nr_tgts(&tgt_list);
}
* feature is disabled, then we will retry last server we
* connected to before.
*/
- reconn_inval_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
+ reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
#endif
rc = reconn_set_ipaddr(server);
if (rc) {
tmp_end++;
if (!(tmp_end < end && tmp_end[1] == delim)) {
/* No it is not. Set the password to NULL */
- kzfree(vol->password);
+ kfree_sensitive(vol->password);
vol->password = NULL;
break;
}
options = end;
}
- kzfree(vol->password);
+ kfree_sensitive(vol->password);
/* Now build new password string */
temp_len = strlen(value);
vol->password = kzalloc(temp_len+1, GFP_KERNEL);
rc = -ENOMEM;
kfree(vol->username);
vol->username = NULL;
- kzfree(vol->password);
+ kfree_sensitive(vol->password);
vol->password = NULL;
goto out_key_put;
}
cifs_cleanup_volume_info_contents(struct smb_vol *volume_info)
{
kfree(volume_info->username);
- kzfree(volume_info->password);
+ kfree_sensitive(volume_info->password);
kfree(volume_info->UNC);
kfree(volume_info->domainname);
kfree(volume_info->iocharset);
static int
expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
- int check_prefix)
+ char *ref_path)
{
int rc;
struct dfs_info3_param referral = {0};
- char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
+ char *full_path = NULL, *mdata = NULL;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
return -EREMOTE;
if (IS_ERR(full_path))
return PTR_ERR(full_path);
- /* For DFS paths, skip the first '\' of the UNC */
- ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1;
-
rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
ref_path, &referral, NULL);
if (!rc) {
return 0;
}
-static int setup_dfs_tgt_conn(const char *path,
+static int setup_dfs_tgt_conn(const char *path, const char *full_path,
const struct dfs_cache_tgt_iterator *tgt_it,
- struct cifs_sb_info *cifs_sb,
- struct smb_vol *vol,
- unsigned int *xid,
- struct TCP_Server_Info **server,
- struct cifs_ses **ses,
+ struct cifs_sb_info *cifs_sb, struct smb_vol *vol, unsigned int *xid,
+ struct TCP_Server_Info **server, struct cifs_ses **ses,
struct cifs_tcon **tcon)
{
int rc;
if (rc)
return rc;
- mdata = cifs_compose_mount_options(cifs_sb->mountdata, path, &ref,
- &fake_devname);
+ mdata = cifs_compose_mount_options(cifs_sb->mountdata, full_path + 1, &ref, &fake_devname);
free_dfs_info_param(&ref);
if (IS_ERR(mdata)) {
mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
rc = mount_get_conns(&fake_vol, cifs_sb, xid, server, ses,
tcon);
- if (!rc) {
+ if (!rc || (*server && *ses)) {
/*
* We were able to connect to new target server.
* Update current volume info with new target server.
return rc;
}
-static int mount_do_dfs_failover(const char *path,
- struct cifs_sb_info *cifs_sb,
- struct smb_vol *vol,
- struct cifs_ses *root_ses,
- unsigned int *xid,
- struct TCP_Server_Info **server,
- struct cifs_ses **ses,
- struct cifs_tcon **tcon)
+static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb,
+ struct smb_vol *vol, struct cifs_ses *root_ses, unsigned int *xid,
+ struct TCP_Server_Info **server, struct cifs_ses **ses,
+ struct cifs_tcon **tcon)
{
int rc;
struct dfs_cache_tgt_list tgt_list;
if (rc)
break;
/* Connect to next DFS target */
- rc = setup_dfs_tgt_conn(path, tgt_it, cifs_sb, vol, xid, server,
- ses, tcon);
- if (!rc || rc == -EACCES || rc == -EOPNOTSUPP)
+ rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, vol, xid, server, ses,
+ tcon);
+ if (!rc || (*server && *ses))
break;
}
if (!rc) {
}
#ifdef CONFIG_CIFS_DFS_UPCALL
-static inline void set_root_tcon(struct cifs_sb_info *cifs_sb,
- struct cifs_tcon *tcon,
- struct cifs_tcon **root)
+static void set_root_ses(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+ struct cifs_ses **root_ses)
{
- spin_lock(&cifs_tcp_ses_lock);
- tcon->tc_count++;
- tcon->remap = cifs_remap(cifs_sb);
- spin_unlock(&cifs_tcp_ses_lock);
- *root = tcon;
+ if (ses) {
+ spin_lock(&cifs_tcp_ses_lock);
+ ses->ses_count++;
+ ses->tcon_ipc->remap = cifs_remap(cifs_sb);
+ spin_unlock(&cifs_tcp_ses_lock);
+ }
+ *root_ses = ses;
+}
+
+static void put_root_ses(struct cifs_ses *ses)
+{
+ if (ses)
+ cifs_put_smb_ses(ses);
+}
+
+/* Check if a path component is remote and then update @dfs_path accordingly */
+static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
+ const unsigned int xid, struct TCP_Server_Info *server,
+ struct cifs_tcon *tcon, char **dfs_path)
+{
+ char *path, *s;
+ char sep = CIFS_DIR_SEP(cifs_sb), tmp;
+ char *npath;
+ int rc = 0;
+ int added_treename = tcon->Flags & SMB_SHARE_IS_IN_DFS;
+ int skip = added_treename;
+
+ path = cifs_build_path_to_root(vol, cifs_sb, tcon, added_treename);
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * Walk through the path components in @path and check if they're accessible. In case any of
+ * the components is -EREMOTE, then update @dfs_path with the next DFS referral request path
+ * (NOT including the remaining components).
+ */
+ s = path;
+ do {
+ /* skip separators */
+ while (*s && *s == sep)
+ s++;
+ if (!*s)
+ break;
+ /* next separator */
+ while (*s && *s != sep)
+ s++;
+ /*
+ * if the treename is added, we then have to skip the first
+ * part within the separators
+ */
+ if (skip) {
+ skip = 0;
+ continue;
+ }
+ tmp = *s;
+ *s = 0;
+ rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, path);
+ if (rc && rc == -EREMOTE) {
+ struct smb_vol v = {NULL};
+ /* if @path contains a tree name, skip it in the prefix path */
+ if (added_treename) {
+ rc = cifs_parse_devname(path, &v);
+ if (rc)
+ break;
+ rc = -EREMOTE;
+ npath = build_unc_path_to_root(&v, cifs_sb, true);
+ cifs_cleanup_volume_info_contents(&v);
+ } else {
+ v.UNC = vol->UNC;
+ v.prepath = path + 1;
+ npath = build_unc_path_to_root(&v, cifs_sb, true);
+ }
+ if (IS_ERR(npath)) {
+ rc = PTR_ERR(npath);
+ break;
+ }
+ kfree(*dfs_path);
+ *dfs_path = npath;
+ }
+ *s = tmp;
+ } while (rc == 0);
+
+ kfree(path);
+ return rc;
}
int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
{
int rc = 0;
unsigned int xid;
- struct cifs_ses *ses;
- struct cifs_tcon *root_tcon = NULL;
+ struct TCP_Server_Info *server = NULL;
+ struct cifs_ses *ses = NULL, *root_ses = NULL;
struct cifs_tcon *tcon = NULL;
- struct TCP_Server_Info *server;
- char *root_path = NULL, *full_path = NULL;
- char *old_mountdata, *origin_mountdata = NULL;
- int count;
+ int count = 0;
+ char *ref_path = NULL, *full_path = NULL;
+ char *oldmnt = NULL;
+ char *mntdata = NULL;
rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
- if (!rc && tcon) {
- /* If not a standalone DFS root, then check if path is remote */
- rc = dfs_cache_find(xid, ses, cifs_sb->local_nls,
- cifs_remap(cifs_sb), vol->UNC + 1, NULL,
- NULL);
- if (rc) {
- rc = is_path_remote(cifs_sb, vol, xid, server, tcon);
- if (!rc)
- goto out;
- if (rc != -EREMOTE)
- goto error;
- }
- }
/*
- * If first DFS target server went offline and we failed to connect it,
- * server and ses pointers are NULL at this point, though we still have
- * chance to get a cached DFS referral in expand_dfs_referral() and
- * retry next target available in it.
+ * Unconditionally try to get an DFS referral (even cached) to determine whether it is an
+ * DFS mount.
*
- * If a NULL ses ptr is passed to dfs_cache_find(), a lookup will be
- * performed against DFS path and *no* requests will be sent to server
- * for any new DFS referrals. Hence it's safe to skip checking whether
- * server or ses ptr is NULL.
+ * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
+ * to respond with PATH_NOT_COVERED to requests that include the prefix.
*/
- if (rc == -EACCES || rc == -EOPNOTSUPP)
- goto error;
-
- root_path = build_unc_path_to_root(vol, cifs_sb, false);
- if (IS_ERR(root_path)) {
- rc = PTR_ERR(root_path);
- root_path = NULL;
- goto error;
- }
-
- full_path = build_unc_path_to_root(vol, cifs_sb, true);
- if (IS_ERR(full_path)) {
- rc = PTR_ERR(full_path);
- full_path = NULL;
- goto error;
- }
- /*
- * Perform an unconditional check for whether there are DFS
- * referrals for this path without prefix, to provide support
- * for DFS referrals from w2k8 servers which don't seem to respond
- * with PATH_NOT_COVERED to requests that include the prefix.
- * Chase the referral if found, otherwise continue normally.
- */
- old_mountdata = cifs_sb->mountdata;
- (void)expand_dfs_referral(xid, ses, vol, cifs_sb, false);
-
- if (cifs_sb->mountdata == NULL) {
- rc = -ENOENT;
- goto error;
+ if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), vol->UNC + 1, NULL,
+ NULL)) {
+ /* No DFS referral was returned. Looks like a regular share. */
+ if (rc)
+ goto error;
+ /* Check if it is fully accessible and then mount it */
+ rc = is_path_remote(cifs_sb, vol, xid, server, tcon);
+ if (!rc)
+ goto out;
+ if (rc != -EREMOTE)
+ goto error;
}
-
- /* Save DFS root volume information for DFS refresh worker */
- origin_mountdata = kstrndup(cifs_sb->mountdata,
- strlen(cifs_sb->mountdata), GFP_KERNEL);
- if (!origin_mountdata) {
+ /* Save mount options */
+ mntdata = kstrndup(cifs_sb->mountdata, strlen(cifs_sb->mountdata), GFP_KERNEL);
+ if (!mntdata) {
rc = -ENOMEM;
goto error;
}
-
- if (cifs_sb->mountdata != old_mountdata) {
- /* If we were redirected, reconnect to new target server */
- mount_put_conns(cifs_sb, xid, server, ses, tcon);
- rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
- }
- if (rc) {
- if (rc == -EACCES || rc == -EOPNOTSUPP)
- goto error;
- /* Perform DFS failover to any other DFS targets */
- rc = mount_do_dfs_failover(root_path + 1, cifs_sb, vol, NULL,
- &xid, &server, &ses, &tcon);
- if (rc)
- goto error;
- }
-
- kfree(root_path);
- root_path = build_unc_path_to_root(vol, cifs_sb, false);
- if (IS_ERR(root_path)) {
- rc = PTR_ERR(root_path);
- root_path = NULL;
+ /* Get path of DFS root */
+ ref_path = build_unc_path_to_root(vol, cifs_sb, false);
+ if (IS_ERR(ref_path)) {
+ rc = PTR_ERR(ref_path);
+ ref_path = NULL;
goto error;
}
- /* Cache out resolved root server */
- (void)dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
- root_path + 1, NULL, NULL);
- kfree(root_path);
- root_path = NULL;
-
- set_root_tcon(cifs_sb, tcon, &root_tcon);
-
- for (count = 1; ;) {
- if (!rc && tcon) {
- rc = is_path_remote(cifs_sb, vol, xid, server, tcon);
- if (!rc || rc != -EREMOTE)
- break;
- }
- /*
- * BB: when we implement proper loop detection,
- * we will remove this check. But now we need it
- * to prevent an indefinite loop if 'DFS tree' is
- * misconfigured (i.e. has loops).
- */
- if (count++ > MAX_NESTED_LINKS) {
- rc = -ELOOP;
- break;
- }
+ set_root_ses(cifs_sb, ses, &root_ses);
+ do {
+ /* Save full path of last DFS path we used to resolve final target server */
kfree(full_path);
- full_path = build_unc_path_to_root(vol, cifs_sb, true);
+ full_path = build_unc_path_to_root(vol, cifs_sb, !!count);
if (IS_ERR(full_path)) {
rc = PTR_ERR(full_path);
- full_path = NULL;
break;
}
-
- old_mountdata = cifs_sb->mountdata;
- rc = expand_dfs_referral(xid, root_tcon->ses, vol, cifs_sb,
- true);
+ /* Chase referral */
+ oldmnt = cifs_sb->mountdata;
+ rc = expand_dfs_referral(xid, root_ses, vol, cifs_sb, ref_path + 1);
if (rc)
break;
-
- if (cifs_sb->mountdata != old_mountdata) {
+ /* Connect to new DFS target only if we were redirected */
+ if (oldmnt != cifs_sb->mountdata) {
mount_put_conns(cifs_sb, xid, server, ses, tcon);
- rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses,
- &tcon);
- /*
- * Ensure that DFS referrals go through new root server.
- */
- if (!rc && tcon &&
- (tcon->share_flags & (SHI1005_FLAGS_DFS |
- SHI1005_FLAGS_DFS_ROOT))) {
- cifs_put_tcon(root_tcon);
- set_root_tcon(cifs_sb, tcon, &root_tcon);
- }
+ rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
}
- if (rc) {
- if (rc == -EACCES || rc == -EOPNOTSUPP)
- break;
- /* Perform DFS failover to any other DFS targets */
- rc = mount_do_dfs_failover(full_path + 1, cifs_sb, vol,
- root_tcon->ses, &xid,
- &server, &ses, &tcon);
- if (rc == -EACCES || rc == -EOPNOTSUPP || !server ||
- !ses)
- goto error;
+ if (rc && !server && !ses) {
+ /* Failed to connect. Try to connect to other targets in the referral. */
+ rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, vol, root_ses, &xid,
+ &server, &ses, &tcon);
}
- }
- cifs_put_tcon(root_tcon);
+ if (rc == -EACCES || rc == -EOPNOTSUPP || !server || !ses)
+ break;
+ if (!tcon)
+ continue;
+ /* Make sure that requests go through new root servers */
+ if (tcon->share_flags & (SHI1005_FLAGS_DFS | SHI1005_FLAGS_DFS_ROOT)) {
+ put_root_ses(root_ses);
+ set_root_ses(cifs_sb, ses, &root_ses);
+ }
+ /* Check for remaining path components and then continue chasing them (-EREMOTE) */
+ rc = check_dfs_prepath(cifs_sb, vol, xid, server, tcon, &ref_path);
+ /* Prevent recursion on broken link referrals */
+ if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS)
+ rc = -ELOOP;
+ } while (rc == -EREMOTE);
if (rc)
goto error;
-
- spin_lock(&cifs_tcp_ses_lock);
- if (!tcon->dfs_path) {
- /* Save full path in new tcon to do failover when reconnecting tcons */
- tcon->dfs_path = full_path;
- full_path = NULL;
- tcon->remap = cifs_remap(cifs_sb);
- }
- cifs_sb->origin_fullpath = kstrndup(tcon->dfs_path,
- strlen(tcon->dfs_path),
- GFP_ATOMIC);
+ put_root_ses(root_ses);
+ root_ses = NULL;
+ kfree(ref_path);
+ ref_path = NULL;
+ /*
+ * Store DFS full path in both superblock and tree connect structures.
+ *
+ * For DFS root mounts, the prefix path (cifs_sb->prepath) is preserved during reconnect so
+ * only the root path is set in cifs_sb->origin_fullpath and tcon->dfs_path. And for DFS
+ * links, the prefix path is included in both and may be changed during reconnect. See
+ * cifs_tree_connect().
+ */
+ cifs_sb->origin_fullpath = kstrndup(full_path, strlen(full_path), GFP_KERNEL);
if (!cifs_sb->origin_fullpath) {
- spin_unlock(&cifs_tcp_ses_lock);
rc = -ENOMEM;
goto error;
}
+ spin_lock(&cifs_tcp_ses_lock);
+ tcon->dfs_path = full_path;
+ full_path = NULL;
+ tcon->remap = cifs_remap(cifs_sb);
spin_unlock(&cifs_tcp_ses_lock);
- rc = dfs_cache_add_vol(origin_mountdata, vol, cifs_sb->origin_fullpath);
- if (rc) {
- kfree(cifs_sb->origin_fullpath);
+ /* Add original volume information for DFS cache to be used when refreshing referrals */
+ rc = dfs_cache_add_vol(mntdata, vol, cifs_sb->origin_fullpath);
+ if (rc)
goto error;
- }
/*
* After reconnecting to a different server, unique ids won't
* match anymore, so we disable serverino. This prevents
return mount_setup_tlink(cifs_sb, ses, tcon);
error:
+ kfree(ref_path);
kfree(full_path);
- kfree(root_path);
- kfree(origin_mountdata);
+ kfree(mntdata);
+ kfree(cifs_sb->origin_fullpath);
+ put_root_ses(root_ses);
mount_put_conns(cifs_sb, xid, server, ses, tcon);
return rc;
}
bcc_ptr += strlen("?????");
bcc_ptr += 1;
count = bcc_ptr - &pSMB->Password[0];
- pSMB->hdr.smb_buf_length = cpu_to_be32(be32_to_cpu(
- pSMB->hdr.smb_buf_length) + count);
+ be32_add_cpu(&pSMB->hdr.smb_buf_length, count);
pSMB->ByteCount = cpu_to_le16(count);
rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
out:
kfree(vol_info->username);
- kzfree(vol_info->password);
+ kfree_sensitive(vol_info->password);
kfree(vol_info);
return tcon;
queue_delayed_work(cifsiod_wq, &cifs_sb->prune_tlinks,
TLINK_IDLE_EXPIRE);
}
+
+#ifdef CONFIG_CIFS_DFS_UPCALL
+int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc)
+{
+ int rc;
+ struct TCP_Server_Info *server = tcon->ses->server;
+ const struct smb_version_operations *ops = server->ops;
+ struct dfs_cache_tgt_list tl;
+ struct dfs_cache_tgt_iterator *it = NULL;
+ char *tree;
+ const char *tcp_host;
+ size_t tcp_host_len;
+ const char *dfs_host;
+ size_t dfs_host_len;
+ char *share = NULL, *prefix = NULL;
+ struct dfs_info3_param ref = {0};
+ bool isroot;
+
+ tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+ if (!tree)
+ return -ENOMEM;
+
+ if (!tcon->dfs_path) {
+ if (tcon->ipc) {
+ scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
+ rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
+ } else {
+ rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
+ }
+ goto out;
+ }
+
+ rc = dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl);
+ if (rc)
+ goto out;
+ isroot = ref.server_type == DFS_TYPE_ROOT;
+ free_dfs_info_param(&ref);
+
+ extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len);
+
+ for (it = dfs_cache_get_tgt_iterator(&tl); it; it = dfs_cache_get_next_tgt(&tl, it)) {
+ bool target_match;
+
+ kfree(share);
+ kfree(prefix);
+ share = NULL;
+ prefix = NULL;
+
+ rc = dfs_cache_get_tgt_share(tcon->dfs_path + 1, it, &share, &prefix);
+ if (rc) {
+ cifs_dbg(VFS, "%s: failed to parse target share %d\n",
+ __func__, rc);
+ continue;
+ }
+
+ extract_unc_hostname(share, &dfs_host, &dfs_host_len);
+
+ if (dfs_host_len != tcp_host_len
+ || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) {
+ cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len,
+ dfs_host, (int)tcp_host_len, tcp_host);
+
+ rc = match_target_ip(server, dfs_host, dfs_host_len, &target_match);
+ if (rc) {
+ cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc);
+ break;
+ }
+
+ if (!target_match) {
+ cifs_dbg(FYI, "%s: skipping target\n", __func__);
+ continue;
+ }
+ }
+
+ if (tcon->ipc) {
+ scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", share);
+ rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
+ } else {
+ scnprintf(tree, MAX_TREE_SIZE, "\\%s", share);
+ rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
+ /* Only handle prefix paths of DFS link targets */
+ if (!rc && !isroot) {
+ rc = update_super_prepath(tcon, prefix);
+ break;
+ }
+ }
+ if (rc == -EREMOTE)
+ break;
+ }
+
+ kfree(share);
+ kfree(prefix);
+
+ if (!rc) {
+ if (it)
+ rc = dfs_cache_noreq_update_tgthint(tcon->dfs_path + 1, it);
+ else
+ rc = -ENOENT;
+ }
+ dfs_cache_free_tgts(&tl);
+out:
+ kfree(tree);
+ return rc;
+}
+#else
+int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc)
+{
+ const struct smb_version_operations *ops = tcon->ses->server->ops;
+
+ return ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
+}
+#endif
struct cache_dfs_tgt {
char *name;
+ int path_consumed;
struct list_head list;
};
}
/* Allocate a new DFS target */
-static struct cache_dfs_tgt *alloc_target(const char *name)
+static struct cache_dfs_tgt *alloc_target(const char *name, int path_consumed)
{
struct cache_dfs_tgt *t;
kfree(t);
return ERR_PTR(-ENOMEM);
}
+ t->path_consumed = path_consumed;
INIT_LIST_HEAD(&t->list);
return t;
}
for (i = 0; i < numrefs; i++) {
struct cache_dfs_tgt *t;
- t = alloc_target(refs[i].node_name);
+ t = alloc_target(refs[i].node_name, refs[i].path_consumed);
if (IS_ERR(t)) {
free_tgts(ce);
return PTR_ERR(t);
return 0;
}
-/*
- * Find a DFS cache entry in hash table and optionally check prefix path against
- * @path.
- * Use whole path components in the match.
- * Must be called with htable_rw_lock held.
- *
- * Return ERR_PTR(-ENOENT) if the entry is not found.
- */
-static struct cache_entry *lookup_cache_entry(const char *path,
- unsigned int *hash)
+static struct cache_entry *__lookup_cache_entry(const char *path)
{
struct cache_entry *ce;
unsigned int h;
if (!found)
ce = ERR_PTR(-ENOENT);
+ return ce;
+}
+
+/*
+ * Find a DFS cache entry in hash table and optionally check prefix path against
+ * @path.
+ * Use whole path components in the match.
+ * Must be called with htable_rw_lock held.
+ *
+ * Return ERR_PTR(-ENOENT) if the entry is not found.
+ */
+static struct cache_entry *lookup_cache_entry(const char *path, unsigned int *hash)
+{
+ struct cache_entry *ce = ERR_PTR(-ENOENT);
+ unsigned int h;
+ int cnt = 0;
+ char *npath;
+ char *s, *e;
+ char sep;
+
+ npath = kstrndup(path, strlen(path), GFP_KERNEL);
+ if (!npath)
+ return ERR_PTR(-ENOMEM);
+
+ s = npath;
+ sep = *npath;
+ while ((s = strchr(s, sep)) && ++cnt < 3)
+ s++;
+
+ if (cnt < 3) {
+ h = cache_entry_hash(path, strlen(path));
+ ce = __lookup_cache_entry(path);
+ goto out;
+ }
+ /*
+ * Handle paths that have more than two path components and are a complete prefix of the DFS
+ * referral request path (@path).
+ *
+ * See MS-DFSC 3.2.5.5 "Receiving a Root Referral Request or Link Referral Request".
+ */
+ h = cache_entry_hash(npath, strlen(npath));
+ e = npath + strlen(npath) - 1;
+ while (e > s) {
+ char tmp;
+
+ /* skip separators */
+ while (e > s && *e == sep)
+ e--;
+ if (e == s)
+ goto out;
+
+ tmp = *(e+1);
+ *(e+1) = 0;
+
+ ce = __lookup_cache_entry(npath);
+ if (!IS_ERR(ce)) {
+ h = cache_entry_hash(npath, strlen(npath));
+ break;
+ }
+
+ *(e+1) = tmp;
+ /* backward until separator */
+ while (e > s && *e != sep)
+ e--;
+ }
+out:
if (hash)
*hash = h;
-
+ kfree(npath);
return ce;
}
rc = -ENOMEM;
goto err_free_it;
}
+ it->it_path_consumed = t->path_consumed;
if (ce->tgthint == t)
list_add(&it->it_list, head);
err_free_unc:
kfree(new->UNC);
err_free_password:
- kzfree(new->password);
+ kfree_sensitive(new->password);
err_free_username:
kfree(new->username);
kfree(new);
/**
* dfs_cache_get_tgt_share - parse a DFS target
*
+ * @path: DFS full path
* @it: DFS target iterator.
* @share: tree name.
- * @share_len: length of tree name.
* @prefix: prefix path.
- * @prefix_len: length of prefix path.
*
* Return zero if target was parsed correctly, otherwise non-zero.
*/
-int dfs_cache_get_tgt_share(const struct dfs_cache_tgt_iterator *it,
- const char **share, size_t *share_len,
- const char **prefix, size_t *prefix_len)
+int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
+ char **share, char **prefix)
{
- char *s, sep;
+ char *s, sep, *p;
+ size_t len;
+ size_t plen1, plen2;
- if (!it || !share || !share_len || !prefix || !prefix_len)
+ if (!it || !path || !share || !prefix || strlen(path) < it->it_path_consumed)
return -EINVAL;
+ *share = NULL;
+ *prefix = NULL;
+
sep = it->it_name[0];
if (sep != '\\' && sep != '/')
return -EINVAL;
if (!s)
return -EINVAL;
+ /* point to prefix in target node */
s = strchrnul(s + 1, sep);
- *share = it->it_name;
- *share_len = s - it->it_name;
- *prefix = *s ? s + 1 : s;
- *prefix_len = &it->it_name[strlen(it->it_name)] - *prefix;
+ /* extract target share */
+ *share = kstrndup(it->it_name, s - it->it_name, GFP_KERNEL);
+ if (!*share)
+ return -ENOMEM;
+ /* skip separator */
+ if (*s)
+ s++;
+ /* point to prefix in DFS path */
+ p = path + it->it_path_consumed;
+ if (*p == sep)
+ p++;
+
+ /* merge prefix paths from DFS path and target node */
+ plen1 = it->it_name + strlen(it->it_name) - s;
+ plen2 = path + strlen(path) - p;
+ if (plen1 || plen2) {
+ len = plen1 + plen2 + 2;
+ *prefix = kmalloc(len, GFP_KERNEL);
+ if (!*prefix) {
+ kfree(*share);
+ *share = NULL;
+ return -ENOMEM;
+ }
+ if (plen1)
+ scnprintf(*prefix, len, "%.*s%c%.*s", (int)plen1, s, sep, (int)plen2, p);
+ else
+ strscpy(*prefix, p, len);
+ }
return 0;
}
kfree(buf_to_free->serverOS);
kfree(buf_to_free->serverDomain);
kfree(buf_to_free->serverNOS);
- kzfree(buf_to_free->password);
+ kfree_sensitive(buf_to_free->password);
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
- kzfree(buf_to_free->auth_key.response);
+ kfree_sensitive(buf_to_free->auth_key.response);
kfree(buf_to_free->iface_list);
- kzfree(buf_to_free);
+ kfree_sensitive(buf_to_free);
}
struct cifs_tcon *
}
atomic_dec(&tconInfoAllocCount);
kfree(buf_to_free->nativeFileSystem);
- kzfree(buf_to_free->password);
+ kfree_sensitive(buf_to_free->password);
kfree(buf_to_free->crfid.fid);
#ifdef CONFIG_CIFS_DFS_UPCALL
kfree(buf_to_free->dfs_path);
}
#endif
-int update_super_prepath(struct cifs_tcon *tcon, const char *prefix,
- size_t prefix_len)
+int update_super_prepath(struct cifs_tcon *tcon, char *prefix)
{
struct super_block *sb;
struct cifs_sb_info *cifs_sb;
kfree(cifs_sb->prepath);
- if (*prefix && prefix_len) {
- cifs_sb->prepath = kstrndup(prefix, prefix_len, GFP_ATOMIC);
+ if (prefix && *prefix) {
+ cifs_sb->prepath = kstrndup(prefix, strlen(prefix), GFP_ATOMIC);
if (!cifs_sb->prepath) {
rc = -ENOMEM;
goto out;
/*
* Might pages of this file have been modified in userspace?
- * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
+ * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
* marks vma as VM_SHARED if it is shared, and the file was opened for
* writing i.e. vma may be mprotected writable even if now readonly.
*
int (*f)(struct dentry *, umode_t, void *),
void *);
+int vfs_fchown(struct file *file, uid_t user, gid_t group);
+int vfs_fchmod(struct file *file, umode_t mode);
+int vfs_utimes(const struct path *path, struct timespec64 *times);
+
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
extern unsigned int get_next_ino(void);
extern void evict_inodes(struct super_block *sb);
+ /*
+ * Userspace may rely on the the inode number being non-zero. For example, glibc
+ * simply ignores files with zero i_ino in unlink() and other places.
+ *
+ * As an additional complication, if userspace was compiled with
+ * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
+ * lower 32 bits, so we need to check that those aren't zero explicitly. With
+ * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
+ * better safe than sorry.
+ */
+ static inline bool is_zero_ino(ino_t ino)
+ {
+ return (u32)ino == 0;
+ }
+
extern void __iget(struct inode * inode);
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
loff_t *);
int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
+ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
+ loff_t *);
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
#if defined(CONFIG_X86)
# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
-#elif defined(CONFIG_PPC)
-# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */
#elif defined(CONFIG_PARISC)
# define VM_GROWSUP VM_ARCH_1
#elif defined(CONFIG_IA64)
extern void kvfree(const void *addr);
extern void kvfree_sensitive(const void *addr, size_t len);
+ static inline int head_mapcount(struct page *head)
+ {
+ return atomic_read(compound_mapcount_ptr(head)) + 1;
+ }
+
/*
* Mapcount of compound page as a whole, does not include mapped sub-pages.
*
{
VM_BUG_ON_PAGE(!PageCompound(page), page);
page = compound_head(page);
- return atomic_read(compound_mapcount_ptr(page)) + 1;
+ return head_mapcount(page);
}
/*
return PageCompound(page) && compound_order(page) > 1;
}
+ static inline int head_pincount(struct page *head)
+ {
+ return atomic_read(compound_pincount_ptr(head));
+ }
+
static inline int compound_pincount(struct page *page)
{
VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
page = compound_head(page);
- return atomic_read(compound_pincount_ptr(page));
+ return head_pincount(page);
}
static inline void set_compound_order(struct page *page, unsigned int order)
NULL : pud_offset(p4d, address);
}
- static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
- unsigned long address,
- pgtbl_mod_mask *mod_mask)
-
- {
- if (unlikely(pgd_none(*pgd))) {
- if (__p4d_alloc(mm, pgd, address))
- return NULL;
- *mod_mask |= PGTBL_PGD_MODIFIED;
- }
-
- return p4d_offset(pgd, address);
- }
-
- static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
- unsigned long address,
- pgtbl_mod_mask *mod_mask)
- {
- if (unlikely(p4d_none(*p4d))) {
- if (__pud_alloc(mm, p4d, address))
- return NULL;
- *mod_mask |= PGTBL_P4D_MODIFIED;
- }
-
- return pud_offset(p4d, address);
- }
-
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
NULL: pmd_offset(pud, address);
}
-
- static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
- unsigned long address,
- pgtbl_mod_mask *mod_mask)
- {
- if (unlikely(pud_none(*pud))) {
- if (__pmd_alloc(mm, pud, address))
- return NULL;
- *mod_mask |= PGTBL_PUD_MODIFIED;
- }
-
- return pmd_offset(pud, address);
- }
#endif /* CONFIG_MMU */
#if USE_SPLIT_PTE_PTLOCKS
((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
NULL: pte_offset_kernel(pmd, address))
- #define pte_alloc_kernel_track(pmd, address, mask) \
- ((unlikely(pmd_none(*(pmd))) && \
- (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
- NULL: pte_offset_kernel(pmd, address))
-
#if USE_SPLIT_PMD_PTLOCKS
static struct page *pmd_to_page(pmd_t *pmd)
* for_each_valid_physical_page_range()
* memblock_add_node(base, size, nid)
* free_area_init(max_zone_pfns);
- *
- * sparse_memory_present_with_active_regions() calls memory_present() for
- * each range when SPARSEMEM is enabled.
*/
void free_area_init(unsigned long *max_zone_pfn);
unsigned long node_map_pfn_alignment(void);
extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);
extern unsigned long find_min_pfn_with_active_regions(void);
- extern void sparse_memory_present_with_active_regions(int nid);
#ifndef CONFIG_NEED_MULTIPLE_NODES
static inline int early_pfn_to_nid(unsigned long pfn)
struct list_head *uf);
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
- struct list_head *uf);
+ unsigned long pgoff, unsigned long *populate, struct list_head *uf);
extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf, bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
extern int do_madvise(unsigned long start, size_t len_in, int behavior);
- static inline unsigned long
- do_mmap_pgoff(struct file *file, unsigned long addr,
- unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long pgoff, unsigned long *populate,
- struct list_head *uf)
- {
- return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf);
- }
-
#ifdef CONFIG_MMU
extern int __mm_populate(unsigned long addr, unsigned long len,
int ignore_errors);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
- pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
+ pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
+ struct vmem_altmap *altmap);
void *vmemmap_alloc_block(unsigned long size, int node);
struct vmem_altmap;
- void *vmemmap_alloc_block_buf(unsigned long size, int node);
- void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap);
+ void *vmemmap_alloc_block_buf(unsigned long size, int node,
+ struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
- int node);
+ int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
#include <linux/jump_label.h>
#include <linux/mem_encrypt.h>
#include <linux/kcsan.h>
+#include <linux/init_syscalls.h>
#include <asm/io.h>
#include <asm/bugs.h>
#endif
static char *execute_command;
-static char *ramdisk_execute_command;
+static char *ramdisk_execute_command = "/init";
/*
* Used to generate warnings if static_key manipulation functions are used
rest_init();
}
- asmlinkage __visible void __init start_kernel(void)
+ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
{
char *command_line;
char *after_dashes;
"See Linux Documentation/admin-guide/init.rst for guidance.");
}
-void console_on_rootfs(void)
+/* Open /dev/console, for stdin/stdout/stderr, this should never fail */
+void __init console_on_rootfs(void)
{
- /* Open the /dev/console as stdin, this should never fail */
- if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
- pr_err("Warning: unable to open an initial console.\n");
+ struct file *file = filp_open("/dev/console", O_RDWR, 0);
- /* create stdout/stderr */
- (void) ksys_dup(0);
- (void) ksys_dup(0);
+ if (IS_ERR(file)) {
+ pr_err("Warning: unable to open an initial console.\n");
+ return;
+ }
+ init_dup(file);
+ init_dup(file);
+ init_dup(file);
+ fput(file);
}
static noinline void __init kernel_init_freeable(void)
* check if there is an early userspace init. If yes, let it do all
* the work
*/
-
- if (!ramdisk_execute_command)
- ramdisk_execute_command = "/init";
-
- if (ksys_access((const char __user *)
- ramdisk_execute_command, 0) != 0) {
+ if (init_eaccess(ramdisk_execute_command) != 0) {
ramdisk_execute_command = NULL;
prepare_namespace();
}
}
EXPORT_SYMBOL(sock_set_rcvbuf);
+void sock_set_mark(struct sock *sk, u32 val)
+{
+ lock_sock(sk);
+ sk->sk_mark = val;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_mark);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
if (WARN_ON_ONCE(!mem))
return;
if (nullify)
- kzfree(mem);
+ kfree_sensitive(mem);
else
kfree(mem);
atomic_sub(size, &sk->sk_omem_alloc);