expediting. Set to zero to disable automatic
expediting.
+ stack_guard_gap= [MM]
+ override the default stack gap protection. The value
+ is in page units and it defines how many pages prior
+ to (for stacks growing down) resp. after (for stacks
+ growing up) the main stack are reserved for no other
+ mapping. Default value is 256 pages.
+
stacktrace [FTRACE]
Enabled the stack tracer on boot up.
i/o is issued (since the bio may otherwise get freed in case i/o completion
happens in the meantime).
-The bio_clone() routine may be used to duplicate a bio, where the clone
+The bio_clone_fast() routine may be used to duplicate a bio, where the clone
shares the bio_vec_list with the original bio (i.e. both point to the
same bio_vec_list). This would typically be used for splitting i/o requests
in lvm or md.
- #clock-cells : must contain 1
- #reset-cells : must contain 1
-For the PRCM CCUs on H3/A64, one more clock is needed:
+For the PRCM CCUs on H3/A64, two more clocks are needed:
+- "pll-periph": the SoC's peripheral PLL from the main CCU
- "iosc": the SoC's internal frequency oscillator
Example for generic CCU:
r_ccu: clock@01f01400 {
compatible = "allwinner,sun50i-a64-r-ccu";
reg = <0x01f01400 0x100>;
- clocks = <&osc24M>, <&osc32k>, <&iosc>;
- clock-names = "hosc", "losc", "iosc";
+ clocks = <&osc24M>, <&osc32k>, <&iosc>, <&ccu CLK_PLL_PERIPH0>;
+ clock-names = "hosc", "losc", "iosc", "pll-periph";
#clock-cells = <1>;
#reset-cells = <1>;
};
Optional properties:
In order to use the GPIO lines in PWM mode, some additional optional
-properties are required. Only Armada 370 and XP support these properties.
+properties are required.
-- compatible: Must contain "marvell,armada-370-xp-gpio"
+- compatible: Must contain "marvell,armada-370-gpio"
- reg: an additional register set is needed, for the GPIO Blink
Counter on/off registers.
};
gpio1: gpio@18140 {
- compatible = "marvell,armada-370-xp-gpio";
+ compatible = "marvell,armada-370-gpio";
reg = <0x18140 0x40>, <0x181c8 0x08>;
reg-names = "gpio", "pwm";
ngpios = <17>;
compatible = "st,stm32-timers";
reg = <0x40010000 0x400>;
clocks = <&rcc 0 160>;
- clock-names = "clk_int";
+ clock-names = "int";
pwm {
compatible = "st,stm32-pwm";
"brcm,bcm6328-switch"
"brcm,bcm6368-switch" and the mandatory "brcm,bcm63xx-switch"
-See Documentation/devicetree/bindings/dsa/dsa.txt for a list of additional
+See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
required and optional properties.
Examples:
of the device. On many systems this is wired high so the device goes
out of reset at power-on, but if it is under program control, this
optional GPIO can wake up in response to it.
+- vdd33a-supply, vddvario-supply : 3.3V analog and IO logic power supplies
Examples:
or will be computed in the stack. Capable hardware can pass the hash in
the receive descriptor for the packet; this would usually be the same
hash used for RSS (e.g. computed Toeplitz hash). The hash is saved in
-skb->rx_hash and can be used elsewhere in the stack as a hash of the
+skb->hash and can be used elsewhere in the stack as a hash of the
packet’s flow.
Each receive hardware queue has an associated list of CPUs to which
C6X ARCHITECTURE
M: Mark Salter <msalter@redhat.com>
-M: Aurelien Jacquiot <a-jacquiot@ti.com>
+M: Aurelien Jacquiot <jacquiot.aurelien@gmail.com>
L: linux-c6x-dev@linux-c6x.org
W: http://www.linux-c6x.org/wiki/index.php/Main_Page
S: Maintained
T: git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
S: Maintained
+UUID HELPERS
+M: Christoph Hellwig <hch@lst.de>
+R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+L: linux-kernel@vger.kernel.org
+T: git git://git.infradead.org/users/hch/uuid.git
+F: lib/uuid.c
+F: lib/test_uuid.c
+F: include/linux/uuid.h
+F: include/uapi/linux/uuid.h
+S: Maintained
+
UVESAFB DRIVER
M: Michal Januszewski <spock@gentoo.org>
L: linux-fbdev@vger.kernel.org
VERSION = 4
PATCHLEVEL = 12
SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION =
NAME = Fearless Coyote
# *DOCUMENTATION*
@echo ' make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
@echo ' make V=2 [targets] 2 => give reason for rebuild of target'
@echo ' make O=dir [targets] Locate all output files in "dir", including .config'
- @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)'
+ @echo ' make C=1 [targets] Check re-compiled c source with $$CHECK (sparse by default)'
@echo ' make C=2 [targets] Force check of all c source with $$CHECK'
@echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
@echo ' make W=n [targets] Enable extra gcc checks, n=1,2,3 where'
#define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4)
#define TSK_K_FP(tsk) TSK_K_REG(tsk, 0)
-#define thread_saved_pc(tsk) TSK_K_BLINK(tsk)
-
extern void start_thread(struct pt_regs * regs, unsigned long pc,
unsigned long usp);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
config VMSPLIT_3G
bool "3G/1G user/kernel split"
config VMSPLIT_3G_OPT
+ depends on !ARM_LPAE
bool "3G/1G user/kernel split (for full 1G low memory)"
config VMSPLIT_2G
bool "2G/2G user/kernel split"
@ there.
.inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000
#else
- W(mov) r0, r0
+ AR_CLASS( mov r0, r0 )
+ M_CLASS( nop.w )
#endif
.endm
mmc1_pins: pinmux_mmc1_pins {
pinctrl-single,pins = <
- AM33XX_IOPAD(0x960, PIN_INPUT | MUX_MODE7) /* spi0_cs1.gpio0_6 */
+ AM33XX_IOPAD(0x96c, PIN_INPUT | MUX_MODE7) /* uart0_rtsn.gpio1_9 */
>;
};
AM33XX_IOPAD(0x834, PIN_INPUT_PULLUP | MUX_MODE7) /* nKbdReset - gpmc_ad13.gpio1_13 */
AM33XX_IOPAD(0x838, PIN_INPUT_PULLUP | MUX_MODE7) /* nDispReset - gpmc_ad14.gpio1_14 */
AM33XX_IOPAD(0x844, PIN_INPUT_PULLUP | MUX_MODE7) /* USB1_enPower - gpmc_a1.gpio1_17 */
- /* AVR Programming - SPI Bus (bit bang) - Screen and Keyboard */
- AM33XX_IOPAD(0x954, PIN_INPUT_PULLUP | MUX_MODE7) /* Kbd/Disp/BattMOSI spi0_d0.gpio0_3 */
- AM33XX_IOPAD(0x958, PIN_INPUT_PULLUP | MUX_MODE7) /* Kbd/Disp/BattMISO spi0_d1.gpio0_4 */
- AM33XX_IOPAD(0x950, PIN_INPUT_PULLUP | MUX_MODE7) /* Kbd/Disp/BattSCLK spi0_clk.gpio0_2 */
/* PDI Bus - Battery system */
AM33XX_IOPAD(0x840, PIN_INPUT_PULLUP | MUX_MODE7) /* nBattReset gpmc_a0.gpio1_16 */
AM33XX_IOPAD(0x83c, PIN_INPUT_PULLUP | MUX_MODE7) /* BattPDIData gpmc_ad15.gpio1_15 */
pinctrl-names = "default";
pinctrl-0 = <&mmc1_pins>;
bus-width = <4>;
- cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>;
+ cd-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vmmcsd_fixed>;
};
};
r_ccu: clock@1f01400 {
- compatible = "allwinner,sun50i-a64-r-ccu";
+ compatible = "allwinner,sun8i-h3-r-ccu";
reg = <0x01f01400 0x100>;
- clocks = <&osc24M>, <&osc32k>, <&iosc>;
- clock-names = "hosc", "losc", "iosc";
+ clocks = <&osc24M>, <&osc32k>, <&iosc>,
+ <&ccu 9>;
+ clock-names = "hosc", "losc", "iosc", "pll-periph";
#clock-cells = <1>;
#reset-cells = <1>;
};
if (arch >= CPU_ARCH_ARMv6) {
unsigned int cachetype = read_cpuid_cachetype();
- if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+ if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) {
cacheid = 0;
} else if ((cachetype & (7 << 29)) == 4 << 29) {
/* ARMv7 register format */
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
high_memory = __va(arm_lowmem_limit - 1) + 1;
+ if (!memblock_limit)
+ memblock_limit = arm_lowmem_limit;
+
/*
* Round the memblock limit down to a pmd size. This
* helps to ensure that we will allocate memory from the
* last full pmd, which should be mapped.
*/
- if (memblock_limit)
- memblock_limit = round_down(memblock_limit, PMD_SIZE);
- if (!memblock_limit)
- memblock_limit = arm_lowmem_limit;
+ memblock_limit = round_down(memblock_limit, PMD_SIZE);
if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
if (memblock_end_of_DRAM() > arm_lowmem_limit) {
r_ccu: clock@1f01400 {
compatible = "allwinner,sun50i-a64-r-ccu";
reg = <0x01f01400 0x100>;
- clocks = <&osc24M>, <&osc32k>, <&iosc>;
- clock-names = "hosc", "losc", "iosc";
+ clocks = <&osc24M>, <&osc32k>, <&iosc>,
+ <&ccu 11>;
+ clock-names = "hosc", "losc", "iosc", "pll-periph";
#clock-cells = <1>;
#reset-cells = <1>;
};
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "sunxi-h3-h5.dtsi"
+#include <arm/sunxi-h3-h5.dtsi>
/ {
cpus {
+++ /dev/null
-../../../../arm/boot/dts/sunxi-h3-h5.dtsi
\ No newline at end of file
/* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->raw_time_sec = tk->raw_time.tv_sec;
- vdso_data->raw_time_nsec = tk->raw_time.tv_nsec;
+ vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec <<
+ tk->tkr_raw.shift) +
+ tk->tkr_raw.xtime_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- /* tkr_raw.xtime_nsec == 0 */
vdso_data->cs_mono_mult = tk->tkr_mono.mult;
vdso_data->cs_raw_mult = tk->tkr_raw.mult;
/* tkr_mono.shift == tkr_raw.shift */
seqcnt_check fail=monotonic_raw
/* All computations are done with left-shifted nsecs. */
- lsl x14, x14, x12
get_nsec_per_sec res=x9
lsl x9, x9, x12
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
+#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
/* Map BPF registers to A64 registers */
static const int bpf2a64[] = {
/* temporary registers for internal BPF JIT */
[TMP_REG_1] = A64_R(10),
[TMP_REG_2] = A64_R(11),
+ [TMP_REG_3] = A64_R(12),
/* tail_call_cnt */
[TCALL_CNT] = A64_R(26),
/* temporary register for blinding constants */
const u8 src = bpf2a64[insn->src_reg];
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
+ const u8 tmp3 = bpf2a64[TMP_REG_3];
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
emit(A64_PRFM(tmp, PST, L1, STRM), ctx);
emit(A64_LDXR(isdw, tmp2, tmp), ctx);
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
- emit(A64_STXR(isdw, tmp2, tmp, tmp2), ctx);
+ emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
jmp_offset = -3;
check_imm19(jmp_offset);
- emit(A64_CBNZ(0, tmp2, jmp_offset), ctx);
+ emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
break;
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
{
}
-/*
- * Return saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (tsk->thread.pc)
-
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
#define release_segments(mm) do { } while (0)
/*
- * saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
-/*
* saved kernel SP and DP of a blocked thread.
*/
#ifdef _BIG_ENDIAN
while(1) /* waiting for RETRIBUTION! */ ;
}
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return task_pt_regs(t)->irp;
-}
-
/* setup the child's kernel stack with a pt_regs and switch_stack on it.
* it will be un-nested during _resume and _ret_from_sys_call when the
* new thread is scheduled.
}
/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return task_pt_regs(t)->erp;
-}
-
-/*
* Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
* It will be unnested during _resume and _ret_from_sys_call when the new thread
* is scheduled.
#define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp)
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
/* Free all resources held by a thread. */
static inline void release_thread(struct task_struct *dead_task)
{
#define release_segments(mm) do { } while (0)
#define forget_segments() do { } while (0)
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) ((tsk)->thread.frame0->pc)
return 0;
}
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- /* Check whether the thread is blocked in resume() */
- if (in_sched_functions(tsk->thread.pc))
- return ((unsigned long *)tsk->thread.fp)[2];
- else
- return tsk->thread.pc;
-}
-
int elf_check_arch(const struct elf32_hdr *hdr)
{
unsigned long hsr0 = __get_HSR(0);
addr = PAGE_ALIGN(addr);
vma = find_vma(current->mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
goto success;
}
{
}
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk);
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
return 0;
}
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return ((struct pt_regs *)tsk->thread.esp0)->pc;
-}
-
unsigned long get_wchan(struct task_struct *p)
{
unsigned long fp, pc;
/* task_struct, defined elsewhere, is the "process descriptor" */
struct task_struct;
-/* this is defined in arch/process.c */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
/*
}
/*
- * Return saved PC of a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return 0;
-}
-
-/*
* Copy architecture-specific thread state
*/
int copy_thread(unsigned long clone_flags, unsigned long usp,
}
/*
- * Return saved PC of a blocked thread.
- * Note that the only way T can block is through a call to schedule() -> switch_to().
- */
-static inline unsigned long
-thread_saved_pc (struct task_struct *t)
-{
- struct unw_frame_info info;
- unsigned long ip;
-
- unw_init_from_blocked_task(&info, t);
- if (unw_unwind(&info) < 0)
- return 0;
- unw_get_ip(&info, &ip);
- return ip;
-}
-
-/*
* Get the current instruction/program counter value.
*/
#define current_text_addr() \
extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
extern void release_segments(struct mm_struct * mm);
-extern unsigned long thread_saved_pc(struct task_struct *);
-
/* Copy and release all segment info associated with a VM */
#define copy_segments(p, mm) do { } while (0)
#define release_segments(mm) do { } while (0)
#include <linux/err.h>
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return tsk->thread.lr;
-}
-
void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
{
}
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_kernel_thread(void);
-
-/*
- * Return saved PC from a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
- /* Check whether the thread is blocked in resume() */
- if (in_sched_functions(sw->retpc))
- return ((unsigned long *)sw->a6)[1];
- else
- return sw->retpc;
-}
-
void arch_cpu_idle(void)
{
#if defined(MACH_ATARI_ONLY)
{
}
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
extern unsigned long get_wchan(struct task_struct *p);
# define KSTK_EIP(tsk) (0)
{
}
-/* Return saved (kernel) PC of a blocked thread. */
-# define thread_saved_pc(tsk) \
- ((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
-
unsigned long get_wchan(struct task_struct *p);
/* The size allocated for kernel stacks. This _must_ be a power of two! */
return 0;
}
-#ifndef CONFIG_MMU
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct cpu_context *ctx =
- &(((struct thread_info *)(tsk->stack))->cpu_context);
-
- /* Check whether the thread is blocked in resume() */
- if (in_sched_functions(ctx->r15))
- return (unsigned long)ctx->r15;
- else
- return ctx->r14;
-}
-#endif
-
unsigned long get_wchan(struct task_struct *p)
{
/* TBD (used by procfs) */
-DADDR_BITS=$(ADDR_BITS) \
-DADDR_CELLS=$(itb_addr_cells)
-$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
$(call if_changed_dep,cpp_its_S,none,vmlinux.bin)
-$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
$(call if_changed_dep,cpp_its_S,gzip,vmlinux.bin.gz)
-$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
$(call if_changed_dep,cpp_its_S,bzip2,vmlinux.bin.bz2)
-$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
$(call if_changed_dep,cpp_its_S,lzma,vmlinux.bin.lzma)
-$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
$(call if_changed_dep,cpp_its_S,lzo,vmlinux.bin.lzo)
quiet_cmd_itb-image = ITB $@
* easily, subsequent pte tables have to be allocated in one physical
* chunk of RAM.
*/
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+#define LAST_PKMAP 512
+#else
#define LAST_PKMAP 1024
+#endif
+
#define LAST_PKMAP_MASK (LAST_PKMAP-1)
#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT)
#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))
#define flush_insn_slot(p) \
do { \
- flush_icache_range((unsigned long)p->addr, \
+ if (p->addr) \
+ flush_icache_range((unsigned long)p->addr, \
(unsigned long)p->addr + \
(MAX_INSN_SIZE * sizeof(kprobe_opcode_t))); \
} while (0)
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
+#ifdef CONFIG_HIGHMEM
+#include <asm/highmem.h>
+#endif
+
extern int temp_tlb_entry;
/*
#define VMALLOC_START MAP_BASE
-#define PKMAP_BASE (0xfe000000UL)
+#define PKMAP_END ((FIXADDR_START) & ~((LAST_PKMAP << PAGE_SHIFT)-1))
+#define PKMAP_BASE (PKMAP_END - PAGE_SIZE * LAST_PKMAP)
#ifdef CONFIG_HIGHMEM
# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
break;
}
/* Compact branch: BNEZC || JIALC */
- if (insn.i_format.rs)
+ if (!insn.i_format.rs) {
+ /* JIALC: set $31/ra */
regs->regs[31] = epc + 4;
+ }
regs->cp0_epc += 8;
break;
#endif
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/compiler.h>
+#include <asm/irqflags.h>
#include <asm/regdef.h>
#include <asm/mipsregs.h>
#include <asm/stackframe.h>
andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS
beqz t0, work_notifysig
work_resched:
+ TRACE_IRQS_OFF
jal schedule
local_irq_disable # make sure need_resched and
beqz t0, work_pending # trace bit set?
local_irq_enable # could let syscall_trace_leave()
# call schedule() instead
+ TRACE_IRQS_ON
move a0, sp
jal syscall_trace_leave
b resume_userspace
#endif
-/*
- * Check if the address is in kernel space
- *
- * Clone core_kernel_text() from kernel/extable.c, but doesn't call
- * init_kernel_text() for Ftrace doesn't trace functions in init sections.
- */
-static inline int in_kernel_space(unsigned long ip)
-{
- if (ip >= (unsigned long)_stext &&
- ip <= (unsigned long)_etext)
- return 1;
- return 0;
-}
-
#ifdef CONFIG_DYNAMIC_FTRACE
#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */
* If ip is in kernel space, no long call, otherwise, long call is
* needed.
*/
- new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
+ new = core_kernel_text(ip) ? INSN_NOP : INSN_B_1F;
#ifdef CONFIG_64BIT
return ftrace_modify_code(ip, new);
#else
unsigned int new;
unsigned long ip = rec->ip;
- new = in_kernel_space(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
+ new = core_kernel_text(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
#ifdef CONFIG_64BIT
return ftrace_modify_code(ip, new);
#else
- return ftrace_modify_code_2r(ip, new, in_kernel_space(ip) ?
+ return ftrace_modify_code_2r(ip, new, core_kernel_text(ip) ?
INSN_NOP : insn_la_mcount[1]);
#endif
}
* instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
* kernel, move after the instruction "move ra, at"(offset is 16)
*/
- ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
+ ip = self_ra - (core_kernel_text(self_ra) ? 16 : 24);
/*
* search the text until finding the non-store instruction or "s{d,w}
* entries configured through the tracing/set_graph_function interface.
*/
- insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
+ insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
/* Only trace if the calling function expects to */
beq t0, t1, dtb_found
#endif
li t1, -2
- beq a0, t1, dtb_found
move t2, a1
+ beq a0, t1, dtb_found
li t2, 0
dtb_found:
break;
case CPU_P5600:
case CPU_P6600:
- case CPU_I6400:
/* 8-bit event numbers */
raw_id = config & 0x1ff;
base_id = raw_id & 0xff;
raw_event.range = P;
#endif
break;
+ case CPU_I6400:
+ /* 8-bit event numbers */
+ base_id = config & 0xff;
+ raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;
+ break;
case CPU_1004K:
if (IS_BOTH_COUNTERS_1004K_EVENT(base_id))
raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;
* state. Actually per-core rather than per-CPU.
*/
static DEFINE_PER_CPU_ALIGNED(u32*, ready_count);
-static DEFINE_PER_CPU_ALIGNED(void*, ready_count_alloc);
/* Indicates online CPUs coupled with the current CPU */
static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled);
{
enum cps_pm_state state;
unsigned core = cpu_data[cpu].core;
- unsigned dlinesz = cpu_data[cpu].dcache.linesz;
void *entry_fn, *core_rc;
for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) {
}
if (!per_cpu(ready_count, core)) {
- core_rc = kmalloc(dlinesz * 2, GFP_KERNEL);
+ core_rc = kmalloc(sizeof(u32), GFP_KERNEL);
if (!core_rc) {
pr_err("Failed allocate core %u ready_count\n", core);
return -ENOMEM;
}
- per_cpu(ready_count_alloc, core) = core_rc;
-
- /* Ensure ready_count is aligned to a cacheline boundary */
- core_rc += dlinesz - 1;
- core_rc = (void *)((unsigned long)core_rc & ~(dlinesz - 1));
per_cpu(ready_count, core) = core_rc;
}
{
struct pt_regs regs;
mm_segment_t old_fs = get_fs();
+
+ regs.cp0_status = KSU_KERNEL;
if (sp) {
regs.regs[29] = (unsigned long)sp;
regs.regs[31] = 0;
int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
bool user, bool kernel)
{
- int idx_user, idx_kernel;
+ /*
+ * Initialize idx_user and idx_kernel to workaround bogus
+ * maybe-initialized warning when using GCC 6.
+ */
+ int idx_user = 0, idx_kernel = 0;
unsigned long flags, old_entryhi;
local_irq_save(flags);
return ieee754dp_nanxcpt(z);
case IEEE754_CLASS_DNORM:
DPDNORMZ;
- /* QNAN is handled separately below */
+ /* QNAN and ZERO cases are handled separately below */
}
switch (CLPAIR(xc, yc)) {
}
assert(rm & (DP_HIDDEN_BIT << 3));
+ if (zc == IEEE754_CLASS_ZERO)
+ return ieee754dp_format(rs, re, rm);
+
/* And now the addition */
assert(zm & DP_HIDDEN_BIT);
return ieee754sp_nanxcpt(z);
case IEEE754_CLASS_DNORM:
SPDNORMZ;
- /* QNAN is handled separately below */
+ /* QNAN and ZERO cases are handled separately below */
}
switch (CLPAIR(xc, yc)) {
}
assert(rm & (SP_HIDDEN_BIT << 3));
+ if (zc == IEEE754_CLASS_ZERO)
+ return ieee754sp_format(rs, re, rm);
+
/* And now the addition */
assert(zm & SP_HIDDEN_BIT);
* systems and only the R10000 and R12000 are used in such systems, the
* SGI IP28 Indigo² rsp. SGI IP32 aka O2.
*/
-static inline int cpu_needs_post_dma_flush(struct device *dev)
+static inline bool cpu_needs_post_dma_flush(struct device *dev)
{
- return !plat_device_is_coherent(dev) &&
- (boot_cpu_type() == CPU_R10000 ||
- boot_cpu_type() == CPU_R12000 ||
- boot_cpu_type() == CPU_BMIPS5000);
+ if (plat_device_is_coherent(dev))
+ return false;
+
+ switch (boot_cpu_type()) {
+ case CPU_R10000:
+ case CPU_R12000:
+ case CPU_BMIPS5000:
+ return true;
+
+ default:
+ /*
+ * Presence of MAARs suggests that the CPU supports
+ * speculatively prefetching data, and therefore requires
+ * the post-DMA flush/invalidate.
+ */
+ return cpu_has_maar;
+ }
}
static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp)
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
/*
* Fixed mappings:
*/
- vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base);
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
+ fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base);
#ifdef CONFIG_HIGHMEM
/*
* Permanent kmaps:
*/
vaddr = PKMAP_BASE;
- fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+ fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
pgd = swapper_pg_dir + __pgd_offset(vaddr);
pud = pud_offset(pgd, vaddr);
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(task) ((task)->thread.uregs)
#include "internal.h"
/*
- * return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return ((unsigned long *) tsk->thread.sp)[3];
-}
-
-/*
* power off function, if any
*/
void (*pm_power_off)(void);
{
}
-/* Return saved PC of a blocked thread. */
-#define thread_saved_pc(tsk) ((tsk)->thread.kregs->ea)
-
extern unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(p) \
void release_thread(struct task_struct *);
unsigned long get_wchan(struct task_struct *p);
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
#define init_stack (init_thread_union.stack)
#define cpu_relax() barrier()
show_registers(regs);
}
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return (unsigned long)user_regs(t->stack)->pc;
-}
-
void release_thread(struct task_struct *dead_task)
{
}
.flags = 0 \
}
-/*
- * Return saved PC of a blocked thread. This is used by ps mostly.
- */
-
struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *t);
void show_trace(struct task_struct *task, unsigned long *stack);
/*
return 0;
}
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return t->thread.regs.kpc;
-}
-
unsigned long
get_wchan(struct task_struct *p)
{
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
unsigned long task_size = TASK_SIZE;
int do_color_align, last_mmap;
struct vm_unmapped_area_info info;
else
addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
+ vma = find_vma_prev(mm, addr, &prev);
if (task_size - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)) &&
+ (!prev || addr >= vm_end_gap(prev)))
goto found_addr;
}
const unsigned long len, const unsigned long pgoff,
const unsigned long flags)
{
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
int do_color_align, last_mmap;
addr = COLOR_ALIGN(addr, last_mmap, pgoff);
else
addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
+
+ vma = find_vma_prev(mm, addr, &prev);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)) &&
+ (!prev || addr >= vm_end_gap(prev)))
goto found_addr;
}
"1: "PPC_TLNEI" %4,0\n" \
_EMIT_BUG_ENTRY \
: : "i" (__FILE__), "i" (__LINE__), \
- "i" (BUGFLAG_TAINT(TAINT_WARN)), \
+ "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
"i" (sizeof(struct bug_entry)), \
"r" (__ret_warn_on)); \
} \
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_handler(struct pt_regs *regs);
extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
#ifdef CONFIG_KPROBES_ON_FTRACE
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb);
}
#endif
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk) \
- ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
-
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.regs)
unsigned long get_wchan(struct task_struct *p);
extern unsigned long __copy_tofrom_user(void __user *to,
const void __user *from, unsigned long size);
-#ifndef __powerpc64__
-
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#else /* __powerpc64__ */
-
+#ifdef __powerpc64__
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
* store at 0 and some ESBs support doing a trigger via a
* separate trigger page.
*/
-#define XIVE_ESB_GET 0x800
-#define XIVE_ESB_SET_PQ_00 0xc00
-#define XIVE_ESB_SET_PQ_01 0xd00
-#define XIVE_ESB_SET_PQ_10 0xe00
-#define XIVE_ESB_SET_PQ_11 0xf00
+#define XIVE_ESB_STORE_EOI 0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
+#define XIVE_ESB_GET 0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
#define XIVE_ESB_VAL_P 0x2
#define XIVE_ESB_VAL_Q 0x1
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_STD_MMU_64
- andis. r0,r4,0xa410 /* weird error? */
+ andis. r0,r4,0xa450 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
- andis. r0,r4,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
/* Error */
blt- 13f
+
+ /* Reload DSISR into r4 for the DABR check below */
+ ld r4,_DSISR(r1)
#endif /* CONFIG_PPC_STD_MMU_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
-11: ld r4,_DAR(r1)
+11: andis. r0,r4,DSISR_DABRMATCH@h
+ bne- handle_dabr_fault
+ ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+int is_current_kprobe_addr(unsigned long addr)
+{
+ struct kprobe *p = kprobe_running();
+ return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
bool arch_within_kprobe_blacklist(unsigned long addr)
{
return (addr >= (unsigned long)__kprobes_text_start &&
regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
#endif
+ /*
+ * jprobes use jprobe_return() which skips the normal return
+ * path of the function, and this messes up the accounting of the
+ * function graph tracer.
+ *
+ * Pause function graph tracing while performing the jprobe function.
+ */
+ pause_graph_tracing();
+
return 1;
}
NOKPROBE_SYMBOL(setjmp_pre_handler);
* saved regs...
*/
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+ /* It's OK to start function graph tracing again */
+ unpause_graph_tracing();
preempt_enable_no_resched();
return 1;
}
#endif
/*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+ ti->task = NULL;
+ ti->cpu = cpu;
+ ti->preempt_count = 0;
+ ti->local_flags = 0;
+ ti->flags = 0;
+ klp_init_thread_info(ti);
+}
+
+/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
* Since we use these as temporary stacks during secondary CPU
* bringup, we need to get at them in real mode. This means they
* must also be within the RMO region.
+ *
+ * The IRQ stacks allocated elsewhere in this file are zeroed and
+ * initialized in kernel/irq.c. These are initialized here in order
+ * to have emergency stacks available as early as possible.
*/
limit = min(safe_stack_limit(), ppc64_rma_size);
for_each_possible_cpu(i) {
struct thread_info *ti;
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
#endif
}
stdu r1,-SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
- SAVE_8GPRS(0,r1)
- SAVE_8GPRS(8,r1)
- SAVE_8GPRS(16,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_GPR(0, r1)
+ SAVE_10GPRS(2, r1)
+ SAVE_10GPRS(12, r1)
+ SAVE_10GPRS(22, r1)
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, SWITCH_FRAME_SIZE
+ std r8, GPR1(r1)
/* Load special regs for save below */
mfmsr r8
bl ftrace_stub
nop
- /* Load ctr with the possibly modified NIP */
- ld r3, _NIP(r1)
- mtctr r3
+ /* Load the possibly modified NIP */
+ ld r15, _NIP(r1)
+
#ifdef CONFIG_LIVEPATCH
- cmpd r14,r3 /* has NIP been altered? */
+ cmpd r14, r15 /* has NIP been altered? */
+#endif
+
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+ /* NIP has not been altered, skip over further checks */
+ beq 1f
+
+ /* Check if there is an active kprobe on us */
+ subi r3, r14, 4
+ bl is_current_kprobe_addr
+ nop
+
+ /*
+ * If r3 == 1, then this is a kprobe/jprobe.
+ * else, this is livepatched function.
+ *
+ * The conditional branch for livepatch_handler below will use the
+ * result of this comparison. For kprobe/jprobe, we just need to branch to
+ * the new NIP, not call livepatch_handler. The branch below is bne, so we
+ * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+ * CR0[EQ] = (r3 == 1).
+ */
+ cmpdi r3, 1
+1:
#endif
+ /* Load CTR with the possibly modified NIP */
+ mtctr r15
+
/* Restore gprs */
- REST_8GPRS(0,r1)
- REST_8GPRS(8,r1)
- REST_8GPRS(16,r1)
- REST_8GPRS(24,r1)
+ REST_GPR(0,r1)
+ REST_10GPRS(2,r1)
+ REST_10GPRS(12,r1)
+ REST_10GPRS(22,r1)
/* Restore possibly modified LR */
ld r0, _LINK(r1)
addi r1, r1, SWITCH_FRAME_SIZE
#ifdef CONFIG_LIVEPATCH
- /* Based on the cmpd above, if the NIP was altered handle livepatch */
+ /*
+ * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+ * not on a kprobe/jprobe, then handle livepatch.
+ */
bne- livepatch_handler
#endif
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
case KVM_REG_PPC_TB_OFFSET:
+ /*
+ * POWER9 DD1 has an erratum where writing TBU40 causes
+ * the timebase to lose ticks. So we don't let the
+ * timebase offset be changed on P9 DD1. (It is
+ * initialized to zero.)
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ break;
/* round up to multiple of 2^24 */
vcpu->arch.vcore->tb_offset =
ALIGN(set_reg_val(id, *val), 1UL << 24);
{
int r;
int srcu_idx;
+ unsigned long ebb_regs[3] = {}; /* shut up GCC */
+ unsigned long user_tar = 0;
+ unsigned int user_vrsave;
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
+ /*
+ * Don't allow entry with a suspended transaction, because
+ * the guest entry/exit code will lose it.
+ * If the guest has TM enabled, save away their TM-related SPRs
+ * (they will get restored by the TM unavailable interrupt).
+ */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ (current->thread.regs->msr & MSR_TM)) {
+ if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ return -EINVAL;
+ }
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
+#endif
+
kvmppc_core_prepare_to_enter(vcpu);
/* No need to go into the guest when all we'll do is come back out */
flush_all_to_thread(current);
+ /* Save userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ ebb_regs[0] = mfspr(SPRN_EBBHR);
+ ebb_regs[1] = mfspr(SPRN_EBBRR);
+ ebb_regs[2] = mfspr(SPRN_BESCR);
+ user_tar = mfspr(SPRN_TAR);
+ }
+ user_vrsave = mfspr(SPRN_VRSAVE);
+
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
vcpu->arch.pgdir = current->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
}
} while (is_kvmppc_resume_guest(r));
+ /* Restore userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_EBBHR, ebb_regs[0]);
+ mtspr(SPRN_EBBRR, ebb_regs[1]);
+ mtspr(SPRN_BESCR, ebb_regs[2]);
+ mtspr(SPRN_TAR, user_tar);
+ mtspr(SPRN_FSCR, current->thread.fscr);
+ }
+ mtspr(SPRN_VRSAVE, user_vrsave);
+
out:
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&vcpu->kvm->arch.vcpus_running);
* Put whatever is in the decrementer into the
* hypervisor decrementer.
*/
+BEGIN_FTR_SECTION
+ ld r5, HSTATE_KVM_VCORE(r13)
+ ld r6, VCORE_KVM(r5)
+ ld r9, KVM_HOST_LPCR(r6)
+ andis. r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mfspr r8,SPRN_DEC
mftb r7
- mtspr SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+ /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+ bne 32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
+32: mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
#include <asm/opal.h>
#include <asm/xive-regs.h>
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg) \
+BEGIN_FTR_SECTION; \
+ extsw reg, reg; \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS 144
+#define STACK_SLOT_TRAP (SFS-4)
+#define STACK_SLOT_TID (SFS-16)
+#define STACK_SLOT_PSSCR (SFS-24)
+#define STACK_SLOT_PID (SFS-32)
+#define STACK_SLOT_IAMR (SFS-40)
+#define STACK_SLOT_CIABR (SFS-48)
+#define STACK_SLOT_DAWR (SFS-56)
+#define STACK_SLOT_DAWRX (SFS-64)
+
/*
* Call kvmppc_hv_entry in real mode.
* Must be called with interrupts hard-disabled.
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+ /* HDEC may be larger than DEC for arch >= v3.00, but since the */
+ /* HDEC value came from DEC in the first place, it will fit */
mfspr r3, SPRN_HDEC
mtspr SPRN_DEC, r3
/*
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
+ EXTEND_HDEC(r0)
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
- cmpwi r0, 0
+ cmpdi r0, 0
blt kvm_novcpu_exit
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
bl kvmhv_accumulate_time
#endif
13: mr r3, r12
- stw r12, 112-4(r1)
+ stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_commence_exit
nop
- lwz r12, 112-4(r1)
+ lwz r12, STACK_SLOT_TRAP(r1)
b kvmhv_switch_to_host
/*
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
- lis r6, 0x7fff
- ori r6, r6, 0xffff
+ LOAD_REG_ADDR(r6, decrementer_max)
+ ld r6, 0(r6)
mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
* *
*****************************************************************************/
-/* Stack frame offsets */
-#define STACK_SLOT_TID (112-16)
-#define STACK_SLOT_PSSCR (112-24)
-#define STACK_SLOT_PID (112-32)
-
.global kvmppc_hv_entry
kvmppc_hv_entry:
*/
mflr r0
std r0, PPC_LR_STKOFF(r1)
- stdu r1, -112(r1)
+ stdu r1, -SFS(r1)
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
+ mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
+ std r8, STACK_SLOT_IAMR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_CIABR
+ mfspr r6, SPRN_DAWR
+ mfspr r7, SPRN_DAWRX
+ std r5, STACK_SLOT_CIABR(r1)
+ std r6, STACK_SLOT_DAWR(r1)
+ std r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
/* Set partition DABR */
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
- cmpwi r3, 512 /* 1 microsecond */
+ EXTEND_HDEC(r3)
+ cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
#ifdef CONFIG_KVM_XICS
* set by the guest could disrupt the host.
*/
li r0, 0
- mtspr SPRN_IAMR, r0
- mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX, r0
+ mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
+ mtspr SPRN_IAMR, r0
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
std r6,VCPU_UAMOR(r9)
li r6,0
mtspr SPRN_AMR,r6
+ mtspr SPRN_UAMOR, r6
/* Switch DSCR back to host value */
mfspr r8, SPRN_DSCR
/* Restore host values of some registers */
BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_CIABR(r1)
+ ld r6, STACK_SLOT_DAWR(r1)
+ ld r7, STACK_SLOT_DAWRX(r1)
+ mtspr SPRN_CIABR, r5
+ mtspr SPRN_DAWR, r6
+ mtspr SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
+ ld r8, STACK_SLOT_IAMR(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
+ mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
- ld r0, 112+PPC_LR_STKOFF(r1)
- addi r1, r1, 112
+ ld r0, SFS+PPC_LR_STKOFF(r1)
+ addi r1, r1, SFS
mtlr r0
blr
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
- cmpw r3, r4
+ extsw r3, r3
+ EXTEND_HDEC(r4)
+ cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
67:
/* save expiry time of guest decrementer */
- extsw r3, r3
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
ld r5, HSTATE_KVM_VCORE(r13)
{
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
- __x_writeq(0, __x_eoi_page(xd));
+ __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
opal_int_eoi(hw_irq);
} else {
* properly.
*/
if (xd->flags & XIVE_IRQ_FLAG_LSI)
- __x_readq(__x_eoi_page(xd));
+ __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
else {
eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (mm->task_size - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
/*
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
if ((mm->task_size - len) < addr)
return 0;
vma = find_vma(mm, addr);
- return (!vma || (addr + len) <= vma->vm_start);
+ return (!vma || (addr + len) <= vm_start_gap(vma));
}
static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
struct pt_regs *regs_user_copy)
{
regs_user->regs = task_pt_regs(current);
- regs_user->abi = perf_reg_abi(current);
+ regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+ PERF_SAMPLE_REGS_ABI_NONE;
}
if (WARN_ON(!gpdev))
return NULL;
- if (WARN_ON(!gpdev->dev.of_node))
+ /* Not all PCI devices have device-tree nodes */
+ if (!gpdev->dev.of_node)
return NULL;
/* Get assoicated PCI device */
return mmio_atsd_reg;
}
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
{
unsigned long launch;
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
/* Invalidating the entire process doesn't use a va */
return mmio_launch_invalidate(npu, launch, 0);
}
static int mmio_invalidate_va(struct npu *npu, unsigned long va,
- unsigned long pid)
+ unsigned long pid, bool flush)
{
unsigned long launch;
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
return mmio_launch_invalidate(npu, launch, va);
}
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
+struct mmio_atsd_reg {
+ struct npu *npu;
+ int reg;
+};
+
+static void mmio_invalidate_wait(
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+ struct npu *npu;
+ int i, reg;
+
+ /* Wait for all invalidations to complete */
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ /* Wait for completion */
+ npu = mmio_atsd_reg[i].npu;
+ reg = mmio_atsd_reg[i].reg;
+ while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+ cpu_relax();
+
+ put_mmio_atsd_reg(npu, reg);
+
+ /*
+ * The GPU requires two flush ATSDs to ensure all entries have
+ * been flushed. We use PID 0 as it will never be used for a
+ * process on the GPU.
+ */
+ if (flush)
+ mmio_invalidate_pid(npu, 0, true);
+ }
+}
+
/*
* Invalidate either a single address or an entire PID depending on
* the value of va.
*/
static void mmio_invalidate(struct npu_context *npu_context, int va,
- unsigned long address)
+ unsigned long address, bool flush)
{
- int i, j, reg;
+ int i, j;
struct npu *npu;
struct pnv_phb *nphb;
struct pci_dev *npdev;
- struct {
- struct npu *npu;
- int reg;
- } mmio_atsd_reg[NV_MAX_NPUS];
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
/*
if (va)
mmio_atsd_reg[i].reg =
- mmio_invalidate_va(npu, address, pid);
+ mmio_invalidate_va(npu, address, pid,
+ flush);
else
mmio_atsd_reg[i].reg =
- mmio_invalidate_pid(npu, pid);
+ mmio_invalidate_pid(npu, pid, flush);
/*
* The NPU hardware forwards the shootdown to all GPUs
*/
flush_tlb_mm(npu_context->mm);
- /* Wait for all invalidations to complete */
- for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
- continue;
-
- /* Wait for completion */
- npu = mmio_atsd_reg[i].npu;
- reg = mmio_atsd_reg[i].reg;
- while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
- cpu_relax();
- put_mmio_atsd_reg(npu, reg);
- }
+ mmio_invalidate_wait(mmio_atsd_reg, flush);
+ if (flush)
+ /* Wait for the flush to complete */
+ mmio_invalidate_wait(mmio_atsd_reg, false);
}
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
* There should be no more translation requests for this PID, but we
* need to ensure any entries for it are removed from the TLB.
*/
- mmio_invalidate(npu_context, 0, 0);
+ mmio_invalidate(npu_context, 0, 0, true);
}
static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct npu_context *npu_context = mn_to_npu_context(mn);
unsigned long address;
- for (address = start; address <= end; address += PAGE_SIZE)
- mmio_invalidate(npu_context, 1, address);
+ for (address = start; address < end; address += PAGE_SIZE)
+ mmio_invalidate(npu_context, 1, address, false);
+
+ /* Do the flush only on the final addess == end */
+ mmio_invalidate(npu_context, 1, address, true);
}
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
/* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV);
- if (!mm) {
- /* kernel thread contexts are not supported */
+ if (!mm || mm->context.id == 0) {
+ /*
+ * Kernel thread contexts are not supported and context id 0 is
+ * reserved on the GPU.
+ */
return ERR_PTR(-EINVAL);
}
{
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
- out_be64(xd->eoi_mmio, 0);
+ out_be64(xd->eoi_mmio + XIVE_ESB_STORE_EOI, 0);
else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
/*
* The FW told us to call it. This happens for some
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RBD=m
CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
# CONFIG_NET_VENDOR_INTEL is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_NATSEMI is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPPOL2TP=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_XFS_RT=y
CONFIG_XFS_DEBUG=y
CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_BTRFS_DEBUG=y
CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QUOTA_DEBUG=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
CONFIG_AUTOFS4_FS=m
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_RODATA_TEST=y
CONFIG_DEBUG_OBJECTS=y
CONFIG_DEBUG_OBJECTS_SELFTEST=y
CONFIG_DEBUG_OBJECTS_FREE=y
CONFIG_WQ_WATCHDOG=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_DEBUG_TIMEKEEPING=y
-CONFIG_TIMER_STATS=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
CONFIG_PROVE_LOCKING=y
CONFIG_RCU_CPU_STALL_TIMEOUT=300
CONFIG_NOTIFIER_ERROR_INJECTION=m
CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
CONFIG_FAULT_INJECTION=y
CONFIG_FAILSLAB=y
CONFIG_FAIL_PAGE_ALLOC=y
CONFIG_TRACE_ENUM_MAP_FILE=y
CONFIG_LKDTM=m
CONFIG_TEST_LIST_SORT=y
+CONFIG_TEST_SORT=y
CONFIG_KPROBES_SANITY_TEST=y
CONFIG_RBTREE_TEST=y
CONFIG_INTERVAL_TREE_TEST=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_TEST_STRING_HELPERS=y
-CONFIG_TEST_KSTRTOX=y
CONFIG_DMA_API_DEBUG=y
CONFIG_TEST_BPF=m
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_USER=m
+CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_X509_CERTIFICATE_PARSER=m
CONFIG_CRC7=m
CONFIG_CRC8=m
+CONFIG_RANDOM32_SELFTEST=y
CONFIG_CORDIC=m
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
CONFIG_VIRTIO_BLK=y
CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
# CONFIG_NET_VENDOR_INTEL is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_NATSEMI is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPPOL2TP=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRC7=m
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
CONFIG_VIRTIO_BLK=y
CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
# CONFIG_NET_VENDOR_INTEL is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_NATSEMI is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPPOL2TP=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRC7=m
CONFIG_NR_CPUS=2
# CONFIG_HOTPLUG_CPU is not set
CONFIG_HZ_100=y
+# CONFIG_ARCH_RANDOM is not set
# CONFIG_COMPACTION is not set
# CONFIG_MIGRATION is not set
+# CONFIG_BOUNCE is not set
# CONFIG_CHECK_STACK is not set
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_FC_ATTRS=y
CONFIG_ZFCP=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
# CONFIG_HVC_IUCV is not set
+# CONFIG_HW_RANDOM_S390 is not set
CONFIG_RAW_DRIVER=y
# CONFIG_SCLP_ASYNC is not set
# CONFIG_HMC_DRV is not set
# CONFIG_INOTIFY_USER is not set
CONFIG_CONFIGFS_FS=y
# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_PANIC_ON_OOPS=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_USER_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_SCSI_VIRTIO=y
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_MD_MULTIPATH=m
CONFIG_BLK_DEV_DM=y
CONFIG_DM_CRYPT=m
CONFIG_VIRTIO_NET=y
# CONFIG_NET_VENDOR_ALACRITECH is not set
# CONFIG_NET_VENDOR_SOLARFLARE is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
CONFIG_DEVKMEM=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_RCU_TRACE=y
CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENTS=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_TRACE_ENUM_MAP_FILE=y
CONFIG_KPROBES_SANITY_TEST=y
CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA512_S390=m
#include <linux/types.h>
#include <linux/device.h>
+#include <linux/blkdev.h>
struct arqb {
u64 data;
int (*probe) (struct scm_device *scmdev);
int (*remove) (struct scm_device *scmdev);
void (*notify) (struct scm_device *scmdev, enum scm_event event);
- void (*handler) (struct scm_device *scmdev, void *data, int error);
+ void (*handler) (struct scm_device *scmdev, void *data,
+ blk_status_t error);
};
int scm_driver_register(struct scm_driver *scmdrv);
void scm_driver_unregister(struct scm_driver *scmdrv);
int eadm_start_aob(struct aob *aob);
-void scm_irq_handler(struct aob *aob, int error);
+void scm_irq_handler(struct aob *aob, blk_status_t error);
#endif /* _ASM_S390_EADM_H */
/* Free guarded storage control block for current */
void exit_thread_gs(void);
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
(task_stack_page(tsk) + THREAD_SIZE) - 1)
unsigned short cpus_shared;
char reserved_4[3];
unsigned char vsne;
- uuid_be uuid;
+ uuid_t uuid;
char reserved_5[160];
char ext_name[256];
};
char reserved_1[3];
unsigned char evmne;
unsigned int reserved_2;
- uuid_be uuid;
+ uuid_t uuid;
} vm[8];
char reserved_3[1504];
char ext_names[8][256];
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
.Lsie_done:
# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
+# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
+# Other instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
# See also .Lcleanup_sie
-.Lrewind_pad:
- nop 0
+.Lrewind_pad6:
+ nopr 7
+.Lrewind_pad4:
+ nopr 7
+.Lrewind_pad2:
+ nopr 7
.globl sie_exit
sie_exit:
lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
stg %r14,__SF_EMPTY+16(%r15) # set exit reason code
j sie_exit
- EX_TABLE(.Lrewind_pad,.Lsie_fault)
+ EX_TABLE(.Lrewind_pad6,.Lsie_fault)
+ EX_TABLE(.Lrewind_pad4,.Lsie_fault)
+ EX_TABLE(.Lrewind_pad2,.Lsie_fault)
EX_TABLE(sie_exit,.Lsie_fault)
EXPORT_SYMBOL(sie64a)
EXPORT_SYMBOL(sie_exit)
static void __ipl_run(void *unused)
{
- if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
- diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
diag308(DIAG308_LOAD_CLEAR, NULL);
if (MACHINE_IS_VM)
__cpcmd("IPL", NULL, 0, NULL);
break;
case REIPL_METHOD_CCW_DIAG:
diag308(DIAG308_SET, reipl_block_ccw);
- if (MACHINE_IS_LPAR)
- diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
- else
- diag308(DIAG308_LOAD_CLEAR, NULL);
+ diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case REIPL_METHOD_FCP_RW_DIAG:
diag308(DIAG308_SET, reipl_block_fcp);
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
-/*
- * Return saved PC of a blocked thread. used in kernel/sched.
- * resume in entry.S does not create a new stack frame, it
- * just stores the registers %r6-%r15 to the frame given by
- * schedule. We want to return the address of the caller of
- * schedule, so we have to walk the backchain one time to
- * find the frame schedule() store its return address.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct stack_frame *sf, *low, *high;
-
- if (!tsk || !task_stack_page(tsk))
- return 0;
- low = task_stack_page(tsk);
- high = (struct stack_frame *) task_pt_regs(tsk);
- sf = (struct stack_frame *) tsk->thread.ksp;
- if (sf <= low || sf > high)
- return 0;
- sf = (struct stack_frame *) sf->back_chain;
- if (sf <= low || sf > high)
- return 0;
- return sf->gprs[8];
-}
-
extern void kernel_thread_starter(void);
/*
static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
{
- if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be)))
+ if (uuid_is_null(&info->vm[i].uuid))
return;
seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid);
}
ptr = asce.origin * 4096;
if (asce.r) {
*fake = 1;
+ ptr = 0;
asce.dt = ASCE_TYPE_REGION1;
}
switch (asce.dt) {
case ASCE_TYPE_REGION1:
- if (vaddr.rfx01 > asce.tl && !asce.r)
+ if (vaddr.rfx01 > asce.tl && !*fake)
return PGM_REGION_FIRST_TRANS;
break;
case ASCE_TYPE_REGION2:
union region1_table_entry rfte;
if (*fake) {
- /* offset in 16EB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+ ptr += (unsigned long) vaddr.rfx << 53;
rfte.val = ptr;
goto shadow_r2t;
}
union region2_table_entry rste;
if (*fake) {
- /* offset in 8PB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+ ptr += (unsigned long) vaddr.rsx << 42;
rste.val = ptr;
goto shadow_r3t;
}
union region3_table_entry rtte;
if (*fake) {
- /* offset in 4TB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+ ptr += (unsigned long) vaddr.rtx << 31;
rtte.val = ptr;
goto shadow_sgt;
}
union segment_table_entry ste;
if (*fake) {
- /* offset in 2G guest memory block */
- ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+ ptr += (unsigned long) vaddr.sx << 20;
ste.val = ptr;
goto shadow_pgt;
}
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
goto check_asce_limit;
}
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
goto check_asce_limit;
}
*/
extern void (*cpu_wait)(void);
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
extern void start_thread(struct pt_regs *regs,
unsigned long pc, unsigned long sp);
extern unsigned long get_wchan(struct task_struct *p);
return 1;
}
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return task_pt_regs(tsk)->cp0_epc;
-}
-
unsigned long get_wchan(struct task_struct *task)
{
if (!task || task == current || task->state == TASK_RUNNING)
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
.current_ds = KERNEL_DS, \
}
-/* Return saved PC of a blocked thread. */
-unsigned long thread_saved_pc(struct task_struct *t);
-
/* Do necessary setup to start up a newly executed thread. */
static inline void start_thread(struct pt_regs * regs, unsigned long pc,
unsigned long sp)
#include <linux/types.h>
#include <asm/fpumacro.h>
-/* Return saved PC of a blocked thread. */
struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *);
/* On Uniprocessor, even in RMO processes see TSO semantics */
#ifdef CONFIG_SMP
}
/*
- * Note: sparc64 has a pretty intricated thread_saved_pc, check it out.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return task_thread_info(tsk)->kpc;
-}
-
-/*
* Free current thread data structures etc..
*/
void exit_thread(struct task_struct *tsk)
#endif
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct thread_info *ti = task_thread_info(tsk);
- unsigned long ret = 0xdeadbeefUL;
-
- if (ti && ti->ksp) {
- unsigned long *sp;
- sp = (unsigned long *)(ti->ksp + STACK_BIAS);
- if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL &&
- sp[14]) {
- unsigned long *fp;
- fp = (unsigned long *)(sp[14] + STACK_BIAS);
- if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL)
- ret = fp[15];
- }
- }
- return ret;
-}
-
/* Free current thread data structures etc.. */
void exit_thread(struct task_struct *tsk)
{
vma = find_vma(mm, addr);
if (task_size - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
vma = find_vma(mm, addr);
if (task_size - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (task_size - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
if (mm->get_unmapped_area == arch_get_unmapped_area)
extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags);
-
-/*
- * Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
- */
-#define thread_saved_pc(t) ((t)->thread.pc)
-
unsigned long get_wchan(struct task_struct *p);
/* Return initial ksp value for given task. */
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
if (current->mm->get_unmapped_area == arch_get_unmapped_area)
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
blk_end_request(
(*irq_req_buffer)[count]->req,
- 0,
+ BLK_STS_OK,
(*irq_req_buffer)[count]->length
);
kfree((*irq_req_buffer)[count]);
{
}
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
static inline void mm_copy_segments(struct mm_struct *from_mm,
struct mm_struct *new_mm)
{
__attribute__((__section__(".data..init_irqstack"))) =
{ INIT_THREAD_INFO(init_task) };
-unsigned long thread_saved_pc(struct task_struct *task)
-{
- /* FIXME: Need to look up userspace_pid by cpu */
- return os_process_pc(userspace_pid[0]);
-}
-
/* Changed in setup_arch, which is called in early boot */
static char host_info[(__NEW_UTS_LEN + 1) * 5];
{
unsigned long random_addr, min_addr;
- /* By default, keep output position unchanged. */
- *virt_addr = *output;
-
if (cmdline_find_option_bool("nokaslr")) {
warn("KASLR disabled: 'nokaslr' on cmdline.");
return;
unsigned long output_len)
{
const unsigned long kernel_total_size = VO__end - VO__text;
- unsigned long virt_addr = (unsigned long)output;
+ unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
boot_params = rmode;
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
+ if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+ error("Destination virtual address is beyond the kernel mapping area");
#else
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
error("Destination address too large");
#ifndef CONFIG_RELOCATABLE
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
error("Destination address does not match LOAD_PHYSICAL_ADDR");
- if ((unsigned long)output != virt_addr)
+ if (virt_addr != LOAD_PHYSICAL_ADDR)
error("Destination virtual address changed when not relocatable");
#endif
unsigned long output_size,
unsigned long *virt_addr)
{
- /* No change from existing output location. */
- *virt_addr = *output;
}
#endif
# Arch-specific CryptoAPI modules.
#
+OBJECT_FILES_NON_STANDARD := y
+
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
# Arch-specific CryptoAPI modules.
#
+OBJECT_FILES_NON_STANDARD := y
+
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
ifeq ($(avx2_supported),yes)
# Arch-specific CryptoAPI modules.
#
+OBJECT_FILES_NON_STANDARD := y
+
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
ifeq ($(avx2_supported),yes)
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
- [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
+ [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
- [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
+ [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[pkg];
- if (!box && atomic_inc_return(&box->refcnt) == 1)
+ if (box && atomic_inc_return(&box->refcnt) == 1)
uncore_box_init(box);
}
}
} while (0)
extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern int fixup_bug(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
+ bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception;
struct x86_exception exception;
#define _ASM_X86_MSHYPER_H
#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
+#include <linux/atomic.h>
#include <asm/hyperv.h>
/*
#endif /* CONFIG_X86_64 */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp);
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
+OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y
# If instrumentation of this dir is enabled, boot hangs during first second.
# Probably could be more selective here, but note that files related to irqs,
+OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y
+
obj-$(CONFIG_ACPI) += boot.o
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
obj-$(CONFIG_ACPI_APEI) += apei.o
dentry = kernfs_mount(fs_type, flags, rdt_root,
RDTGROUP_SUPER_MAGIC, NULL);
if (IS_ERR(dentry))
- goto out_cdp;
+ goto out_destroy;
static_branch_enable(&rdt_enable_key);
goto out;
+out_destroy:
+ kernfs_remove(kn_info);
out_cdp:
cdp_disable();
out:
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
+#include <linux/frame.h>
#include <asm/text-patching.h>
#include <asm/cacheflush.h>
}
asm (
+ "optprobe_template_func:\n"
".global optprobe_template_entry\n"
"optprobe_template_entry:\n"
#ifdef CONFIG_X86_64
" popf\n"
#endif
".global optprobe_template_end\n"
- "optprobe_template_end:\n");
+ "optprobe_template_end:\n"
+ ".type optprobe_template_func, @function\n"
+ ".size optprobe_template_func, .-optprobe_template_func\n");
+
+void optprobe_template_func(void);
+STACK_FRAME_NON_STANDARD(optprobe_template_func);
#define TMPL_MOVE_IDX \
((long)&optprobe_template_val - (long)&optprobe_template_entry)
}
/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct inactive_task_frame *frame =
- (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
- return READ_ONCE_NOCHECK(frame->ret_addr);
-}
-
-/*
* Called from fs/proc with a reference on @p to find the function
* which called into schedule(). This needs to be done carefully
* because the task might wake up and we might look at a stack
#include <linux/sched.h>
#include <linux/tboot.h>
#include <linux/delay.h>
+#include <linux/frame.h>
#include <acpi/reboot.h>
#include <asm/io.h>
#include <asm/apic.h>
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(machine_real_restart);
#endif
+STACK_FRAME_NON_STANDARD(machine_real_restart);
/*
* Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (end - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
if (!tboot_enabled())
return 0;
- if (!intel_iommu_tboot_noforce)
+ if (intel_iommu_tboot_noforce)
return 1;
if (no_iommu || swiotlb || dmar_disabled)
return ud == INSN_UD0 || ud == INSN_UD2;
}
-static int fixup_bug(struct pt_regs *regs, int trapnr)
+int fixup_bug(struct pt_regs *regs, int trapnr)
{
if (trapnr != X86_TRAP_UD)
return 0;
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
}
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
return X86EMUL_CONTINUE;
}
#include <linux/slab.h>
#include <linux/amd-iommu.h>
#include <linux/hashtable.h>
+#include <linux/frame.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
mark_all_clean(svm->vmcb);
}
+STACK_FRAME_NON_STANDARD(svm_vcpu_run);
static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{
#include <linux/slab.h>
#include <linux/tboot.h>
#include <linux/hrtimer.h>
+#include <linux/frame.h>
#include "kvm_cache_regs.h"
#include "x86.h"
);
}
}
+STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
static bool vmx_has_high_real_mode_segbase(void)
{
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
}
+STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
{
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
ctxt->eflags = kvm_get_rflags(vcpu);
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
ctxt->eip = kvm_rip_read(vcpu);
ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
return dr6;
}
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
{
struct kvm_run *kvm_run = vcpu->run;
- /*
- * rflags is the old, "raw" value of the flags. The new value has
- * not been saved yet.
- *
- * This is correct even for TF set by the guest, because "the
- * processor will not generate this exception after the instruction
- * that sets the TF flag".
- */
- if (unlikely(rflags & X86_EFLAGS_TF)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
- DR6_RTM;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = EMULATE_USER_EXIT;
- } else {
- /*
- * "Certain debug exceptions may clear bit 0-3. The
- * remaining contents of the DR6 register are never
- * cleared by the processor".
- */
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
- }
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+ kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ *r = EMULATE_USER_EXIT;
+ } else {
+ /*
+ * "Certain debug exceptions may clear bit 0-3. The
+ * remaining contents of the DR6 register are never
+ * cleared by the processor".
+ */
+ vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+ kvm_queue_exception(vcpu, DB_VECTOR);
}
}
int r = EMULATE_DONE;
kvm_x86_ops->skip_emulated_instruction(vcpu);
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+ /*
+ * rflags is the old, "raw" value of the flags. The new value has
+ * not been saved yet.
+ *
+ * This is correct even for TF set by the guest, because "the
+ * processor will not generate this exception after the instruction
+ * that sets the TF flag".
+ */
+ if (unlikely(rflags & X86_EFLAGS_TF))
+ kvm_vcpu_do_singlestep(vcpu, &r);
return r == EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ if (r == EMULATE_DONE &&
+ (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ kvm_vcpu_do_singlestep(vcpu, &r);
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP)
__kvm_set_rflags(vcpu, ctxt->eflags);
.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
pushq %rbx
- pushq %rbp
+ pushq %r12
movq %rdi, %r10 /* Save pointer */
xorl %r11d, %r11d /* Return value */
movl (%rdi), %eax
movl 4(%rdi), %ecx
movl 8(%rdi), %edx
movl 12(%rdi), %ebx
- movl 20(%rdi), %ebp
+ movl 20(%rdi), %r12d
movl 24(%rdi), %esi
movl 28(%rdi), %edi
1: \op
movl %ecx, 4(%r10)
movl %edx, 8(%r10)
movl %ebx, 12(%r10)
- movl %ebp, 20(%r10)
+ movl %r12d, 20(%r10)
movl %esi, 24(%r10)
movl %edi, 28(%r10)
- popq %rbp
+ popq %r12
popq %rbx
ret
3:
if (fixup_exception(regs, trapnr))
return;
+ if (fixup_bug(regs, trapnr))
+ return;
+
fail:
early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
(unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
if (mm->get_unmapped_area == arch_get_unmapped_area)
static void __init probe_page_size_mask(void)
{
-#if !defined(CONFIG_KMEMCHECK)
/*
* For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
* use small pages.
* This will simplify cpa(), which otherwise needs to support splitting
* large pages into small in interrupt context, etc.
*/
- if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
+ if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK))
page_size_mask |= 1 << PG_LEVEL_2M;
-#endif
+ else
+ direct_gbpages = 0;
/* Enable PSE if available */
if (boot_cpu_has(X86_FEATURE_PSE))
pud_base = pud_offset(p4d, 0);
remove_pud_table(pud_base, addr, next, direct);
- free_pud_table(pud_base, p4d);
+ /*
+ * For 4-level page tables we do not want to free PUDs, but in the
+ * 5-level case we should free them. This code will have to change
+ * to adapt for boot-time switching between 4 and 5 level page tables.
+ */
+ if (CONFIG_PGTABLE_LEVELS == 5)
+ free_pud_table(pud_base, p4d);
}
if (direct)
#
# Arch-specific network modules
#
+OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
+
obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y
obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o
+OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y
+
# __restore_processor_state() restores %gs after S3 resume and so should not
# itself be stack-protected
nostackp := $(call cc-option, -fno-stack-protector)
+OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_xen-pvh.o := y
+
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_spinlock.o = -pg
# define PLATFORM_NR_IRQS 0
#endif
#define XTENSA_NR_IRQS XCHAL_NUM_INTERRUPTS
-#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS)
+#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS + 1)
+#define XTENSA_PIC_LINUX_IRQ(hwirq) ((hwirq) + 1)
#if VARIANT_NR_IRQS == 0
static inline void variant_init_irq(void) { }
#define release_segments(mm) do { } while(0)
#define forget_segments() do { } while (0)
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
extern unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
{
int irq = irq_find_mapping(NULL, hwirq);
- if (hwirq >= NR_IRQS) {
- printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
- __func__, hwirq);
- }
-
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
{
(ccount_freq/10000) % 100,
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100);
-
- seq_printf(f,"flags\t\t: "
+ seq_puts(f, "flags\t\t: "
#if XCHAL_HAVE_NMI
"nmi "
#endif
/* At this point: (!vmm || addr < vmm->vm_end). */
if (TASK_SIZE - len < addr)
return -ENOMEM;
- if (!vmm || addr + len <= vmm->vm_start)
+ if (!vmm || addr + len <= vm_start_gap(vmm))
return addr;
addr = vmm->vm_end;
if (flags & MAP_SHARED)
SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR)
SECTION_VECTOR (.UserExceptionVector.literal, USER_VECTOR_VADDR - 4)
SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR)
- SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 48)
+ SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 20)
SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
#endif
.UserExceptionVector.literal)
SECTION_VECTOR (_DoubleExceptionVector_literal,
.DoubleExceptionVector.literal,
- DOUBLEEXC_VECTOR_VADDR - 48,
+ DOUBLEEXC_VECTOR_VADDR - 20,
SIZEOF(.UserExceptionVector.text),
.UserExceptionVector.text)
SECTION_VECTOR (_DoubleExceptionVector_text,
.DoubleExceptionVector.text,
DOUBLEEXC_VECTOR_VADDR,
- 48,
+ 20,
.DoubleExceptionVector.literal)
. = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
if (simdisk_count > MAX_SIMDISK_COUNT)
simdisk_count = MAX_SIMDISK_COUNT;
- sddev = kmalloc(simdisk_count * sizeof(struct simdisk),
- GFP_KERNEL);
+ sddev = kmalloc_array(simdisk_count, sizeof(*sddev), GFP_KERNEL);
if (sddev == NULL)
goto out_unregister;
/* Interrupt configuration. */
-#define PLATFORM_NR_IRQS 10
+#define PLATFORM_NR_IRQS 0
/* Default assignment of LX60 devices to external interrupts. */
#ifdef CONFIG_XTENSA_MX
#define DUART16552_INTNUM XCHAL_EXTINT3_NUM
#define OETH_IRQ XCHAL_EXTINT4_NUM
+#define C67X00_IRQ XCHAL_EXTINT8_NUM
#else
#define DUART16552_INTNUM XCHAL_EXTINT0_NUM
#define OETH_IRQ XCHAL_EXTINT1_NUM
+#define C67X00_IRQ XCHAL_EXTINT5_NUM
#endif
/*
#define C67X00_PADDR (XCHAL_KIO_PADDR + 0x0D0D0000)
#define C67X00_SIZE 0x10
-#define C67X00_IRQ 5
+
#endif /* __XTENSA_XTAVNET_HARDWARE_H */
.flags = IORESOURCE_MEM,
},
[2] = { /* IRQ number */
- .start = OETH_IRQ,
- .end = OETH_IRQ,
+ .start = XTENSA_PIC_LINUX_IRQ(OETH_IRQ),
+ .end = XTENSA_PIC_LINUX_IRQ(OETH_IRQ),
.flags = IORESOURCE_IRQ,
},
};
.flags = IORESOURCE_MEM,
},
[1] = { /* IRQ number */
- .start = C67X00_IRQ,
- .end = C67X00_IRQ,
+ .start = XTENSA_PIC_LINUX_IRQ(C67X00_IRQ),
+ .end = XTENSA_PIC_LINUX_IRQ(C67X00_IRQ),
.flags = IORESOURCE_IRQ,
},
};
static struct plat_serial8250_port serial_platform_data[] = {
[0] = {
.mapbase = DUART16552_PADDR,
- .irq = DUART16552_INTNUM,
+ .irq = XTENSA_PIC_LINUX_IRQ(DUART16552_INTNUM),
.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
UPF_IOREMAP,
.iotype = XCHAL_HAVE_BE ? UPIO_MEM32BE : UPIO_MEM32,
case 3:
if (newline != '\n')
return -EINVAL;
+ /* fall through */
case 2:
if (length <= 0)
return -EINVAL;
}
static void
-bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
+ struct bfq_io_cq *bic, bool bfq_already_existing)
{
+ unsigned int old_wr_coeff = bfqq->wr_coeff;
+ bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
+
if (bic->saved_idle_window)
bfq_mark_bfqq_idle_window(bfqq);
else
/* make sure weight will be updated, however we got here */
bfqq->entity.prio_changed = 1;
+
+ if (likely(!busy))
+ return;
+
+ if (old_wr_coeff == 1 && bfqq->wr_coeff > 1)
+ bfqd->wr_busy_queues++;
+ else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1)
+ bfqd->wr_busy_queues--;
}
static int bfqq_process_refs(struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
+static void bfq_finish_request(struct request *rq)
{
- struct bfq_queue *bfqq = RQ_BFQQ(rq);
- struct bfq_data *bfqd = bfqq->bfqd;
+ struct bfq_queue *bfqq;
+ struct bfq_data *bfqd;
+
+ if (!rq->elv.icq)
+ return;
+
+ bfqq = RQ_BFQQ(rq);
+ bfqd = bfqq->bfqd;
if (rq->rq_flags & RQF_STARTED)
bfqg_stats_update_completion(bfqq_group(bfqq),
*/
if (!RB_EMPTY_NODE(&rq->rb_node))
- bfq_remove_request(q, rq);
+ bfq_remove_request(rq->q, rq);
bfq_put_rq_priv_body(bfqq);
}
/*
* Allocate bfq data structures associated with this request.
*/
-static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
- struct bio *bio)
+static void bfq_prepare_request(struct request *rq, struct bio *bio)
{
+ struct request_queue *q = rq->q;
struct bfq_data *bfqd = q->elevator->elevator_data;
- struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
+ struct bfq_io_cq *bic;
const int is_sync = rq_is_sync(rq);
struct bfq_queue *bfqq;
bool new_queue = false;
- bool split = false;
+ bool bfqq_already_existing = false, split = false;
- spin_lock_irq(&bfqd->lock);
+ if (!rq->elv.icq)
+ return;
+ bic = icq_to_bic(rq->elv.icq);
- if (!bic)
- goto queue_fail;
+ spin_lock_irq(&bfqd->lock);
bfq_check_ioprio_change(bic, bio);
bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
true, is_sync,
NULL);
+ else
+ bfqq_already_existing = true;
}
}
* queue: restore the idle window and the
* possible weight raising period.
*/
- bfq_bfqq_resume_state(bfqq, bic);
+ bfq_bfqq_resume_state(bfqq, bfqd, bic,
+ bfqq_already_existing);
}
}
bfq_handle_burst(bfqd, bfqq);
spin_unlock_irq(&bfqd->lock);
-
- return 0;
-
-queue_fail:
- spin_unlock_irq(&bfqd->lock);
-
- return 1;
}
static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
static struct elevator_type iosched_bfq_mq = {
.ops.mq = {
- .get_rq_priv = bfq_get_rq_private,
- .put_rq_priv = bfq_put_rq_private,
+ .prepare_request = bfq_prepare_request,
+ .finish_request = bfq_finish_request,
.exit_icq = bfq_exit_icq,
.insert_requests = bfq_insert_requests,
.dispatch_request = bfq_dispatch_request,
* @bio: bio to generate/verify integrity metadata for
* @proc_fn: Pointer to the relevant processing function
*/
-static int bio_integrity_process(struct bio *bio,
+static blk_status_t bio_integrity_process(struct bio *bio,
integrity_processing_fn *proc_fn)
{
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct bvec_iter bviter;
struct bio_vec bv;
struct bio_integrity_payload *bip = bio_integrity(bio);
- unsigned int ret = 0;
+ blk_status_t ret = BLK_STS_OK;
void *prot_buf = page_address(bip->bip_vec->bv_page) +
bip->bip_vec->bv_offset;
struct bio *bio = bip->bip_bio;
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
- bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn);
+ bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
/* Restore original bio completion handler */
bio->bi_end_io = bip->bip_end_io;
* integrity metadata. Restore original bio end_io handler
* and run it.
*/
- if (bio->bi_error) {
+ if (bio->bi_status) {
bio->bi_end_io = bip->bip_end_io;
bio_endio(bio);
return bvl;
}
-static void __bio_free(struct bio *bio)
+void bio_uninit(struct bio *bio)
{
bio_disassociate_task(bio);
if (bio_integrity(bio))
bio_integrity_free(bio);
}
+EXPORT_SYMBOL(bio_uninit);
static void bio_free(struct bio *bio)
{
struct bio_set *bs = bio->bi_pool;
void *p;
- __bio_free(bio);
+ bio_uninit(bio);
if (bs) {
bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
}
}
+/*
+ * Users of this function have their own bio allocation. Subsequently,
+ * they must remember to pair any call to bio_init() with bio_uninit()
+ * when IO has completed, or when the bio is released.
+ */
void bio_init(struct bio *bio, struct bio_vec *table,
unsigned short max_vecs)
{
{
unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
- __bio_free(bio);
+ bio_uninit(bio);
memset(bio, 0, BIO_RESET_BYTES);
bio->bi_flags = flags;
{
struct bio *parent = bio->bi_private;
- if (!parent->bi_error)
- parent->bi_error = bio->bi_error;
+ if (!parent->bi_status)
+ parent->bi_status = bio->bi_status;
bio_put(bio);
return parent;
}
struct bio_list punt, nopunt;
struct bio *bio;
+ if (WARN_ON_ONCE(!bs->rescue_workqueue))
+ return;
/*
* In order to guarantee forward progress we must punt only bios that
* were allocated from this bio_set; otherwise, if there was a bio on
if (current->bio_list &&
(!bio_list_empty(¤t->bio_list[0]) ||
- !bio_list_empty(¤t->bio_list[1])))
+ !bio_list_empty(¤t->bio_list[1])) &&
+ bs->rescue_workqueue)
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
p = mempool_alloc(bs->bio_pool, gfp_mask);
*
* Description:
* Put a reference to a &struct bio, either one you have gotten with
- * bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
+ * bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
**/
void bio_put(struct bio *bio)
{
bio->bi_bdev = bio_src->bi_bdev;
bio_set_flag(bio, BIO_CLONED);
bio->bi_opf = bio_src->bi_opf;
+ bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter = bio_src->bi_iter;
bio->bi_io_vec = bio_src->bi_io_vec;
return NULL;
bio->bi_bdev = bio_src->bi_bdev;
bio->bi_opf = bio_src->bi_opf;
+ bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
{
struct submit_bio_ret *ret = bio->bi_private;
- ret->error = bio->bi_error;
+ ret->error = blk_status_to_errno(bio->bi_status);
complete(&ret->event);
}
}
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
- trace_block_bio_complete(bdev_get_queue(bio->bi_bdev),
- bio, bio->bi_error);
+ trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
+ blk_status_to_errno(bio->bi_status));
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
}
}
EXPORT_SYMBOL(bioset_free);
-static struct bio_set *__bioset_create(unsigned int pool_size,
- unsigned int front_pad,
- bool create_bvec_pool)
+/**
+ * bioset_create - Create a bio_set
+ * @pool_size: Number of bio and bio_vecs to cache in the mempool
+ * @front_pad: Number of bytes to allocate in front of the returned bio
+ * @flags: Flags to modify behavior, currently %BIOSET_NEED_BVECS
+ * and %BIOSET_NEED_RESCUER
+ *
+ * Description:
+ * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ * to ask for a number of bytes to be allocated in front of the bio.
+ * Front pad allocation is useful for embedding the bio inside
+ * another structure, to avoid allocating extra data to go with the bio.
+ * Note that the bio must be embedded at the END of that structure always,
+ * or things will break badly.
+ * If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
+ * for allocating iovecs. This pool is not needed e.g. for bio_clone_fast().
+ * If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
+ * dispatch queued requests when the mempool runs out of space.
+ *
+ */
+struct bio_set *bioset_create(unsigned int pool_size,
+ unsigned int front_pad,
+ int flags)
{
unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
struct bio_set *bs;
if (!bs->bio_pool)
goto bad;
- if (create_bvec_pool) {
+ if (flags & BIOSET_NEED_BVECS) {
bs->bvec_pool = biovec_create_pool(pool_size);
if (!bs->bvec_pool)
goto bad;
}
+ if (!(flags & BIOSET_NEED_RESCUER))
+ return bs;
+
bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
if (!bs->rescue_workqueue)
goto bad;
bioset_free(bs);
return NULL;
}
-
-/**
- * bioset_create - Create a bio_set
- * @pool_size: Number of bio and bio_vecs to cache in the mempool
- * @front_pad: Number of bytes to allocate in front of the returned bio
- *
- * Description:
- * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
- * to ask for a number of bytes to be allocated in front of the bio.
- * Front pad allocation is useful for embedding the bio inside
- * another structure, to avoid allocating extra data to go with the bio.
- * Note that the bio must be embedded at the END of that structure always,
- * or things will break badly.
- */
-struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
-{
- return __bioset_create(pool_size, front_pad, true);
-}
EXPORT_SYMBOL(bioset_create);
-/**
- * bioset_create_nobvec - Create a bio_set without bio_vec mempool
- * @pool_size: Number of bio to cache in the mempool
- * @front_pad: Number of bytes to allocate in front of the returned bio
- *
- * Description:
- * Same functionality as bioset_create() except that mempool is not
- * created for bio_vecs. Saving some memory for bio_clone_fast() users.
- */
-struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
-{
- return __bioset_create(pool_size, front_pad, false);
-}
-EXPORT_SYMBOL(bioset_create_nobvec);
-
#ifdef CONFIG_BLK_CGROUP
/**
bio_integrity_init();
biovec_init_slabs();
- fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+ fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!fs_bio_set)
panic("bio: can't allocate bios\n");
}
EXPORT_SYMBOL(blk_rq_init);
+static const struct {
+ int errno;
+ const char *name;
+} blk_errors[] = {
+ [BLK_STS_OK] = { 0, "" },
+ [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" },
+ [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" },
+ [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" },
+ [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" },
+ [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" },
+ [BLK_STS_NEXUS] = { -EBADE, "critical nexus" },
+ [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" },
+ [BLK_STS_PROTECTION] = { -EILSEQ, "protection" },
+ [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
+ [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
+
+ /* device mapper special case, should not leak out: */
+ [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
+
+ /* everything else not covered above: */
+ [BLK_STS_IOERR] = { -EIO, "I/O" },
+};
+
+blk_status_t errno_to_blk_status(int errno)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
+ if (blk_errors[i].errno == errno)
+ return (__force blk_status_t)i;
+ }
+
+ return BLK_STS_IOERR;
+}
+EXPORT_SYMBOL_GPL(errno_to_blk_status);
+
+int blk_status_to_errno(blk_status_t status)
+{
+ int idx = (__force int)status;
+
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+ return -EIO;
+ return blk_errors[idx].errno;
+}
+EXPORT_SYMBOL_GPL(blk_status_to_errno);
+
+static void print_req_error(struct request *req, blk_status_t status)
+{
+ int idx = (__force int)status;
+
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+ return;
+
+ printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
+ __func__, blk_errors[idx].name, req->rq_disk ?
+ req->rq_disk->disk_name : "?",
+ (unsigned long long)blk_rq_pos(req));
+}
+
static void req_bio_endio(struct request *rq, struct bio *bio,
- unsigned int nbytes, int error)
+ unsigned int nbytes, blk_status_t error)
{
if (error)
- bio->bi_error = error;
+ bio->bi_status = error;
if (unlikely(rq->rq_flags & RQF_QUIET))
bio_set_flag(bio, BIO_QUIET);
* Description:
* Sometimes queueing needs to be postponed for a little while, to allow
* resources to come back. This function will make sure that queueing is
- * restarted around the specified time. Queue lock must be held.
+ * restarted around the specified time.
*/
void blk_delay_queue(struct request_queue *q, unsigned long msecs)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
if (likely(!blk_queue_dead(q)))
queue_delayed_work(kblockd_workqueue, &q->delay_work,
msecs_to_jiffies(msecs));
**/
void blk_start_queue_async(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
blk_run_queue_async(q);
}
* Description:
* blk_start_queue() will clear the stop flag on the queue, and call
* the request_fn for the queue if it was in a stopped state when
- * entered. Also see blk_stop_queue(). Queue lock must be held.
+ * entered. Also see blk_stop_queue().
**/
void blk_start_queue(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
WARN_ON(!irqs_disabled());
+ WARN_ON_ONCE(q->mq_ops);
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
__blk_run_queue(q);
* or if it simply chooses not to queue more I/O at one point, it can
* call this function to prevent the request_fn from being called until
* the driver has signalled it's ready to go again. This happens by calling
- * blk_start_queue() to restart queue operations. Queue lock must be held.
+ * blk_start_queue() to restart queue operations.
**/
void blk_stop_queue(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
cancel_delayed_work(&q->delay_work);
queue_flag_set(QUEUE_FLAG_STOPPED, q);
}
*/
inline void __blk_run_queue_uncond(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
if (unlikely(blk_queue_dead(q)))
return;
* @q: The queue to run
*
* Description:
- * See @blk_run_queue. This variant must be called with the queue lock
- * held and interrupts disabled.
+ * See @blk_run_queue.
*/
void __blk_run_queue(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
if (unlikely(blk_queue_stopped(q)))
return;
*
* Description:
* Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- * of us. The caller must hold the queue lock.
+ * of us.
+ *
+ * Note:
+ * Since it is not allowed to run q->delay_work after blk_cleanup_queue()
+ * has canceled q->delay_work, callers must hold the queue lock to avoid
+ * race conditions between blk_cleanup_queue() and blk_run_queue_async().
*/
void blk_run_queue_async(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
}
{
unsigned long flags;
+ WARN_ON_ONCE(q->mq_ops);
+
spin_lock_irqsave(q->queue_lock, flags);
__blk_run_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
int i;
lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
while (true) {
bool drain = false;
*/
void blk_queue_bypass_start(struct request_queue *q)
{
+ WARN_ON_ONCE(q->mq_ops);
+
spin_lock_irq(q->queue_lock);
q->bypass_depth++;
queue_flag_set(QUEUE_FLAG_BYPASS, q);
* @q: queue of interest
*
* Leave bypass mode and restore the normal queueing behavior.
+ *
+ * Note: although blk_queue_bypass_start() is only called for blk-sq queues,
+ * this function is called for both blk-sq and blk-mq queues.
*/
void blk_queue_bypass_end(struct request_queue *q)
{
if (q->id < 0)
goto fail_q;
- q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!q->bio_split)
goto fail_id;
int blk_init_allocated_queue(struct request_queue *q)
{
+ WARN_ON_ONCE(q->mq_ops);
+
q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
if (!q->fq)
return -ENOMEM;
struct request_list *rl;
int on_thresh, off_thresh;
+ WARN_ON_ONCE(q->mq_ops);
+
spin_lock_irq(q->queue_lock);
q->nr_requests = nr;
blk_queue_congestion_threshold(q);
int may_queue;
req_flags_t rq_flags = RQF_ALLOCED;
+ lockdep_assert_held(q->queue_lock);
+
if (unlikely(blk_queue_dying(q)))
return ERR_PTR(-ENODEV);
struct request_list *rl;
struct request *rq;
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
rl = blk_get_rl(q, bio); /* transferred to @rq on success */
retry:
rq = __get_request(rl, op, bio, gfp_mask);
if (!IS_ERR(rq))
return rq;
+ if (op & REQ_NOWAIT) {
+ blk_put_rl(rl);
+ return ERR_PTR(-EAGAIN);
+ }
+
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
blk_put_rl(rl);
return rq;
goto retry;
}
-static struct request *blk_old_get_request(struct request_queue *q, int rw,
- gfp_t gfp_mask)
+static struct request *blk_old_get_request(struct request_queue *q,
+ unsigned int op, gfp_t gfp_mask)
{
struct request *rq;
+ WARN_ON_ONCE(q->mq_ops);
+
/* create ioc upfront */
create_io_context(gfp_mask, q->node);
spin_lock_irq(q->queue_lock);
- rq = get_request(q, rw, NULL, gfp_mask);
+ rq = get_request(q, op, NULL, gfp_mask);
if (IS_ERR(rq)) {
spin_unlock_irq(q->queue_lock);
return rq;
return rq;
}
-struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+ gfp_t gfp_mask)
{
- if (q->mq_ops)
- return blk_mq_alloc_request(q, rw,
+ struct request *req;
+
+ if (q->mq_ops) {
+ req = blk_mq_alloc_request(q, op,
(gfp_mask & __GFP_DIRECT_RECLAIM) ?
0 : BLK_MQ_REQ_NOWAIT);
- else
- return blk_old_get_request(q, rw, gfp_mask);
+ if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
+ q->mq_ops->initialize_rq_fn(req);
+ } else {
+ req = blk_old_get_request(q, op, gfp_mask);
+ if (!IS_ERR(req) && q->initialize_rq_fn)
+ q->initialize_rq_fn(req);
+ }
+
+ return req;
}
EXPORT_SYMBOL(blk_get_request);
*/
void blk_requeue_request(struct request_queue *q, struct request *rq)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
static inline void blk_pm_put_request(struct request *rq) {}
#endif
-/*
- * queue lock must be held
- */
void __blk_put_request(struct request_queue *q, struct request *req)
{
req_flags_t rq_flags = req->rq_flags;
return;
}
+ lockdep_assert_held(q->queue_lock);
+
blk_pm_put_request(req);
elv_completed_request(q, req);
req->ioprio = ioc->ioprio;
else
req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+ req->write_hint = bio->bi_write_hint;
blk_rq_bio_prep(req->q, req, bio);
}
EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
*/
blk_queue_bounce(q, &bio);
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
}
req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
if (IS_ERR(req)) {
__wbt_done(q->rq_wb, wb_acct);
- bio->bi_error = PTR_ERR(req);
+ if (PTR_ERR(req) == -ENOMEM)
+ bio->bi_status = BLK_STS_RESOURCE;
+ else
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
goto out_unlock;
}
{
struct request_queue *q;
int nr_sectors = bio_sectors(bio);
- int err = -EIO;
+ blk_status_t status = BLK_STS_IOERR;
char b[BDEVNAME_SIZE];
struct hd_struct *part;
goto end_io;
}
+ /*
+ * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+ * if queue is not a request based queue.
+ */
+
+ if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
+ goto not_supported;
+
part = bio->bi_bdev->bd_part;
if (should_fail_request(part, bio->bi_iter.bi_size) ||
should_fail_request(&part_to_disk(part)->part0,
!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
if (!nr_sectors) {
- err = 0;
+ status = BLK_STS_OK;
goto end_io;
}
}
return true;
not_supported:
- err = -EOPNOTSUPP;
+ status = BLK_STS_NOTSUPP;
end_io:
- bio->bi_error = err;
+ bio->bi_status = status;
bio_endio(bio);
return false;
}
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- if (likely(blk_queue_enter(q, false) == 0)) {
+ if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
struct bio_list lower, same;
/* Create a fresh bio_list for all subordinate requests */
bio_list_merge(&bio_list_on_stack[0], &same);
bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
} else {
- bio_io_error(bio);
+ if (unlikely(!blk_queue_dying(q) &&
+ (bio->bi_opf & REQ_NOWAIT)))
+ bio_wouldblock_error(bio);
+ else
+ bio_io_error(bio);
}
bio = bio_list_pop(&bio_list_on_stack[0]);
} while (bio);
* @q: the queue to submit the request
* @rq: the request being queued
*/
-int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
unsigned long flags;
int where = ELEVATOR_INSERT_BACK;
if (blk_cloned_rq_check_limits(q, rq))
- return -EIO;
+ return BLK_STS_IOERR;
if (rq->rq_disk &&
should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
- return -EIO;
+ return BLK_STS_IOERR;
if (q->mq_ops) {
if (blk_queue_io_stat(q))
blk_account_io_start(rq, true);
blk_mq_sched_insert_request(rq, false, true, false, false);
- return 0;
+ return BLK_STS_OK;
}
spin_lock_irqsave(q->queue_lock, flags);
if (unlikely(blk_queue_dying(q))) {
spin_unlock_irqrestore(q->queue_lock, flags);
- return -ENODEV;
+ return BLK_STS_IOERR;
}
/*
__blk_run_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
- return 0;
+ return BLK_STS_OK;
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
*
* Return:
* The number of bytes to fail.
- *
- * Context:
- * queue_lock must be held.
*/
unsigned int blk_rq_err_bytes(const struct request *rq)
{
* Return:
* Pointer to the request at the top of @q if available. Null
* otherwise.
- *
- * Context:
- * queue_lock must be held.
*/
struct request *blk_peek_request(struct request_queue *q)
{
struct request *rq;
int ret;
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
while ((rq = __elv_next_request(q)) != NULL) {
rq = blk_pm_peek_request(q, rq);
rq = NULL;
break;
} else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
- int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
-
rq->rq_flags |= RQF_QUIET;
/*
* Mark this request as started so we don't trigger
* any debug logic in the end I/O path.
*/
blk_start_request(rq);
- __blk_end_request_all(rq, err);
+ __blk_end_request_all(rq, ret == BLKPREP_INVALID ?
+ BLK_STS_TARGET : BLK_STS_IOERR);
} else {
printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
break;
*
* Block internal functions which don't want to start timer should
* call blk_dequeue_request().
- *
- * Context:
- * queue_lock must be held.
*/
void blk_start_request(struct request *req)
{
+ lockdep_assert_held(req->q->queue_lock);
+ WARN_ON_ONCE(req->q->mq_ops);
+
blk_dequeue_request(req);
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
* Return:
* Pointer to the request at the top of @q if available. Null
* otherwise.
- *
- * Context:
- * queue_lock must be held.
*/
struct request *blk_fetch_request(struct request_queue *q)
{
struct request *rq;
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
rq = blk_peek_request(q);
if (rq)
blk_start_request(rq);
/**
* blk_update_request - Special helper function for request stacking drivers
* @req: the request being processed
- * @error: %0 for success, < %0 for error
+ * @error: block status code
* @nr_bytes: number of bytes to complete @req
*
* Description:
* %false - this request doesn't have any more data
* %true - this request has more data
**/
-bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
+bool blk_update_request(struct request *req, blk_status_t error,
+ unsigned int nr_bytes)
{
int total_bytes;
- trace_block_rq_complete(req, error, nr_bytes);
+ trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
if (!req->bio)
return false;
- if (error && !blk_rq_is_passthrough(req) &&
- !(req->rq_flags & RQF_QUIET)) {
- char *error_type;
-
- switch (error) {
- case -ENOLINK:
- error_type = "recoverable transport";
- break;
- case -EREMOTEIO:
- error_type = "critical target";
- break;
- case -EBADE:
- error_type = "critical nexus";
- break;
- case -ETIMEDOUT:
- error_type = "timeout";
- break;
- case -ENOSPC:
- error_type = "critical space allocation";
- break;
- case -ENODATA:
- error_type = "critical medium";
- break;
- case -EIO:
- default:
- error_type = "I/O";
- break;
- }
- printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
- __func__, error_type, req->rq_disk ?
- req->rq_disk->disk_name : "?",
- (unsigned long long)blk_rq_pos(req));
-
- }
+ if (unlikely(error && !blk_rq_is_passthrough(req) &&
+ !(req->rq_flags & RQF_QUIET)))
+ print_req_error(req, error);
blk_account_io_completion(req, nr_bytes);
}
EXPORT_SYMBOL_GPL(blk_update_request);
-static bool blk_update_bidi_request(struct request *rq, int error,
+static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes,
unsigned int bidi_bytes)
{
}
EXPORT_SYMBOL_GPL(blk_unprep_request);
-/*
- * queue lock must be held
- */
-void blk_finish_request(struct request *req, int error)
+void blk_finish_request(struct request *req, blk_status_t error)
{
struct request_queue *q = req->q;
+ lockdep_assert_held(req->q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
if (req->rq_flags & RQF_STATS)
blk_stat_add(req);
/**
* blk_end_bidi_request - Complete a bidi request
* @rq: the request to complete
- * @error: %0 for success, < %0 for error
+ * @error: block status code
* @nr_bytes: number of bytes to complete @rq
* @bidi_bytes: number of bytes to complete @rq->next_rq
*
* %false - we are done with this request
* %true - still buffers pending for this request
**/
-static bool blk_end_bidi_request(struct request *rq, int error,
+static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes, unsigned int bidi_bytes)
{
struct request_queue *q = rq->q;
unsigned long flags;
+ WARN_ON_ONCE(q->mq_ops);
+
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
return true;
/**
* __blk_end_bidi_request - Complete a bidi request with queue lock held
* @rq: the request to complete
- * @error: %0 for success, < %0 for error
+ * @error: block status code
* @nr_bytes: number of bytes to complete @rq
* @bidi_bytes: number of bytes to complete @rq->next_rq
*
* %false - we are done with this request
* %true - still buffers pending for this request
**/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes, unsigned int bidi_bytes)
{
+ lockdep_assert_held(rq->q->queue_lock);
+ WARN_ON_ONCE(rq->q->mq_ops);
+
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
return true;
/**
* blk_end_request - Helper function for drivers to complete the request.
* @rq: the request being processed
- * @error: %0 for success, < %0 for error
+ * @error: block status code
* @nr_bytes: number of bytes to complete
*
* Description:
* %false - we are done with this request
* %true - still buffers pending for this request
**/
-bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool blk_end_request(struct request *rq, blk_status_t error,
+ unsigned int nr_bytes)
{
+ WARN_ON_ONCE(rq->q->mq_ops);
return blk_end_bidi_request(rq, error, nr_bytes, 0);
}
EXPORT_SYMBOL(blk_end_request);
/**
* blk_end_request_all - Helper function for drives to finish the request.
* @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error: block status code
*
* Description:
* Completely finish @rq.
*/
-void blk_end_request_all(struct request *rq, int error)
+void blk_end_request_all(struct request *rq, blk_status_t error)
{
bool pending;
unsigned int bidi_bytes = 0;
/**
* __blk_end_request - Helper function for drivers to complete the request.
* @rq: the request being processed
- * @error: %0 for success, < %0 for error
+ * @error: block status code
* @nr_bytes: number of bytes to complete
*
* Description:
* %false - we are done with this request
* %true - still buffers pending for this request
**/
-bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool __blk_end_request(struct request *rq, blk_status_t error,
+ unsigned int nr_bytes)
{
+ lockdep_assert_held(rq->q->queue_lock);
+ WARN_ON_ONCE(rq->q->mq_ops);
+
return __blk_end_bidi_request(rq, error, nr_bytes, 0);
}
EXPORT_SYMBOL(__blk_end_request);
/**
* __blk_end_request_all - Helper function for drives to finish the request.
* @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error: block status code
*
* Description:
* Completely finish @rq. Must be called with queue lock held.
*/
-void __blk_end_request_all(struct request *rq, int error)
+void __blk_end_request_all(struct request *rq, blk_status_t error)
{
bool pending;
unsigned int bidi_bytes = 0;
+ lockdep_assert_held(rq->q->queue_lock);
+ WARN_ON_ONCE(rq->q->mq_ops);
+
if (unlikely(blk_bidi_rq(rq)))
bidi_bytes = blk_rq_bytes(rq->next_rq);
/**
* __blk_end_request_cur - Helper function to finish the current request chunk.
* @rq: the request to finish the current chunk for
- * @error: %0 for success, < %0 for error
+ * @error: block status code
*
* Description:
* Complete the current consecutively mapped chunk from @rq. Must
* %false - we are done with this request
* %true - still buffers pending for this request
*/
-bool __blk_end_request_cur(struct request *rq, int error)
+bool __blk_end_request_cur(struct request *rq, blk_status_t error)
{
return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
}
bool from_schedule)
__releases(q->queue_lock)
{
+ lockdep_assert_held(q->queue_lock);
+
trace_block_unplug(q, depth, !from_schedule);
if (from_schedule)
* Short-circuit if @q is dead
*/
if (unlikely(blk_queue_dying(q))) {
- __blk_end_request_all(rq, -ENODEV);
+ __blk_end_request_all(rq, BLK_STS_IOERR);
continue;
}
* @rq: request to complete
* @error: end I/O status of the request
*/
-static void blk_end_sync_rq(struct request *rq, int error)
+static void blk_end_sync_rq(struct request *rq, blk_status_t error)
{
struct completion *waiting = rq->end_io_data;
if (unlikely(blk_queue_dying(q))) {
rq->rq_flags |= RQF_QUIET;
- __blk_end_request_all(rq, -ENXIO);
+ __blk_end_request_all(rq, BLK_STS_IOERR);
spin_unlock_irq(q->queue_lock);
return;
}
*/
static bool blk_flush_complete_seq(struct request *rq,
struct blk_flush_queue *fq,
- unsigned int seq, int error)
+ unsigned int seq, blk_status_t error)
{
struct request_queue *q = rq->q;
struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
return kicked | queued;
}
-static void flush_end_io(struct request *flush_rq, int error)
+static void flush_end_io(struct request *flush_rq, blk_status_t error)
{
struct request_queue *q = flush_rq->q;
struct list_head *running;
return blk_flush_queue_rq(flush_rq, false);
}
-static void flush_data_end_io(struct request *rq, int error)
+static void flush_data_end_io(struct request *rq, blk_status_t error)
{
struct request_queue *q = rq->q;
struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
+ lockdep_assert_held(q->queue_lock);
+
/*
* Updating q->in_flight[] here for making this tag usable
* early. Because in blk_queue_start_tag(),
blk_run_queue_async(q);
}
-static void mq_flush_data_end_io(struct request *rq, int error)
+static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
{
struct request_queue *q = rq->q;
struct blk_mq_hw_ctx *hctx;
* or __blk_mq_run_hw_queue() to dispatch request.
* @rq is being submitted. Analyze what needs to be done and put it on the
* right queue.
- *
- * CONTEXT:
- * spin_lock_irq(q->queue_lock) in !mq case
*/
void blk_insert_flush(struct request *rq)
{
unsigned int policy = blk_flush_policy(fflags, rq);
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
+ if (!q->mq_ops)
+ lockdep_assert_held(q->queue_lock);
+
/*
* @policy now records what operations need to be done. Adjust
* REQ_PREFLUSH and FUA for the driver.
.sysfs_ops = &integrity_ops,
};
-static int blk_integrity_nop_fn(struct blk_integrity_iter *iter)
+static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
{
- return 0;
+ return BLK_STS_OK;
}
static const struct blk_integrity_profile nop_profile = {
*/
int blk_rq_append_bio(struct request *rq, struct bio *bio)
{
+ blk_queue_bounce(rq->q, &bio);
+
if (!rq->bio) {
blk_rq_bio_prep(rq->q, rq, bio);
} else {
map_data->offset += bio->bi_iter.bi_size;
orig_bio = bio;
- blk_queue_bounce(q, &bio);
/*
* We link the bounce buffer in and could have to traverse it
* later so we have to get a ref to prevent it from being freed
*/
- bio_get(bio);
-
ret = blk_rq_append_bio(rq, bio);
+ bio_get(bio);
if (ret) {
bio_endio(bio);
__blk_rq_unmap_user(orig_bio);
return ret;
}
- blk_queue_bounce(q, &rq->bio);
return 0;
}
EXPORT_SYMBOL(blk_rq_map_kern);
bool do_split = true;
struct bio *new = NULL;
const unsigned max_sectors = get_max_io_size(q, bio);
- unsigned bvecs = 0;
bio_for_each_segment(bv, bio, iter) {
/*
- * With arbitrary bio size, the incoming bio may be very
- * big. We have to split the bio into small bios so that
- * each holds at most BIO_MAX_PAGES bvecs because
- * bio_clone() can fail to allocate big bvecs.
- *
- * It should have been better to apply the limit per
- * request queue in which bio_clone() is involved,
- * instead of globally. The biggest blocker is the
- * bio_clone() in bio bounce.
- *
- * If bio is splitted by this reason, we should have
- * allowed to continue bios merging, but don't do
- * that now for making the change simple.
- *
- * TODO: deal with bio bounce's bio_clone() gracefully
- * and convert the global limit into per-queue limit.
- */
- if (bvecs++ >= BIO_MAX_PAGES)
- goto split;
-
- /*
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, disallow it.
*/
return do_split ? new : NULL;
}
-void blk_queue_split(struct request_queue *q, struct bio **bio,
- struct bio_set *bs)
+void blk_queue_split(struct request_queue *q, struct bio **bio)
{
struct bio *split, *res;
unsigned nsegs;
switch (bio_op(*bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
- split = blk_bio_discard_split(q, *bio, bs, &nsegs);
+ split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
break;
case REQ_OP_WRITE_ZEROES:
- split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
+ split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
break;
case REQ_OP_WRITE_SAME:
- split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
+ split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
break;
default:
split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
static struct request *attempt_merge(struct request_queue *q,
struct request *req, struct request *next)
{
+ if (!q->mq_ops)
+ lockdep_assert_held(q->queue_lock);
+
if (!rq_mergeable(req) || !rq_mergeable(next))
return NULL;
return NULL;
/*
+ * Don't allow merge of different write hints, or for a hint with
+ * non-hint IO.
+ */
+ if (req->write_hint != next->write_hint)
+ return NULL;
+
+ /*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
* will have updated segment counts, update sector
!blk_write_same_mergeable(rq->bio, bio))
return false;
+ /*
+ * Don't allow merge of different write hints, or for a hint with
+ * non-hint IO.
+ */
+ if (rq->write_hint != bio->bi_write_hint)
+ return false;
+
return true;
}
#include "blk.h"
#include "blk-mq.h"
-static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
- const int cpu)
+static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
+ const struct cpumask *online_mask)
{
- return cpu * nr_queues / nr_cpus;
+ /*
+ * Non online CPU will be mapped to queue index 0.
+ */
+ if (!cpumask_test_cpu(cpu, online_mask))
+ return 0;
+ return cpu % nr_queues;
}
static int get_first_sibling(unsigned int cpu)
unsigned int *map = set->mq_map;
unsigned int nr_queues = set->nr_hw_queues;
const struct cpumask *online_mask = cpu_online_mask;
- unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
- cpumask_var_t cpus;
-
- if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
- return -ENOMEM;
-
- cpumask_clear(cpus);
- nr_cpus = nr_uniq_cpus = 0;
- for_each_cpu(i, online_mask) {
- nr_cpus++;
- first_sibling = get_first_sibling(i);
- if (!cpumask_test_cpu(first_sibling, cpus))
- nr_uniq_cpus++;
- cpumask_set_cpu(i, cpus);
- }
-
- queue = 0;
- for_each_possible_cpu(i) {
- if (!cpumask_test_cpu(i, online_mask)) {
- map[i] = 0;
- continue;
- }
+ unsigned int cpu, first_sibling;
+ for_each_possible_cpu(cpu) {
/*
- * Easy case - we have equal or more hardware queues. Or
- * there are no thread siblings to take into account. Do
- * 1:1 if enough, or sequential mapping if less.
+ * First do sequential mapping between CPUs and queues.
+ * In case we still have CPUs to map, and we have some number of
+ * threads per cores then map sibling threads to the same queue for
+ * performace optimizations.
*/
- if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
- map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
- queue++;
- continue;
+ if (cpu < nr_queues) {
+ map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+ } else {
+ first_sibling = get_first_sibling(cpu);
+ if (first_sibling == cpu)
+ map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+ else
+ map[cpu] = map[first_sibling];
}
-
- /*
- * Less then nr_cpus queues, and we have some number of
- * threads per cores. Map sibling threads to the same
- * queue.
- */
- first_sibling = get_first_sibling(i);
- if (first_sibling == i) {
- map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
- queue);
- queue++;
- } else
- map[i] = map[first_sibling];
}
- free_cpumask_var(cpus);
return 0;
}
EXPORT_SYMBOL_GPL(blk_mq_map_queues);
blk_mq_run_hw_queues(q, true);
} else if (strcmp(op, "start") == 0) {
blk_mq_start_stopped_hw_queues(q, true);
+ } else if (strcmp(op, "kick") == 0) {
+ blk_mq_kick_requeue_list(q);
} else {
pr_err("%s: unsupported operation '%s'\n", __func__, op);
inval:
- pr_err("%s: use either 'run' or 'start'\n", __func__);
+ pr_err("%s: use 'run', 'start' or 'kick'\n", __func__);
return -EINVAL;
}
return count;
}
}
+static int queue_write_hint_show(void *data, struct seq_file *m)
+{
+ struct request_queue *q = data;
+ int i;
+
+ for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+ seq_printf(m, "hint%d: %llu\n", i, q->write_hints[i]);
+
+ return 0;
+}
+
+static ssize_t queue_write_hint_store(void *data, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct request_queue *q = data;
+ int i;
+
+ for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+ q->write_hints[i] = 0;
+
+ return count;
+}
+
static int queue_poll_stat_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
};
#undef RQF_NAME
+#define RQAF_NAME(name) [REQ_ATOM_##name] = #name
+static const char *const rqaf_name[] = {
+ RQAF_NAME(COMPLETE),
+ RQAF_NAME(STARTED),
+ RQAF_NAME(POLL_SLEPT),
+};
+#undef RQAF_NAME
+
int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
{
const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
seq_puts(m, ", .rq_flags=");
blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
ARRAY_SIZE(rqf_name));
+ seq_puts(m, ", .atomic_flags=");
+ blk_flags_show(m, rq->atomic_flags, rqaf_name, ARRAY_SIZE(rqaf_name));
seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
rq->internal_tag);
if (mq_ops->show_rq)
}
EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
+static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
+ __acquires(&q->requeue_lock)
+{
+ struct request_queue *q = m->private;
+
+ spin_lock_irq(&q->requeue_lock);
+ return seq_list_start(&q->requeue_list, *pos);
+}
+
+static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct request_queue *q = m->private;
+
+ return seq_list_next(v, &q->requeue_list, pos);
+}
+
+static void queue_requeue_list_stop(struct seq_file *m, void *v)
+ __releases(&q->requeue_lock)
+{
+ struct request_queue *q = m->private;
+
+ spin_unlock_irq(&q->requeue_lock);
+}
+
+static const struct seq_operations queue_requeue_list_seq_ops = {
+ .start = queue_requeue_list_start,
+ .next = queue_requeue_list_next,
+ .stop = queue_requeue_list_stop,
+ .show = blk_mq_debugfs_rq_show,
+};
+
static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
__acquires(&hctx->lock)
{
.show = blk_mq_debugfs_rq_show,
};
+struct show_busy_params {
+ struct seq_file *m;
+ struct blk_mq_hw_ctx *hctx;
+};
+
+/*
+ * Note: the state of a request may change while this function is in progress,
+ * e.g. due to a concurrent blk_mq_finish_request() call.
+ */
+static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
+{
+ const struct show_busy_params *params = data;
+
+ if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx &&
+ test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+ __blk_mq_debugfs_rq_show(params->m,
+ list_entry_rq(&rq->queuelist));
+}
+
+static int hctx_busy_show(void *data, struct seq_file *m)
+{
+ struct blk_mq_hw_ctx *hctx = data;
+ struct show_busy_params params = { .m = m, .hctx = hctx };
+
+ blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq,
+ ¶ms);
+
+ return 0;
+}
+
static int hctx_ctx_map_show(void *data, struct seq_file *m)
{
struct blk_mq_hw_ctx *hctx = data;
static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
{"poll_stat", 0400, queue_poll_stat_show},
+ {"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
{"state", 0600, queue_state_show, queue_state_write},
+ {"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
{},
};
{"state", 0400, hctx_state_show},
{"flags", 0400, hctx_flags_show},
{"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops},
+ {"busy", 0400, hctx_busy_show},
{"ctx_map", 0400, hctx_ctx_map_show},
{"tags", 0400, hctx_tags_show},
{"tags_bitmap", 0400, hctx_tags_bitmap_show},
}
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
-static void __blk_mq_sched_assign_ioc(struct request_queue *q,
- struct request *rq,
- struct bio *bio,
- struct io_context *ioc)
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
{
+ struct request_queue *q = rq->q;
+ struct io_context *ioc = rq_ioc(bio);
struct io_cq *icq;
spin_lock_irq(q->queue_lock);
if (!icq)
return;
}
-
+ get_io_context(icq->ioc);
rq->elv.icq = icq;
- if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
- rq->rq_flags |= RQF_ELVPRIV;
- get_io_context(icq->ioc);
- return;
- }
-
- rq->elv.icq = NULL;
}
-static void blk_mq_sched_assign_ioc(struct request_queue *q,
- struct request *rq, struct bio *bio)
+/*
+ * Mark a hardware queue as needing a restart. For shared queues, maintain
+ * a count of how many hardware queues are marked for restart.
+ */
+static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
- struct io_context *ioc;
+ if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ return;
+
+ if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+ struct request_queue *q = hctx->queue;
- ioc = rq_ioc(bio);
- if (ioc)
- __blk_mq_sched_assign_ioc(q, rq, bio, ioc);
+ if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ atomic_inc(&q->shared_hctx_restart);
+ } else
+ set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
-struct request *blk_mq_sched_get_request(struct request_queue *q,
- struct bio *bio,
- unsigned int op,
- struct blk_mq_alloc_data *data)
+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
- struct elevator_queue *e = q->elevator;
- struct request *rq;
-
- blk_queue_enter_live(q);
- data->q = q;
- if (likely(!data->ctx))
- data->ctx = blk_mq_get_ctx(q);
- if (likely(!data->hctx))
- data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
-
- if (e) {
- data->flags |= BLK_MQ_REQ_INTERNAL;
-
- /*
- * Flush requests are special and go directly to the
- * dispatch list.
- */
- if (!op_is_flush(op) && e->type->ops.mq.get_request) {
- rq = e->type->ops.mq.get_request(q, op, data);
- if (rq)
- rq->rq_flags |= RQF_QUEUED;
- } else
- rq = __blk_mq_alloc_request(data, op);
- } else {
- rq = __blk_mq_alloc_request(data, op);
- }
-
- if (rq) {
- if (!op_is_flush(op)) {
- rq->elv.icq = NULL;
- if (e && e->type->icq_cache)
- blk_mq_sched_assign_ioc(q, rq, bio);
- }
- data->hctx->queued++;
- return rq;
- }
+ if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ return false;
- blk_queue_exit(q);
- return NULL;
-}
+ if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+ struct request_queue *q = hctx->queue;
-void blk_mq_sched_put_request(struct request *rq)
-{
- struct request_queue *q = rq->q;
- struct elevator_queue *e = q->elevator;
+ if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ atomic_dec(&q->shared_hctx_restart);
+ } else
+ clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
- if (rq->rq_flags & RQF_ELVPRIV) {
- blk_mq_sched_put_rq_priv(rq->q, rq);
- if (rq->elv.icq) {
- put_io_context(rq->elv.icq->ioc);
- rq->elv.icq = NULL;
- }
+ if (blk_mq_hctx_has_pending(hctx)) {
+ blk_mq_run_hw_queue(hctx, true);
+ return true;
}
- if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
- e->type->ops.mq.put_request(rq);
- else
- blk_mq_finish_request(rq);
+ return false;
}
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
bool did_work = false;
LIST_HEAD(rq_list);
- if (unlikely(blk_mq_hctx_stopped(hctx)))
+ /* RCU or SRCU read lock is needed before checking quiesced flag */
+ if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
return;
hctx->run++;
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
+/*
+ * Reverse check our software queue for entries that we could potentially
+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
+ * too much time checking for merges.
+ */
+static bool blk_mq_attempt_merge(struct request_queue *q,
+ struct blk_mq_ctx *ctx, struct bio *bio)
+{
+ struct request *rq;
+ int checked = 8;
+
+ lockdep_assert_held(&ctx->lock);
+
+ list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
+ bool merged = false;
+
+ if (!checked--)
+ break;
+
+ if (!blk_rq_merge_ok(rq, bio))
+ continue;
+
+ switch (blk_try_merge(rq, bio)) {
+ case ELEVATOR_BACK_MERGE:
+ if (blk_mq_sched_allow_merge(q, rq, bio))
+ merged = bio_attempt_back_merge(q, rq, bio);
+ break;
+ case ELEVATOR_FRONT_MERGE:
+ if (blk_mq_sched_allow_merge(q, rq, bio))
+ merged = bio_attempt_front_merge(q, rq, bio);
+ break;
+ case ELEVATOR_DISCARD_MERGE:
+ merged = bio_attempt_discard_merge(q, rq, bio);
+ break;
+ default:
+ continue;
+ }
+
+ if (merged)
+ ctx->rq_merged++;
+ return merged;
+ }
+
+ return false;
+}
+
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
+ struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ bool ret = false;
- if (e->type->ops.mq.bio_merge) {
- struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
+ if (e && e->type->ops.mq.bio_merge) {
blk_mq_put_ctx(ctx);
return e->type->ops.mq.bio_merge(hctx, bio);
}
- return false;
+ if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
+ /* default per sw-queue merge */
+ spin_lock(&ctx->lock);
+ ret = blk_mq_attempt_merge(q, ctx, bio);
+ spin_unlock(&ctx->lock);
+ }
+
+ blk_mq_put_ctx(ctx);
+ return ret;
}
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
return true;
}
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
- if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
- clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
- if (blk_mq_hctx_has_pending(hctx)) {
- blk_mq_run_hw_queue(hctx, true);
- return true;
- }
- }
- return false;
-}
-
/**
* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
* @pos: loop cursor.
unsigned int i, j;
if (set->flags & BLK_MQ_F_TAG_SHARED) {
+ /*
+ * If this is 0, then we know that no hardware queues
+ * have RESTART marked. We're done.
+ */
+ if (!atomic_read(&queue->shared_hctx_restart))
+ return;
+
rcu_read_lock();
list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
tag_set_list) {
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *));
-struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
-void blk_mq_sched_put_request(struct request *rq);
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio);
void blk_mq_sched_request_inserted(struct request *rq);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
- struct elevator_queue *e = q->elevator;
-
- if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
+ if (blk_queue_nomerges(q) || !bio_mergeable(bio))
return false;
return __blk_mq_sched_bio_merge(q, bio);
}
-static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
- struct request *rq,
- struct bio *bio)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e && e->type->ops.mq.get_rq_priv)
- return e->type->ops.mq.get_rq_priv(q, rq, bio);
-
- return 0;
-}
-
-static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
- struct request *rq)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e && e->type->ops.mq.put_rq_priv)
- e->type->ops.mq.put_rq_priv(q, rq);
-}
-
static inline bool
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
return false;
}
-/*
- * Mark a hardware queue as needing a restart.
- */
-static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
- if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
- set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-}
-
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
{
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
static void blk_mq_poll_stats_start(struct request_queue *q);
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
static int blk_mq_poll_stats_bkt(const struct request *rq)
{
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+/*
+ * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
+ * mpt3sas driver such that this function can be removed.
+ */
+void blk_mq_quiesce_queue_nowait(struct request_queue *q)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
+
/**
- * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
* @q: request queue.
*
* Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Additionally, it is not prevented that
- * new queue_rq() calls occur unless the queue has been stopped first.
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
*/
void blk_mq_quiesce_queue(struct request_queue *q)
{
unsigned int i;
bool rcu = false;
- __blk_mq_stop_hw_queues(q, true);
+ blk_mq_quiesce_queue_nowait(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->flags & BLK_MQ_F_BLOCKING)
- synchronize_srcu(&hctx->queue_rq_srcu);
+ synchronize_srcu(hctx->queue_rq_srcu);
else
rcu = true;
}
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
+/*
+ * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
+ * @q: request queue.
+ *
+ * This function recovers queue into the state before quiescing
+ * which is done by blk_mq_quiesce_queue.
+ */
+void blk_mq_unquiesce_queue(struct request_queue *q)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ /* dispatch requests which are inserted during quiescing */
+ blk_mq_run_hw_queues(q, true);
+}
+EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
+
void blk_mq_wake_waiters(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
}
EXPORT_SYMBOL(blk_mq_can_queue);
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
- struct request *rq, unsigned int op)
+static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
+ unsigned int tag, unsigned int op)
{
+ struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+ struct request *rq = tags->static_rqs[tag];
+
+ rq->rq_flags = 0;
+
+ if (data->flags & BLK_MQ_REQ_INTERNAL) {
+ rq->tag = -1;
+ rq->internal_tag = tag;
+ } else {
+ if (blk_mq_tag_busy(data->hctx)) {
+ rq->rq_flags = RQF_MQ_INFLIGHT;
+ atomic_inc(&data->hctx->nr_active);
+ }
+ rq->tag = tag;
+ rq->internal_tag = -1;
+ data->hctx->tags->rqs[rq->tag] = rq;
+ }
+
INIT_LIST_HEAD(&rq->queuelist);
/* csd/requeue_work/fifo_time is initialized before use */
- rq->q = q;
- rq->mq_ctx = ctx;
+ rq->q = data->q;
+ rq->mq_ctx = data->ctx;
rq->cmd_flags = op;
- if (blk_queue_io_stat(q))
+ if (blk_queue_io_stat(data->q))
rq->rq_flags |= RQF_IO_STAT;
/* do not touch atomic flags, it needs atomic ops against the timer */
rq->cpu = -1;
rq->end_io_data = NULL;
rq->next_rq = NULL;
- ctx->rq_dispatched[op_is_sync(op)]++;
+ data->ctx->rq_dispatched[op_is_sync(op)]++;
+ return rq;
}
-EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
- unsigned int op)
+static struct request *blk_mq_get_request(struct request_queue *q,
+ struct bio *bio, unsigned int op,
+ struct blk_mq_alloc_data *data)
{
+ struct elevator_queue *e = q->elevator;
struct request *rq;
unsigned int tag;
- tag = blk_mq_get_tag(data);
- if (tag != BLK_MQ_TAG_FAIL) {
- struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+ blk_queue_enter_live(q);
+ data->q = q;
+ if (likely(!data->ctx))
+ data->ctx = blk_mq_get_ctx(q);
+ if (likely(!data->hctx))
+ data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+ if (op & REQ_NOWAIT)
+ data->flags |= BLK_MQ_REQ_NOWAIT;
- rq = tags->static_rqs[tag];
+ if (e) {
+ data->flags |= BLK_MQ_REQ_INTERNAL;
- if (data->flags & BLK_MQ_REQ_INTERNAL) {
- rq->tag = -1;
- rq->internal_tag = tag;
- } else {
- if (blk_mq_tag_busy(data->hctx)) {
- rq->rq_flags = RQF_MQ_INFLIGHT;
- atomic_inc(&data->hctx->nr_active);
- }
- rq->tag = tag;
- rq->internal_tag = -1;
- data->hctx->tags->rqs[rq->tag] = rq;
- }
+ /*
+ * Flush requests are special and go directly to the
+ * dispatch list.
+ */
+ if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
+ e->type->ops.mq.limit_depth(op, data);
+ }
- blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
- return rq;
+ tag = blk_mq_get_tag(data);
+ if (tag == BLK_MQ_TAG_FAIL) {
+ blk_queue_exit(q);
+ return NULL;
}
- return NULL;
+ rq = blk_mq_rq_ctx_init(data, tag, op);
+ if (!op_is_flush(op)) {
+ rq->elv.icq = NULL;
+ if (e && e->type->ops.mq.prepare_request) {
+ if (e->type->icq_cache && rq_ioc(bio))
+ blk_mq_sched_assign_ioc(rq, bio);
+
+ e->type->ops.mq.prepare_request(rq, bio);
+ rq->rq_flags |= RQF_ELVPRIV;
+ }
+ }
+ data->hctx->queued++;
+ return rq;
}
-EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
unsigned int flags)
{
struct blk_mq_alloc_data alloc_data = { .flags = flags };
if (ret)
return ERR_PTR(ret);
- rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+ rq = blk_mq_get_request(q, NULL, op, &alloc_data);
blk_mq_put_ctx(alloc_data.ctx);
blk_queue_exit(q);
}
EXPORT_SYMBOL(blk_mq_alloc_request);
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
- unsigned int flags, unsigned int hctx_idx)
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+ unsigned int op, unsigned int flags, unsigned int hctx_idx)
{
struct blk_mq_alloc_data alloc_data = { .flags = flags };
struct request *rq;
cpu = cpumask_first(alloc_data.hctx->cpumask);
alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
- rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+ rq = blk_mq_get_request(q, NULL, op, &alloc_data);
blk_queue_exit(q);
}
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
- struct request *rq)
+void blk_mq_free_request(struct request *rq)
{
- const int sched_tag = rq->internal_tag;
struct request_queue *q = rq->q;
+ struct elevator_queue *e = q->elevator;
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ const int sched_tag = rq->internal_tag;
+
+ if (rq->rq_flags & RQF_ELVPRIV) {
+ if (e && e->type->ops.mq.finish_request)
+ e->type->ops.mq.finish_request(rq);
+ if (rq->elv.icq) {
+ put_io_context(rq->elv.icq->ioc);
+ rq->elv.icq = NULL;
+ }
+ }
+ ctx->rq_completed[rq_is_sync(rq)]++;
if (rq->rq_flags & RQF_MQ_INFLIGHT)
atomic_dec(&hctx->nr_active);
wbt_done(q->rq_wb, &rq->issue_stat);
- rq->rq_flags = 0;
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
blk_mq_sched_restart(hctx);
blk_queue_exit(q);
}
-
-static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
- struct request *rq)
-{
- struct blk_mq_ctx *ctx = rq->mq_ctx;
-
- ctx->rq_completed[rq_is_sync(rq)]++;
- __blk_mq_finish_request(hctx, ctx, rq);
-}
-
-void blk_mq_finish_request(struct request *rq)
-{
- blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_finish_request);
-
-void blk_mq_free_request(struct request *rq)
-{
- blk_mq_sched_put_request(rq);
-}
EXPORT_SYMBOL_GPL(blk_mq_free_request);
-inline void __blk_mq_end_request(struct request *rq, int error)
+inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
{
blk_account_io_done(rq);
}
EXPORT_SYMBOL(__blk_mq_end_request);
-void blk_mq_end_request(struct request *rq, int error)
+void blk_mq_end_request(struct request *rq, blk_status_t error)
{
if (blk_update_request(rq, error, blk_rq_bytes(rq)))
BUG();
blk_queue_exit(q);
}
-/*
- * Reverse check our software queue for entries that we could potentially
- * merge with. Currently includes a hand-wavy stop count of 8, to not spend
- * too much time checking for merges.
- */
-static bool blk_mq_attempt_merge(struct request_queue *q,
- struct blk_mq_ctx *ctx, struct bio *bio)
-{
- struct request *rq;
- int checked = 8;
-
- list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
- bool merged = false;
-
- if (!checked--)
- break;
-
- if (!blk_rq_merge_ok(rq, bio))
- continue;
-
- switch (blk_try_merge(rq, bio)) {
- case ELEVATOR_BACK_MERGE:
- if (blk_mq_sched_allow_merge(q, rq, bio))
- merged = bio_attempt_back_merge(q, rq, bio);
- break;
- case ELEVATOR_FRONT_MERGE:
- if (blk_mq_sched_allow_merge(q, rq, bio))
- merged = bio_attempt_front_merge(q, rq, bio);
- break;
- case ELEVATOR_DISCARD_MERGE:
- merged = bio_attempt_discard_merge(q, rq, bio);
- break;
- default:
- continue;
- }
-
- if (merged)
- ctx->rq_merged++;
- return merged;
- }
-
- return false;
-}
-
struct flush_busy_ctx_data {
struct blk_mq_hw_ctx *hctx;
struct list_head *list;
{
struct blk_mq_hw_ctx *hctx;
struct request *rq;
- int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
+ int errors, queued;
if (list_empty(list))
return false;
errors = queued = 0;
do {
struct blk_mq_queue_data bd;
+ blk_status_t ret;
rq = list_first_entry(list, struct request, queuelist);
if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
}
ret = q->mq_ops->queue_rq(hctx, &bd);
- switch (ret) {
- case BLK_MQ_RQ_QUEUE_OK:
- queued++;
- break;
- case BLK_MQ_RQ_QUEUE_BUSY:
+ if (ret == BLK_STS_RESOURCE) {
blk_mq_put_driver_tag_hctx(hctx, rq);
list_add(&rq->queuelist, list);
__blk_mq_requeue_request(rq);
break;
- default:
- pr_err("blk-mq: bad return on queue: %d\n", ret);
- case BLK_MQ_RQ_QUEUE_ERROR:
+ }
+
+ if (unlikely(ret != BLK_STS_OK)) {
errors++;
- blk_mq_end_request(rq, -EIO);
- break;
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+ continue;
}
- if (ret == BLK_MQ_RQ_QUEUE_BUSY)
- break;
+ queued++;
} while (!list_empty(list));
hctx->dispatched[queued_to_index(queued)]++;
* - blk_mq_run_hw_queue() checks whether or not a queue has
* been stopped before rerunning a queue.
* - Some but not all block drivers stop a queue before
- * returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq
+ * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
* and dm-rq.
*/
if (!blk_mq_sched_needs_restart(hctx) &&
} else {
might_sleep();
- srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+ srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
blk_mq_sched_dispatch_requests(hctx);
- srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+ srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
}
}
static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
unsigned long msecs)
{
- if (unlikely(blk_mq_hctx_stopped(hctx) ||
- !blk_mq_hw_queue_mapped(hctx)))
+ if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
+ return;
+
+ if (unlikely(blk_mq_hctx_stopped(hctx)))
return;
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
}
EXPORT_SYMBOL(blk_mq_queue_stopped);
-static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queue() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
+void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
{
- if (sync)
- cancel_delayed_work_sync(&hctx->run_work);
- else
- cancel_delayed_work(&hctx->run_work);
+ cancel_delayed_work(&hctx->run_work);
set_bit(BLK_MQ_S_STOPPED, &hctx->state);
}
-
-void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
-{
- __blk_mq_stop_hw_queue(hctx, false);
-}
EXPORT_SYMBOL(blk_mq_stop_hw_queue);
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queues() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
+void blk_mq_stop_hw_queues(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i)
- __blk_mq_stop_hw_queue(hctx, sync);
-}
-
-void blk_mq_stop_hw_queues(struct request_queue *q)
-{
- __blk_mq_stop_hw_queues(q, false);
+ blk_mq_stop_hw_queue(hctx);
}
EXPORT_SYMBOL(blk_mq_stop_hw_queues);
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
{
- if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
+ if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
return;
/*
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
+ lockdep_assert_held(&ctx->lock);
+
trace_block_rq_insert(hctx->queue, rq);
if (at_head)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
+ lockdep_assert_held(&ctx->lock);
+
__blk_mq_insert_req_list(hctx, rq, at_head);
blk_mq_hctx_mark_pending(hctx, ctx);
}
!blk_queue_nomerges(hctx->queue);
}
-static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
- struct blk_mq_ctx *ctx,
- struct request *rq, struct bio *bio)
+static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
+ struct blk_mq_ctx *ctx,
+ struct request *rq)
{
- if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
- blk_mq_bio_to_request(rq, bio);
- spin_lock(&ctx->lock);
-insert_rq:
- __blk_mq_insert_request(hctx, rq, false);
- spin_unlock(&ctx->lock);
- return false;
- } else {
- struct request_queue *q = hctx->queue;
-
- spin_lock(&ctx->lock);
- if (!blk_mq_attempt_merge(q, ctx, bio)) {
- blk_mq_bio_to_request(rq, bio);
- goto insert_rq;
- }
-
- spin_unlock(&ctx->lock);
- __blk_mq_finish_request(hctx, ctx, rq);
- return true;
- }
+ spin_lock(&ctx->lock);
+ __blk_mq_insert_request(hctx, rq, false);
+ spin_unlock(&ctx->lock);
}
static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
.last = true,
};
blk_qc_t new_cookie;
- int ret;
+ blk_status_t ret;
bool run_queue = true;
- if (blk_mq_hctx_stopped(hctx)) {
+ /* RCU or SRCU read lock is needed before checking quiesced flag */
+ if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
run_queue = false;
goto insert;
}
* would have done
*/
ret = q->mq_ops->queue_rq(hctx, &bd);
- if (ret == BLK_MQ_RQ_QUEUE_OK) {
+ switch (ret) {
+ case BLK_STS_OK:
*cookie = new_cookie;
return;
- }
-
- if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+ case BLK_STS_RESOURCE:
+ __blk_mq_requeue_request(rq);
+ goto insert;
+ default:
*cookie = BLK_QC_T_NONE;
- blk_mq_end_request(rq, -EIO);
+ blk_mq_end_request(rq, ret);
return;
}
- __blk_mq_requeue_request(rq);
insert:
blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
}
might_sleep();
- srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+ srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
__blk_mq_try_issue_directly(hctx, rq, cookie, true);
- srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+ srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
}
}
blk_queue_bounce(q, &bio);
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
bio_io_error(bio);
trace_block_getrq(q, bio, bio->bi_opf);
- rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
+ rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+ if (bio->bi_opf & REQ_NOWAIT)
+ bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
blk_mq_sched_insert_request(rq, false, true, true, true);
- } else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
+ } else {
blk_mq_put_ctx(data.ctx);
+ blk_mq_bio_to_request(rq, bio);
+ blk_mq_queue_io(data.hctx, data.ctx, rq);
blk_mq_run_hw_queue(data.hctx, true);
- } else
- blk_mq_put_ctx(data.ctx);
+ }
return cookie;
}
set->ops->exit_hctx(hctx, hctx_idx);
if (hctx->flags & BLK_MQ_F_BLOCKING)
- cleanup_srcu_struct(&hctx->queue_rq_srcu);
+ cleanup_srcu_struct(hctx->queue_rq_srcu);
blk_mq_remove_cpuhp(hctx);
blk_free_flush_queue(hctx->fq);
spin_lock_init(&hctx->lock);
INIT_LIST_HEAD(&hctx->dispatch);
hctx->queue = q;
- hctx->queue_num = hctx_idx;
hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
goto free_fq;
if (hctx->flags & BLK_MQ_F_BLOCKING)
- init_srcu_struct(&hctx->queue_rq_srcu);
+ init_srcu_struct(hctx->queue_rq_srcu);
blk_mq_debugfs_register_hctx(q, hctx);
}
}
+/*
+ * Caller needs to ensure that we're either frozen/quiesced, or that
+ * the queue isn't live yet.
+ */
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
- if (shared)
+ if (shared) {
+ if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ atomic_inc(&q->shared_hctx_restart);
hctx->flags |= BLK_MQ_F_TAG_SHARED;
- else
+ } else {
+ if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ atomic_dec(&q->shared_hctx_restart);
hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+ }
}
}
-static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
+static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
+ bool shared)
{
struct request_queue *q;
}
EXPORT_SYMBOL(blk_mq_init_queue);
+static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
+{
+ int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
+
+ BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu),
+ __alignof__(struct blk_mq_hw_ctx)) !=
+ sizeof(struct blk_mq_hw_ctx));
+
+ if (tag_set->flags & BLK_MQ_F_BLOCKING)
+ hw_ctx_size += sizeof(struct srcu_struct);
+
+ return hw_ctx_size;
+}
+
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q)
{
continue;
node = blk_mq_hw_queue_to_node(q->mq_map, i);
- hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
+ hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
GFP_KERNEL, node);
if (!hctxs[i])
break;
return data->hctx->tags;
}
-/*
- * Internal helpers for request allocation/init/free
- */
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
- struct request *rq, unsigned int op);
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
- struct request *rq);
-void blk_mq_finish_request(struct request *rq);
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
- unsigned int op);
-
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
{
return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
q->nr_batching = BLK_BATCH_REQ;
blk_set_default_limits(&q->limits);
-
- /*
- * by default assume old behaviour and bounce for any highmem page
- */
- blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
}
EXPORT_SYMBOL(blk_queue_make_request);
}
/**
- * blk_release_queue: - release a &struct request_queue when it is no longer needed
- * @kobj: the kobj belonging to the request queue to be released
+ * __blk_release_queue - release a request queue when it is no longer needed
+ * @work: pointer to the release_work member of the request queue to be released
*
* Description:
- * blk_release_queue is the pair to blk_init_queue() or
- * blk_queue_make_request(). It should be called when a request queue is
- * being released; typically when a block device is being de-registered.
- * Currently, its primary task it to free all the &struct request
- * structures that were allocated to the queue and the queue itself.
+ * blk_release_queue is the counterpart of blk_init_queue(). It should be
+ * called when a request queue is being released; typically when a block
+ * device is being de-registered. Its primary task it to free the queue
+ * itself.
*
- * Note:
+ * Notes:
* The low level driver must have finished any outstanding requests first
* via blk_cleanup_queue().
- **/
-static void blk_release_queue(struct kobject *kobj)
+ *
+ * Although blk_release_queue() may be called with preemption disabled,
+ * __blk_release_queue() may sleep.
+ */
+static void __blk_release_queue(struct work_struct *work)
{
- struct request_queue *q =
- container_of(kobj, struct request_queue, kobj);
+ struct request_queue *q = container_of(work, typeof(*q), release_work);
if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
blk_stat_remove_callback(q, q->poll_cb);
call_rcu(&q->rcu_head, blk_free_queue_rcu);
}
+static void blk_release_queue(struct kobject *kobj)
+{
+ struct request_queue *q =
+ container_of(kobj, struct request_queue, kobj);
+
+ INIT_WORK(&q->release_work, __blk_release_queue);
+ schedule_work(&q->release_work);
+}
+
static const struct sysfs_ops queue_sysfs_ops = {
.show = queue_attr_show,
.store = queue_attr_store,
* all transfers have been done for a request. It's important to call
* this function before end_that_request_last(), as that will put the
* request back on the free list thus corrupting the internal tag list.
- *
- * Notes:
- * queue lock must be held.
**/
void blk_queue_end_tag(struct request_queue *q, struct request *rq)
{
struct blk_queue_tag *bqt = q->queue_tags;
unsigned tag = rq->tag; /* negative tags invalid */
+ lockdep_assert_held(q->queue_lock);
+
BUG_ON(tag >= bqt->real_max_depth);
list_del_init(&rq->queuelist);
* calling this function. The request will also be removed from
* the request queue, so it's the drivers responsibility to readd
* it if it should need to be restarted for some reason.
- *
- * Notes:
- * queue lock must be held.
**/
int blk_queue_start_tag(struct request_queue *q, struct request *rq)
{
unsigned max_depth;
int tag;
+ lockdep_assert_held(q->queue_lock);
+
if (unlikely((rq->rq_flags & RQF_QUEUED))) {
printk(KERN_ERR
"%s: request %p for device [%s] already tagged %d",
* Hardware conditions may dictate a need to stop all pending requests.
* In this case, we will safely clear the block side of the tag queue and
* readd all requests to the request queue in the right order.
- *
- * Notes:
- * queue lock must be held.
**/
void blk_queue_invalidate_tags(struct request_queue *q)
{
struct list_head *tmp, *n;
+ lockdep_assert_held(q->queue_lock);
+
list_for_each_safe(tmp, n, &q->tag_busy_list)
blk_requeue_request(q, list_entry_rq(tmp));
}
* Notes:
* Each request has its own timer, and as it is added to the queue, we
* set up the timer. When the request completes, we cancel the timer.
- * Queue lock must be held for the non-mq case, mq case doesn't care.
*/
void blk_add_timer(struct request *req)
{
struct request_queue *q = req->q;
unsigned long expiry;
+ if (!q->mq_ops)
+ lockdep_assert_held(q->queue_lock);
+
/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
if (!q->mq_ops && !q->rq_timed_out_fn)
return;
struct request *rq;
struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
+ WARN_ON_ONCE(q->mq_ops);
+
while (1) {
if (!list_empty(&q->queue_head)) {
rq = list_entry_rq(q->queue_head.next);
static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
#endif
+#ifdef CONFIG_BOUNCE
+extern int init_emergency_isa_pool(void);
+extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
+#else
+static inline int init_emergency_isa_pool(void)
+{
+ return 0;
+}
+static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
+{
+}
+#endif /* CONFIG_BOUNCE */
+
#endif /* BLK_INTERNAL_H */
#include <asm/tlbflush.h>
#include <trace/events/block.h>
+#include "blk.h"
#define POOL_SIZE 64
#define ISA_POOL_SIZE 16
+static struct bio_set *bounce_bio_set, *bounce_bio_split;
static mempool_t *page_pool, *isa_page_pool;
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
BUG_ON(!page_pool);
pr_info("pool size: %d pages\n", POOL_SIZE);
+ bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ BUG_ON(!bounce_bio_set);
+ if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
+ BUG_ON(1);
+
+ bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
+ BUG_ON(!bounce_bio_split);
+
return 0;
}
mempool_free(bvec->bv_page, pool);
}
- bio_orig->bi_error = bio->bi_error;
+ bio_orig->bi_status = bio->bi_status;
bio_endio(bio_orig);
bio_put(bio);
}
{
struct bio *bio_orig = bio->bi_private;
- if (!bio->bi_error)
+ if (!bio->bi_status)
copy_to_high_bio_irq(bio_orig, bio);
bounce_end_io(bio, pool);
int rw = bio_data_dir(*bio_orig);
struct bio_vec *to, from;
struct bvec_iter iter;
- unsigned i;
-
- bio_for_each_segment(from, *bio_orig, iter)
- if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q))
- goto bounce;
+ unsigned i = 0;
+ bool bounce = false;
+ int sectors = 0;
+
+ bio_for_each_segment(from, *bio_orig, iter) {
+ if (i++ < BIO_MAX_PAGES)
+ sectors += from.bv_len >> 9;
+ if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
+ bounce = true;
+ }
+ if (!bounce)
+ return;
- return;
-bounce:
- bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
+ if (sectors < bio_sectors(*bio_orig)) {
+ bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
+ bio_chain(bio, *bio_orig);
+ generic_make_request(*bio_orig);
+ *bio_orig = bio;
+ }
+ bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
bio_for_each_segment_all(to, bio, i) {
struct page *page = to->bv_page;
- if (page_to_pfn(page) <= queue_bounce_pfn(q))
+ if (page_to_pfn(page) <= q->limits.bounce_pfn)
continue;
to->bv_page = mempool_alloc(pool, q->bounce_gfp);
* don't waste time iterating over bio segments
*/
if (!(q->bounce_gfp & GFP_DMA)) {
- if (queue_bounce_pfn(q) >= blk_max_pfn)
+ if (q->limits.bounce_pfn >= blk_max_pfn)
return;
pool = page_pool;
} else {
*/
__blk_queue_bounce(q, bio_orig, pool);
}
-
-EXPORT_SYMBOL(blk_queue_bounce);
struct bsg_job *job = container_of(kref, struct bsg_job, kref);
struct request *rq = job->req;
- blk_end_request_all(rq, scsi_req(rq)->result);
+ blk_end_request_all(rq, BLK_STS_OK);
put_device(job->dev); /* release reference for the request */
ret = bsg_create_job(dev, req);
if (ret) {
scsi_req(req)->result = ret;
- blk_end_request_all(req, ret);
+ blk_end_request_all(req, BLK_STS_OK);
spin_lock_irq(q->queue_lock);
continue;
}
q->bsg_job_size = dd_job_size;
q->bsg_job_fn = job_fn;
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
blk_queue_softirq_done(q, bsg_softirq_done);
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
rq = blk_get_request(q, op, GFP_KERNEL);
if (IS_ERR(rq))
return rq;
- scsi_req_init(rq);
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
if (ret)
* async completion call-back from the block layer, when scsi/ide/whatever
* calls end_that_request_last() on a request
*/
-static void bsg_rq_end_io(struct request *rq, int uptodate)
+static void bsg_rq_end_io(struct request *rq, blk_status_t status)
{
struct bsg_command *bc = rq->end_io_data;
struct bsg_device *bd = bc->bd;
unsigned long flags;
- dprintk("%s: finished rq %p bc %p, bio %p stat %d\n",
- bd->name, rq, bc, bc->bio, uptodate);
+ dprintk("%s: finished rq %p bc %p, bio %p\n",
+ bd->name, rq, bc, bc->bio);
bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
#ifdef BSG_DEBUG
unsigned char buf[32];
#endif
+
+ if (!blk_queue_scsi_passthrough(rq)) {
+ WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+ return ERR_PTR(-EINVAL);
+ }
+
if (!blk_get_queue(rq))
return ERR_PTR(-ENXIO);
return min_vdisktime;
}
-static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
-{
- s64 delta = (s64)(vdisktime - min_vdisktime);
- if (delta < 0)
- min_vdisktime = vdisktime;
-
- return min_vdisktime;
-}
-
static void update_min_vdisktime(struct cfq_rb_root *st)
{
struct cfq_group *cfqg;
*/
if (elv_attempt_insert_merge(q, rq))
break;
+ /* fall through */
case ELEVATOR_INSERT_SORT:
BUG_ON(blk_rq_is_passthrough(rq));
rq->rq_flags |= RQF_SORTED;
static DEFINE_SPINLOCK(ext_devt_lock);
static DEFINE_IDR(ext_devt_idr);
-static struct device_type disk_type;
+static const struct device_type disk_type;
static void disk_check_events(struct disk_events *ev,
unsigned int *clearing_ptr);
return NULL;
}
-static struct device_type disk_type = {
+static const struct device_type disk_type = {
.name = "disk",
.groups = disk_attr_groups,
.release = disk_release,
case IOPRIO_CLASS_RT:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- /* fall through, rt has prio field too */
+ /* fall through */
+ /* rt has prio field too */
case IOPRIO_CLASS_BE:
if (data >= IOPRIO_BE_NR || data < 0)
return -EINVAL;
}
}
-static struct request *kyber_get_request(struct request_queue *q,
- unsigned int op,
- struct blk_mq_alloc_data *data)
+static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
{
- struct kyber_queue_data *kqd = q->elevator->elevator_data;
- struct request *rq;
-
/*
* We use the scheduler tags as per-hardware queue queueing tokens.
* Async requests can be limited at this stage.
*/
- if (!op_is_sync(op))
+ if (!op_is_sync(op)) {
+ struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
+
data->shallow_depth = kqd->async_depth;
+ }
+}
- rq = __blk_mq_alloc_request(data, op);
- if (rq)
- rq_set_domain_token(rq, -1);
- return rq;
+static void kyber_prepare_request(struct request *rq, struct bio *bio)
+{
+ rq_set_domain_token(rq, -1);
}
-static void kyber_put_request(struct request *rq)
+static void kyber_finish_request(struct request *rq)
{
- struct request_queue *q = rq->q;
- struct kyber_queue_data *kqd = q->elevator->elevator_data;
+ struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
rq_clear_domain_token(kqd, rq);
- blk_mq_finish_request(rq);
}
static void kyber_completed_request(struct request *rq)
.exit_sched = kyber_exit_sched,
.init_hctx = kyber_init_hctx,
.exit_hctx = kyber_exit_hctx,
- .get_request = kyber_get_request,
- .put_request = kyber_put_request,
+ .limit_depth = kyber_limit_depth,
+ .prepare_request = kyber_prepare_request,
+ .finish_request = kyber_finish_request,
.completed_request = kyber_completed_request,
.dispatch_request = kyber_dispatch_request,
.has_work = kyber_has_work,
ldm_error("PRIVHEAD disk size doesn't match real disk size");
return false;
}
- if (uuid_be_to_bin(data + 0x0030, (uuid_be *)ph->disk_id)) {
+ if (uuid_parse(data + 0x0030, &ph->disk_id)) {
ldm_error("PRIVHEAD contains an invalid GUID.");
return false;
}
(ph1->logical_disk_size == ph2->logical_disk_size) &&
(ph1->config_start == ph2->config_start) &&
(ph1->config_size == ph2->config_size) &&
- !memcmp (ph1->disk_id, ph2->disk_id, GUID_SIZE));
+ uuid_equal(&ph1->disk_id, &ph2->disk_id));
}
/**
list_for_each (item, &ldb->v_disk) {
struct vblk *v = list_entry (item, struct vblk, list);
- if (!memcmp (v->vblk.disk.disk_id, ldb->ph.disk_id, GUID_SIZE))
+ if (uuid_equal(&v->vblk.disk.disk_id, &ldb->ph.disk_id))
return v;
}
disk = &vb->vblk.disk;
ldm_get_vstr (buffer + 0x18 + r_diskid, disk->alt_name,
sizeof (disk->alt_name));
- if (uuid_be_to_bin(buffer + 0x19 + r_name, (uuid_be *)disk->disk_id))
+ if (uuid_parse(buffer + 0x19 + r_name, &disk->disk_id))
return false;
return true;
return false;
disk = &vb->vblk.disk;
- memcpy (disk->disk_id, buffer + 0x18 + r_name, GUID_SIZE);
+ uuid_copy(&disk->disk_id, (uuid_t *)(buffer + 0x18 + r_name));
return true;
}
/* In memory LDM database structures. */
-#define GUID_SIZE 16
-
struct privhead { /* Offsets and sizes are in sectors. */
u16 ver_major;
u16 ver_minor;
u64 logical_disk_size;
u64 config_start;
u64 config_size;
- u8 disk_id[GUID_SIZE];
+ uuid_t disk_id;
};
struct tocblock { /* We have exactly two bitmaps. */
};
struct vblk_disk { /* VBLK Disk */
- u8 disk_id[GUID_SIZE];
+ uuid_t disk_id;
u8 alt_name[128];
};
if (IS_ERR(rq))
return PTR_ERR(rq);
req = scsi_req(rq);
- scsi_req_init(rq);
if (hdr->cmd_len > BLK_MAX_CDB) {
req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
goto error_free_buffer;
}
req = scsi_req(rq);
- scsi_req_init(rq);
cmdlen = COMMAND_SIZE(opcode);
rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
if (IS_ERR(rq))
return PTR_ERR(rq);
- scsi_req_init(rq);
rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
scsi_req(rq)->cmd[0] = cmd;
scsi_req(rq)->cmd[4] = data;
}
EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
-void scsi_req_init(struct request *rq)
+/**
+ * scsi_req_init - initialize certain fields of a scsi_request structure
+ * @req: Pointer to a scsi_request structure.
+ * Initializes .__cmd[], .cmd, .cmd_len and .sense_len but no other members
+ * of struct scsi_request.
+ */
+void scsi_req_init(struct scsi_request *req)
{
- struct scsi_request *req = scsi_req(rq);
-
memset(req->__cmd, 0, sizeof(req->__cmd));
req->cmd = req->__cmd;
req->cmd_len = BLK_MAX_CDB;
* 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
* tag.
*/
-static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
- unsigned int type)
+static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
+ csum_fn *fn, unsigned int type)
{
unsigned int i;
iter->seed++;
}
- return 0;
+ return BLK_STS_OK;
}
-static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
- unsigned int type)
+static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
+ csum_fn *fn, unsigned int type)
{
unsigned int i;
"(rcvd %u)\n", iter->disk_name,
(unsigned long long)
iter->seed, be32_to_cpu(pi->ref_tag));
- return -EILSEQ;
+ return BLK_STS_PROTECTION;
}
break;
case 3:
"(rcvd %04x, want %04x)\n", iter->disk_name,
(unsigned long long)iter->seed,
be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
- return -EILSEQ;
+ return BLK_STS_PROTECTION;
}
next:
iter->seed++;
}
- return 0;
+ return BLK_STS_OK;
}
-static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
{
return t10_pi_generate(iter, t10_pi_crc_fn, 1);
}
-static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
{
return t10_pi_generate(iter, t10_pi_ip_fn, 1);
}
-static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
{
return t10_pi_verify(iter, t10_pi_crc_fn, 1);
}
-static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
{
return t10_pi_verify(iter, t10_pi_ip_fn, 1);
}
-static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
{
return t10_pi_generate(iter, t10_pi_crc_fn, 3);
}
-static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
{
return t10_pi_generate(iter, t10_pi_ip_fn, 3);
}
-static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
{
return t10_pi_verify(iter, t10_pi_crc_fn, 3);
}
-static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
{
return t10_pi_verify(iter, t10_pi_ip_fn, 3);
}
int cpu = mce->extcpu;
struct acpi_hest_generic_status *estatus, *tmp;
struct acpi_hest_generic_data *gdata;
- const uuid_le *fru_id = &NULL_UUID_LE;
+ const guid_t *fru_id = &guid_null;
char *fru_text = "";
- uuid_le *sec_type;
+ guid_t *sec_type;
static u32 err_seq;
estatus = extlog_elog_entry_check(cpu, bank);
err_seq++;
gdata = (struct acpi_hest_generic_data *)(tmp + 1);
if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
- fru_id = (uuid_le *)gdata->fru_id;
+ fru_id = (guid_t *)gdata->fru_id;
if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
fru_text = gdata->fru_text;
- sec_type = (uuid_le *)gdata->section_type;
- if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+ sec_type = (guid_t *)gdata->section_type;
+ if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem = (void *)(gdata + 1);
if (gdata->error_data_length >= sizeof(*mem))
trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
static bool __init extlog_get_l1addr(void)
{
- u8 uuid[16];
+ guid_t guid;
acpi_handle handle;
union acpi_object *obj;
- acpi_str_to_uuid(extlog_dsm_uuid, uuid);
-
+ if (guid_parse(extlog_dsm_uuid, &guid))
+ return false;
if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
return false;
- if (!acpi_check_dsm(handle, uuid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
+ if (!acpi_check_dsm(handle, &guid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
return false;
- obj = acpi_evaluate_dsm_typed(handle, uuid, EXTLOG_DSM_REV,
+ obj = acpi_evaluate_dsm_typed(handle, &guid, EXTLOG_DSM_REV,
EXTLOG_FN_ADDR, NULL, ACPI_TYPE_INTEGER);
if (!obj) {
return false;
}
}
- table_desc->validation_count++;
- if (table_desc->validation_count == 0) {
- table_desc->validation_count--;
+ if (table_desc->validation_count < ACPI_MAX_TABLE_VALIDATIONS) {
+ table_desc->validation_count++;
+
+ /*
+ * Detect validation_count overflows to ensure that the warning
+ * message will only be printed once.
+ */
+ if (table_desc->validation_count >= ACPI_MAX_TABLE_VALIDATIONS) {
+ ACPI_WARNING((AE_INFO,
+ "Table %p, Validation count overflows\n",
+ table_desc));
+ }
}
*out_table = table_desc->pointer;
ACPI_FUNCTION_TRACE(acpi_tb_put_table);
- if (table_desc->validation_count == 0) {
- ACPI_WARNING((AE_INFO,
- "Table %p, Validation count is zero before decrement\n",
- table_desc));
- return_VOID;
+ if (table_desc->validation_count < ACPI_MAX_TABLE_VALIDATIONS) {
+ table_desc->validation_count--;
+
+ /*
+ * Detect validation_count underflows to ensure that the warning
+ * message will only be printed once.
+ */
+ if (table_desc->validation_count >= ACPI_MAX_TABLE_VALIDATIONS) {
+ ACPI_WARNING((AE_INFO,
+ "Table %p, Validation count underflows\n",
+ table_desc));
+ return_VOID;
+ }
}
- table_desc->validation_count--;
if (table_desc->validation_count == 0) {
return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
}
- /*
- * The end_tag opcode must be followed by a zero byte.
- * Although this byte is technically defined to be a checksum,
- * in practice, all ASL compilers set this byte to zero.
- */
- if (*(aml + 1) != 0) {
- return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
- }
-
/* Return the pointer to the end_tag if requested */
if (!user_function) {
{
int sev, sec_sev;
struct acpi_hest_generic_data *gdata;
+ guid_t *sec_type;
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
+ sec_type = (guid_t *)gdata->section_type;
sec_sev = ghes_severity(gdata->error_severity);
- if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
- CPER_SEC_PLATFORM_MEM)) {
+ if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err;
mem_err = (struct cper_sec_mem_err *)(gdata+1);
ghes_edac_report_mem_error(ghes, sev, mem_err);
ghes_handle_memory_failure(gdata, sev);
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
- else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
- CPER_SEC_PCIE)) {
+ else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
struct cper_sec_pcie *pcie_err;
pcie_err = (struct cper_sec_pcie *)(gdata+1);
if (sev == GHES_SEV_RECOVERABLE &&
pr_debug("\n");
}
-acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
-{
- int i;
- static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21,
- 24, 26, 28, 30, 32, 34};
-
- if (strlen(str) != 36)
- return AE_BAD_PARAMETER;
- for (i = 0; i < 36; i++) {
- if (i == 8 || i == 13 || i == 18 || i == 23) {
- if (str[i] != '-')
- return AE_BAD_PARAMETER;
- } else if (!isxdigit(str[i]))
- return AE_BAD_PARAMETER;
- }
- for (i = 0; i < 16; i++) {
- uuid[i] = hex_to_bin(str[opc_map_to_uuid[i]]) << 4;
- uuid[i] |= hex_to_bin(str[opc_map_to_uuid[i] + 1]);
- }
- return AE_OK;
-}
-EXPORT_SYMBOL_GPL(acpi_str_to_uuid);
-
acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
{
acpi_status status;
struct acpi_object_list input;
union acpi_object in_params[4];
union acpi_object *out_obj;
- u8 uuid[16];
+ guid_t guid;
u32 errors;
struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
if (!context)
return AE_ERROR;
- if (ACPI_FAILURE(acpi_str_to_uuid(context->uuid_str, uuid)))
+ if (guid_parse(context->uuid_str, &guid))
return AE_ERROR;
context->ret.length = ACPI_ALLOCATE_BUFFER;
context->ret.pointer = NULL;
input.pointer = in_params;
in_params[0].type = ACPI_TYPE_BUFFER;
in_params[0].buffer.length = 16;
- in_params[0].buffer.pointer = uuid;
+ in_params[0].buffer.pointer = (u8 *)&guid;
in_params[1].type = ACPI_TYPE_INTEGER;
in_params[1].integer.value = context->rev;
in_params[2].type = ACPI_TYPE_INTEGER;
struct list_head flushes;
};
-static u8 nfit_uuid[NFIT_UUID_MAX][16];
+static guid_t nfit_uuid[NFIT_UUID_MAX];
-const u8 *to_nfit_uuid(enum nfit_uuids id)
+const guid_t *to_nfit_uuid(enum nfit_uuids id)
{
- return nfit_uuid[id];
+ return &nfit_uuid[id];
}
EXPORT_SYMBOL(to_nfit_uuid);
u32 offset, fw_status = 0;
acpi_handle handle;
unsigned int func;
- const u8 *uuid;
+ const guid_t *guid;
int rc, i;
func = cmd;
cmd_mask = nvdimm_cmd_mask(nvdimm);
dsm_mask = nfit_mem->dsm_mask;
desc = nd_cmd_dimm_desc(cmd);
- uuid = to_nfit_uuid(nfit_mem->family);
+ guid = to_nfit_uuid(nfit_mem->family);
handle = adev->handle;
} else {
struct acpi_device *adev = to_acpi_dev(acpi_desc);
cmd_mask = nd_desc->cmd_mask;
dsm_mask = cmd_mask;
desc = nd_cmd_bus_desc(cmd);
- uuid = to_nfit_uuid(NFIT_DEV_BUS);
+ guid = to_nfit_uuid(NFIT_DEV_BUS);
handle = adev->handle;
dimm_name = "bus";
}
in_buf.buffer.pointer,
min_t(u32, 256, in_buf.buffer.length), true);
- out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
+ out_obj = acpi_evaluate_dsm(handle, guid, 1, func, &in_obj);
if (!out_obj) {
dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
cmd_name);
int i;
for (i = 0; i < NFIT_UUID_MAX; i++)
- if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
+ if (guid_equal(to_nfit_uuid(i), (guid_t *)&spa->range_guid))
return i;
return -1;
}
struct acpi_device *adev, *adev_dimm;
struct device *dev = acpi_desc->dev;
unsigned long dsm_mask;
- const u8 *uuid;
+ const guid_t *guid;
int i;
int family = -1;
/*
* Until standardization materializes we need to consider 4
* different command sets. Note, that checking for function0 (bit0)
- * tells us if any commands are reachable through this uuid.
+ * tells us if any commands are reachable through this GUID.
*/
for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
return 0;
}
- uuid = to_nfit_uuid(nfit_mem->family);
+ guid = to_nfit_uuid(nfit_mem->family);
for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
- if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+ if (acpi_check_dsm(adev_dimm->handle, guid, 1, 1ULL << i))
set_bit(i, &nfit_mem->dsm_mask);
return 0;
static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
{
struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
- const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+ const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS);
struct acpi_device *adev;
int i;
return;
for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
- if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+ if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
set_bit(i, &nd_desc->cmd_mask);
}
BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
- acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
- acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
- acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
- acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
- acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
- acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
- acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
- acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
- acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
- acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
- acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
- acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
- acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+ guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]);
+ guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]);
+ guid_parse(UUID_CONTROL_REGION, &nfit_uuid[NFIT_SPA_DCR]);
+ guid_parse(UUID_DATA_REGION, &nfit_uuid[NFIT_SPA_BDW]);
+ guid_parse(UUID_VOLATILE_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_VDISK]);
+ guid_parse(UUID_VOLATILE_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_VCD]);
+ guid_parse(UUID_PERSISTENT_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_PDISK]);
+ guid_parse(UUID_PERSISTENT_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_PCD]);
+ guid_parse(UUID_NFIT_BUS, &nfit_uuid[NFIT_DEV_BUS]);
+ guid_parse(UUID_NFIT_DIMM, &nfit_uuid[NFIT_DEV_DIMM]);
+ guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
+ guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
+ guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
nfit_wq = create_singlethread_workqueue("nfit");
if (!nfit_wq)
#include <linux/libnvdimm.h>
#include <linux/ndctl.h>
#include <linux/types.h>
-#include <linux/uuid.h>
#include <linux/acpi.h>
#include <acpi/acuuid.h>
return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
}
-const u8 *to_nfit_uuid(enum nfit_uuids id);
+const guid_t *to_nfit_uuid(enum nfit_uuids id);
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
void acpi_nfit_shutdown(void *data);
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);
adev->flags.coherent_dma = cca;
}
+static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
+{
+ bool *is_spi_i2c_slave_p = data;
+
+ if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+ return 1;
+
+ /*
+ * devices that are connected to UART still need to be enumerated to
+ * platform bus
+ */
+ if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
+ *is_spi_i2c_slave_p = true;
+
+ /* no need to do more checking */
+ return -1;
+}
+
+static bool acpi_is_spi_i2c_slave(struct acpi_device *device)
+{
+ struct list_head resource_list;
+ bool is_spi_i2c_slave = false;
+
+ INIT_LIST_HEAD(&resource_list);
+ acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
+ &is_spi_i2c_slave);
+ acpi_dev_free_resource_list(&resource_list);
+
+ return is_spi_i2c_slave;
+}
+
void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
int type, unsigned long long sta)
{
acpi_bus_get_flags(device);
device->flags.match_driver = false;
device->flags.initialized = true;
+ device->flags.spi_i2c_slave = acpi_is_spi_i2c_slave(device);
acpi_device_clear_enumerated(device);
device_initialize(&device->dev);
dev_set_uevent_suppress(&device->dev, true);
return AE_OK;
}
-static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
-{
- bool *is_spi_i2c_slave_p = data;
-
- if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
- return 1;
-
- /*
- * devices that are connected to UART still need to be enumerated to
- * platform bus
- */
- if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
- *is_spi_i2c_slave_p = true;
-
- /* no need to do more checking */
- return -1;
-}
-
static void acpi_default_enumeration(struct acpi_device *device)
{
- struct list_head resource_list;
- bool is_spi_i2c_slave = false;
-
/*
* Do not enumerate SPI/I2C slaves as they will be enumerated by their
* respective parents.
*/
- INIT_LIST_HEAD(&resource_list);
- acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
- &is_spi_i2c_slave);
- acpi_dev_free_resource_list(&resource_list);
- if (!is_spi_i2c_slave) {
+ if (!device->flags.spi_i2c_slave) {
acpi_create_platform_device(device, NULL);
acpi_device_set_enumerated(device);
} else {
return;
device->flags.match_driver = true;
- if (ret > 0) {
+ if (ret > 0 && !device->flags.spi_i2c_slave) {
acpi_device_set_enumerated(device);
goto ok;
}
if (ret < 0)
return;
- if (device->pnp.type.platform_id)
- acpi_default_enumeration(device);
- else
+ if (!device->pnp.type.platform_id && !device->flags.spi_i2c_slave)
acpi_device_set_enumerated(device);
+ else
+ acpi_default_enumeration(device);
ok:
list_for_each_entry(child, &device->children, node)
/**
* acpi_evaluate_dsm - evaluate device's _DSM method
* @handle: ACPI device handle
- * @uuid: UUID of requested functions, should be 16 bytes
+ * @guid: GUID of requested functions, should be 16 bytes
* @rev: revision number of requested function
* @func: requested function number
* @argv4: the function specific parameter
*
- * Evaluate device's _DSM method with specified UUID, revision id and
+ * Evaluate device's _DSM method with specified GUID, revision id and
* function number. Caller needs to free the returned object.
*
* Though ACPI defines the fourth parameter for _DSM should be a package,
* some old BIOSes do expect a buffer or an integer etc.
*/
union acpi_object *
-acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
+acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func,
union acpi_object *argv4)
{
acpi_status ret;
params[0].type = ACPI_TYPE_BUFFER;
params[0].buffer.length = 16;
- params[0].buffer.pointer = (char *)uuid;
+ params[0].buffer.pointer = (u8 *)guid;
params[1].type = ACPI_TYPE_INTEGER;
params[1].integer.value = rev;
params[2].type = ACPI_TYPE_INTEGER;
/**
* acpi_check_dsm - check if _DSM method supports requested functions.
* @handle: ACPI device handle
- * @uuid: UUID of requested functions, should be 16 bytes at least
+ * @guid: GUID of requested functions, should be 16 bytes at least
* @rev: revision number of requested functions
* @funcs: bitmap of requested functions
*
* functions. Currently only support 64 functions at maximum, should be
* enough for now.
*/
-bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
+bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs)
{
int i;
u64 mask = 0;
if (funcs == 0)
return false;
- obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL);
+ obj = acpi_evaluate_dsm(handle, guid, rev, 0, NULL);
if (!obj)
return false;
/*
* Bit 0 indicates whether there's support for any functions other than
- * function 0 for the specified UUID and revision.
+ * function 0 for the specified GUID and revision.
*/
if ((mask & 0x1) && (mask & funcs) == funcs)
return true;
bool SuccessfulIO)
{
struct request *Request = Command->Request;
- int Error = SuccessfulIO ? 0 : -EIO;
+ blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR;
pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
Command->SegmentCount, Command->DmaDirection);
struct amiga_floppy_struct *floppy;
char *data;
unsigned long flags;
- int err;
+ blk_status_t err;
next_req:
rq = set_next_request();
next_segment:
/* Here someone could investigate to be more efficient */
- for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
+ for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
#ifdef DEBUG
printk("fd: sector %ld + %d requested for %s\n",
blk_rq_pos(rq), cnt,
#endif
block = blk_rq_pos(rq) + cnt;
if ((int)block > floppy->blocks) {
- err = -EIO;
+ err = BLK_STS_IOERR;
break;
}
#endif
if (get_track(drive, track) == -1) {
- err = -EIO;
+ err = BLK_STS_IOERR;
break;
}
/* keep the drive spinning while writes are scheduled */
if (!fd_motor_on(drive)) {
- err = -EIO;
+ err = BLK_STS_IOERR;
break;
}
/*
d->aoemajor, d->aoeminor);
goto err_mempool;
}
+ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
spin_lock_irqsave(&d->lock, flags);
WARN_ON(!(d->flags & DEVFL_GD_NOW));
d->ip.rq = NULL;
do {
bio = rq->bio;
- bok = !fastfail && !bio->bi_error;
- } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
+ bok = !fastfail && !bio->bi_status;
+ } while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
/* cf. http://lkml.org/lkml/2006/10/31/28 */
if (!fastfail)
ahout->cmdstat, ahin->cmdstat,
d->aoemajor, d->aoeminor);
noskb: if (buf)
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
goto out;
}
"aoe: runt data size in read from",
(long) d->aoemajor, d->aoeminor,
skb->len, n);
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
break;
}
if (n > f->iter.bi_size) {
"aoe: too-large data size in read from",
(long) d->aoemajor, d->aoeminor,
n, f->iter.bi_size);
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
break;
}
bvcpy(skb, f->buf->bio, f->iter, n);
if (buf == NULL)
return;
buf->iter.bi_size = 0;
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
if (buf->nframesout == 0)
aoe_end_buf(d, buf);
}
if (rq == NULL)
return;
while ((bio = d->ip.nxbio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
d->ip.nxbio = bio->bi_next;
n = (unsigned long) rq->special;
rq->special = (void *) --n;
static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
static DEFINE_TIMER(fd_timer, check_change, 0, 0);
-static void fd_end_request_cur(int err)
+static void fd_end_request_cur(blk_status_t err)
{
if (!__blk_end_request_cur(fd_request, err))
fd_request = NULL;
fd_request->error_count++;
if (fd_request->error_count >= MAX_ERRORS) {
printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
}
else if (fd_request->error_count == RECALIBRATE_ERRORS) {
printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
}
else {
/* all sectors finished */
- fd_end_request_cur(0);
+ fd_end_request_cur(BLK_STS_OK);
redo_fd_request();
return;
}
}
else {
/* all sectors finished */
- fd_end_request_cur(0);
+ fd_end_request_cur(BLK_STS_OK);
redo_fd_request();
}
return;
if (!UD.connected) {
/* drive not connected */
printk(KERN_ERR "Unknown Device: fd%d\n", drive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
/* user supplied disk type */
if (--type >= NUM_DISK_MINORS) {
printk(KERN_WARNING "fd%d: invalid disk format", drive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
if (minor2disktype[type].drive_types > DriveType) {
printk(KERN_WARNING "fd%d: unsupported disk format", drive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
type = minor2disktype[type].index;
}
if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
blk_queue_make_request(brd->brd_queue, brd_make_request);
blk_queue_max_hw_sectors(brd->brd_queue, 1024);
- blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
/* This is so fdisk will align partitions on 4k, because of
* direct_access API needing 4k alignment, returning a PFN
/* set the residual count for pc requests */
if (blk_rq_is_passthrough(rq))
scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
- blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
+ blk_end_request_all(rq, scsi_req(rq)->result ?
+ BLK_STS_IOERR : BLK_STS_OK);
spin_lock_irqsave(&h->lock, flags);
cmd_free(h, c);
disk->queue->cmd_size = sizeof(struct scsi_request);
disk->queue->request_fn = do_cciss_request;
disk->queue->queue_lock = &h->lock;
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
if (blk_init_allocated_queue(disk->queue) < 0)
goto cleanup_queue;
else
submit_bio(bio);
wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
- if (!bio->bi_error)
+ if (!bio->bi_status)
err = device->md_io.error;
out:
!bm_test_page_unchanged(b->bm_pages[idx]))
drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* ctx error will hold the completed-last non-zero error code,
* in case error codes differ. */
- ctx->error = bio->bi_error;
+ ctx->error = blk_status_to_errno(bio->bi_status);
bm_set_page_io_err(b->bm_pages[idx]);
/* Not identical to on disk version of it.
* Is BM_PAGE_IO_ERROR enough? */
if (__ratelimit(&drbd_ratelimit_state))
drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
- bio->bi_error, idx);
+ bio->bi_status, idx);
} else {
bm_clear_page_io_err(b->bm_pages[idx]);
dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
/* to allocate from that set */
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
+/* And a bio_set for cloning */
+extern struct bio_set *drbd_io_bio_set;
+
extern struct mutex resources_mutex;
extern int conn_lowest_minor(struct drbd_connection *connection);
__release(local);
if (!bio->bi_bdev) {
drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
- bio->bi_error = -ENODEV;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
}
mempool_t *drbd_ee_mempool;
mempool_t *drbd_md_io_page_pool;
struct bio_set *drbd_md_io_bio_set;
+struct bio_set *drbd_io_bio_set;
/* I do not use a standard mempool, because:
1) I want to hand out the pre-allocated objects first.
/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
+ if (drbd_io_bio_set)
+ bioset_free(drbd_io_bio_set);
if (drbd_md_io_bio_set)
bioset_free(drbd_md_io_bio_set);
if (drbd_md_io_page_pool)
if (drbd_al_ext_cache)
kmem_cache_destroy(drbd_al_ext_cache);
+ drbd_io_bio_set = NULL;
drbd_md_io_bio_set = NULL;
drbd_md_io_page_pool = NULL;
drbd_ee_mempool = NULL;
drbd_pp_pool = NULL;
drbd_md_io_page_pool = NULL;
drbd_md_io_bio_set = NULL;
+ drbd_io_bio_set = NULL;
/* caches */
drbd_request_cache = kmem_cache_create(
goto Enomem;
/* mempools */
- drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
+ drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
+ if (drbd_io_bio_set == NULL)
+ goto Enomem;
+
+ drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER);
if (drbd_md_io_bio_set == NULL)
goto Enomem;
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
q->queue_lock = &resource->req_lock;
device->md_io.page = alloc_page(GFP_KERNEL);
static enum drbd_ret_code
check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
{
- static enum drbd_ret_code rv;
+ enum drbd_ret_code rv;
struct drbd_peer_device *peer_device;
int i;
struct drbd_device *device = octx->device;
struct issue_flush_context *ctx = octx->ctx;
- if (bio->bi_error) {
- ctx->error = bio->bi_error;
- drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
+ if (bio->bi_status) {
+ ctx->error = blk_status_to_errno(bio->bi_status);
+ drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
}
kfree(octx);
bio_put(bio);
void complete_master_bio(struct drbd_device *device,
struct bio_and_error *m)
{
- m->bio->bi_error = m->error;
+ m->bio->bi_status = errno_to_blk_status(m->error);
bio_endio(m->bio);
dec_ap_bio(device);
}
if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
GFP_NOIO, 0))
- req->private_bio->bi_error = -EIO;
+ req->private_bio->bi_status = BLK_STS_IOERR;
bio_endio(req->private_bio);
}
/* only pass the error to the upper layers.
* if user cannot handle io errors, that's not our business. */
drbd_err(device, "could not kmalloc() req\n");
- bio->bi_error = -ENOMEM;
+ bio->bi_status = BLK_STS_RESOURCE;
bio_endio(bio);
return ERR_PTR(-ENOMEM);
}
struct drbd_device *device = (struct drbd_device *) q->queuedata;
unsigned long start_jif;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
start_jif = jiffies;
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
{
struct bio *bio;
- bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+ bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
req->private_bio = bio;
struct drbd_device *device;
device = bio->bi_private;
- device->md_io.error = bio->bi_error;
+ device->md_io.error = blk_status_to_errno(bio->bi_status);
/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
* to timeout on the lower level device, and eventually detach from it.
bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
bio_op(bio) == REQ_OP_DISCARD;
- if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
+ if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
drbd_warn(device, "%s: error=%d s=%llus\n",
is_write ? (is_discard ? "discard" : "write")
- : "read", bio->bi_error,
+ : "read", bio->bi_status,
(unsigned long long)peer_req->i.sector);
- if (bio->bi_error)
+ if (bio->bi_status)
set_bit(__EE_WAS_ERROR, &peer_req->flags);
bio_put(bio); /* no need for the bio anymore */
if (__ratelimit(&drbd_ratelimit_state))
drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
- if (!bio->bi_error)
+ if (!bio->bi_status)
drbd_panic_after_delayed_completion_of_aborted_request(device);
}
/* to avoid recursion in __req_mod */
- if (unlikely(bio->bi_error)) {
+ if (unlikely(bio->bi_status)) {
switch (bio_op(bio)) {
case REQ_OP_WRITE_ZEROES:
case REQ_OP_DISCARD:
- if (bio->bi_error == -EOPNOTSUPP)
+ if (bio->bi_status == BLK_STS_NOTSUPP)
what = DISCARD_COMPLETED_NOTSUPP;
else
what = DISCARD_COMPLETED_WITH_ERROR;
}
bio_put(req->private_bio);
- req->private_bio = ERR_PTR(bio->bi_error);
+ req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
/* not req_mod(), we need irqsave here! */
spin_lock_irqsave(&device->resource->req_lock, flags);
* =============================
*/
-static void floppy_end_request(struct request *req, int error)
+static void floppy_end_request(struct request *req, blk_status_t error)
{
unsigned int nr_sectors = current_count_sectors;
unsigned int drive = (unsigned long)req->rq_disk->private_data;
DRWE->last_error_generation = DRS->generation;
}
spin_lock_irqsave(q->queue_lock, flags);
- floppy_end_request(req, -EIO);
+ floppy_end_request(req, BLK_STS_IOERR);
spin_unlock_irqrestore(q->queue_lock, flags);
}
}
struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
int drive = cbdata->drive;
- if (bio->bi_error) {
+ if (bio->bi_status) {
pr_info("floppy: error %d while reading block 0\n",
- bio->bi_error);
+ bio->bi_status);
set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
}
complete(&cbdata->complete);
goto out_put_disk;
}
+ blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH);
blk_queue_max_hw_sectors(disks[drive]->queue, 64);
disks[drive]->major = FLOPPY_MAJOR;
disks[drive]->first_minor = TOMINOR(drive);
}
static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
+ loff_t logical_blocksize)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
sector_t x = (sector_t)size;
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
+ if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
+ lo->lo_logical_blocksize = logical_blocksize;
+ blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
+ blk_queue_logical_block_size(lo->lo_queue,
+ lo->lo_logical_blocksize);
+ }
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
zero_fill_bio(bio);
}
- blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
+ blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
}
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
struct file *file = lo->lo_backing_file;
struct inode *inode = file->f_mapping->host;
struct request_queue *q = lo->lo_queue;
+ int lo_bits = 9;
/*
* We use punch hole to reclaim the free space used by the
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
- blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
+ if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
+ lo_bits = blksize_bits(lo->lo_logical_blocksize);
+
+ blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
+ blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
kthread_stop(lo->worker_task);
}
+static int loop_kthread_worker_fn(void *worker_ptr)
+{
+ current->flags |= PF_LESS_THROTTLE;
+ return kthread_worker_fn(worker_ptr);
+}
+
static int loop_prepare_queue(struct loop_device *lo)
{
kthread_init_worker(&lo->worker);
- lo->worker_task = kthread_run(kthread_worker_fn,
+ lo->worker_task = kthread_run(loop_kthread_worker_fn,
&lo->worker, "loop%d", lo->lo_number);
if (IS_ERR(lo->worker_task))
return -ENOMEM;
lo->use_dio = false;
lo->lo_blocksize = lo_blocksize;
+ lo->lo_logical_blocksize = 512;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
+ int lo_flags = lo->lo_flags;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
if (err)
goto exit;
+ if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
+ if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
+ lo->lo_logical_blocksize = 512;
+ lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
+ if (LO_INFO_BLOCKSIZE(info) != 512 &&
+ LO_INFO_BLOCKSIZE(info) != 1024 &&
+ LO_INFO_BLOCKSIZE(info) != 2048 &&
+ LO_INFO_BLOCKSIZE(info) != 4096)
+ return -EINVAL;
+ if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
+ return -EINVAL;
+ }
+
if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit)
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
+ lo->lo_sizelimit != info->lo_sizelimit ||
+ lo->lo_flags != lo_flags ||
+ ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
+ lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
+ if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
+ LO_INFO_BLOCKSIZE(info))) {
err = -EFBIG;
goto exit;
}
+ }
loop_config_discard(lo);
return err;
}
-static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
+static int loop_set_capacity(struct loop_device *lo)
{
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+ return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
+ lo->lo_logical_blocksize);
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
case LOOP_SET_CAPACITY:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
- err = loop_set_capacity(lo, bdev);
+ err = loop_set_capacity(lo);
break;
case LOOP_SET_DIRECT_IO:
err = -EPERM;
EXPORT_SYMBOL(loop_register_transfer);
EXPORT_SYMBOL(loop_unregister_transfer);
-static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
blk_mq_start_request(bd->rq);
if (lo->lo_state != Lo_bound)
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
switch (req_op(cmd->rq)) {
case REQ_OP_FLUSH:
kthread_queue_work(&lo->worker, &cmd->work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void loop_handle_cmd(struct loop_cmd *cmd)
struct file * lo_backing_file;
struct block_device *lo_device;
unsigned lo_blocksize;
+ unsigned lo_logical_blocksize;
void *key_data;
gfp_t old_gfp_mask;
static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
struct smart_attr *attrib);
-static void mtip_complete_command(struct mtip_cmd *cmd, int status)
+static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
- mtip_complete_command(cmd, -EIO);
+ mtip_complete_command(cmd, BLK_STS_IOERR);
return;
}
tag,
fail_reason != NULL ?
fail_reason : "unknown");
- mtip_complete_command(cmd, -ENODATA);
+ mtip_complete_command(cmd, BLK_STS_MEDIUM);
continue;
}
}
dev_warn(&port->dd->pdev->dev,
"retiring tag %d\n", tag);
- mtip_complete_command(cmd, -EIO);
+ mtip_complete_command(cmd, BLK_STS_IOERR);
}
}
print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
/* insert request and run queue */
blk_execute_rq(rq->q, NULL, rq, true);
- rv = int_cmd->status;
- if (rv < 0) {
- if (rv == -ERESTARTSYS) { /* interrupted */
- dev_err(&dd->pdev->dev,
- "Internal command [%02X] was interrupted after %u ms\n",
- fis->command,
- jiffies_to_msecs(jiffies - start));
- rv = -EINTR;
- goto exec_ic_exit;
- } else if (rv == 0) /* timeout */
- dev_err(&dd->pdev->dev,
- "Internal command did not complete [%02X] within timeout of %lu ms\n",
- fis->command, timeout);
- else
- dev_err(&dd->pdev->dev,
- "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
- fis->command, rv, timeout);
+ if (int_cmd->status) {
+ dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
+ fis->command, int_cmd->status);
+ rv = -EIO;
if (mtip_check_surprise_removal(dd->pdev) ||
test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
clear_bit(req->tag, dd->port->cmds_to_issue);
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
mtip_softirq_done_fn(req);
}
int err;
err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
- blk_mq_end_request(rq, err);
+ blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
return 0;
}
return false;
}
-static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
- struct request *rq)
+static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
+ struct request *rq)
{
struct driver_data *dd = hctx->queue->queuedata;
struct mtip_int_cmd *icmd = rq->special;
struct mtip_cmd_sg *command_sg;
if (mtip_commands_active(dd->port))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
/* Populate the SG list */
cmd->command_header->opts =
blk_mq_start_request(rq);
mtip_issue_non_ncq_command(dd->port, rq->tag);
- return BLK_MQ_RQ_QUEUE_OK;
+ return 0;
}
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
return mtip_issue_reserved_cmd(hctx, rq);
if (unlikely(mtip_check_unal_depth(hctx, rq)))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
blk_mq_start_request(rq);
ret = mtip_submit_request(hctx, rq);
if (likely(!ret))
- return BLK_MQ_RQ_QUEUE_OK;
-
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_OK;
+ return BLK_STS_IOERR;
}
static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
if (reserved) {
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
- cmd->status = -ETIME;
+ cmd->status = BLK_STS_TIMEOUT;
return BLK_EH_HANDLED;
}
{
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
- cmd->status = -ENODEV;
+ cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(rq);
}
int retries; /* The number of retries left for this command. */
int direction; /* Data transfer direction */
- int status;
+ blk_status_t status;
};
/* Structure used to describe a port. */
int index;
int cookie;
struct completion send_complete;
- int status;
+ blk_status_t status;
};
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct nbd_config *config;
if (!refcount_inc_not_zero(&nbd->config_refs)) {
- cmd->status = -EIO;
+ cmd->status = BLK_STS_TIMEOUT;
return BLK_EH_HANDLED;
}
"Connection timed out\n");
}
set_bit(NBD_TIMEDOUT, &config->runtime_flags);
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
sock_shutdown(nbd);
nbd_config_put(nbd);
unsigned long size = blk_rq_bytes(req);
struct bio *bio;
u32 type;
+ u32 nbd_cmd_flags = 0;
u32 tag = blk_mq_unique_tag(req);
int sent = nsock->sent, skip = 0;
return -EIO;
}
+ if (req->cmd_flags & REQ_FUA)
+ nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
+
/* We did a partial send previously, and we at least sent the whole
* request struct, so just go and send the rest of the pages in the
* request.
}
cmd->index = index;
cmd->cookie = nsock->cookie;
- request.type = htonl(type);
+ request.type = htonl(type | nbd_cmd_flags);
if (type != NBD_CMD_FLUSH) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
request.len = htonl(size);
nsock->pending = req;
nsock->sent = sent;
}
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result);
*/
nsock->pending = req;
nsock->sent = sent;
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n",
if (ntohl(reply.error)) {
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
ntohl(reply.error));
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
return cmd;
}
*/
if (nbd_disconnected(config) ||
config->num_connections <= 1) {
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
return cmd;
}
return ERR_PTR(-EIO);
if (!blk_mq_request_started(req))
return;
cmd = blk_mq_rq_to_pdu(req);
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(req);
}
nbd_config_put(nbd);
return -EINVAL;
}
- cmd->status = 0;
+ cmd->status = BLK_STS_OK;
again:
nsock = config->socks[index];
mutex_lock(&nsock->tx_lock);
return ret;
}
-static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
* appropriate.
*/
ret = nbd_handle_cmd(cmd, hctx->queue_num);
- if (ret < 0)
- ret = BLK_MQ_RQ_QUEUE_ERROR;
- if (!ret)
- ret = BLK_MQ_RQ_QUEUE_OK;
complete(&cmd->send_complete);
- return ret;
+ return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
}
static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
continue;
}
sk_set_memalloc(sock->sk);
+ sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
atomic_inc(&config->recv_threads);
refcount_inc(&nbd->config_refs);
old = nsock->sock;
set_disk_ro(nbd->disk, false);
if (config->flags & NBD_FLAG_SEND_TRIM)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
- if (config->flags & NBD_FLAG_SEND_FLUSH)
- blk_queue_write_cache(nbd->disk->queue, true, false);
+ if (config->flags & NBD_FLAG_SEND_FLUSH) {
+ if (config->flags & NBD_FLAG_SEND_FUA)
+ blk_queue_write_cache(nbd->disk->queue, true, true);
+ else
+ blk_queue_write_cache(nbd->disk->queue, true, false);
+ }
else
blk_queue_write_cache(nbd->disk->queue, false, false);
}
return -ENOMEM;
}
sk_set_memalloc(config->socks[i]->sock->sk);
+ config->socks[i]->sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
atomic_inc(&config->recv_threads);
refcount_inc(&nbd->config_refs);
INIT_WORK(&args->work, recv_work);
seq_puts(s, "NBD_FLAG_READ_ONLY\n");
if (flags & NBD_FLAG_SEND_FLUSH)
seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
+ if (flags & NBD_FLAG_SEND_FUA)
+ seq_puts(s, "NBD_FLAG_SEND_FUA\n");
if (flags & NBD_FLAG_SEND_TRIM)
seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
struct request_queue *q;
struct gendisk *disk;
struct nvm_dev *ndev;
- struct blk_mq_tag_set tag_set;
+ struct blk_mq_tag_set *tag_set;
+ struct blk_mq_tag_set __tag_set;
struct hrtimer timer;
unsigned int queue_depth;
spinlock_t lock;
static int null_major;
static int nullb_indexes;
static struct kmem_cache *ppa_cache;
+static struct blk_mq_tag_set tag_set;
enum {
NULL_IRQ_NONE = 0,
module_param(bs, int, S_IRUGO);
MODULE_PARM_DESC(bs, "Block size (in bytes)");
-static int nr_devices = 2;
+static int nr_devices = 1;
module_param(nr_devices, int, S_IRUGO);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
module_param(blocking, bool, S_IRUGO);
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
+static bool shared_tags;
+module_param(shared_tags, bool, S_IRUGO);
+MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
+
static int irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
switch (queue_mode) {
case NULL_Q_MQ:
- blk_mq_end_request(cmd->rq, 0);
+ blk_mq_end_request(cmd->rq, BLK_STS_OK);
return;
case NULL_Q_RQ:
INIT_LIST_HEAD(&cmd->rq->queuelist);
- blk_end_request_all(cmd->rq, 0);
+ blk_end_request_all(cmd->rq, BLK_STS_OK);
break;
case NULL_Q_BIO:
bio_endio(cmd->bio);
}
}
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
blk_mq_start_request(bd->rq);
null_handle_cmd(cmd);
- return BLK_MQ_RQ_QUEUE_OK;
-}
-
-static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
-{
- BUG_ON(!nullb);
- BUG_ON(!nq);
-
- init_waitqueue_head(&nq->wait);
- nq->queue_depth = nullb->queue_depth;
-}
-
-static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
- unsigned int index)
-{
- struct nullb *nullb = data;
- struct nullb_queue *nq = &nullb->queues[index];
-
- hctx->driver_data = nq;
- null_init_queue(nullb, nq);
- nullb->nr_queues++;
-
- return 0;
+ return BLK_STS_OK;
}
static const struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq,
- .init_hctx = null_init_hctx,
.complete = null_softirq_done_fn,
};
#ifdef CONFIG_NVM
-static void null_lnvm_end_io(struct request *rq, int error)
+static void null_lnvm_end_io(struct request *rq, blk_status_t status)
{
struct nvm_rq *rqd = rq->end_io_data;
- rqd->error = error;
+ /* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
+ rqd->error = status ? -EIO : 0;
nvm_end_io(rqd);
blk_put_request(rq);
else
del_gendisk(nullb->disk);
blk_cleanup_queue(nullb->q);
- if (queue_mode == NULL_Q_MQ)
- blk_mq_free_tag_set(&nullb->tag_set);
+ if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ blk_mq_free_tag_set(nullb->tag_set);
if (!use_lightnvm)
put_disk(nullb->disk);
cleanup_queues(nullb);
.release = null_release,
};
+static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+{
+ BUG_ON(!nullb);
+ BUG_ON(!nq);
+
+ init_waitqueue_head(&nq->wait);
+ nq->queue_depth = nullb->queue_depth;
+}
+
+static void null_init_queues(struct nullb *nullb)
+{
+ struct request_queue *q = nullb->q;
+ struct blk_mq_hw_ctx *hctx;
+ struct nullb_queue *nq;
+ int i;
+
+ queue_for_each_hw_ctx(q, hctx, i) {
+ if (!hctx->nr_ctx || !hctx->tags)
+ continue;
+ nq = &nullb->queues[i];
+ hctx->driver_data = nq;
+ null_init_queue(nullb, nq);
+ nullb->nr_queues++;
+ }
+}
+
static int setup_commands(struct nullb_queue *nq)
{
struct nullb_cmd *cmd;
return 0;
}
+static int null_init_tag_set(struct blk_mq_tag_set *set)
+{
+ set->ops = &null_mq_ops;
+ set->nr_hw_queues = submit_queues;
+ set->queue_depth = hw_queue_depth;
+ set->numa_node = home_node;
+ set->cmd_size = sizeof(struct nullb_cmd);
+ set->flags = BLK_MQ_F_SHOULD_MERGE;
+ set->driver_data = NULL;
+
+ if (blocking)
+ set->flags |= BLK_MQ_F_BLOCKING;
+
+ return blk_mq_alloc_tag_set(set);
+}
+
static int null_add_dev(void)
{
struct nullb *nullb;
goto out_free_nullb;
if (queue_mode == NULL_Q_MQ) {
- nullb->tag_set.ops = &null_mq_ops;
- nullb->tag_set.nr_hw_queues = submit_queues;
- nullb->tag_set.queue_depth = hw_queue_depth;
- nullb->tag_set.numa_node = home_node;
- nullb->tag_set.cmd_size = sizeof(struct nullb_cmd);
- nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
- nullb->tag_set.driver_data = nullb;
-
- if (blocking)
- nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
-
- rv = blk_mq_alloc_tag_set(&nullb->tag_set);
+ if (shared_tags) {
+ nullb->tag_set = &tag_set;
+ rv = 0;
+ } else {
+ nullb->tag_set = &nullb->__tag_set;
+ rv = null_init_tag_set(nullb->tag_set);
+ }
+
if (rv)
goto out_cleanup_queues;
- nullb->q = blk_mq_init_queue(&nullb->tag_set);
+ nullb->q = blk_mq_init_queue(nullb->tag_set);
if (IS_ERR(nullb->q)) {
rv = -ENOMEM;
goto out_cleanup_tags;
}
+ null_init_queues(nullb);
} else if (queue_mode == NULL_Q_BIO) {
nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
if (!nullb->q) {
out_cleanup_blk_queue:
blk_cleanup_queue(nullb->q);
out_cleanup_tags:
- if (queue_mode == NULL_Q_MQ)
- blk_mq_free_tag_set(&nullb->tag_set);
+ if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ blk_mq_free_tag_set(nullb->tag_set);
out_cleanup_queues:
cleanup_queues(nullb);
out_free_nullb:
queue_mode = NULL_Q_MQ;
}
+ if (queue_mode == NULL_Q_MQ && shared_tags)
+ null_init_tag_set(&tag_set);
+
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) {
pr_warn("null_blk: submit_queues param is set to %u.",
}
mutex_unlock(&lock);
+ if (queue_mode == NULL_Q_MQ && shared_tags)
+ blk_mq_free_tag_set(&tag_set);
+
kmem_cache_destroy(ppa_cache);
}
put_disk(disk);
continue;
}
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
cd->disk = disk;
cd->pi = &cd->pia;
cd->present = 0;
ps_set_intr(do_pcd_read, NULL, 0, nice);
return;
} else {
- __blk_end_request_all(pcd_req, -EIO);
+ __blk_end_request_all(pcd_req, BLK_STS_IOERR);
pcd_req = NULL;
}
}
pcd_request();
}
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
{
unsigned long saved_flags;
if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
pcd_bufblk = -1;
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
return;
}
pcd_bufblk = -1;
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
phase = NULL;
spin_lock_irqsave(&pd_lock, saved_flags);
if (!__blk_end_request_cur(pd_req,
- res == Ok ? 0 : -EIO)) {
+ res == Ok ? 0 : BLK_STS_IOERR)) {
if (!set_next_request())
stop = 1;
}
return;
}
blk_queue_max_hw_sectors(p->queue, cluster);
+ blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
if (disk->drive == -1) {
for (disk->drive = 0; disk->drive <= 1; disk->drive++)
return;
}
blk_queue_max_segments(disk->queue, cluster);
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
pf->disk = disk;
pf->pi = &pf->pia;
pf->media_status = PF_NM;
return pf_req != NULL;
}
-static void pf_end_request(int err)
+static void pf_end_request(blk_status_t err)
{
if (pf_req && !__blk_end_request_cur(pf_req, err))
pf_req = NULL;
pf_count = blk_rq_cur_sectors(pf_req);
if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
- pf_end_request(-EIO);
+ pf_end_request(BLK_STS_IOERR);
goto repeat;
}
pi_do_claimed(pf_current->pi, do_pf_write);
else {
pf_busy = 0;
- pf_end_request(-EIO);
+ pf_end_request(BLK_STS_IOERR);
goto repeat;
}
}
return 0;
}
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
{
unsigned long saved_flags;
pi_do_claimed(pf_current->pi, do_pf_read_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pf_mask = STAT_DRQ;
pi_do_claimed(pf_current->pi, do_pf_read_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pi_read_block(pf_current->pi, pf_buf, 512);
pi_do_claimed(pf_current->pi, do_pf_write_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pi_do_claimed(pf_current->pi, do_pf_write_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pi_write_block(pf_current->pi, pf_buf, 512);
pi_do_claimed(pf_current->pi, do_pf_write_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pi_disconnect(pf_current->pi);
static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
static mempool_t *psd_pool;
+static struct bio_set *pkt_bio_set;
static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */
static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
if (IS_ERR(rq))
return PTR_ERR(rq);
- scsi_req_init(rq);
if (cgc->buflen) {
ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
bio, (unsigned long long)pkt->sector,
- (unsigned long long)bio->bi_iter.bi_sector, bio->bi_error);
+ (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
- if (bio->bi_error)
+ if (bio->bi_status)
atomic_inc(&pkt->io_errors);
if (atomic_dec_and_test(&pkt->io_wait)) {
atomic_inc(&pkt->run_sm);
struct pktcdvd_device *pd = pkt->pd;
BUG_ON(!pd);
- pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error);
+ pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
pd->stats.pkt_ended++;
pkt_queue_bio(pd, pkt->w_bio);
}
-static void pkt_finish_packet(struct packet_data *pkt, int error)
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
{
struct bio *bio;
- if (error)
+ if (status)
pkt->cache_valid = 0;
/* Finish all bios corresponding to this packet */
while ((bio = bio_list_pop(&pkt->orig_bios))) {
- bio->bi_error = error;
+ bio->bi_status = status;
bio_endio(bio);
}
}
if (atomic_read(&pkt->io_wait) > 0)
return;
- if (!pkt->w_bio->bi_error) {
+ if (!pkt->w_bio->bi_status) {
pkt_set_state(pkt, PACKET_FINISHED_STATE);
} else {
pkt_set_state(pkt, PACKET_RECOVERY_STATE);
break;
case PACKET_FINISHED_STATE:
- pkt_finish_packet(pkt, pkt->w_bio->bi_error);
+ pkt_finish_packet(pkt, pkt->w_bio->bi_status);
return;
default:
struct packet_stacked_data *psd = bio->bi_private;
struct pktcdvd_device *pd = psd->pd;
- psd->bio->bi_error = bio->bi_error;
+ psd->bio->bi_status = bio->bi_status;
bio_put(bio);
bio_endio(psd->bio);
mempool_free(psd, psd_pool);
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
- struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+ struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
psd->pd = pd;
char b[BDEVNAME_SIZE];
struct bio *split;
- blk_queue_bounce(q, &bio);
-
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
pd = q->queuedata;
if (!pd) {
split = bio_split(bio, last_zone -
bio->bi_iter.bi_sector,
- GFP_NOIO, fs_bio_set);
+ GFP_NOIO, pkt_bio_set);
bio_chain(split, bio);
} else {
split = bio;
bdev = bdget(dev);
if (!bdev)
return -ENOMEM;
+ if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
+ WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+ bdput(bdev);
+ return -EINVAL;
+ }
ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
if (ret)
return ret;
sizeof(struct packet_stacked_data));
if (!psd_pool)
return -ENOMEM;
+ pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!pkt_bio_set) {
+ mempool_destroy(psd_pool);
+ return -ENOMEM;
+ }
ret = register_blkdev(pktdev_major, DRIVER_NAME);
if (ret < 0) {
unregister_blkdev(pktdev_major, DRIVER_NAME);
out2:
mempool_destroy(psd_pool);
+ bioset_free(pkt_bio_set);
return ret;
}
unregister_blkdev(pktdev_major, DRIVER_NAME);
mempool_destroy(psd_pool);
+ bioset_free(pkt_bio_set);
}
MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
if (res) {
dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
__LINE__, op, res);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return 0;
}
if (res) {
dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
__func__, __LINE__, res);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return 0;
}
break;
default:
blk_dump_rq_flags(req, DEVICE_NAME " bad request");
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
}
}
struct ps3_storage_device *dev = data;
struct ps3disk_private *priv;
struct request *req;
- int res, read, error;
+ int res, read;
+ blk_status_t error;
u64 tag, status;
const char *op;
if (status) {
dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
__LINE__, op, status);
- error = -EIO;
+ error = BLK_STS_IOERR;
} else {
dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
__LINE__, op);
kfree(priv->cache.tags);
}
-static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+static blk_status_t ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
size_t len, size_t *retlen, u_char *buf)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
(unsigned int)from, len);
if (from >= priv->size)
- return -EIO;
+ return BLK_STS_IOERR;
if (len > priv->size - from)
len = priv->size - from;
return 0;
}
-static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+static blk_status_t ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
size_t len, size_t *retlen, const u_char *buf)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
unsigned int cached, count;
if (to >= priv->size)
- return -EIO;
+ return BLK_STS_IOERR;
if (len > priv->size - to)
len = priv->size - to;
int write = bio_data_dir(bio) == WRITE;
const char *op = write ? "write" : "read";
loff_t offset = bio->bi_iter.bi_sector << 9;
- int error = 0;
+ blk_status_t error = 0;
struct bio_vec bvec;
struct bvec_iter iter;
struct bio *next;
if (retlen != len) {
dev_err(&dev->core, "Short %s\n", op);
- error = -EIO;
+ error = BLK_STS_IOERR;
goto out;
}
next = bio_list_peek(&priv->list);
spin_unlock_irq(&priv->lock);
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
return next;
}
dev_dbg(&dev->core, "%s\n", __func__);
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
spin_lock_irq(&priv->lock);
busy = !bio_list_empty(&priv->list);
static struct kmem_cache *rbd_img_request_cache;
static struct kmem_cache *rbd_obj_request_cache;
+static struct bio_set *rbd_bio_clone;
+
static int rbd_major;
static DEFINE_IDA(rbd_dev_id_ida);
{
struct bio *bio;
- bio = bio_clone(bio_src, gfpmask);
+ bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
if (!bio)
return NULL; /* ENOMEM */
rbd_assert(img_request->obj_request != NULL);
more = obj_request->which < img_request->obj_request_count - 1;
} else {
+ blk_status_t status = errno_to_blk_status(result);
+
rbd_assert(img_request->rq != NULL);
- more = blk_update_request(img_request->rq, result, xferred);
+ more = blk_update_request(img_request->rq, status, xferred);
if (!more)
- __blk_mq_end_request(img_request->rq, result);
+ __blk_mq_end_request(img_request->rq, status);
}
return more;
obj_op_name(op_type), length, offset, result);
ceph_put_snap_context(snapc);
err:
- blk_mq_end_request(rq, result);
+ blk_mq_end_request(rq, errno_to_blk_status(result));
}
-static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
struct work_struct *work = blk_mq_rq_to_pdu(rq);
queue_work(rbd_wq, work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void rbd_free_disk(struct rbd_device *rbd_dev)
if (!rbd_obj_request_cache)
goto out_err;
+ rbd_assert(!rbd_bio_clone);
+ rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!rbd_bio_clone)
+ goto out_err_clone;
+
return 0;
+out_err_clone:
+ kmem_cache_destroy(rbd_obj_request_cache);
+ rbd_obj_request_cache = NULL;
out_err:
kmem_cache_destroy(rbd_img_request_cache);
rbd_img_request_cache = NULL;
rbd_assert(rbd_img_request_cache);
kmem_cache_destroy(rbd_img_request_cache);
rbd_img_request_cache = NULL;
+
+ rbd_assert(rbd_bio_clone);
+ bioset_free(rbd_bio_clone);
+ rbd_bio_clone = NULL;
}
static int __init rbd_init(void)
{
struct rsxx_cardinfo *card = q->queuedata;
struct rsxx_bio_meta *bio_meta;
- int st = -EINVAL;
+ blk_status_t st = BLK_STS_IOERR;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
might_sleep();
if (bio_end_sector(bio) > get_capacity(card->gendisk))
goto req_err;
- if (unlikely(card->halt)) {
- st = -EFAULT;
+ if (unlikely(card->halt))
goto req_err;
- }
- if (unlikely(card->dma_fault)) {
- st = (-EFAULT);
+ if (unlikely(card->dma_fault))
goto req_err;
- }
if (bio->bi_iter.bi_size == 0) {
dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
if (!bio_meta) {
- st = -ENOMEM;
+ st = BLK_STS_RESOURCE;
goto req_err;
}
kmem_cache_free(bio_meta_pool, bio_meta);
req_err:
if (st)
- bio->bi_error = st;
+ bio->bi_status = st;
bio_endio(bio);
return BLK_QC_T_NONE;
}
}
blk_queue_make_request(card->queue, rsxx_make_request);
- blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
mutex_unlock(&ctrl->work_lock);
}
-static int rsxx_queue_discard(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
struct list_head *q,
unsigned int laddr,
rsxx_dma_cb cb,
dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
if (!dma)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
dma->cmd = HW_CMD_BLK_DISCARD;
dma->laddr = laddr;
return 0;
}
-static int rsxx_queue_dma(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
struct list_head *q,
int dir,
unsigned int dma_off,
dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
if (!dma)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
dma->laddr = laddr;
return 0;
}
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
struct bio *bio,
atomic_t *n_dmas,
rsxx_dma_cb cb,
unsigned int dma_len;
int dma_cnt[RSXX_MAX_TARGETS];
int tgt;
- int st;
+ blk_status_t st;
int i;
addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
for (i = 0; i < card->n_targets; i++)
rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
FREE_DMA);
-
return st;
}
void rsxx_dma_cleanup(void);
void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
int rsxx_dma_configure(struct rsxx_cardinfo *card);
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
struct bio *bio,
atomic_t *n_dmas,
rsxx_dma_cb cb,
struct skd_special_context *skspcl);
static void skd_request_fn(struct request_queue *rq);
static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, int error);
-static int skd_preop_sg_list(struct skd_device *skdev,
+ struct skd_request_context *skreq, blk_status_t status);
+static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq);
static void skd_postop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq);
if (req == NULL)
break;
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
}
struct request *req = NULL;
struct skd_scsi_request *scsi_req;
unsigned long io_flags;
- int error;
u32 lba;
u32 count;
int data_dir;
if (!req->bio)
goto skip_sg;
- error = skd_preop_sg_list(skdev, skreq);
-
- if (error != 0) {
+ if (!skd_preop_sg_list(skdev, skreq)) {
/*
* Complete the native request with error.
* Note that the request context is still at the
*/
pr_debug("%s:%s:%d error Out\n",
skdev->name, __func__, __LINE__);
- skd_end_request(skdev, skreq, error);
+ skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
continue;
}
}
static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, int error)
+ struct skd_request_context *skreq, blk_status_t error)
{
if (unlikely(error)) {
struct request *req = skreq->req;
__blk_end_request_all(skreq->req, error);
}
-static int skd_preop_sg_list(struct skd_device *skdev,
+static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq)
{
struct request *req = skreq->req;
n_sg = blk_rq_map_sg(skdev->queue, req, sg);
if (n_sg <= 0)
- return -EINVAL;
+ return false;
/*
* Map scatterlist to PCI bus addresses.
*/
n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
if (n_sg <= 0)
- return -EINVAL;
+ return false;
SKD_ASSERT(n_sg <= skdev->sgs_per_request);
}
}
- return 0;
+ return true;
}
static void skd_postop_sg_list(struct skd_device *skdev,
switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
case SKD_CHECK_STATUS_REPORT_GOOD:
case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
- skd_end_request(skdev, skreq, 0);
+ skd_end_request(skdev, skreq, BLK_STS_OK);
break;
case SKD_CHECK_STATUS_BUSY_IMMINENT:
case SKD_CHECK_STATUS_REPORT_ERROR:
default:
- skd_end_request(skdev, skreq, -EIO);
+ skd_end_request(skdev, skreq, BLK_STS_IOERR);
break;
}
}
* native request.
*/
if (likely(cmp_status == SAM_STAT_GOOD))
- skd_end_request(skdev, skreq, 0);
+ skd_end_request(skdev, skreq, BLK_STS_OK);
else
skd_resolve_req_exception(skdev, skreq);
}
SKD_MAX_RETRIES)
blk_requeue_request(skdev->queue, skreq->req);
else
- skd_end_request(skdev, skreq, -EIO);
+ skd_end_request(skdev, skreq, BLK_STS_IOERR);
skreq->req = NULL;
rc = -ENOMEM;
goto err_out;
}
+ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
skdev->queue = q;
disk->queue = q;
rqe->req = NULL;
- __blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
+ __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
vdc_blk_queue_start(port);
}
struct request *req;
while ((req = blk_fetch_request(port->disk->queue)) != NULL)
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
static void vdc_ldc_reset_timer(unsigned long _arg)
return ret;
}
-static int floppy_read_sectors(struct floppy_state *fs,
+static blk_status_t floppy_read_sectors(struct floppy_state *fs,
int req_sector, int sectors_nb,
unsigned char *buffer)
{
ret = swim_read_sector(fs, side, track, sector,
buffer);
if (try-- == 0)
- return -EIO;
+ return BLK_STS_IOERR;
} while (ret != 512);
buffer += ret;
req = swim_next_request(swd);
while (req) {
- int err = -EIO;
+ blk_status_t err = BLK_STS_IOERR;
fs = req->rq_disk->private_data;
if (blk_rq_pos(req) >= fs->total_secs)
put_disk(swd->unit[drive].disk);
goto exit_put_disks;
}
+ blk_queue_bounce_limit(swd->unit[drive].disk->queue,
+ BLK_BOUNCE_HIGH);
swd->unit[drive].disk->queue->queuedata = swd;
swd->unit[drive].swd = swd;
}
unsigned int clearing);
static int floppy_revalidate(struct gendisk *disk);
-static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes)
+static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
{
struct request *req = fs->cur_req;
int rc;
if (fs->mdev->media_bay &&
check_media_bay(fs->mdev->media_bay) != MB_FD) {
swim3_dbg("%s", " media bay absent, dropping req\n");
- swim3_end_request(fs, -ENODEV, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
if (blk_rq_pos(req) >= fs->total_secs) {
swim3_dbg(" pos out of bounds (%ld, max is %ld)\n",
(long)blk_rq_pos(req), (long)fs->total_secs);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
if (fs->ejected) {
swim3_dbg("%s", " disk ejected\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
fs->write_prot = swim3_readbit(fs, WRITE_PROT);
if (fs->write_prot) {
swim3_dbg("%s", " try to write, disk write protected\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
}
if (fs->retries > 5) {
swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
fs->req_cyl, fs->cur_cyl);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
return;
}
out_8(&sw->intr_enable, 0);
fs->cur_cyl = -1;
if (fs->retries > 5) {
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
} else {
out_8(&sw->select, RELAX);
out_8(&sw->intr_enable, 0);
swim3_err("%s", "Seek timeout\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
spin_unlock_irqrestore(&swim3_lock, flags);
goto unlock;
}
swim3_err("%s", "Seek settle timeout\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
unlock:
swim3_err("Timeout %sing sector %ld\n",
(rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
(long)blk_rq_pos(fs->cur_req));
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
spin_unlock_irqrestore(&swim3_lock, flags);
swim3_err("%s", "Seen sector but cyl=ff?\n");
fs->cur_cyl = -1;
if (fs->retries > 5) {
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
} else {
swim3_err("Error %sing block %ld (err=%x)\n",
rq_data_dir(req) == WRITE? "writ": "read",
(long)blk_rq_pos(req), err);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
}
} else {
swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
swim3_err(" state=%d, dir=%x, intr=%x, err=%x\n",
fs->state, rq_data_dir(req), intr, err);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
break;
put_disk(disk);
return -ENOMEM;
}
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
disk->queue->queuedata = &floppy_states[index];
if (index == 0) {
return 0;
}
-static struct of_device_id swim3_match[] =
+static const struct of_device_id swim3_match[] =
{
{
.name = "swim3",
static inline void carm_end_request_queued(struct carm_host *host,
struct carm_request *crq,
- int error)
+ blk_status_t error)
{
struct request *req = crq->rq;
int rc;
}
static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
- int error)
+ blk_status_t error)
{
carm_end_request_queued(host, crq, error);
if (max_queue == 1)
sg = &crq->sg[0];
n_elem = blk_rq_map_sg(q, rq, sg);
if (n_elem <= 0) {
- carm_end_rq(host, crq, -EIO);
+ carm_end_rq(host, crq, BLK_STS_IOERR);
return; /* request with no s/g entries? */
}
/* map scatterlist to PCI bus addresses */
n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
if (n_elem <= 0) {
- carm_end_rq(host, crq, -EIO);
+ carm_end_rq(host, crq, BLK_STS_IOERR);
return; /* request with no s/g entries? */
}
crq->n_elem = n_elem;
static void carm_handle_array_info(struct carm_host *host,
struct carm_request *crq, u8 *mem,
- int error)
+ blk_status_t error)
{
struct carm_port *port;
u8 *msg_data = mem + sizeof(struct carm_array_info);
static void carm_handle_scan_chan(struct carm_host *host,
struct carm_request *crq, u8 *mem,
- int error)
+ blk_status_t error)
{
u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
unsigned int i, dev_count = 0;
}
static void carm_handle_generic(struct carm_host *host,
- struct carm_request *crq, int error,
+ struct carm_request *crq, blk_status_t error,
int cur_state, int next_state)
{
DPRINTK("ENTER\n");
}
static inline void carm_handle_rw(struct carm_host *host,
- struct carm_request *crq, int error)
+ struct carm_request *crq, blk_status_t error)
{
int pci_dir;
u32 handle = le32_to_cpu(ret_handle_le);
unsigned int msg_idx;
struct carm_request *crq;
- int error = (status == RMSG_OK) ? 0 : -EIO;
+ blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
u8 *mem;
VPRINTK("ENTER, handle == 0x%x\n", handle);
err_out:
printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
- carm_end_rq(host, crq, -EIO);
+ carm_end_rq(host, crq, BLK_STS_IOERR);
}
static inline void carm_handle_responses(struct carm_host *host)
PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
if (control & DMASCR_HARD_ERROR) {
/* error */
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
dev_printk(KERN_WARNING, &card->dev->dev,
"I/O error on sector %d/%d\n",
le32_to_cpu(desc->local_addr)>>9,
(unsigned long long)bio->bi_iter.bi_sector,
bio->bi_iter.bi_size);
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
spin_lock_irq(&card->lock);
*card->biotail = bio;
struct scatterlist sg[];
};
-static inline int virtblk_result(struct virtblk_req *vbr)
+static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
{
switch (vbr->status) {
case VIRTIO_BLK_S_OK:
- return 0;
+ return BLK_STS_OK;
case VIRTIO_BLK_S_UNSUPP:
- return -ENOTTY;
+ return BLK_STS_NOTSUPP;
default:
- return -EIO;
+ return BLK_STS_IOERR;
}
}
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
}
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
break;
default:
WARN_ON_ONCE(1);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
/* Out of mem doesn't actually happen, since we fall back
* to direct descriptors */
if (err == -ENOMEM || err == -ENOSPC)
- return BLK_MQ_RQ_QUEUE_BUSY;
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
if (notify)
virtqueue_notify(vblk->vqs[qid].vq);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
/* return id (s/n) string for *disk to *id_str
goto out;
blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
- err = virtblk_result(blk_mq_rq_to_pdu(req));
+ err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
out:
blk_put_request(req);
return err;
/* We can handle whatever the host told us to handle. */
blk_queue_max_segments(q, vblk->sg_elems-2);
- /* No need to bounce any requests */
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
-
/* No real sector limit. */
blk_queue_max_hw_sectors(q, -1U);
unsigned long timeout;
int ret;
- xen_blkif_get(blkif);
-
set_freezable();
while (!kthread_should_stop()) {
if (try_to_freeze())
print_stats(ring);
ring->xenblkd = NULL;
- xen_blkif_put(blkif);
return 0;
}
atomic_set(&blkif->drain, 0);
}
-/*
- * Completion callback on the bio's. Called as bh->b_end_io()
- */
-
-static void __end_block_io_op(struct pending_req *pending_req, int error)
+static void __end_block_io_op(struct pending_req *pending_req,
+ blk_status_t error)
{
/* An error fails the entire request. */
- if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
- (error == -EOPNOTSUPP)) {
+ if (pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE &&
+ error == BLK_STS_NOTSUPP) {
pr_debug("flush diskcache op failed, not supported\n");
xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
- } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
- (error == -EOPNOTSUPP)) {
+ } else if (pending_req->operation == BLKIF_OP_WRITE_BARRIER &&
+ error == BLK_STS_NOTSUPP) {
pr_debug("write barrier op failed, not supported\n");
xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
*/
static void end_block_io_op(struct bio *bio)
{
- __end_block_io_op(bio->bi_private, bio->bi_error);
+ __end_block_io_op(bio->bi_private, bio->bi_status);
bio_put(bio);
}
for (i = 0; i < nbio; i++)
bio_put(biolist[i]);
atomic_set(&pending_req->pendcnt, 1);
- __end_block_io_op(pending_req, -EINVAL);
+ __end_block_io_op(pending_req, BLK_STS_RESOURCE);
msleep(1); /* back off a bit */
return -EIO;
}
static void make_response(struct xen_blkif_ring *ring, u64 id,
unsigned short op, int st)
{
- struct blkif_response resp;
+ struct blkif_response *resp;
unsigned long flags;
union blkif_back_rings *blk_rings;
int notify;
- resp.id = id;
- resp.operation = op;
- resp.status = st;
-
spin_lock_irqsave(&ring->blk_ring_lock, flags);
blk_rings = &ring->blk_rings;
/* Place on the response ring for the relevant domain. */
switch (ring->blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
- memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
- &resp, sizeof(resp));
+ resp = RING_GET_RESPONSE(&blk_rings->native,
+ blk_rings->native.rsp_prod_pvt);
break;
case BLKIF_PROTOCOL_X86_32:
- memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
- &resp, sizeof(resp));
+ resp = RING_GET_RESPONSE(&blk_rings->x86_32,
+ blk_rings->x86_32.rsp_prod_pvt);
break;
case BLKIF_PROTOCOL_X86_64:
- memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
- &resp, sizeof(resp));
+ resp = RING_GET_RESPONSE(&blk_rings->x86_64,
+ blk_rings->x86_64.rsp_prod_pvt);
break;
default:
BUG();
}
+
+ resp->id = id;
+ resp->operation = op;
+ resp->status = st;
+
blk_rings->common.rsp_prod_pvt++;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
struct blkif_common_request {
char dummy;
};
-struct blkif_common_response {
- char dummy;
-};
+
+/* i386 protocol version */
struct blkif_x86_32_request_rw {
uint8_t nr_segments; /* number of segments */
} u;
} __attribute__((__packed__));
-/* i386 protocol version */
-#pragma pack(push, 4)
-struct blkif_x86_32_response {
- uint64_t id; /* copied from request */
- uint8_t operation; /* copied from request */
- int16_t status; /* BLKIF_RSP_??? */
-};
-#pragma pack(pop)
/* x86_64 protocol version */
struct blkif_x86_64_request_rw {
} u;
} __attribute__((__packed__));
-struct blkif_x86_64_response {
- uint64_t __attribute__((__aligned__(8))) id;
- uint8_t operation; /* copied from request */
- int16_t status; /* BLKIF_RSP_??? */
-};
-
DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
- struct blkif_common_response);
+ struct blkif_response);
DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
- struct blkif_x86_32_response);
+ struct blkif_response __packed);
DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
- struct blkif_x86_64_response);
+ struct blkif_response);
union blkif_back_rings {
struct blkif_back_ring native;
wait_queue_head_t wq;
atomic_t inflight;
+ bool active;
/* One thread per blkif ring. */
struct task_struct *xenblkd;
unsigned int waiting_reqs;
init_waitqueue_head(&ring->shutdown_wq);
ring->blkif = blkif;
ring->st_print = jiffies;
- xen_blkif_get(blkif);
+ ring->active = true;
}
return 0;
struct xen_blkif_ring *ring = &blkif->rings[r];
unsigned int i = 0;
+ if (!ring->active)
+ continue;
+
if (ring->xenblkd) {
kthread_stop(ring->xenblkd);
wake_up(&ring->shutdown_wq);
- ring->xenblkd = NULL;
}
/* The above kthread_stop() guarantees that at this point we
BUG_ON(ring->free_pages_num != 0);
BUG_ON(ring->persistent_gnt_c != 0);
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
- xen_blkif_put(blkif);
+ ring->active = false;
}
blkif->nr_ring_pages = 0;
/*
static void xen_blkif_free(struct xen_blkif *blkif)
{
-
- xen_blkif_disconnect(blkif);
+ WARN_ON(xen_blkif_disconnect(blkif));
xen_vbd_free(&blkif->vbd);
+ kfree(blkif->be->mode);
+ kfree(blkif->be);
/* Make sure everything is drained before shutting down */
kmem_cache_free(xen_blkif_cachep, blkif);
xen_blkif_put(be->blkif);
}
- kfree(be->mode);
- kfree(be);
return 0;
}
unsigned long associated_id;
};
-struct split_bio {
- struct bio *bio;
- atomic_t pending;
-};
-
struct blkif_req {
int error;
};
!info->feature_fua));
}
-static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *qd)
{
unsigned long flags;
flush_requests(rinfo);
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_err:
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
out_busy:
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
blk_mq_stop_hw_queue(hctx);
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
static void blkif_complete_rq(struct request *rq)
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
-
- /* Make sure we don't use bounce buffers. */
- blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
}
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
continue;
}
- blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+ if (bret->status == BLKIF_RSP_OKAY)
+ blkif_req(req)->error = BLK_STS_OK;
+ else
+ blkif_req(req)->error = BLK_STS_IOERR;
+
switch (bret->operation) {
case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
- blkif_req(req)->error = -EOPNOTSUPP;
+ blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
- blkif_req(req)->error = -EOPNOTSUPP;
+ blkif_req(req)->error = BLK_STS_NOTSUPP;
}
if (unlikely(blkif_req(req)->error)) {
- if (blkif_req(req)->error == -EOPNOTSUPP)
- blkif_req(req)->error = 0;
+ if (blkif_req(req)->error == BLK_STS_NOTSUPP)
+ blkif_req(req)->error = BLK_STS_OK;
info->feature_fua = 0;
info->feature_flush = 0;
xlvbd_flush(info);
return 0;
}
-static void split_bio_end(struct bio *bio)
-{
- struct split_bio *split_bio = bio->bi_private;
-
- if (atomic_dec_and_test(&split_bio->pending)) {
- split_bio->bio->bi_phys_segments = 0;
- split_bio->bio->bi_error = bio->bi_error;
- bio_endio(split_bio->bio);
- kfree(split_bio);
- }
- bio_put(bio);
-}
-
static int blkif_recover(struct blkfront_info *info)
{
- unsigned int i, r_index;
+ unsigned int r_index;
struct request *req, *n;
int rc;
- struct bio *bio, *cloned_bio;
- unsigned int segs, offset;
- int pending, size;
- struct split_bio *split_bio;
+ struct bio *bio;
+ unsigned int segs;
blkfront_gather_backend_features(info);
/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
/* Traverse the list of pending bios and re-queue them */
- if (bio_segments(bio) > segs) {
- /*
- * This bio has more segments than what we can
- * handle, we have to split it.
- */
- pending = (bio_segments(bio) + segs - 1) / segs;
- split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
- BUG_ON(split_bio == NULL);
- atomic_set(&split_bio->pending, pending);
- split_bio->bio = bio;
- for (i = 0; i < pending; i++) {
- offset = (i * segs * XEN_PAGE_SIZE) >> 9;
- size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
- (unsigned int)bio_sectors(bio) - offset);
- cloned_bio = bio_clone(bio, GFP_NOIO);
- BUG_ON(cloned_bio == NULL);
- bio_trim(cloned_bio, offset, size);
- cloned_bio->bi_private = split_bio;
- cloned_bio->bi_end_io = split_bio_end;
- submit_bio(cloned_bio);
- }
- /*
- * Now we have to wait for all those smaller bios to
- * end, so we can also end the "parent" bio.
- */
- continue;
- }
- /* We don't need to split this bio */
submit_bio(bio);
}
merge_bio.tail = shadow[j].request->biotail;
bio_list_merge(&info->bio_list, &merge_bio);
shadow[j].request->bio = NULL;
- blk_mq_end_request(shadow[j].request, 0);
+ blk_mq_end_request(shadow[j].request, BLK_STS_OK);
}
}
if (!blk_rq_is_passthrough(req))
break;
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
return req;
}
/* Drop all in-flight and pending requests */
if (ace->req) {
- __blk_end_request_all(ace->req, -EIO);
+ __blk_end_request_all(ace->req, BLK_STS_IOERR);
ace->req = NULL;
}
while ((req = blk_fetch_request(ace->queue)) != NULL)
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
/* Drop back to IDLE state and notify waiters */
ace->fsm_state = ACE_FSM_STATE_IDLE;
}
/* bio finished; is there another one? */
- if (__blk_end_request_cur(ace->req, 0)) {
+ if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
* blk_rq_sectors(ace->req),
* blk_rq_cur_sectors(ace->req));
if (ace->queue == NULL)
goto err_blk_initq;
blk_queue_logical_block_size(ace->queue, 512);
+ blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
/*
* Allocate and initialize GD structure
while (req) {
unsigned long start = blk_rq_pos(req) << 9;
unsigned long len = blk_rq_cur_bytes(req);
- int err = 0;
+ blk_status_t err = BLK_STS_OK;
if (start + len > z2ram_size) {
pr_err(DEVICE_NAME ": bad access: block=%llu, "
"count=%u\n",
(unsigned long long)blk_rq_pos(req),
blk_rq_cur_sectors(req));
- err = -EIO;
+ err = BLK_STS_IOERR;
goto done;
}
while (len) {
if (!q)
return -ENXIO;
+ if (!blk_queue_scsi_passthrough(q)) {
+ WARN_ONCE(true,
+ "Attempt read CDDA info through a non-SCSI queue\n");
+ return -EINVAL;
+ }
+
cdi->last_sense = 0;
while (nframes) {
break;
}
req = scsi_req(rq);
- scsi_req_init(rq);
ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
if (ret) {
*/
static void gdrom_readdisk_dma(struct work_struct *work)
{
- int err, block, block_cnt;
+ int block, block_cnt;
+ blk_status_t err;
struct packet_command *read_command;
struct list_head *elem, *next;
struct request *req;
__raw_writeb(1, GDROM_DMA_STATUS_REG);
wait_event_interruptible_timeout(request_queue,
gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
- err = gd.transfer ? -EIO : 0;
+ err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
gd.transfer = 0;
gd.pending = 0;
/* now seek to take the request spinlock
break;
case REQ_OP_WRITE:
pr_notice("Read only device - write request ignored\n");
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
break;
default:
printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
break;
}
}
err = -ENOMEM;
goto probe_fail_requestq;
}
+ blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
err = probe_gdrom_setupqueue();
if (err)
p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp;
cp++; crng_init_cnt++; len--;
}
+ spin_unlock_irqrestore(&primary_crng.lock, flags);
if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
invalidate_batched_entropy();
crng_init = 1;
wake_up_interruptible(&crng_init_wait);
pr_notice("random: fast init done\n");
}
- spin_unlock_irqrestore(&primary_crng.lock, flags);
return 1;
}
}
memzero_explicit(&buf, sizeof(buf));
crng->init_time = jiffies;
+ spin_unlock_irqrestore(&primary_crng.lock, flags);
if (crng == &primary_crng && crng_init < 2) {
invalidate_batched_entropy();
crng_init = 2;
wake_up_interruptible(&crng_init_wait);
pr_notice("random: crng init done\n");
}
- spin_unlock_irqrestore(&primary_crng.lock, flags);
}
static inline void crng_wait_ready(void)
u64 get_random_u64(void)
{
u64 ret;
- bool use_lock = crng_init < 2;
- unsigned long flags;
+ bool use_lock = READ_ONCE(crng_init) < 2;
+ unsigned long flags = 0;
struct batched_entropy *batch;
#if BITS_PER_LONG == 64
u32 get_random_u32(void)
{
u32 ret;
- bool use_lock = crng_init < 2;
- unsigned long flags;
+ bool use_lock = READ_ONCE(crng_init) < 2;
+ unsigned long flags = 0;
struct batched_entropy *batch;
if (arch_get_random_int(&ret))
#define ACPI_SIG_TPM2 "TPM2"
-static const u8 CRB_ACPI_START_UUID[] = {
- /* 0000 */ 0xAB, 0x6C, 0xBF, 0x6B, 0x63, 0x54, 0x14, 0x47,
- /* 0008 */ 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4
-};
+static const guid_t crb_acpi_start_guid =
+ GUID_INIT(0x6BBF6CAB, 0x5463, 0x4714,
+ 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4);
enum crb_defaults {
CRB_ACPI_START_REVISION_ID = 1,
int rc;
obj = acpi_evaluate_dsm(chip->acpi_dev_handle,
- CRB_ACPI_START_UUID,
+ &crb_acpi_start_guid,
CRB_ACPI_START_REVISION_ID,
CRB_ACPI_START_INDEX,
NULL);
#define PPI_VS_REQ_START 128
#define PPI_VS_REQ_END 255
-static const u8 tpm_ppi_uuid[] = {
- 0xA6, 0xFA, 0xDD, 0x3D,
- 0x1B, 0x36,
- 0xB4, 0x4E,
- 0xA4, 0x24,
- 0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53
-};
+static const guid_t tpm_ppi_guid =
+ GUID_INIT(0x3DDDFAA6, 0x361B, 0x4EB4,
+ 0xA4, 0x24, 0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53);
static inline union acpi_object *
tpm_eval_dsm(acpi_handle ppi_handle, int func, acpi_object_type type,
union acpi_object *argv4)
{
BUG_ON(!ppi_handle);
- return acpi_evaluate_dsm_typed(ppi_handle, tpm_ppi_uuid,
+ return acpi_evaluate_dsm_typed(ppi_handle, &tpm_ppi_guid,
TPM_PPI_REVISION_ID,
func, argv4, type);
}
* is updated with function index from SUBREQ to SUBREQ2 since PPI
* version 1.1
*/
- if (acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+ if (acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid,
TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_SUBREQ2))
func = TPM_PPI_FN_SUBREQ2;
"User not required",
};
- if (!acpi_check_dsm(dev_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID,
+ if (!acpi_check_dsm(dev_handle, &tpm_ppi_guid, TPM_PPI_REVISION_ID,
1 << TPM_PPI_FN_GETOPR))
return -EPERM;
if (!chip->acpi_dev_handle)
return;
- if (!acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+ if (!acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid,
TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_VERSION))
return;
/* Cache PPI version string. */
- obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, tpm_ppi_uuid,
+ obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, &tpm_ppi_guid,
TPM_PPI_REVISION_ID, TPM_PPI_FN_VERSION,
NULL, ACPI_TYPE_STRING);
if (obj) {
config COMMON_CLK_GXBB
bool
depends on COMMON_CLK_AMLOGIC
+ select RESET_CONTROLLER
help
Support for the clock controller on AmLogic S905 devices, aka gxbb.
Say Y if you want peripherals and CPU frequency scaling to work.
bool "Support for Allwinner SoCs' PRCM CCUs"
select SUNXI_CCU_DIV
select SUNXI_CCU_GATE
+ select SUNXI_CCU_MP
default MACH_SUN8I || (ARCH_SUNXI && ARM64)
endif
#define CLK_PLL_VIDEO0_2X 8
#define CLK_PLL_VE 9
#define CLK_PLL_DDR0 10
-#define CLK_PLL_PERIPH0 11
+
+/* PLL_PERIPH0 exported for PRCM */
+
#define CLK_PLL_PERIPH0_2X 12
#define CLK_PLL_PERIPH1 13
#define CLK_PLL_PERIPH1_2X 14
static SUNXI_CCU_GATE(ahb_dma_clk, "ahb-dma", "ahb",
0x060, BIT(6), 0);
static SUNXI_CCU_GATE(ahb_bist_clk, "ahb-bist", "ahb",
- 0x060, BIT(6), 0);
+ 0x060, BIT(7), 0);
static SUNXI_CCU_GATE(ahb_mmc0_clk, "ahb-mmc0", "ahb",
0x060, BIT(8), 0);
static SUNXI_CCU_GATE(ahb_mmc1_clk, "ahb-mmc1", "ahb",
0x12c, 0, 4, 24, 3, BIT(31),
CLK_SET_RATE_PARENT);
static SUNXI_CCU_M_WITH_MUX_GATE(lcd1_ch1_clk, "lcd1-ch1", lcd_ch1_parents,
- 0x12c, 0, 4, 24, 3, BIT(31),
+ 0x130, 0, 4, 24, 3, BIT(31),
CLK_SET_RATE_PARENT);
static const char * const csi_sclk_parents[] = { "pll-video0", "pll-video1",
#define CLK_PLL_VIDEO 6
#define CLK_PLL_VE 7
#define CLK_PLL_DDR 8
-#define CLK_PLL_PERIPH0 9
+
+/* PLL_PERIPH0 exported for PRCM */
+
#define CLK_PLL_PERIPH0_2X 10
#define CLK_PLL_GPU 11
#define CLK_PLL_PERIPH1 12
[RST_BUS_EMAC] = { 0x2c0, BIT(17) },
[RST_BUS_HSTIMER] = { 0x2c0, BIT(19) },
[RST_BUS_SPI0] = { 0x2c0, BIT(20) },
- [RST_BUS_OTG] = { 0x2c0, BIT(23) },
+ [RST_BUS_OTG] = { 0x2c0, BIT(24) },
[RST_BUS_EHCI0] = { 0x2c0, BIT(26) },
[RST_BUS_OHCI0] = { 0x2c0, BIT(29) },
return 0;
}
- rate = readl_relaxed(frame + CNTFRQ);
+ rate = readl_relaxed(base + CNTFRQ);
- iounmap(frame);
+ iounmap(base);
return rate;
}
#include <linux/clk.h>
#include <linux/interrupt.h>
#include <linux/clockchips.h>
+#include <linux/clocksource.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/slab.h>
#include <linux/clk.h>
#include <linux/clockchips.h>
+#include <linux/clocksource.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
int ret;
ret = sscanf(buf, "%u", &input);
- /* cannot be lower than 11 otherwise freq will not fall */
- if (ret != 1 || input < 11 || input > 100 ||
+ /* cannot be lower than 1 otherwise freq will not fall */
+ if (ret != 1 || input < 1 || input > 100 ||
input >= dbs_data->up_threshold)
return -EINVAL;
if (!state_node)
break;
- if (!of_device_is_available(state_node))
+ if (!of_device_is_available(state_node)) {
+ of_node_put(state_node);
continue;
+ }
if (!idle_state_valid(state_node, i, cpumask)) {
pr_warn("%s idle state not valid, bailing out\n",
}
platform_set_drvdata(pdev, nocp);
- clk_prepare_enable(nocp->clk);
+ ret = clk_prepare_enable(nocp->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to prepare ppmu clock\n");
+ return ret;
+ }
pr_info("exynos-nocp: new NoC Probe device registered: %s\n",
dev_name(dev));
{ "ppmu-event2-"#name, PPMU_PMNCNT2 }, \
{ "ppmu-event3-"#name, PPMU_PMNCNT3 }
-struct __exynos_ppmu_events {
+static struct __exynos_ppmu_events {
char *name;
int id;
} ppmu_events[] = {
dev_name(&pdev->dev), desc[i].name);
}
- clk_prepare_enable(info->ppmu.clk);
+ ret = clk_prepare_enable(info->ppmu.clk);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to prepare ppmu clock\n");
+ return ret;
+ }
return 0;
}
static unsigned long get_total_mem(void)
{
struct device_node *np = NULL;
- const unsigned int *reg, *reg_end;
- int len, sw, aw;
- unsigned long start, size, total_mem = 0;
+ struct resource res;
+ int ret;
+ unsigned long total_mem = 0;
for_each_node_by_type(np, "memory") {
- aw = of_n_addr_cells(np);
- sw = of_n_size_cells(np);
- reg = (const unsigned int *)of_get_property(np, "reg", &len);
- reg_end = reg + (len / sizeof(u32));
-
- total_mem = 0;
- do {
- start = of_read_number(reg, aw);
- reg += aw;
- size = of_read_number(reg, sw);
- reg += sw;
- total_mem += size;
- } while (reg < reg_end);
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret)
+ continue;
+
+ total_mem += resource_size(&res);
}
edac_dbg(0, "total_mem 0x%lx\n", total_mem);
return total_mem;
return 0;
}
-static struct irq_domain_ops a10_eccmgr_ic_ops = {
+static const struct irq_domain_ops a10_eccmgr_ic_ops = {
.map = a10_eccmgr_irqdomain_map,
.xlate = irq_domain_xlate_twocell,
};
#define NREC_RDWR(x) (((x)>>11) & 1)
#define NREC_RANK(x) (((x)>>8) & 0x7)
#define NRECMEMB 0xC0
-#define NREC_CAS(x) (((x)>>16) & 0xFFFFFF)
+#define NREC_CAS(x) (((x)>>16) & 0xFFF)
#define NREC_RAS(x) ((x) & 0x7FFF)
#define NRECFGLOG 0xC4
#define NREEECFBDA 0xC8
/* These registers are input ONLY if there was a
* Non-Recoverable Error */
u16 nrecmema; /* Non-Recoverable Mem log A */
- u16 nrecmemb; /* Non-Recoverable Mem log B */
+ u32 nrecmemb; /* Non-Recoverable Mem log B */
};
NERR_FAT_FBD, &info->nerr_fat_fbd);
pci_read_config_word(pvt->branchmap_werrors,
NRECMEMA, &info->nrecmema);
- pci_read_config_word(pvt->branchmap_werrors,
+ pci_read_config_dword(pvt->branchmap_werrors,
NRECMEMB, &info->nrecmemb);
/* Clear the error bits, by writing them back */
/* These registers are input ONLY if there was a Non-Rec Error */
u16 nrecmema; /* Non-Recoverable Mem log A */
- u16 nrecmemb; /* Non-Recoverable Mem log B */
+ u32 nrecmemb; /* Non-Recoverable Mem log B */
};
NERR_FAT_FBD, &info->nerr_fat_fbd);
pci_read_config_word(pvt->branchmap_werrors,
NRECMEMA, &info->nrecmema);
- pci_read_config_word(pvt->branchmap_werrors,
+ pci_read_config_dword(pvt->branchmap_werrors,
NRECMEMB, &info->nrecmemb);
/* Clear the error bits, by writing them back */
* 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller
* 0c08: Xeon E3-1200 v3 Processor DRAM Controller
* 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
+ * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
*
* Based on Intel specification:
* http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
* http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html
+ * http://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html
*
* According to the above datasheet (p.16):
* "
#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918
+#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x5918
#define IE31200_DIMMS 4
#define IE31200_RANKS 8
void __iomem *window;
struct ie31200_priv *priv;
u32 addr_decode, mad_offset;
- bool skl = (pdev->device == PCI_DEVICE_ID_INTEL_IE31200_HB_8);
+
+ /*
+ * Kaby Lake seems to work like Skylake. Please re-visit this logic
+ * when adding new CPU support.
+ */
+ bool skl = (pdev->device >= PCI_DEVICE_ID_INTEL_IE31200_HB_8);
edac_dbg(0, "MC:\n");
PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
IE31200},
{
+ PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ IE31200},
+ {
0,
} /* 0 terminated list. */
};
"Sys Read data error thread 0",
"Sys read data error thread 1",
"DC tag error type 2",
- "DC data error type 1 (poison comsumption)",
+ "DC data error type 1 (poison consumption)",
"DC data error type 2",
"DC data error type 3",
"DC tag error type 4",
struct mv64x60_pci_pdata *pdata = pci->pvt_info;
u32 cause;
- cause = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+ cause = readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
if (!cause)
return;
printk(KERN_ERR "Error in PCI %d Interface\n", pdata->pci_hose);
printk(KERN_ERR "Cause register: 0x%08x\n", cause);
printk(KERN_ERR "Address Low: 0x%08x\n",
- in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_LO));
+ readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_LO));
printk(KERN_ERR "Address High: 0x%08x\n",
- in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_HI));
+ readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_HI));
printk(KERN_ERR "Attribute: 0x%08x\n",
- in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ATTR));
+ readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ATTR));
printk(KERN_ERR "Command: 0x%08x\n",
- in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CMD));
- out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, ~cause);
+ readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CMD));
+ writel(~cause, pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
if (cause & MV64X60_PCI_PE_MASK)
edac_pci_handle_pe(pci, pci->ctl_name);
struct mv64x60_pci_pdata *pdata = pci->pvt_info;
u32 val;
- val = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+ val = readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
if (!val)
return IRQ_NONE;
if (!pci_serr)
return -ENOMEM;
- out_le32(pci_serr, in_le32(pci_serr) & ~0x1);
+ writel(readl(pci_serr) & ~0x1, pci_serr);
iounmap(pci_serr);
return 0;
pdata = pci->pvt_info;
pdata->pci_hose = pdev->id;
- pdata->name = "mpc85xx_pci_err";
+ pdata->name = "mv64x60_pci_err";
platform_set_drvdata(pdev, pci);
pci->dev = &pdev->dev;
pci->dev_name = dev_name(&pdev->dev);
goto err;
}
- out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0);
- out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0);
- out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK,
- MV64X60_PCIx_ERR_MASK_VAL);
+ writel(0, pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+ writel(0, pdata->pci_vbase + MV64X60_PCI_ERROR_MASK);
+ writel(MV64X60_PCIx_ERR_MASK_VAL,
+ pdata->pci_vbase + MV64X60_PCI_ERROR_MASK);
if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
edac_dbg(3, "failed edac_pci_add_device()\n");
struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info;
u32 cause;
- cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
+ cause = readl(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
if (!cause)
return;
printk(KERN_ERR "Error in internal SRAM\n");
printk(KERN_ERR "Cause register: 0x%08x\n", cause);
printk(KERN_ERR "Address Low: 0x%08x\n",
- in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_LO));
+ readl(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_LO));
printk(KERN_ERR "Address High: 0x%08x\n",
- in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_HI));
+ readl(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_HI));
printk(KERN_ERR "Data Low: 0x%08x\n",
- in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_LO));
+ readl(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_LO));
printk(KERN_ERR "Data High: 0x%08x\n",
- in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_HI));
+ readl(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_HI));
printk(KERN_ERR "Parity: 0x%08x\n",
- in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_PARITY));
- out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0);
+ readl(pdata->sram_vbase + MV64X60_SRAM_ERR_PARITY));
+ writel(0, pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
}
struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info;
u32 cause;
- cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
+ cause = readl(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
if (!cause)
return IRQ_NONE;
}
/* setup SRAM err registers */
- out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0);
+ writel(0, pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
edac_dev->mod_name = EDAC_MOD_STR;
edac_dev->ctl_name = pdata->name;
struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info;
u32 cause;
- cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
+ cause = readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
MV64x60_CPU_CAUSE_MASK;
if (!cause)
return;
printk(KERN_ERR "Error on CPU interface\n");
printk(KERN_ERR "Cause register: 0x%08x\n", cause);
printk(KERN_ERR "Address Low: 0x%08x\n",
- in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_LO));
+ readl(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_LO));
printk(KERN_ERR "Address High: 0x%08x\n",
- in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_HI));
+ readl(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_HI));
printk(KERN_ERR "Data Low: 0x%08x\n",
- in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_LO));
+ readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_LO));
printk(KERN_ERR "Data High: 0x%08x\n",
- in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_HI));
+ readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_HI));
printk(KERN_ERR "Parity: 0x%08x\n",
- in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_PARITY));
- out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0);
+ readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_PARITY));
+ writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE);
edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
}
struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info;
u32 cause;
- cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
+ cause = readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
MV64x60_CPU_CAUSE_MASK;
if (!cause)
return IRQ_NONE;
}
/* setup CPU err registers */
- out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0);
- out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0);
- out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0x000000ff);
+ writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE);
+ writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK);
+ writel(0x000000ff, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK);
edac_dev->mod_name = EDAC_MOD_STR;
edac_dev->ctl_name = pdata->name;
u32 comp_ecc;
u32 syndrome;
- reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+ reg = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
if (!reg)
return;
err_addr = reg & ~0x3;
- sdram_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_RCVD);
- comp_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CALC);
+ sdram_ecc = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_RCVD);
+ comp_ecc = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CALC);
syndrome = sdram_ecc ^ comp_ecc;
/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
mci->ctl_name, "");
/* clear the error */
- out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
+ writel(0, pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
}
static irqreturn_t mv64x60_mc_isr(int irq, void *dev_id)
struct mv64x60_mc_pdata *pdata = mci->pvt_info;
u32 reg;
- reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+ reg = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
if (!reg)
return IRQ_NONE;
get_total_mem(pdata);
- ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
+ ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
csrow = mci->csrows[0];
dimm = csrow->channels[0]->dimm;
goto err;
}
- ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
+ ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
if (!(ctl & MV64X60_SDRAM_ECC)) {
/* Non-ECC RAM? */
printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__);
mv64x60_init_csrows(mci, pdata);
/* setup MC registers */
- out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
- ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
+ writel(0, pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+ ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
ctl = (ctl & 0xff00ffff) | 0x10000;
- out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL, ctl);
+ writel(ctl, pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
res = edac_mc_add_mc(mci);
if (res) {
static int __init mv64x60_edac_init(void)
{
- int ret = 0;
printk(KERN_INFO "Marvell MV64x60 EDAC driver " MV64x60_REVISION "\n");
printk(KERN_INFO "\t(C) 2006-2007 MontaVista Software\n");
+
/* make sure error reporting method is sane */
switch (edac_op_state) {
case EDAC_OPSTATE_POLL:
#ifdef CONFIG_X86_INTEL_SBI_APL
#include "linux/platform_data/sbi_apl.h"
-int sbi_send(int port, int off, int op, u32 *data)
+static int sbi_send(int port, int off, int op, u32 *data)
{
struct sbi_apl_message sbi_arg;
int ret, read = 0;
return ret;
}
#else
-int sbi_send(int port, int off, int op, u32 *data)
+static int sbi_send(int port, int off, int op, u32 *data)
{
return -EUNATCH;
}
static int apl_rd_reg(int port, int off, int op, void *data, size_t sz, char *name)
{
- int ret = 0;
+ int ret = 0;
edac_dbg(2, "Read %s port=%x off=%x op=%x\n", name, port, off, op);
switch (sz) {
case 8:
ret = sbi_send(port, off + 4, op, (u32 *)(data + 4));
+ /* fall through */
case 4:
- ret = sbi_send(port, off, op, (u32 *)data);
+ ret |= sbi_send(port, off, op, (u32 *)data);
pnd2_printk(KERN_DEBUG, "%s=%x%08x ret=%d\n", name,
sz == 8 ? *((u32 *)(data + 4)) : 0, *((u32 *)data), ret);
break;
static int apl_get_registers(void)
{
+ int ret = -ENODEV;
int i;
if (RD_REG(&asym_2way, b_cr_asym_2way_mem_region_mchbar))
return -ENODEV;
+ /*
+ * RD_REGP() will fail for unpopulated or non-existent
+ * DIMM slots. Return success if we find at least one DIMM.
+ */
for (i = 0; i < APL_NUM_CHANNELS; i++)
- if (RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i]))
- return -ENODEV;
+ if (!RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i]))
+ ret = 0;
- return 0;
+ return ret;
}
static int dnv_get_registers(void)
/*
* Alter this version for the module when modifications are made
*/
-#define SBRIDGE_REVISION " Ver: 1.1.1 "
+#define SBRIDGE_REVISION " Ver: 1.1.2 "
#define EDAC_MOD_STR "sbridge_edac"
/*
* sbridge structs
*/
-#define NUM_CHANNELS 8 /* 2MC per socket, four chan per MC */
+#define NUM_CHANNELS 4 /* Max channels per MC */
#define MAX_DIMMS 3 /* Max DIMMS per channel */
#define KNL_MAX_CHAS 38 /* KNL max num. of Cache Home Agents */
#define KNL_MAX_CHANNELS 6 /* KNL max num. of PCI channels */
KNIGHTS_LANDING,
};
+enum domain {
+ IMC0 = 0,
+ IMC1,
+ SOCK,
+};
+
struct sbridge_pvt;
struct sbridge_info {
enum type type;
struct pci_id_descr {
int dev_id;
int optional;
+ enum domain dom;
};
struct pci_id_table {
const struct pci_id_descr *descr;
- int n_devs;
+ int n_devs_per_imc;
+ int n_devs_per_sock;
+ int n_imcs_per_sock;
enum type type;
};
u8 bus, mc;
u8 node_id, source_id;
struct pci_dev **pdev;
+ enum domain dom;
int n_devs;
+ int i_devs;
struct mem_ctl_info *mci;
};
};
struct sbridge_pvt {
- struct pci_dev *pci_ta, *pci_ddrio, *pci_ras;
+ /* Devices per socket */
+ struct pci_dev *pci_ddrio;
struct pci_dev *pci_sad0, *pci_sad1;
- struct pci_dev *pci_ha0, *pci_ha1;
struct pci_dev *pci_br0, *pci_br1;
- struct pci_dev *pci_ha1_ta;
+ /* Devices per memory controller */
+ struct pci_dev *pci_ha, *pci_ta, *pci_ras;
struct pci_dev *pci_tad[NUM_CHANNELS];
struct sbridge_dev *sbridge_dev;
struct knl_pvt knl;
};
-#define PCI_DESCR(device_id, opt) \
+#define PCI_DESCR(device_id, opt, domain) \
.dev_id = (device_id), \
- .optional = opt
+ .optional = opt, \
+ .dom = domain
static const struct pci_id_descr pci_dev_descr_sbridge[] = {
/* Processor Home Agent */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0, IMC0) },
/* Memory controller */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1, SOCK) },
/* System Address Decoder */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0, SOCK) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0, SOCK) },
/* Broadcast Registers */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0, SOCK) },
};
-#define PCI_ID_TABLE_ENTRY(A, T) { \
+#define PCI_ID_TABLE_ENTRY(A, N, M, T) { \
.descr = A, \
- .n_devs = ARRAY_SIZE(A), \
+ .n_devs_per_imc = N, \
+ .n_devs_per_sock = ARRAY_SIZE(A), \
+ .n_imcs_per_sock = M, \
.type = T \
}
static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
- PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, SANDY_BRIDGE),
+ PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, ARRAY_SIZE(pci_dev_descr_sbridge), 1, SANDY_BRIDGE),
{0,} /* 0 terminated list. */
};
static const struct pci_id_descr pci_dev_descr_ibridge[] = {
/* Processor Home Agent */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0, IMC0) },
/* Memory controller */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0, IMC0) },
+
+ /* Optional, mode 2HA */
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3, 1, IMC1) },
+
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1, SOCK) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1, SOCK) },
/* System Address Decoder */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0, SOCK) },
/* Broadcast Registers */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1, SOCK) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0, SOCK) },
- /* Optional, mode 2HA */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1) },
-#if 0
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1) },
-#endif
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3, 1) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1) },
};
static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
- PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, IVY_BRIDGE),
+ PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, 12, 2, IVY_BRIDGE),
{0,} /* 0 terminated list. */
};
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0 0x2fa0
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1 0x2f60
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA 0x2fa8
-#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL 0x2f71
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM 0x2f71
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA 0x2f68
-#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL 0x2f79
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM 0x2f79
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0 0x2ffc
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1 0x2ffd
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0 0x2faa
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3 0x2fbb
static const struct pci_id_descr pci_dev_descr_haswell[] = {
/* first item must be the HA */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0, 0) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1, 1) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3, 1) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1, 1, IMC1) },
+
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1, IMC0) },
+
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1, IMC1) },
+
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0, SOCK) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0, SOCK) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0, 1, SOCK) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1, 1, SOCK) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2, 1, SOCK) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3, 1, SOCK) },
};
static const struct pci_id_table pci_dev_descr_haswell_table[] = {
- PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, HASWELL),
+ PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, 13, 2, HASWELL),
{0,} /* 0 terminated list. */
};
/* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */
#define PCI_DEVICE_ID_INTEL_KNL_IMC_MC 0x7840
/* DRAM channel stuff; bank addrs, dimmmtr, etc.. 2-8-2 - 2-9-4 (6 of these) */
-#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL 0x7843
+#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN 0x7843
/* kdrwdbu TAD limits/offsets, MCMTR - 2-10-1, 2-11-1 (2 of these) */
#define PCI_DEVICE_ID_INTEL_KNL_IMC_TA 0x7844
/* CHA broadcast registers, dram rules - 1-29-0 (1 of these) */
*/
static const struct pci_id_descr pci_dev_descr_knl[] = {
- [0] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0, 0) },
- [1] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1, 0) },
- [2 ... 3] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC, 0)},
- [4 ... 41] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA, 0) },
- [42 ... 47] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL, 0) },
- [48] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA, 0) },
- [49] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0) },
+ [0 ... 1] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC, 0, IMC0)},
+ [2 ... 7] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN, 0, IMC0) },
+ [8] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA, 0, IMC0) },
+ [9] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0, IMC0) },
+ [10] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0, 0, SOCK) },
+ [11] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1, 0, SOCK) },
+ [12 ... 49] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA, 0, SOCK) },
};
static const struct pci_id_table pci_dev_descr_knl_table[] = {
- PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, KNIGHTS_LANDING),
+ PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, ARRAY_SIZE(pci_dev_descr_knl), 1, KNIGHTS_LANDING),
{0,}
};
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0 0x6fa0
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1 0x6f60
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA 0x6fa8
-#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL 0x6f71
+#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM 0x6f71
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA 0x6f68
-#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL 0x6f79
+#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM 0x6f79
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0 0x6ffc
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1 0x6ffd
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0 0x6faa
static const struct pci_id_descr pci_dev_descr_broadwell[] = {
/* first item must be the HA */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0, 0) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1, 1) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0, 1) },
-
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1) },
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1, 1, IMC1) },
+
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1, IMC0) },
+
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1, IMC1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1, IMC1) },
+
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0, SOCK) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0, SOCK) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0, 1, SOCK) },
};
static const struct pci_id_table pci_dev_descr_broadwell_table[] = {
- PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, BROADWELL),
+ PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, 10, 2, BROADWELL),
{0,} /* 0 terminated list. */
};
return 1 << cols;
}
-static struct sbridge_dev *get_sbridge_dev(u8 bus, int multi_bus)
+static struct sbridge_dev *get_sbridge_dev(u8 bus, enum domain dom, int multi_bus,
+ struct sbridge_dev *prev)
{
struct sbridge_dev *sbridge_dev;
struct sbridge_dev, list);
}
- list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
- if (sbridge_dev->bus == bus)
+ sbridge_dev = list_entry(prev ? prev->list.next
+ : sbridge_edac_list.next, struct sbridge_dev, list);
+
+ list_for_each_entry_from(sbridge_dev, &sbridge_edac_list, list) {
+ if (sbridge_dev->bus == bus && (dom == SOCK || dom == sbridge_dev->dom))
return sbridge_dev;
}
return NULL;
}
-static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
- const struct pci_id_table *table)
+static struct sbridge_dev *alloc_sbridge_dev(u8 bus, enum domain dom,
+ const struct pci_id_table *table)
{
struct sbridge_dev *sbridge_dev;
if (!sbridge_dev)
return NULL;
- sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
- GFP_KERNEL);
+ sbridge_dev->pdev = kcalloc(table->n_devs_per_imc,
+ sizeof(*sbridge_dev->pdev),
+ GFP_KERNEL);
if (!sbridge_dev->pdev) {
kfree(sbridge_dev);
return NULL;
}
sbridge_dev->bus = bus;
- sbridge_dev->n_devs = table->n_devs;
+ sbridge_dev->dom = dom;
+ sbridge_dev->n_devs = table->n_devs_per_imc;
list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
return sbridge_dev;
return idx;
}
-/****************************************************************************
- Memory check routines
- ****************************************************************************/
-static struct pci_dev *get_pdev_same_bus(u8 bus, u32 id)
-{
- struct pci_dev *pdev = NULL;
-
- do {
- pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, pdev);
- if (pdev && pdev->bus->number == bus)
- break;
- } while (pdev);
-
- return pdev;
-}
-
-/**
- * check_if_ecc_is_active() - Checks if ECC is active
- * @bus: Device bus
- * @type: Memory controller type
- * returns: 0 in case ECC is active, -ENODEV if it can't be determined or
- * disabled
- */
-static int check_if_ecc_is_active(const u8 bus, enum type type)
-{
- struct pci_dev *pdev = NULL;
- u32 mcmtr, id;
-
- switch (type) {
- case IVY_BRIDGE:
- id = PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA;
- break;
- case HASWELL:
- id = PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA;
- break;
- case SANDY_BRIDGE:
- id = PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA;
- break;
- case BROADWELL:
- id = PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA;
- break;
- case KNIGHTS_LANDING:
- /*
- * KNL doesn't group things by bus the same way
- * SB/IB/Haswell does.
- */
- id = PCI_DEVICE_ID_INTEL_KNL_IMC_TA;
- break;
- default:
- return -ENODEV;
- }
-
- if (type != KNIGHTS_LANDING)
- pdev = get_pdev_same_bus(bus, id);
- else
- pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, 0);
-
- if (!pdev) {
- sbridge_printk(KERN_ERR, "Couldn't find PCI device "
- "%04x:%04x! on bus %02d\n",
- PCI_VENDOR_ID_INTEL, id, bus);
- return -ENODEV;
- }
-
- pci_read_config_dword(pdev,
- type == KNIGHTS_LANDING ? KNL_MCMTR : MCMTR, &mcmtr);
- if (!IS_ECC_ENABLED(mcmtr)) {
- sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
- return -ENODEV;
- }
- return 0;
-}
-
/* Low bits of TAD limit, and some metadata. */
static const u32 knl_tad_dram_limit_lo[] = {
0x400, 0x500, 0x600, 0x700,
return 0;
}
-static int get_dimm_config(struct mem_ctl_info *mci)
+static void get_source_id(struct mem_ctl_info *mci)
{
struct sbridge_pvt *pvt = mci->pvt_info;
- struct dimm_info *dimm;
- unsigned i, j, banks, ranks, rows, cols, npages;
- u64 size;
u32 reg;
- enum edac_type mode;
- enum mem_type mtype;
- int channels = pvt->info.type == KNIGHTS_LANDING ?
- KNL_MAX_CHANNELS : NUM_CHANNELS;
- u64 knl_mc_sizes[KNL_MAX_CHANNELS];
- if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
- pci_read_config_dword(pvt->pci_ha0, HASWELL_HASYSDEFEATURE2, ®);
- pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21);
- }
if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL ||
- pvt->info.type == KNIGHTS_LANDING)
+ pvt->info.type == KNIGHTS_LANDING)
pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, ®);
else
pci_read_config_dword(pvt->pci_br0, SAD_TARGET, ®);
pvt->sbridge_dev->source_id = SOURCE_ID_KNL(reg);
else
pvt->sbridge_dev->source_id = SOURCE_ID(reg);
+}
- pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt);
- edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
- pvt->sbridge_dev->mc,
- pvt->sbridge_dev->node_id,
- pvt->sbridge_dev->source_id);
-
- /* KNL doesn't support mirroring or lockstep,
- * and is always closed page
- */
- if (pvt->info.type == KNIGHTS_LANDING) {
- mode = EDAC_S4ECD4ED;
- pvt->is_mirrored = false;
-
- if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0)
- return -1;
- } else {
- pci_read_config_dword(pvt->pci_ras, RASENABLES, ®);
- if (IS_MIRROR_ENABLED(reg)) {
- edac_dbg(0, "Memory mirror is enabled\n");
- pvt->is_mirrored = true;
- } else {
- edac_dbg(0, "Memory mirror is disabled\n");
- pvt->is_mirrored = false;
- }
-
- pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
- if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
- edac_dbg(0, "Lockstep is enabled\n");
- mode = EDAC_S8ECD8ED;
- pvt->is_lockstep = true;
- } else {
- edac_dbg(0, "Lockstep is disabled\n");
- mode = EDAC_S4ECD4ED;
- pvt->is_lockstep = false;
- }
- if (IS_CLOSE_PG(pvt->info.mcmtr)) {
- edac_dbg(0, "address map is on closed page mode\n");
- pvt->is_close_pg = true;
- } else {
- edac_dbg(0, "address map is on open page mode\n");
- pvt->is_close_pg = false;
- }
- }
+static int __populate_dimms(struct mem_ctl_info *mci,
+ u64 knl_mc_sizes[KNL_MAX_CHANNELS],
+ enum edac_type mode)
+{
+ struct sbridge_pvt *pvt = mci->pvt_info;
+ int channels = pvt->info.type == KNIGHTS_LANDING ? KNL_MAX_CHANNELS
+ : NUM_CHANNELS;
+ unsigned int i, j, banks, ranks, rows, cols, npages;
+ struct dimm_info *dimm;
+ enum mem_type mtype;
+ u64 size;
mtype = pvt->info.get_memory_type(pvt);
if (mtype == MEM_RDDR3 || mtype == MEM_RDDR4)
}
for (j = 0; j < max_dimms_per_channel; j++) {
- dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
- i, j, 0);
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0);
if (pvt->info.type == KNIGHTS_LANDING) {
pci_read_config_dword(pvt->knl.pci_channel[i],
knl_mtr_reg, &mtr);
}
edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
if (IS_DIMM_PRESENT(mtr)) {
+ if (!IS_ECC_ENABLED(pvt->info.mcmtr)) {
+ sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n",
+ pvt->sbridge_dev->source_id,
+ pvt->sbridge_dev->dom, i);
+ return -ENODEV;
+ }
pvt->channel[i].dimms++;
ranks = numrank(pvt->info.type, mtr);
npages = MiB_TO_PAGES(size);
edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
- pvt->sbridge_dev->mc, i/4, i%4, j,
+ pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j,
size, npages,
banks, ranks, rows, cols);
dimm->mtype = mtype;
dimm->edac_mode = mode;
snprintf(dimm->label, sizeof(dimm->label),
- "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
- pvt->sbridge_dev->source_id, i/4, i%4, j);
+ "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
+ pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j);
}
}
}
return 0;
}
+static int get_dimm_config(struct mem_ctl_info *mci)
+{
+ struct sbridge_pvt *pvt = mci->pvt_info;
+ u64 knl_mc_sizes[KNL_MAX_CHANNELS];
+ enum edac_type mode;
+ u32 reg;
+
+ if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
+ pci_read_config_dword(pvt->pci_ha, HASWELL_HASYSDEFEATURE2, ®);
+ pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21);
+ }
+ pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt);
+ edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
+ pvt->sbridge_dev->mc,
+ pvt->sbridge_dev->node_id,
+ pvt->sbridge_dev->source_id);
+
+ /* KNL doesn't support mirroring or lockstep,
+ * and is always closed page
+ */
+ if (pvt->info.type == KNIGHTS_LANDING) {
+ mode = EDAC_S4ECD4ED;
+ pvt->is_mirrored = false;
+
+ if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0)
+ return -1;
+ pci_read_config_dword(pvt->pci_ta, KNL_MCMTR, &pvt->info.mcmtr);
+ } else {
+ pci_read_config_dword(pvt->pci_ras, RASENABLES, ®);
+ if (IS_MIRROR_ENABLED(reg)) {
+ edac_dbg(0, "Memory mirror is enabled\n");
+ pvt->is_mirrored = true;
+ } else {
+ edac_dbg(0, "Memory mirror is disabled\n");
+ pvt->is_mirrored = false;
+ }
+
+ pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
+ if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
+ edac_dbg(0, "Lockstep is enabled\n");
+ mode = EDAC_S8ECD8ED;
+ pvt->is_lockstep = true;
+ } else {
+ edac_dbg(0, "Lockstep is disabled\n");
+ mode = EDAC_S4ECD4ED;
+ pvt->is_lockstep = false;
+ }
+ if (IS_CLOSE_PG(pvt->info.mcmtr)) {
+ edac_dbg(0, "address map is on closed page mode\n");
+ pvt->is_close_pg = true;
+ } else {
+ edac_dbg(0, "address map is on open page mode\n");
+ pvt->is_close_pg = false;
+ }
+ }
+
+ return __populate_dimms(mci, knl_mc_sizes, mode);
+}
+
static void get_memory_layout(const struct mem_ctl_info *mci)
{
struct sbridge_pvt *pvt = mci->pvt_info;
*/
prv = 0;
for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
- pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
- ®);
+ pci_read_config_dword(pvt->pci_ha, tad_dram_rule[n_tads], ®);
limit = TAD_LIMIT(reg);
if (limit <= prv)
break;
}
}
-static struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
+static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha)
{
struct sbridge_dev *sbridge_dev;
list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
- if (sbridge_dev->node_id == node_id)
+ if (sbridge_dev->node_id == node_id && sbridge_dev->dom == ha)
return sbridge_dev->mci;
}
return NULL;
int interleave_mode, shiftup = 0;
unsigned sad_interleave[pvt->info.max_interleave];
u32 reg, dram_rule;
- u8 ch_way, sck_way, pkg, sad_ha = 0, ch_add = 0;
+ u8 ch_way, sck_way, pkg, sad_ha = 0;
u32 tad_offset;
u32 rir_way;
u32 mb, gb;
pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
*socket = sad_pkg_socket(pkg);
sad_ha = sad_pkg_ha(pkg);
- if (sad_ha)
- ch_add = 4;
if (a7mode) {
/* MCChanShiftUpEnable */
- pci_read_config_dword(pvt->pci_ha0,
- HASWELL_HASYSDEFEATURE2, ®);
+ pci_read_config_dword(pvt->pci_ha, HASWELL_HASYSDEFEATURE2, ®);
shiftup = GET_BITFIELD(reg, 22, 22);
}
pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
*socket = sad_pkg_socket(pkg);
sad_ha = sad_pkg_ha(pkg);
- if (sad_ha)
- ch_add = 4;
edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n",
idx, *socket, sad_ha);
}
* Move to the proper node structure, in order to access the
* right PCI registers
*/
- new_mci = get_mci_for_node_id(*socket);
+ new_mci = get_mci_for_node_id(*socket, sad_ha);
if (!new_mci) {
sprintf(msg, "Struct for socket #%u wasn't initialized",
*socket);
* Step 2) Get memory channel
*/
prv = 0;
- if (pvt->info.type == SANDY_BRIDGE)
- pci_ha = pvt->pci_ha0;
- else {
- if (sad_ha)
- pci_ha = pvt->pci_ha1;
- else
- pci_ha = pvt->pci_ha0;
- }
+ pci_ha = pvt->pci_ha;
for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], ®);
limit = TAD_LIMIT(reg);
}
*channel_mask = 1 << base_ch;
- pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
- tad_ch_nilv_offset[n_tads],
- &tad_offset);
+ pci_read_config_dword(pvt->pci_tad[base_ch], tad_ch_nilv_offset[n_tads], &tad_offset);
if (pvt->is_mirrored) {
*channel_mask |= 1 << ((base_ch + 2) % 4);
* Step 3) Decode rank
*/
for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
- pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
- rir_way_limit[n_rir],
- ®);
+ pci_read_config_dword(pvt->pci_tad[base_ch], rir_way_limit[n_rir], ®);
if (!IS_RIR_VALID(reg))
continue;
idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */
idx %= 1 << rir_way;
- pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
- rir_offset[n_rir][idx],
- ®);
+ pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], ®);
*rank = RIR_RNK_TGT(pvt->info.type, reg);
edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
const unsigned devno,
const int multi_bus)
{
- struct sbridge_dev *sbridge_dev;
+ struct sbridge_dev *sbridge_dev = NULL;
const struct pci_id_descr *dev_descr = &table->descr[devno];
struct pci_dev *pdev = NULL;
u8 bus = 0;
+ int i = 0;
sbridge_printk(KERN_DEBUG,
"Seeking for: PCI ID %04x:%04x\n",
}
bus = pdev->bus->number;
- sbridge_dev = get_sbridge_dev(bus, multi_bus);
+next_imc:
+ sbridge_dev = get_sbridge_dev(bus, dev_descr->dom, multi_bus, sbridge_dev);
if (!sbridge_dev) {
- sbridge_dev = alloc_sbridge_dev(bus, table);
+
+ if (dev_descr->dom == SOCK)
+ goto out_imc;
+
+ sbridge_dev = alloc_sbridge_dev(bus, dev_descr->dom, table);
if (!sbridge_dev) {
pci_dev_put(pdev);
return -ENOMEM;
(*num_mc)++;
}
- if (sbridge_dev->pdev[devno]) {
+ if (sbridge_dev->pdev[sbridge_dev->i_devs]) {
sbridge_printk(KERN_ERR,
"Duplicated device for %04x:%04x\n",
PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
return -ENODEV;
}
- sbridge_dev->pdev[devno] = pdev;
+ sbridge_dev->pdev[sbridge_dev->i_devs++] = pdev;
+
+ /* pdev belongs to more than one IMC, do extra gets */
+ if (++i > 1)
+ pci_dev_get(pdev);
+ if (dev_descr->dom == SOCK && i < table->n_imcs_per_sock)
+ goto next_imc;
+
+out_imc:
/* Be sure that the device is enabled */
if (unlikely(pci_enable_device(pdev) < 0)) {
sbridge_printk(KERN_ERR,
if (table->type == KNIGHTS_LANDING)
allow_dups = multi_bus = 1;
while (table && table->descr) {
- for (i = 0; i < table->n_devs; i++) {
+ for (i = 0; i < table->n_devs_per_sock; i++) {
if (!allow_dups || i == 0 ||
table->descr[i].dev_id !=
table->descr[i-1].dev_id) {
table, i, multi_bus);
if (rc < 0) {
if (i == 0) {
- i = table->n_devs;
+ i = table->n_devs_per_sock;
break;
}
sbridge_put_all_devices();
return 0;
}
+/*
+ * Device IDs for {SBRIDGE,IBRIDGE,HASWELL,BROADWELL}_IMC_HA0_TAD0 are in
+ * the format: XXXa. So we can convert from a device to the corresponding
+ * channel like this
+ */
+#define TAD_DEV_TO_CHAN(dev) (((dev) & 0xf) - 0xa)
+
static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
struct sbridge_dev *sbridge_dev)
{
pvt->pci_br0 = pdev;
break;
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
- pvt->pci_ha0 = pdev;
+ pvt->pci_ha = pdev;
break;
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA:
pvt->pci_ta = pdev;
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2:
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3:
{
- int id = pdev->device - PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0;
+ int id = TAD_DEV_TO_CHAN(pdev->device);
pvt->pci_tad[id] = pdev;
saw_chan_mask |= 1 << id;
}
}
/* Check if everything were registered */
- if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
+ if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha ||
!pvt->pci_ras || !pvt->pci_ta)
goto enodev;
switch (pdev->device) {
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0:
- pvt->pci_ha0 = pdev;
+ case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1:
+ pvt->pci_ha = pdev;
break;
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
+ case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA:
pvt->pci_ta = pdev;
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS:
+ case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS:
pvt->pci_ras = pdev;
break;
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0:
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1:
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2:
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3:
+ case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0:
+ case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1:
+ case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2:
+ case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3:
{
- int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0;
+ int id = TAD_DEV_TO_CHAN(pdev->device);
pvt->pci_tad[id] = pdev;
saw_chan_mask |= 1 << id;
}
case PCI_DEVICE_ID_INTEL_IBRIDGE_BR1:
pvt->pci_br1 = pdev;
break;
- case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1:
- pvt->pci_ha1 = pdev;
- break;
- case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0:
- case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1:
- case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2:
- case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3:
- {
- int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 + 4;
- pvt->pci_tad[id] = pdev;
- saw_chan_mask |= 1 << id;
- }
- break;
default:
goto error;
}
}
/* Check if everything were registered */
- if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 ||
+ if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_br0 ||
!pvt->pci_br1 || !pvt->pci_ras || !pvt->pci_ta)
goto enodev;
- if (saw_chan_mask != 0x0f && /* -EN */
- saw_chan_mask != 0x33 && /* -EP */
- saw_chan_mask != 0xff) /* -EX */
+ if (saw_chan_mask != 0x0f && /* -EN/-EX */
+ saw_chan_mask != 0x03) /* -EP */
goto enodev;
return 0;
pvt->pci_sad1 = pdev;
break;
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
- pvt->pci_ha0 = pdev;
+ case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1:
+ pvt->pci_ha = pdev;
break;
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA:
+ case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA:
pvt->pci_ta = pdev;
break;
- case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL:
+ case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM:
+ case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM:
pvt->pci_ras = pdev;
break;
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0:
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1:
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2:
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3:
- {
- int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0;
-
- pvt->pci_tad[id] = pdev;
- saw_chan_mask |= 1 << id;
- }
- break;
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0:
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1:
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2:
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3:
{
- int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0 + 4;
-
+ int id = TAD_DEV_TO_CHAN(pdev->device);
pvt->pci_tad[id] = pdev;
saw_chan_mask |= 1 << id;
}
if (!pvt->pci_ddrio)
pvt->pci_ddrio = pdev;
break;
- case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1:
- pvt->pci_ha1 = pdev;
- break;
- case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA:
- pvt->pci_ha1_ta = pdev;
- break;
default:
break;
}
}
/* Check if everything were registered */
- if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 ||
+ if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_sad1 ||
!pvt->pci_ras || !pvt->pci_ta || !pvt->info.pci_vtd)
goto enodev;
- if (saw_chan_mask != 0x0f && /* -EN */
- saw_chan_mask != 0x33 && /* -EP */
- saw_chan_mask != 0xff) /* -EX */
+ if (saw_chan_mask != 0x0f && /* -EN/-EX */
+ saw_chan_mask != 0x03) /* -EP */
goto enodev;
return 0;
pvt->pci_sad1 = pdev;
break;
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
- pvt->pci_ha0 = pdev;
+ case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1:
+ pvt->pci_ha = pdev;
break;
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA:
+ case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA:
pvt->pci_ta = pdev;
break;
- case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL:
+ case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM:
+ case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM:
pvt->pci_ras = pdev;
break;
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0:
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1:
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2:
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3:
- {
- int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0;
- pvt->pci_tad[id] = pdev;
- saw_chan_mask |= 1 << id;
- }
- break;
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0:
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1:
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2:
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3:
{
- int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0 + 4;
+ int id = TAD_DEV_TO_CHAN(pdev->device);
pvt->pci_tad[id] = pdev;
saw_chan_mask |= 1 << id;
}
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0:
pvt->pci_ddrio = pdev;
break;
- case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1:
- pvt->pci_ha1 = pdev;
- break;
- case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA:
- pvt->pci_ha1_ta = pdev;
- break;
default:
break;
}
}
/* Check if everything were registered */
- if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 ||
+ if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_sad1 ||
!pvt->pci_ras || !pvt->pci_ta || !pvt->info.pci_vtd)
goto enodev;
- if (saw_chan_mask != 0x0f && /* -EN */
- saw_chan_mask != 0x33 && /* -EP */
- saw_chan_mask != 0xff) /* -EX */
+ if (saw_chan_mask != 0x0f && /* -EN/-EX */
+ saw_chan_mask != 0x03) /* -EP */
goto enodev;
return 0;
pvt->knl.pci_cha[devidx] = pdev;
break;
- case PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL:
+ case PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN:
devidx = -1;
/*
if (rc < 0)
goto err_parsing;
- new_mci = get_mci_for_node_id(socket);
+ new_mci = get_mci_for_node_id(socket, ha);
if (!new_mci) {
strcpy(msg, "Error: socket got corrupted!");
goto err_parsing;
/* Call the helper to output message */
edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
- 4*ha+channel, dimm, -1,
+ channel, dimm, -1,
optype, msg);
return;
err_parsing:
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
- mci = get_mci_for_node_id(mce->socketid);
+ mci = get_mci_for_node_id(mce->socketid, IMC0);
if (!mci)
return NOTIFY_DONE;
pvt = mci->pvt_info;
struct pci_dev *pdev = sbridge_dev->pdev[0];
int rc;
- /* Check the number of active and not disabled channels */
- rc = check_if_ecc_is_active(sbridge_dev->bus, type);
- if (unlikely(rc < 0))
- return rc;
-
/* allocate a new MC control structure */
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = type == KNIGHTS_LANDING ?
MEM_FLAG_DDR4 : MEM_FLAG_DDR3;
mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE;
- mci->mod_name = "sbridge_edac.c";
+ mci->mod_name = "sb_edac.c";
mci->mod_ver = SBRIDGE_REVISION;
mci->dev_name = pci_name(pdev);
mci->ctl_page_to_phys = NULL;
pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
pvt->info.interleave_pkg = ibridge_interleave_pkg;
pvt->info.get_width = ibridge_get_width;
- mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx);
/* Store pci devices at mci for faster access */
rc = ibridge_mci_bind_devs(mci, sbridge_dev);
if (unlikely(rc < 0))
goto fail0;
+ get_source_id(mci);
+ mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge SrcID#%d_Ha#%d",
+ pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
break;
case SANDY_BRIDGE:
pvt->info.rankcfgr = SB_RANK_CFG_A;
pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list);
pvt->info.interleave_pkg = sbridge_interleave_pkg;
pvt->info.get_width = sbridge_get_width;
- mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
/* Store pci devices at mci for faster access */
rc = sbridge_mci_bind_devs(mci, sbridge_dev);
if (unlikely(rc < 0))
goto fail0;
+ get_source_id(mci);
+ mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge SrcID#%d_Ha#%d",
+ pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
break;
case HASWELL:
/* rankcfgr isn't used */
pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
pvt->info.interleave_pkg = ibridge_interleave_pkg;
pvt->info.get_width = ibridge_get_width;
- mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell Socket#%d", mci->mc_idx);
/* Store pci devices at mci for faster access */
rc = haswell_mci_bind_devs(mci, sbridge_dev);
if (unlikely(rc < 0))
goto fail0;
+ get_source_id(mci);
+ mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell SrcID#%d_Ha#%d",
+ pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
break;
case BROADWELL:
/* rankcfgr isn't used */
pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
pvt->info.interleave_pkg = ibridge_interleave_pkg;
pvt->info.get_width = broadwell_get_width;
- mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell Socket#%d", mci->mc_idx);
/* Store pci devices at mci for faster access */
rc = broadwell_mci_bind_devs(mci, sbridge_dev);
if (unlikely(rc < 0))
goto fail0;
+ get_source_id(mci);
+ mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell SrcID#%d_Ha#%d",
+ pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
break;
case KNIGHTS_LANDING:
/* pvt->info.rankcfgr == ??? */
pvt->info.max_interleave = ARRAY_SIZE(knl_interleave_list);
pvt->info.interleave_pkg = ibridge_interleave_pkg;
pvt->info.get_width = knl_get_width;
- mci->ctl_name = kasprintf(GFP_KERNEL,
- "Knights Landing Socket#%d", mci->mc_idx);
rc = knl_mci_bind_devs(mci, sbridge_dev);
if (unlikely(rc < 0))
goto fail0;
+ get_source_id(mci);
+ mci->ctl_name = kasprintf(GFP_KERNEL, "Knights Landing SrcID#%d_Ha#%d",
+ pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
break;
}
/* Get dimm basic config and the memory layout */
- get_dimm_config(mci);
+ rc = get_dimm_config(mci);
+ if (rc < 0) {
+ edac_dbg(0, "MC: failed to get_dimm_config()\n");
+ goto fail;
+ }
get_memory_layout(mci);
/* record ptr to the generic device */
if (unlikely(edac_mc_add_mc(mci))) {
edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
rc = -EINVAL;
- goto fail0;
+ goto fail;
}
return 0;
-fail0:
+fail:
kfree(mci->ctl_name);
+fail0:
edac_mc_free(mci);
sbridge_dev->mci = NULL;
return rc;
if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
- thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
+ ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
l2c, dfs_entries);
if (ret != dfs_entries) {
DEFINE_DMI_ATTR_WITH_SHOW(product_version, 0444, DMI_PRODUCT_VERSION);
DEFINE_DMI_ATTR_WITH_SHOW(product_serial, 0400, DMI_PRODUCT_SERIAL);
DEFINE_DMI_ATTR_WITH_SHOW(product_uuid, 0400, DMI_PRODUCT_UUID);
-DEFINE_DMI_ATTR_WITH_SHOW(product_family, 0400, DMI_PRODUCT_FAMILY);
+DEFINE_DMI_ATTR_WITH_SHOW(product_family, 0444, DMI_PRODUCT_FAMILY);
DEFINE_DMI_ATTR_WITH_SHOW(board_vendor, 0444, DMI_BOARD_VENDOR);
DEFINE_DMI_ATTR_WITH_SHOW(board_name, 0444, DMI_BOARD_NAME);
DEFINE_DMI_ATTR_WITH_SHOW(board_version, 0444, DMI_BOARD_VERSION);
ADD_DMI_ATTR(product_version, DMI_PRODUCT_VERSION);
ADD_DMI_ATTR(product_serial, DMI_PRODUCT_SERIAL);
ADD_DMI_ATTR(product_uuid, DMI_PRODUCT_UUID);
- ADD_DMI_ATTR(product_family, DMI_PRODUCT_FAMILY);
+ ADD_DMI_ATTR(product_family, DMI_PRODUCT_FAMILY);
ADD_DMI_ATTR(board_vendor, DMI_BOARD_VENDOR);
ADD_DMI_ATTR(board_name, DMI_BOARD_NAME);
ADD_DMI_ATTR(board_version, DMI_BOARD_VERSION);
buf = dmi_early_remap(dmi_base, orig_dmi_len);
if (buf == NULL)
- return -1;
+ return -ENOMEM;
dmi_decode_table(buf, decode, NULL);
const char *d = (const char *) dm;
const char *p;
- if (dmi_ident[slot])
+ if (dmi_ident[slot] || dm->length <= string)
return;
p = dmi_string(dm, d[string]);
static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
int index)
{
- const u8 *d = (u8 *) dm + index;
+ const u8 *d;
char *s;
int is_ff = 1, is_00 = 1, i;
- if (dmi_ident[slot])
+ if (dmi_ident[slot] || dm->length <= index + 16)
return;
+ d = (u8 *) dm + index;
for (i = 0; i < 16 && (is_ff || is_00); i++) {
if (d[i] != 0x00)
is_00 = 0;
static void __init dmi_save_type(const struct dmi_header *dm, int slot,
int index)
{
- const u8 *d = (u8 *) dm + index;
+ const u8 *d;
char *s;
- if (dmi_ident[slot])
+ if (dmi_ident[slot] || dm->length <= index)
return;
s = dmi_alloc(4);
if (!s)
return;
+ d = (u8 *) dm + index;
sprintf(s, "%u", *d & 0x7F);
dmi_ident[slot] = s;
}
static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
{
- int i, count = *(u8 *)(dm + 1);
+ int i, count;
struct dmi_device *dev;
+ if (dm->length < 0x05)
+ return;
+
+ count = *(u8 *)(dm + 1);
for (i = 1; i <= count; i++) {
const char *devname = dmi_string(dm, i);
const char *name;
const u8 *d = (u8 *)dm;
+ if (dm->length < 0x0B)
+ return;
+
/* Skip disabled device */
if ((d[0x5] & 0x80) == 0)
return;
const char *d = (const char *)dm;
static int nr;
- if (dm->type != DMI_ENTRY_MEM_DEVICE)
+ if (dm->type != DMI_ENTRY_MEM_DEVICE || dm->length < 0x12)
return;
if (nr >= dmi_memdev_nr) {
pr_warn(FW_BUG "Too many DIMM entries in SMBIOS table\n");
goto error;
/*
+ * Same logic as above, look for a 64-bit entry point
+ * first, and if not found, fall back to 32-bit entry point.
+ */
+ memcpy_fromio(buf, p, 16);
+ for (q = p + 16; q < p + 0x10000; q += 16) {
+ memcpy_fromio(buf + 16, q, 16);
+ if (!dmi_smbios3_present(buf)) {
+ dmi_available = 1;
+ dmi_early_unmap(p, 0x10000);
+ goto out;
+ }
+ memcpy(buf, buf + 16, 16);
+ }
+
+ /*
* Iterate over all possible DMI header addresses q.
* Maintain the 32 bytes around q in buf. On the
* first iteration, substitute zero for the
memset(buf, 0, 16);
for (q = p; q < p + 0x10000; q += 16) {
memcpy_fromio(buf + 16, q, 16);
- if (!dmi_smbios3_present(buf) || !dmi_present(buf)) {
+ if (!dmi_present(buf)) {
dmi_available = 1;
dmi_early_unmap(p, 0x10000);
goto out;
* @decode: Callback function
* @private_data: Private data to be passed to the callback function
*
- * Returns -1 when the DMI table can't be reached, 0 on success.
+ * Returns 0 on success, -ENXIO if DMI is not selected or not present,
+ * or a different negative error code if DMI walking fails.
*/
int dmi_walk(void (*decode)(const struct dmi_header *, void *),
void *private_data)
u8 *buf;
if (!dmi_available)
- return -1;
+ return -ENXIO;
buf = dmi_remap(dmi_base, dmi_len);
if (buf == NULL)
- return -1;
+ return -ENOMEM;
dmi_decode_table(buf, decode, private_data);
u32 set;
if (!of_device_is_compatible(mvchip->chip.of_node,
- "marvell,armada-370-xp-gpio"))
+ "marvell,armada-370-gpio"))
return 0;
if (IS_ERR(mvchip->clk))
.data = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP,
},
{
- .compatible = "marvell,armada-370-xp-gpio",
+ .compatible = "marvell,armada-370-gpio",
.data = (void *) MVEBU_GPIO_SOC_VARIANT_ORION,
},
{
mvchip);
}
- /* Armada 370/XP has simple PWM support for GPIO lines */
+ /* Some MVEBU SoCs have simple PWM support for GPIO lines */
if (IS_ENABLED(CONFIG_PWM))
return mvebu_pwm_probe(pdev, mvchip, id);
handler = acpi_gpio_irq_handler_evt;
}
if (!handler)
- return AE_BAD_PARAMETER;
+ return AE_OK;
pin = acpi_gpiochip_pin_to_gpio_offset(chip->gpiodev, pin);
if (pin < 0)
ge.timestamp = ktime_get_real_ns();
- if (le->eflags & GPIOEVENT_REQUEST_BOTH_EDGES) {
+ if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE
+ && le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE) {
int level = gpiod_get_value_cansleep(le->desc);
if (level)
DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n",
adev->clock.default_dispclk / 100);
adev->clock.default_dispclk = 60000;
+ } else if (adev->clock.default_dispclk <= 60000) {
+ DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n",
+ adev->clock.default_dispclk / 100);
+ adev->clock.default_dispclk = 62500;
}
adev->clock.dp_extclk =
le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
{0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
/* Vega 10 */
{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
- ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+ ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
memset(&args, 0, sizeof(args));
void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
{
int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
- ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+ ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
memset(&args, 0, sizeof(args));
u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
if (amdgpu_crtc->base.enabled && num_heads && mode) {
- active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
- line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
/* watermark for high clocks */
if (adev->pm.dpm_enabled) {
u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
if (amdgpu_crtc->base.enabled && num_heads && mode) {
- active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
- line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
/* watermark for high clocks */
if (adev->pm.dpm_enabled) {
fixed20_12 a, b, c;
if (amdgpu_crtc->base.enabled && num_heads && mode) {
- active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
- line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
priority_a_cnt = 0;
priority_b_cnt = 0;
u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
if (amdgpu_crtc->base.enabled && num_heads && mode) {
- active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
- line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
/* watermark for high clocks */
if (adev->pm.dpm_enabled) {
config DRM_DW_HDMI
tristate
select DRM_KMS_HELPER
+ select REGMAP_MMIO
config DRM_DW_HDMI_AHB_AUDIO
tristate "Synopsys Designware AHB Audio interface"
if (!connector)
return -ENOENT;
- drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
- encoder = drm_connector_get_encoder(connector);
- if (encoder)
- out_resp->encoder_id = encoder->base.id;
- else
- out_resp->encoder_id = 0;
-
- ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
- (uint32_t __user *)(unsigned long)(out_resp->props_ptr),
- (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
- &out_resp->count_props);
- drm_modeset_unlock(&dev->mode_config.connection_mutex);
- if (ret)
- goto out_unref;
-
for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++)
if (connector->encoder_ids[i] != 0)
encoders_count++;
if (put_user(connector->encoder_ids[i],
encoder_ptr + copied)) {
ret = -EFAULT;
- goto out_unref;
+ goto out;
}
copied++;
}
if (copy_to_user(mode_ptr + copied,
&u_mode, sizeof(u_mode))) {
ret = -EFAULT;
+ mutex_unlock(&dev->mode_config.mutex);
+
goto out;
}
copied++;
}
}
out_resp->count_modes = mode_count;
-out:
mutex_unlock(&dev->mode_config.mutex);
-out_unref:
+
+ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+ encoder = drm_connector_get_encoder(connector);
+ if (encoder)
+ out_resp->encoder_id = encoder->base.id;
+ else
+ out_resp->encoder_id = 0;
+
+ /* Only grab properties after probing, to make sure EDID and other
+ * properties reflect the latest status. */
+ ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
+ (uint32_t __user *)(unsigned long)(out_resp->props_ptr),
+ (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
+ &out_resp->count_props);
+ drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+out:
drm_connector_put(connector);
return ret;
struct etnaviv_gpu *gpu;
struct ww_acquire_ctx ticket;
struct dma_fence *fence;
+ u32 flags;
unsigned int nr_bos;
struct etnaviv_gem_submit_bo bos[0];
- u32 flags;
+ /* No new members here, the previous one is variable-length! */
};
int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
for (i = 0; i < submit->nr_bos; i++) {
struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
- bool explicit = !(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
+ bool explicit = !!(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write,
explicit);
struct file_stats *stats = data;
struct i915_vma *vma;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
stats->count++;
stats->total += obj->base.size;
if (!obj->bind_count)
struct drm_i915_gem_request *request;
struct task_struct *task;
+ mutex_lock(&dev->struct_mutex);
+
memset(&stats, 0, sizeof(stats));
stats.file_priv = file->driver_priv;
spin_lock(&file->table_lock);
* still alive (e.g. get_pid(current) => fork() => exit()).
* Therefore, we need to protect this ->comm access using RCU.
*/
- mutex_lock(&dev->struct_mutex);
request = list_first_entry_or_null(&file_priv->mm.request_list,
struct drm_i915_gem_request,
client_link);
PIDTYPE_PID);
print_file_stats(m, task ? task->comm : "<unknown>", stats);
rcu_read_unlock();
+
mutex_unlock(&dev->struct_mutex);
}
mutex_unlock(&dev->filelist_mutex);
struct page *page;
unsigned long last_pfn = 0; /* suppress gcc warning */
unsigned int max_segment;
+ gfp_t noreclaim;
int ret;
- gfp_t gfp;
/* Assert that the object is not currently in any GPU domain. As it
* wasn't in the GTT, there shouldn't be any way it could have been in
* Fail silently without starting the shrinker
*/
mapping = obj->base.filp->f_mapping;
- gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
- gfp |= __GFP_NORETRY | __GFP_NOWARN;
+ noreclaim = mapping_gfp_constraint(mapping,
+ ~(__GFP_IO | __GFP_RECLAIM));
+ noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
sg = st->sgl;
st->nents = 0;
for (i = 0; i < page_count; i++) {
- page = shmem_read_mapping_page_gfp(mapping, i, gfp);
- if (unlikely(IS_ERR(page))) {
- i915_gem_shrink(dev_priv,
- page_count,
- I915_SHRINK_BOUND |
- I915_SHRINK_UNBOUND |
- I915_SHRINK_PURGEABLE);
+ const unsigned int shrink[] = {
+ I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
+ 0,
+ }, *s = shrink;
+ gfp_t gfp = noreclaim;
+
+ do {
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
- }
- if (unlikely(IS_ERR(page))) {
- gfp_t reclaim;
+ if (likely(!IS_ERR(page)))
+ break;
+
+ if (!*s) {
+ ret = PTR_ERR(page);
+ goto err_sg;
+ }
+
+ i915_gem_shrink(dev_priv, 2 * page_count, *s++);
+ cond_resched();
/* We've tried hard to allocate the memory by reaping
* our own buffer, now let the real VM do its job and
* defer the oom here by reporting the ENOMEM back
* to userspace.
*/
- reclaim = mapping_gfp_mask(mapping);
- reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
-
- page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto err_sg;
+ if (!*s) {
+ /* reclaim and warn, but no oom */
+ gfp = mapping_gfp_mask(mapping);
+
+ /* Our bo are always dirty and so we require
+ * kswapd to reclaim our pages (direct reclaim
+ * does not effectively begin pageout of our
+ * buffers on its own). However, direct reclaim
+ * only waits for kswapd when under allocation
+ * congestion. So as a result __GFP_RECLAIM is
+ * unreliable and fails to actually reclaim our
+ * dirty pages -- unless you try over and over
+ * again with !__GFP_NORETRY. However, we still
+ * want to fail this allocation rather than
+ * trigger the out-of-memory killer and for
+ * this we want the future __GFP_MAYFAIL.
+ */
}
- }
+ } while (1);
+
if (!i ||
sg->length >= max_segment ||
page_to_pfn(page) != last_pfn + 1) {
mapping = obj->base.filp->f_mapping;
mapping_set_gfp_mask(mapping, mask);
+ GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
i915_gem_object_init(obj, &i915_gem_object_ops);
}
static int
-i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
+i915_gem_execbuffer_relocate_entry(struct i915_vma *vma,
struct eb_vmas *eb,
struct drm_i915_gem_relocation_entry *reloc,
struct reloc_cache *cache)
{
+ struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct drm_gem_object *target_obj;
struct drm_i915_gem_object *target_i915_obj;
return -EINVAL;
}
+ /*
+ * If we write into the object, we need to force the synchronisation
+ * barrier, either with an asynchronous clflush or if we executed the
+ * patching using the GPU (though that should be serialised by the
+ * timeline). To be completely sure, and since we are required to
+ * do relocations we are already stalling, disable the user's opt
+ * of our synchronisation.
+ */
+ vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+
ret = relocate_entry(obj, reloc, cache, target_offset);
if (ret)
return ret;
do {
u64 offset = r->presumed_offset;
- ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
+ ret = i915_gem_execbuffer_relocate_entry(vma, eb, r, &cache);
if (ret)
goto out;
reloc_cache_init(&cache, eb->i915);
for (i = 0; i < entry->relocation_count; i++) {
- ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
+ ret = i915_gem_execbuffer_relocate_entry(vma, eb, &relocs[i], &cache);
if (ret)
break;
}
* GPU processing the request, we never over-estimate the
* position of the head.
*/
- req->head = req->ring->tail;
+ req->head = req->ring->emit;
/* Check that we didn't interrupt ourselves with a new request */
GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
GEM_BUG_ON(freespace < wqi_size);
/* The GuC firmware wants the tail index in QWords, not bytes */
- tail = rq->tail;
- assert_ring_tail_valid(rq->ring, rq->tail);
- tail >>= 3;
+ tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
#define VGT_VERSION_MAJOR 1
#define VGT_VERSION_MINOR 0
-#define INTEL_VGT_IF_VERSION_ENCODE(major, minor) ((major) << 16 | (minor))
-#define INTEL_VGT_IF_VERSION \
- INTEL_VGT_IF_VERSION_ENCODE(VGT_VERSION_MAJOR, VGT_VERSION_MINOR)
-
/*
* notifications from guest to vgpu device model
*/
struct vgt_if {
u64 magic; /* VGT_MAGIC */
- uint16_t version_major;
- uint16_t version_minor;
+ u16 version_major;
+ u16 version_minor;
u32 vgt_id; /* ID of vGT instance */
u32 rsv1[12]; /* pad to offset 0x40 */
/*
*/
void i915_check_vgpu(struct drm_i915_private *dev_priv)
{
- uint64_t magic;
- uint32_t version;
+ u64 magic;
+ u16 version_major;
BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
if (magic != VGT_MAGIC)
return;
- version = INTEL_VGT_IF_VERSION_ENCODE(
- __raw_i915_read16(dev_priv, vgtif_reg(version_major)),
- __raw_i915_read16(dev_priv, vgtif_reg(version_minor)));
- if (version != INTEL_VGT_IF_VERSION) {
+ version_major = __raw_i915_read16(dev_priv, vgtif_reg(version_major));
+ if (version_major < VGT_VERSION_MAJOR) {
DRM_INFO("VGT interface version mismatch!\n");
return;
}
break;
}
+ if (!ret) {
+ ret = i915_gem_active_retire(&vma->last_fence,
+ &vma->vm->i915->drm.struct_mutex);
+ }
+
__i915_vma_unpin(vma);
if (ret)
return ret;
acpi_handle dhandle;
} intel_dsm_priv;
-static const u8 intel_dsm_guid[] = {
- 0xd3, 0x73, 0xd8, 0x7e,
- 0xd0, 0xc2,
- 0x4f, 0x4e,
- 0xa8, 0x54,
- 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c
-};
+static const guid_t intel_dsm_guid =
+ GUID_INIT(0x7ed873d3, 0xc2d0, 0x4e4f,
+ 0xa8, 0x54, 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c);
static char *intel_dsm_port_name(u8 id)
{
int i;
union acpi_object *pkg, *connector_count;
- pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, intel_dsm_guid,
+ pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, &intel_dsm_guid,
INTEL_DSM_REVISION_ID, INTEL_DSM_FN_PLATFORM_MUX_INFO,
NULL, ACPI_TYPE_PACKAGE);
if (!pkg) {
if (!dhandle)
return false;
- if (!acpi_check_dsm(dhandle, intel_dsm_guid, INTEL_DSM_REVISION_ID,
+ if (!acpi_check_dsm(dhandle, &intel_dsm_guid, INTEL_DSM_REVISION_ID,
1 << INTEL_DSM_FN_PLATFORM_MUX_INFO)) {
DRM_DEBUG_KMS("no _DSM method for intel device\n");
return false;
static void skylake_pfit_enable(struct intel_crtc *crtc);
static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force);
static void ironlake_pfit_enable(struct intel_crtc *crtc);
-static void intel_modeset_setup_hw_state(struct drm_device *dev);
+static void intel_modeset_setup_hw_state(struct drm_device *dev,
+ struct drm_modeset_acquire_ctx *ctx);
static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
struct intel_limit {
struct drm_crtc *crtc;
int i, ret;
- intel_modeset_setup_hw_state(dev);
+ intel_modeset_setup_hw_state(dev, ctx);
i915_redisable_vga(to_i915(dev));
if (!state)
static int
skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
- unsigned scaler_user, int *scaler_id, unsigned int rotation,
+ unsigned int scaler_user, int *scaler_id,
int src_w, int src_h, int dst_w, int dst_h)
{
struct intel_crtc_scaler_state *scaler_state =
to_intel_crtc(crtc_state->base.crtc);
int need_scaling;
- need_scaling = drm_rotation_90_or_270(rotation) ?
- (src_h != dst_w || src_w != dst_h):
- (src_w != dst_w || src_h != dst_h);
+ /*
+ * Src coordinates are already rotated by 270 degrees for
+ * the 90/270 degree plane rotation cases (to match the
+ * GTT mapping), hence no need to account for rotation here.
+ */
+ need_scaling = src_w != dst_w || src_h != dst_h;
/*
* if plane is being disabled or scaler is no more required or force detach
const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode;
return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
- &state->scaler_state.scaler_id, DRM_ROTATE_0,
+ &state->scaler_state.scaler_id,
state->pipe_src_w, state->pipe_src_h,
adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
}
ret = skl_update_scaler(crtc_state, force_detach,
drm_plane_index(&intel_plane->base),
&plane_state->scaler_id,
- plane_state->base.rotation,
drm_rect_width(&plane_state->base.src) >> 16,
drm_rect_height(&plane_state->base.src) >> 16,
drm_rect_width(&plane_state->base.dst),
intel_update_watermarks(intel_crtc);
}
-static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
+static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct intel_encoder *encoder;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
return;
}
- state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+ state->acquire_ctx = ctx;
/* Everything's already locked, -EDEADLK can't happen. */
crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
intel_setup_outputs(dev_priv);
drm_modeset_lock_all(dev);
- intel_modeset_setup_hw_state(dev);
+ intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx);
drm_modeset_unlock_all(dev);
for_each_intel_crtc(dev, crtc) {
return 0;
}
-static void intel_enable_pipe_a(struct drm_device *dev)
+static void intel_enable_pipe_a(struct drm_device *dev,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct intel_connector *connector;
struct drm_connector_list_iter conn_iter;
struct drm_connector *crt = NULL;
struct intel_load_detect_pipe load_detect_temp;
- struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx;
int ret;
/* We can't just switch on the pipe A, we need to set things up with a
(HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
}
-static void intel_sanitize_crtc(struct intel_crtc *crtc)
+static void intel_sanitize_crtc(struct intel_crtc *crtc,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct drm_device *dev = crtc->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
plane = crtc->plane;
crtc->base.primary->state->visible = true;
crtc->plane = !plane;
- intel_crtc_disable_noatomic(&crtc->base);
+ intel_crtc_disable_noatomic(&crtc->base, ctx);
crtc->plane = plane;
}
* resume. Force-enable the pipe to fix this, the update_dpms
* call below we restore the pipe to the right state, but leave
* the required bits on. */
- intel_enable_pipe_a(dev);
+ intel_enable_pipe_a(dev, ctx);
}
/* Adjust the state of the output pipe according to whether we
* have active connectors/encoders. */
if (crtc->active && !intel_crtc_has_encoders(crtc))
- intel_crtc_disable_noatomic(&crtc->base);
+ intel_crtc_disable_noatomic(&crtc->base, ctx);
if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) {
/*
* and sanitizes it to the current state
*/
static void
-intel_modeset_setup_hw_state(struct drm_device *dev)
+intel_modeset_setup_hw_state(struct drm_device *dev,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct drm_i915_private *dev_priv = to_i915(dev);
enum pipe pipe;
for_each_pipe(dev_priv, pipe) {
crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
- intel_sanitize_crtc(crtc);
+ intel_sanitize_crtc(crtc, ctx);
intel_dump_pipe_config(crtc, crtc->config,
"[setup_hw_state]");
}
struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
struct intel_panel *panel = &connector->panel;
- intel_dp_aux_enable_backlight(connector);
-
if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT)
panel->backlight.max = 0xFFFF;
else
rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
u32 *reg_state = ce->lrc_reg_state;
- assert_ring_tail_valid(rq->ring, rq->tail);
- reg_state[CTX_RING_TAIL+1] = rq->tail;
+ reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
/* True 32b PPGTT with dynamic page allocation: update PDP
* registers and point the unallocated PDPs to scratch page.
ce->state->obj->mm.dirty = true;
i915_gem_object_unpin_map(ce->state->obj);
- ce->ring->head = ce->ring->tail = 0;
- intel_ring_update_space(ce->ring);
+ intel_ring_reset(ce->ring, 0);
}
}
}
/* n.b., src is 16.16 fixed point, dst is whole integer */
if (plane->id == PLANE_CURSOR) {
+ /*
+ * Cursors only support 0/180 degree rotation,
+ * hence no need to account for rotation here.
+ */
src_w = pstate->base.src_w;
src_h = pstate->base.src_h;
dst_w = pstate->base.crtc_w;
dst_h = pstate->base.crtc_h;
} else {
+ /*
+ * Src coordinates are already rotated by 270 degrees for
+ * the 90/270 degree plane rotation cases (to match the
+ * GTT mapping), hence no need to account for rotation here.
+ */
src_w = drm_rect_width(&pstate->base.src);
src_h = drm_rect_height(&pstate->base.src);
dst_w = drm_rect_width(&pstate->base.dst);
dst_h = drm_rect_height(&pstate->base.dst);
}
- if (drm_rotation_90_or_270(pstate->base.rotation))
- swap(dst_w, dst_h);
-
downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
if (y && format != DRM_FORMAT_NV12)
return 0;
+ /*
+ * Src coordinates are already rotated by 270 degrees for
+ * the 90/270 degree plane rotation cases (to match the
+ * GTT mapping), hence no need to account for rotation here.
+ */
width = drm_rect_width(&intel_pstate->base.src) >> 16;
height = drm_rect_height(&intel_pstate->base.src) >> 16;
- if (drm_rotation_90_or_270(pstate->rotation))
- swap(width, height);
-
/* for planar format */
if (format == DRM_FORMAT_NV12) {
if (y) /* y-plane data rate */
fb->modifier != I915_FORMAT_MOD_Yf_TILED)
return 8;
+ /*
+ * Src coordinates are already rotated by 270 degrees for
+ * the 90/270 degree plane rotation cases (to match the
+ * GTT mapping), hence no need to account for rotation here.
+ */
src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
- if (drm_rotation_90_or_270(pstate->rotation))
- swap(src_w, src_h);
-
/* Halve UV plane width and height for NV12 */
if (fb->format->format == DRM_FORMAT_NV12 && !y) {
src_w /= 2;
width = intel_pstate->base.crtc_w;
height = intel_pstate->base.crtc_h;
} else {
+ /*
+ * Src coordinates are already rotated by 270 degrees for
+ * the 90/270 degree plane rotation cases (to match the
+ * GTT mapping), hence no need to account for rotation here.
+ */
width = drm_rect_width(&intel_pstate->base.src) >> 16;
height = drm_rect_height(&intel_pstate->base.src) >> 16;
}
- if (drm_rotation_90_or_270(pstate->rotation))
- swap(width, height);
-
cpp = fb->format->cpp[0];
plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
void intel_ring_update_space(struct intel_ring *ring)
{
- ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+ ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
}
static int
i915_gem_request_submit(request);
- assert_ring_tail_valid(request->ring, request->tail);
- I915_WRITE_TAIL(request->engine, request->tail);
+ I915_WRITE_TAIL(request->engine,
+ intel_ring_set_tail(request->ring, request->tail));
}
static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
return PTR_ERR(addr);
}
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+ GEM_BUG_ON(!list_empty(&ring->request_list));
+ ring->tail = tail;
+ ring->head = tail;
+ ring->emit = tail;
+ intel_ring_update_space(ring);
+}
+
void intel_ring_unpin(struct intel_ring *ring)
{
GEM_BUG_ON(!ring->vma);
GEM_BUG_ON(!ring->vaddr);
+ /* Discard any unused bytes beyond that submitted to hw. */
+ intel_ring_reset(ring, ring->tail);
+
if (i915_vma_is_map_and_fenceable(ring->vma))
i915_vma_unpin_iomap(ring->vma);
else
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ /* Restart from the beginning of the rings for convenience */
for_each_engine(engine, dev_priv, id)
- engine->buffer->head = engine->buffer->tail;
+ intel_ring_reset(engine->buffer, 0);
}
static int ring_request_alloc(struct drm_i915_gem_request *request)
unsigned space;
/* Would completion of this request free enough space? */
- space = __intel_ring_space(target->postfix, ring->tail,
+ space = __intel_ring_space(target->postfix, ring->emit,
ring->size);
if (space >= bytes)
break;
u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
{
struct intel_ring *ring = req->ring;
- int remain_actual = ring->size - ring->tail;
- int remain_usable = ring->effective_size - ring->tail;
+ int remain_actual = ring->size - ring->emit;
+ int remain_usable = ring->effective_size - ring->emit;
int bytes = num_dwords * sizeof(u32);
int total_bytes, wait_bytes;
bool need_wrap = false;
if (unlikely(need_wrap)) {
GEM_BUG_ON(remain_actual > ring->space);
- GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+ GEM_BUG_ON(ring->emit + remain_actual > ring->size);
/* Fill the tail with MI_NOOP */
- memset(ring->vaddr + ring->tail, 0, remain_actual);
- ring->tail = 0;
+ memset(ring->vaddr + ring->emit, 0, remain_actual);
+ ring->emit = 0;
ring->space -= remain_actual;
}
- GEM_BUG_ON(ring->tail > ring->size - bytes);
- cs = ring->vaddr + ring->tail;
- ring->tail += bytes;
+ GEM_BUG_ON(ring->emit > ring->size - bytes);
+ cs = ring->vaddr + ring->emit;
+ ring->emit += bytes;
ring->space -= bytes;
GEM_BUG_ON(ring->space < 0);
int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
{
int num_dwords =
- (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+ (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
u32 *cs;
if (num_dwords == 0)
u32 head;
u32 tail;
+ u32 emit;
int space;
int size;
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_free(struct intel_ring *ring);
* reserved for the command packet (i.e. the value passed to
* intel_ring_begin()).
*/
- GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+ GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
}
static inline u32
GEM_BUG_ON(tail >= ring->size);
}
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+ /* Whilst writes to the tail are strictly order, there is no
+ * serialisation between readers and the writers. The tail may be
+ * read by i915_gem_request_retire() just as it is being updated
+ * by execlists, as although the breadcrumb is complete, the context
+ * switch hasn't been seen.
+ */
+ assert_ring_tail_valid(ring, tail);
+ ring->tail = tail;
+ return tail;
+}
void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
if (IS_G200_SE(mdev)) {
- if (mdev->unique_rev_id >= 0x02) {
+ if (mdev->unique_rev_id >= 0x04) {
+ WREG8(MGAREG_CRTCEXT_INDEX, 0x06);
+ WREG8(MGAREG_CRTCEXT_DATA, 0);
+ } else if (mdev->unique_rev_id >= 0x02) {
u8 hi_pri_lvl;
u32 bpp;
u32 mb;
if (mga_vga_calculate_mode_bandwidth(mode, bpp)
> (30100 * 1024))
return MODE_BANDWIDTH;
+ } else {
+ if (mga_vga_calculate_mode_bandwidth(mode, bpp)
+ > (55000 * 1024))
+ return MODE_BANDWIDTH;
}
} else if (mdev->type == G200_WB) {
if (mode->hdisplay > 1280)
#include "mxsfb_drv.h"
#include "mxsfb_regs.h"
+#define MXS_SET_ADDR 0x4
+#define MXS_CLR_ADDR 0x8
+#define MODULE_CLKGATE BIT(30)
+#define MODULE_SFTRST BIT(31)
+/* 1 second delay should be plenty of time for block reset */
+#define RESET_TIMEOUT 1000000
+
static u32 set_hsync_pulse_width(struct mxsfb_drm_private *mxsfb, u32 val)
{
return (val & mxsfb->devdata->hs_wdth_mask) <<
clk_disable_unprepare(mxsfb->clk_disp_axi);
}
+/*
+ * Clear the bit and poll it cleared. This is usually called with
+ * a reset address and mask being either SFTRST(bit 31) or CLKGATE
+ * (bit 30).
+ */
+static int clear_poll_bit(void __iomem *addr, u32 mask)
+{
+ u32 reg;
+
+ writel(mask, addr + MXS_CLR_ADDR);
+ return readl_poll_timeout(addr, reg, !(reg & mask), 0, RESET_TIMEOUT);
+}
+
+static int mxsfb_reset_block(void __iomem *reset_addr)
+{
+ int ret;
+
+ ret = clear_poll_bit(reset_addr, MODULE_SFTRST);
+ if (ret)
+ return ret;
+
+ writel(MODULE_CLKGATE, reset_addr + MXS_CLR_ADDR);
+
+ ret = clear_poll_bit(reset_addr, MODULE_SFTRST);
+ if (ret)
+ return ret;
+
+ return clear_poll_bit(reset_addr, MODULE_CLKGATE);
+}
+
static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
{
struct drm_display_mode *m = &mxsfb->pipe.crtc.state->adjusted_mode;
*/
mxsfb_enable_axi_clk(mxsfb);
+ /* Mandatory eLCDIF reset as per the Reference Manual */
+ err = mxsfb_reset_block(mxsfb->base);
+ if (err)
+ return;
+
/* Clear the FIFOs */
writel(CTRL1_FIFO_CLEAR, mxsfb->base + LCDC_CTRL1 + REG_SET);
}
#ifdef CONFIG_VGA_SWITCHEROO
-static const char nouveau_dsm_muid[] = {
- 0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
- 0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
-};
+static const guid_t nouveau_dsm_muid =
+ GUID_INIT(0x9D95A0A0, 0x0060, 0x4D48,
+ 0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4);
-static const char nouveau_op_dsm_muid[] = {
- 0xF8, 0xD8, 0x86, 0xA4, 0xDA, 0x0B, 0x1B, 0x47,
- 0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0,
-};
+static const guid_t nouveau_op_dsm_muid =
+ GUID_INIT(0xA486D8F8, 0x0BDA, 0x471B,
+ 0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0);
static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *result)
{
args_buff[i] = (arg >> i * 8) & 0xFF;
*result = 0;
- obj = acpi_evaluate_dsm_typed(handle, nouveau_op_dsm_muid, 0x00000100,
+ obj = acpi_evaluate_dsm_typed(handle, &nouveau_op_dsm_muid, 0x00000100,
func, &argv4, ACPI_TYPE_BUFFER);
if (!obj) {
acpi_handle_info(handle, "failed to evaluate _DSM\n");
.integer.value = arg,
};
- obj = acpi_evaluate_dsm_typed(handle, nouveau_dsm_muid, 0x00000102,
+ obj = acpi_evaluate_dsm_typed(handle, &nouveau_dsm_muid, 0x00000102,
func, &argv4, ACPI_TYPE_INTEGER);
if (!obj) {
acpi_handle_info(handle, "failed to evaluate _DSM\n");
if (!acpi_has_method(dhandle, "_DSM"))
return;
- supports_mux = acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102,
+ supports_mux = acpi_check_dsm(dhandle, &nouveau_dsm_muid, 0x00000102,
1 << NOUVEAU_DSM_POWER);
optimus_funcs = nouveau_dsm_get_optimus_functions(dhandle);
{
struct nvkm_subdev *subdev = &mxm->subdev;
struct nvkm_device *device = subdev->device;
- static char muid[] = {
- 0x00, 0xA4, 0x04, 0x40, 0x7D, 0x91, 0xF2, 0x4C,
- 0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65
- };
+ static guid_t muid =
+ GUID_INIT(0x4004A400, 0x917D, 0x4CF2,
+ 0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65);
u32 mxms_args[] = { 0x00000000 };
union acpi_object argv4 = {
.buffer.type = ACPI_TYPE_BUFFER,
* unless you pass in exactly the version it supports..
*/
rev = (version & 0xf0) << 4 | (version & 0x0f);
- obj = acpi_evaluate_dsm(handle, muid, rev, 0x00000010, &argv4);
+ obj = acpi_evaluate_dsm(handle, &muid, rev, 0x00000010, &argv4);
if (!obj) {
nvkm_debug(subdev, "DSM MXMS failed\n");
return false;
u32 tmp, wm_mask;
if (radeon_crtc->base.enabled && num_heads && mode) {
- active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
- line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
/* watermark for high clocks */
if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
fixed20_12 a, b, c;
if (radeon_crtc->base.enabled && num_heads && mode) {
- active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
- line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
priority_a_cnt = 0;
priority_b_cnt = 0;
dram_channels = evergreen_get_number_of_dram_channels(rdev);
rdev->pdev->subsystem_vendor == 0x103c &&
rdev->pdev->subsystem_device == 0x280a)
return;
+ /* quirk for rs4xx Toshiba Sattellite L20-183 latop to make it resume
+ * - it hangs on resume inside the dynclk 1 table.
+ */
+ if (rdev->family == CHIP_RS400 &&
+ rdev->pdev->subsystem_vendor == 0x1179 &&
+ rdev->pdev->subsystem_device == 0xff31)
+ return;
/* DYN CLK 1 */
table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
* https://bugzilla.kernel.org/show_bug.cgi?id=51381
*/
{ PCI_VENDOR_ID_ATI, 0x6840, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
+ /* Asus K53TK laptop with AMD A6-3420M APU and Radeon 7670m GPU
+ * https://bugs.freedesktop.org/show_bug.cgi?id=101491
+ */
+ { PCI_VENDOR_ID_ATI, 0x6741, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
/* macbook pro 8.2 */
{ PCI_VENDOR_ID_ATI, 0x6741, PCI_VENDOR_ID_APPLE, 0x00e2, RADEON_PX_QUIRK_LONG_WAKEUP },
{ 0, 0, 0, 0, 0 },
}
/* TODO: is this still necessary on NI+ ? */
- if ((cmd == 0 || cmd == 1 || cmd == 0x3) &&
+ if ((cmd == 0 || cmd == 0x3) &&
(start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
start, end);
fixed20_12 a, b, c;
if (radeon_crtc->base.enabled && num_heads && mode) {
- active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
- line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
priority_a_cnt = 0;
priority_b_cnt = 0;
#ifdef CONFIG_DRM_TEGRA_STAGING
-static struct tegra_drm_context *
-tegra_drm_file_get_context(struct tegra_drm_file *file, u32 id)
-{
- struct tegra_drm_context *context;
-
- mutex_lock(&file->lock);
- context = idr_find(&file->contexts, id);
- mutex_unlock(&file->lock);
-
- return context;
-}
-
static int tegra_gem_create(struct drm_device *drm, void *data,
struct drm_file *file)
{
if (err < 0)
return err;
- err = idr_alloc(&fpriv->contexts, context, 0, 0, GFP_KERNEL);
+ err = idr_alloc(&fpriv->contexts, context, 1, 0, GFP_KERNEL);
if (err < 0) {
client->ops->close_channel(context);
return err;
mutex_lock(&fpriv->lock);
- context = tegra_drm_file_get_context(fpriv, args->context);
+ context = idr_find(&fpriv->contexts, args->context);
if (!context) {
err = -EINVAL;
goto unlock;
mutex_lock(&fpriv->lock);
- context = tegra_drm_file_get_context(fpriv, args->context);
+ context = idr_find(&fpriv->contexts, args->context);
if (!context) {
err = -ENODEV;
goto unlock;
mutex_lock(&fpriv->lock);
- context = tegra_drm_file_get_context(fpriv, args->context);
+ context = idr_find(&fpriv->contexts, args->context);
if (!context) {
err = -ENODEV;
goto unlock;
mutex_lock(&fpriv->lock);
- context = tegra_drm_file_get_context(fpriv, args->context);
+ context = idr_find(&fpriv->contexts, args->context);
if (!context) {
err = -ENODEV;
goto unlock;
list_for_each_entry_safe(entry, next, &man->list, head)
vmw_cmdbuf_res_free(man, entry);
+ drm_ht_remove(&man->resources);
kfree(man);
}
host->rst = devm_reset_control_get(&pdev->dev, "host1x");
if (IS_ERR(host->rst)) {
- err = PTR_ERR(host->clk);
+ err = PTR_ERR(host->rst);
dev_err(&pdev->dev, "failed to get reset: %d\n", err);
return err;
}
* hid-rmi should take care of them,
* not hid-generic
*/
- if (IS_ENABLED(CONFIG_HID_RMI))
- hid->group = HID_GROUP_RMI;
+ hid->group = HID_GROUP_RMI;
break;
}
+ /* fall back to generic driver in case specific driver doesn't exist */
+ switch (hid->group) {
+ case HID_GROUP_MULTITOUCH_WIN_8:
+ /* fall-through */
+ case HID_GROUP_MULTITOUCH:
+ if (!IS_ENABLED(CONFIG_HID_MULTITOUCH))
+ hid->group = HID_GROUP_GENERIC;
+ break;
+ case HID_GROUP_SENSOR_HUB:
+ if (!IS_ENABLED(CONFIG_HID_SENSOR_HUB))
+ hid->group = HID_GROUP_GENERIC;
+ break;
+ case HID_GROUP_RMI:
+ if (!IS_ENABLED(CONFIG_HID_RMI))
+ hid->group = HID_GROUP_GENERIC;
+ break;
+ case HID_GROUP_WACOM:
+ if (!IS_ENABLED(CONFIG_HID_WACOM))
+ hid->group = HID_GROUP_GENERIC;
+ break;
+ case HID_GROUP_LOGITECH_DJ_DEVICE:
+ if (!IS_ENABLED(CONFIG_HID_LOGITECH_DJ))
+ hid->group = HID_GROUP_GENERIC;
+ break;
+ }
vfree(parser);
return 0;
}
* used as a driver. See hid_scan_report().
*/
static const struct hid_device_id hid_have_special_driver[] = {
+#if IS_ENABLED(CONFIG_HID_A4TECH)
{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ACCUTOUCH)
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ACRUX)
{ HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0x0802) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0xf705) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ALPS)
{ HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_APPLE)
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE) },
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS) },
- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL2) },
- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL3) },
- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
+#endif
+#if IS_ENABLED(CONFIG_HID_APPLEIR)
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL2) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL3) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ASUS)
{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_AUREAL)
{ HID_USB_DEVICE(USB_VENDOR_ID_AUREAL, USB_DEVICE_ID_AUREAL_W01RN) },
+#endif
+#if IS_ENABLED(CONFIG_HID_BELKIN)
{ HID_USB_DEVICE(USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_BETOP_FF)
{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185BFM, 0x2208) },
{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185PC, 0x5506) },
{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2PC, 0x1850) },
{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2BFM, 0x5500) },
- { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) },
- { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CHERRY)
{ HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CHICONY)
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
- { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CMEDIA)
+ { HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CORSAIR)
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
- { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CP2112)
{ HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CYPRESS)
{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_3) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_4) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) },
- { HID_USB_DEVICE(USB_VENDOR_ID_DELCOM, USB_DEVICE_ID_DELCOM_VISUAL_IND) },
+#endif
+#if IS_ENABLED(CONFIG_HID_DRAGONRISE)
{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0006) },
{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0011) },
-#if IS_ENABLED(CONFIG_HID_MAYFLASH)
- { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
- { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR) },
- { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE1) },
- { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE2) },
#endif
- { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) },
- { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) },
+#if IS_ENABLED(CONFIG_HID_ELECOM)
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ELO)
{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0030) },
- { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) },
+#endif
+#if IS_ENABLED(CONFIG_HID_EMS_FF)
{ HID_USB_DEVICE(USB_VENDOR_ID_EMS, USB_DEVICE_ID_EMS_TRIO_LINKER_PLUS_II) },
+#endif
+#if IS_ENABLED(CONFIG_HID_EZKEY)
{ HID_USB_DEVICE(USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193) },
- { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
- { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GEMBIRD)
{ HID_USB_DEVICE(USB_VENDOR_ID_GEMBIRD, USB_DEVICE_ID_GEMBIRD_JPD_DUALFORCE2) },
- { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GFRM)
+ { HID_BLUETOOTH_DEVICE(0x58, 0x2000) },
+ { HID_BLUETOOTH_DEVICE(0x471, 0x2210) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GREENASIA)
{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0012) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GT683R)
+ { HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GYRATION)
{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_3) },
+#endif
+#if IS_ENABLED(CONFIG_HID_HOLTEK)
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK, USB_DEVICE_ID_HOLTEK_ON_LINE_GRIP) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A072) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
- { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
- { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
- { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ICADE)
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KENSINGTON)
{ HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KEYTOUCH)
{ HID_USB_DEVICE(USB_VENDOR_ID_KEYTOUCH, USB_DEVICE_ID_KEYTOUCH_IEC) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KYE)
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_MANTICORE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GX_IMPERATOR) },
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) },
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912) },
- { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LCPOWER)
{ HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LED)
+ { HID_USB_DEVICE(USB_VENDOR_ID_DELCOM, USB_DEVICE_ID_DELCOM_VISUAL_IND) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_LUXAFOR) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_RISO_KAGAKU, USB_DEVICE_ID_RI_KA_WEBMAIL) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
+#endif
#if IS_ENABLED(CONFIG_HID_LENOVO)
{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) },
#endif
- { HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) },
+#if IS_ENABLED(CONFIG_HID_LOGITECH)
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER_2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RECEIVER) },
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) },
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_T651) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_DESKTOP) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_EDGE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_MINI) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2_2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G29_WHEEL) },
- { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_F3D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG ) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FORCE3D_PRO) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DFGT_WHEEL) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G25_WHEEL) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G27_WHEEL) },
-#if IS_ENABLED(CONFIG_HID_LOGITECH_DJ)
- { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER) },
- { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2) },
-#endif
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WII_WHEEL) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACETRAVELLER) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR) },
- { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) },
- { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) },
- { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_LUXAFOR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LOGITECH_HIDPP)
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_T651) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LOGITECH_DJ)
+ { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MAGICMOUSE)
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MAYFLASH)
+ { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE1) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MICROSOFT)
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_MOUSE_4500) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_KEYBOARD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER) },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MONTEREY)
{ HID_USB_DEVICE(USB_VENDOR_ID_MONTEREY, USB_DEVICE_ID_GENIUS_KB29E) },
- { HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MULTITOUCH)
+ { HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) },
+#endif
+#if IS_ENABLED(CONFIG_HID_WIIMOTE)
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_NTI)
{ HID_USB_DEVICE(USB_VENDOR_ID_NTI, USB_DEVICE_ID_USB_SUN) },
+#endif
+#if IS_ENABLED(CONFIG_HID_NTRIG)
{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN) },
{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_1) },
{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_16) },
{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_17) },
{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_18) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ORTEK)
{ HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_PKB1700) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_WKB2000) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PANTHERLORD)
+ { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PENMOUNT)
{ HID_USB_DEVICE(USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_6000) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PETALYNX)
{ HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PICOLCD)
+ { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PLANTRONICS)
{ HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, HID_ANY_ID) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PRIMAX)
{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_KEYBOARD) },
- { HID_USB_DEVICE(USB_VENDOR_ID_RISO_KAGAKU, USB_DEVICE_ID_RI_KA_WEBMAIL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PRODIKEYS)
+ { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
+#endif
+#if IS_ENABLED(CONFIG_HID_RMI)
+ { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14) },
+#endif
#if IS_ENABLED(CONFIG_HID_ROCCAT)
{ HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ARVO) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ISKU) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT5) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9) },
#endif
+#if IS_ENABLED(CONFIG_HID_SAMSUNG)
{ HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE) },
- { HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SMARTJOYPLUS)
+ { HID_USB_DEVICE(USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_3_PRO) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_DUAL_BOX_PRO) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_5_PRO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SONY)
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_PS3_BDREMOTE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_BUZZ_CONTROLLER) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_WIRELESS_BUZZ_CONTROLLER) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SINO_LITE, USB_DEVICE_ID_SINO_LITE_CONTROLLER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SPEEDLINK)
+ { HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_STEELSERIES)
{ HID_USB_DEVICE(USB_VENDOR_ID_STEELSERIES, USB_DEVICE_ID_STEELSERIES_SRWS1) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SUNPLUS)
{ HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) },
- { HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
+#endif
+#if IS_ENABLED(CONFIG_HID_THRUSTMASTER)
{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) },
{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) },
{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb323) },
{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb653) },
{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) },
{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65a) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TIVO)
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_BT) },
{ HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_PRO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TOPSEED)
+ { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) },
{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, USB_DEVICE_ID_TOPSEED2_RF_COMBO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TWINHAN)
{ HID_USB_DEVICE(USB_VENDOR_ID_TWINHAN, USB_DEVICE_ID_TWINHAN_IR_REMOTE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_UCLOGIC)
+ { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP1062) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) },
- { HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_YIYNOVA_TABLET) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_81) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) },
- { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) },
- { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
- { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) },
- { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) },
- { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_3_PRO) },
- { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_DUAL_BOX_PRO) },
- { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_5_PRO) },
- { HID_USB_DEVICE(USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
+#endif
+#if IS_ENABLED(CONFIG_HID_UDRAW_PS3)
+ { HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) },
+#endif
+#if IS_ENABLED(CONFIG_HID_WALTOP)
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH) },
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH) },
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_Q_PAD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH) },
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH) },
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) },
- { HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_XINMO)
{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ZEROPLUS)
{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ZYDACRON)
{ HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) },
-
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) },
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) },
- { HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14) },
- { HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) },
- { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
+#endif
{ }
};
#define USB_VENDOR_ID_DELCOM 0x0fc5
#define USB_DEVICE_ID_DELCOM_VISUAL_IND 0xb080
+#define USB_VENDOR_ID_DELL 0x413c
+#define USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE 0x301a
+
#define USB_VENDOR_ID_DELORME 0x1163
#define USB_DEVICE_ID_DELORME_EARTHMATE 0x0100
#define USB_DEVICE_ID_DELORME_EM_LT20 0x0200
if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
magicmouse_emit_buttons(msc, clicks & 3);
- input_mt_report_pointer_emulation(input, true);
input_report_rel(input, REL_X, x);
input_report_rel(input, REL_Y, y);
} else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
__clear_bit(BTN_RIGHT, input->keybit);
__clear_bit(BTN_MIDDLE, input->keybit);
__set_bit(BTN_MOUSE, input->keybit);
+ __set_bit(BTN_TOOL_FINGER, input->keybit);
+ __set_bit(BTN_TOOL_DOUBLETAP, input->keybit);
+ __set_bit(BTN_TOOL_TRIPLETAP, input->keybit);
+ __set_bit(BTN_TOOL_QUADTAP, input->keybit);
+ __set_bit(BTN_TOOL_QUINTTAP, input->keybit);
+ __set_bit(BTN_TOUCH, input->keybit);
+ __set_bit(INPUT_PROP_POINTER, input->propbit);
__set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
}
- __set_bit(BTN_TOOL_FINGER, input->keybit);
- __set_bit(BTN_TOOL_DOUBLETAP, input->keybit);
- __set_bit(BTN_TOOL_TRIPLETAP, input->keybit);
- __set_bit(BTN_TOOL_QUADTAP, input->keybit);
- __set_bit(BTN_TOOL_QUINTTAP, input->keybit);
- __set_bit(BTN_TOUCH, input->keybit);
- __set_bit(INPUT_PROP_POINTER, input->propbit);
__set_bit(EV_ABS, input->evbit);
static int i2c_hid_acpi_pdata(struct i2c_client *client,
struct i2c_hid_platform_data *pdata)
{
- static u8 i2c_hid_guid[] = {
- 0xF7, 0xF6, 0xDF, 0x3C, 0x67, 0x42, 0x55, 0x45,
- 0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE,
- };
+ static guid_t i2c_hid_guid =
+ GUID_INIT(0x3CDFF6F7, 0x4267, 0x4555,
+ 0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE);
union acpi_object *obj;
struct acpi_device *adev;
acpi_handle handle;
if (!handle || acpi_bus_get_device(handle, &adev))
return -ENODEV;
- obj = acpi_evaluate_dsm_typed(handle, i2c_hid_guid, 1, 1, NULL,
+ obj = acpi_evaluate_dsm_typed(handle, &i2c_hid_guid, 1, 1, NULL,
ACPI_TYPE_INTEGER);
if (!obj) {
dev_err(&client->dev, "device _DSM execution failed\n");
{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
+ { USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3, HID_QUIRK_MULTI_INPUT },
dev->addr_len = 1;
dev->tx_queue_len = SSIP_TXQUEUE_LEN;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->header_ops = &phonet_header_ops;
}
* the first read operation, otherwise the first read cost
* one extra clock cycle.
*/
- temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+ temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
temp |= I2CR_MTX;
- writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+ imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
}
msgs->buf[msgs->len-1] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
* the first read operation, otherwise the first read cost
* one extra clock cycle.
*/
- temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+ temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
temp |= I2CR_MTX;
- writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+ imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
}
} else if (i == (msgs->len - 2)) {
dev_dbg(&i2c_imx->adapter.dev,
/* unmap the data buffer */
if (dma_size != 0)
- dma_unmap_single(&adap->dev, dma_addr, dma_size, dma_direction);
+ dma_unmap_single(dev, dma_addr, dma_size, dma_direction);
if (unlikely(!time_left)) {
dev_err(dev, "completion wait timed out\n");
rcar_i2c_write(priv, ICFBSCR, TCYC06);
dma_unmap_single(chan->device->dev, sg_dma_address(&priv->sg),
- priv->msg->len, priv->dma_direction);
+ sg_dma_len(&priv->sg), priv->dma_direction);
priv->dma_direction = DMA_NONE;
}
int error;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
rq->special = (char *)pc;
memset(sense, 0, sizeof(*sense));
blk_rq_init(rq->q, sense_rq);
- scsi_req_init(sense_rq);
+ scsi_req_init(req);
err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
GFP_NOIO);
ide_requeue_and_plug(drive, failed_rq);
if (ide_queue_sense_rq(drive, pc)) {
blk_start_request(failed_rq);
- ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
}
}
EXPORT_SYMBOL_GPL(ide_retry_pc);
/* No more interrupts */
if ((stat & ATA_DRQ) == 0) {
- int uptodate, error;
+ int uptodate;
+ blk_status_t error;
debug_log("Packet command completed, %d bytes transferred\n",
blk_rq_bytes(rq));
if (ata_misc_request(rq)) {
scsi_req(rq)->result = 0;
- error = 0;
+ error = BLK_STS_OK;
} else {
if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
scsi_req(rq)->result = -EIO;
}
- error = uptodate ? 0 : -EIO;
+ error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
}
ide_complete_rq(drive, error, blk_rq_bytes(rq));
scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
cdrom_analyze_sense_data(drive, failed);
- if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed)))
+ if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
BUG();
} else
cdrom_analyze_sense_data(drive, NULL);
rq = blk_get_request(drive->queue,
write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
ide_req(rq)->type = ATA_PRIV_PC;
rq->rq_flags |= rq_flags;
nr_bytes -= cmd->last_xfer_len;
if (nr_bytes > 0) {
- ide_complete_rq(drive, 0, nr_bytes);
+ ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
return true;
}
out_end:
if (blk_rq_is_scsi(rq) && rc == 0) {
scsi_req(rq)->resid_len = 0;
- blk_end_request_all(rq, 0);
+ blk_end_request_all(rq, BLK_STS_OK);
hwif->rq = NULL;
} else {
if (sense && uptodate)
scsi_req(rq)->resid_len += cmd->last_xfer_len;
}
- ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
if (sense && rc == 2)
ide_error(drive, "request sense failure", stat);
if (nsectors == 0)
nsectors = 1;
- ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
+ ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
return ide_stopped;
}
int ret;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
rq->rq_flags = RQF_QUIET;
blk_execute_rq(drive->queue, cd->disk, rq, 0);
return setting->set(drive, arg);
rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd_len = 5;
scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
return -EBUSY;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
drive->mult_req = arg;
if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
ide_finish_cmd(drive, cmd, stat);
else
- ide_complete_rq(drive, 0,
+ ide_complete_rq(drive, BLK_STS_OK,
blk_rq_sectors(cmd->rq) << 9);
return ide_stopped;
}
return ide_stopped;
}
scsi_req(rq)->result = err;
- ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
return ide_stopped;
}
}
EXPORT_SYMBOL_GPL(ide_error);
-static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
+static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
{
struct request *rq = drive->hwif->rq;
scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
if (err <= 0 && scsi_req(rq)->result == 0)
scsi_req(rq)->result = -EIO;
- ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, err, blk_rq_bytes(rq));
}
}
}
/* done polling */
hwif->polling = 0;
- ide_complete_drive_reset(drive, 0);
+ ide_complete_drive_reset(drive, BLK_STS_OK);
return ide_stopped;
}
ide_hwif_t *hwif = drive->hwif;
const struct ide_port_ops *port_ops = hwif->port_ops;
u8 tmp;
- int err = 0;
+ blk_status_t err = BLK_STS_OK;
if (port_ops && port_ops->reset_poll) {
err = port_ops->reset_poll(drive);
printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
hwif->name, tmp);
drive->failures++;
- err = -EIO;
+ err = BLK_STS_IOERR;
} else {
tmp = ide_read_error(drive);
} else {
ide_reset_report_error(hwif, tmp);
drive->failures++;
- err = -EIO;
+ err = BLK_STS_IOERR;
}
}
out:
if (io_ports->ctl_addr == 0) {
spin_unlock_irqrestore(&hwif->lock, flags);
- ide_complete_drive_reset(drive, -ENXIO);
+ ide_complete_drive_reset(drive, BLK_STS_IOERR);
return ide_stopped;
}
drive->failed_pc = NULL;
drive->pc_callback(drive, 0);
- ide_complete_rq(drive, -EIO, done);
+ ide_complete_rq(drive, BLK_STS_IOERR, done);
return ide_stopped;
}
if (ata_misc_request(rq)) {
scsi_req(rq)->result = 0;
- ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
return ide_stopped;
} else
goto out_end;
drive->failed_pc = NULL;
if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
scsi_req(rq)->result = -EIO;
- ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
return ide_stopped;
}
#include <linux/uaccess.h>
#include <asm/io.h>
-int ide_end_rq(ide_drive_t *drive, struct request *rq, int error,
+int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
unsigned int nr_bytes)
{
/*
}
}
-int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
+int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
{
ide_hwif_t *hwif = drive->hwif;
struct request *rq = hwif->rq;
* if failfast is set on a request, override number of sectors
* and complete the whole request right now
*/
- if (blk_noretry_request(rq) && error <= 0)
+ if (blk_noretry_request(rq) && error)
nr_bytes = blk_rq_sectors(rq) << 9;
rc = ide_end_rq(drive, rq, error, nr_bytes);
scsi_req(rq)->result = -EIO;
}
- ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
}
static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
printk("%s: DRIVE_CMD (null)\n", drive->name);
#endif
scsi_req(rq)->result = 0;
- ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
return ide_stopped;
}
struct request *rq;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
blk_execute_rq(drive->queue, NULL, rq, 0);
err = scsi_req(rq)->result ? -EIO : 0;
int ret = 0;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd_len = 1;
scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
spin_unlock_irq(&hwif->lock);
rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
scsi_req(rq)->cmd_len = 1;
ide_req(rq)->type = ATA_PRIV_MISC;
* timeout has expired, so power management will be reenabled.
*/
rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
- scsi_req_init(rq);
if (IS_ERR(rq))
goto out;
memset(&rqpm, 0, sizeof(rqpm));
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
rq->special = &rqpm;
rqpm.pm_step = IDE_PM_START_SUSPEND;
return ret;
}
-static void ide_end_sync_rq(struct request *rq, int error)
+static void ide_end_sync_rq(struct request *rq, blk_status_t error)
{
complete(rq->end_io_data);
}
if (unlikely(blk_queue_dying(q))) {
rq->rq_flags |= RQF_QUIET;
scsi_req(rq)->result = -ENXIO;
- __blk_end_request_all(rq, 0);
+ __blk_end_request_all(rq, BLK_STS_OK);
spin_unlock_irq(q->queue_lock);
return -ENXIO;
}
memset(&rqpm, 0, sizeof(rqpm));
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_PM_RESUME;
rq->rq_flags |= RQF_PREEMPT;
rq->special = &rqpm;
drive->hwif->rq = NULL;
- if (blk_end_request(rq, 0, 0))
+ if (blk_end_request(rq, BLK_STS_OK, 0))
BUG();
}
}
}
-static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+static void ide_initialize_rq(struct request *rq)
{
struct ide_request *req = blk_mq_rq_to_pdu(rq);
+ scsi_req_init(&req->sreq);
req->sreq.sense = req->sense;
- return 0;
}
/*
return 1;
q->request_fn = do_ide_request;
- q->init_rq_fn = ide_init_rq;
+ q->initialize_rq_fn = ide_initialize_rq;
q->cmd_size = sizeof(struct ide_request);
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
if (blk_init_allocated_queue(q) < 0) {
blk_cleanup_queue(q);
return 1;
drive->failed_pc = NULL;
drive->pc_callback(drive, 0);
- ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
return ide_stopped;
}
ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
BUG_ON(size < 0 || size % tape->blk_size);
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd[13] = cmd;
rq->rq_disk = tape->disk;
}
if (nr_bytes > 0)
- ide_complete_rq(drive, 0, nr_bytes);
+ ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
}
}
ide_driveid_update(drive);
}
- ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
}
/*
if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
ide_finish_cmd(drive, cmd, stat);
else
- ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
+ ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
return ide_stopped;
out_err:
ide_error_cmd(drive, cmd);
rq = blk_get_request(drive->queue,
(cmd->tf_flags & IDE_TFLAG_WRITE) ?
REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
/*
* yet.
*/
-static int sil_sata_reset_poll(ide_drive_t *drive)
+static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
{
ide_hwif_t *hwif = drive->hwif;
void __iomem *sata_status_addr
if ((sata_stat & 0x03) != 0x03) {
printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
hwif->name, sata_stat);
- return -ENXIO;
+ return BLK_STS_IOERR;
}
}
- return 0;
+ return BLK_STS_OK;
}
/**
static void meson_sar_adc_clear_fifo(struct iio_dev *indio_dev)
{
struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
- int count;
+ unsigned int count, tmp;
for (count = 0; count < MESON_SAR_ADC_MAX_FIFO_SIZE; count++) {
if (!meson_sar_adc_get_fifo_count(indio_dev))
break;
- regmap_read(priv->regmap, MESON_SAR_ADC_FIFO_RD, 0);
+ regmap_read(priv->regmap, MESON_SAR_ADC_FIFO_RD, &tmp);
}
}
adc->dev = dev;
iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!iores)
+ return -EINVAL;
+
adc->base = devm_ioremap(dev, iores->start, resource_size(iores));
- if (IS_ERR(adc->base))
- return PTR_ERR(adc->base);
+ if (!adc->base)
+ return -ENOMEM;
init_completion(&adc->completion);
spin_lock_init(&adc->lock);
#include <linux/sched.h>
#include <linux/poll.h>
#include <linux/iio/buffer.h>
+#include <linux/iio/buffer_impl.h>
#include <linux/iio/buffer-dma.h>
#include <linux/dma-mapping.h>
#include <linux/sizes.h>
#include <linux/iio/iio.h>
#include <linux/iio/buffer.h>
+#include <linux/iio/buffer_impl.h>
#include <linux/iio/buffer-dma.h>
#include <linux/iio/buffer-dmaengine.h>
static const struct inv_mpu6050_reg_map reg_set_6500 = {
.sample_rate_div = INV_MPU6050_REG_SAMPLE_RATE_DIV,
.lpf = INV_MPU6050_REG_CONFIG,
+ .accel_lpf = INV_MPU6500_REG_ACCEL_CONFIG_2,
.user_ctrl = INV_MPU6050_REG_USER_CTRL,
.fifo_en = INV_MPU6050_REG_FIFO_EN,
.gyro_config = INV_MPU6050_REG_GYRO_CONFIG,
EXPORT_SYMBOL_GPL(inv_mpu6050_set_power_itg);
/**
+ * inv_mpu6050_set_lpf_regs() - set low pass filter registers, chip dependent
+ *
+ * MPU60xx/MPU9150 use only 1 register for accelerometer + gyroscope
+ * MPU6500 and above have a dedicated register for accelerometer
+ */
+static int inv_mpu6050_set_lpf_regs(struct inv_mpu6050_state *st,
+ enum inv_mpu6050_filter_e val)
+{
+ int result;
+
+ result = regmap_write(st->map, st->reg->lpf, val);
+ if (result)
+ return result;
+
+ switch (st->chip_type) {
+ case INV_MPU6050:
+ case INV_MPU6000:
+ case INV_MPU9150:
+ /* old chips, nothing to do */
+ result = 0;
+ break;
+ default:
+ /* set accel lpf */
+ result = regmap_write(st->map, st->reg->accel_lpf, val);
+ break;
+ }
+
+ return result;
+}
+
+/**
* inv_mpu6050_init_config() - Initialize hardware, disable FIFO.
*
* Initial configuration:
if (result)
return result;
- d = INV_MPU6050_FILTER_20HZ;
- result = regmap_write(st->map, st->reg->lpf, d);
+ result = inv_mpu6050_set_lpf_regs(st, INV_MPU6050_FILTER_20HZ);
if (result)
return result;
* would be alising. This function basically search for the
* correct low pass parameters based on the fifo rate, e.g,
* sampling frequency.
+ *
+ * lpf is set automatically when setting sampling rate to avoid any aliases.
*/
static int inv_mpu6050_set_lpf(struct inv_mpu6050_state *st, int rate)
{
while ((h < hz[i]) && (i < ARRAY_SIZE(d) - 1))
i++;
data = d[i];
- result = regmap_write(st->map, st->reg->lpf, data);
+ result = inv_mpu6050_set_lpf_regs(st, data);
if (result)
return result;
st->chip_config.lpf = data;
* struct inv_mpu6050_reg_map - Notable registers.
* @sample_rate_div: Divider applied to gyro output rate.
* @lpf: Configures internal low pass filter.
+ * @accel_lpf: Configures accelerometer low pass filter.
* @user_ctrl: Enables/resets the FIFO.
* @fifo_en: Determines which data will appear in FIFO.
* @gyro_config: gyro config register.
struct inv_mpu6050_reg_map {
u8 sample_rate_div;
u8 lpf;
+ u8 accel_lpf;
u8 user_ctrl;
u8 fifo_en;
u8 gyro_config;
#define INV_MPU6050_FIFO_THRESHOLD 500
/* mpu6500 registers */
+#define INV_MPU6500_REG_ACCEL_CONFIG_2 0x1D
#define INV_MPU6500_REG_ACCEL_OFFSET 0x77
/* delay time in milliseconds */
return ret;
rt = (struct rt6_info *)dst;
- if (ipv6_addr_any(&fl6.saddr)) {
- ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
- &fl6.daddr, 0, &fl6.saddr);
- if (ret)
- goto put;
-
+ if (ipv6_addr_any(&src_in->sin6_addr)) {
src_in->sin6_family = AF_INET6;
src_in->sin6_addr = fl6.saddr;
}
*pdst = dst;
return 0;
-put:
- dst_release(dst);
- return ret;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
+#define BNXT_RE_UD_QP_HW_STALL 0x400000
+
+#define BNXT_RE_RQ_WQE_THRESHOLD 32
+
struct bnxt_re_work {
struct work_struct work;
unsigned long event;
#include "ib_verbs.h"
#include <rdma/bnxt_re-abi.h>
+static int __from_ib_access_flags(int iflags)
+{
+ int qflags = 0;
+
+ if (iflags & IB_ACCESS_LOCAL_WRITE)
+ qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_READ)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
+ if (iflags & IB_ACCESS_REMOTE_WRITE)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
+ if (iflags & IB_ACCESS_MW_BIND)
+ qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
+ if (iflags & IB_ZERO_BASED)
+ qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
+ if (iflags & IB_ACCESS_ON_DEMAND)
+ qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
+ return qflags;
+};
+
+static enum ib_access_flags __to_ib_access_flags(int qflags)
+{
+ enum ib_access_flags iflags = 0;
+
+ if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
+ iflags |= IB_ACCESS_LOCAL_WRITE;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
+ iflags |= IB_ACCESS_REMOTE_WRITE;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
+ iflags |= IB_ACCESS_REMOTE_READ;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
+ iflags |= IB_ACCESS_REMOTE_ATOMIC;
+ if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
+ iflags |= IB_ACCESS_MW_BIND;
+ if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
+ iflags |= IB_ZERO_BASED;
+ if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
+ iflags |= IB_ACCESS_ON_DEMAND;
+ return iflags;
+};
+
static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
struct bnxt_qplib_sge *sg_list, int num)
{
ib_attr->max_total_mcast_qp_attach = 0;
ib_attr->max_ah = dev_attr->max_ah;
- ib_attr->max_fmr = dev_attr->max_fmr;
- ib_attr->max_map_per_fmr = 1; /* ? */
+ ib_attr->max_fmr = 0;
+ ib_attr->max_map_per_fmr = 0;
ib_attr->max_srq = dev_attr->max_srq;
ib_attr->max_srq_wr = dev_attr->max_srq_wqes;
return IB_LINK_LAYER_ETHERNET;
}
+#define BNXT_RE_FENCE_PBL_SIZE DIV_ROUND_UP(BNXT_RE_FENCE_BYTES, PAGE_SIZE)
+
+static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
+{
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct ib_mr *ib_mr = &fence->mr->ib_mr;
+ struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+
+ memset(wqe, 0, sizeof(*wqe));
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
+ wqe->wr_id = BNXT_QPLIB_FENCE_WRID;
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ wqe->bind.zero_based = false;
+ wqe->bind.parent_l_key = ib_mr->lkey;
+ wqe->bind.va = (u64)(unsigned long)fence->va;
+ wqe->bind.length = fence->size;
+ wqe->bind.access_cntl = __from_ib_access_flags(IB_ACCESS_REMOTE_READ);
+ wqe->bind.mw_type = SQ_BIND_MW_TYPE_TYPE1;
+
+ /* Save the initial rkey in fence structure for now;
+ * wqe->bind.r_key will be set at (re)bind time.
+ */
+ fence->bind_rkey = ib_inc_rkey(fence->mw->rkey);
+}
+
+static int bnxt_re_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp)
+{
+ struct bnxt_re_qp *qp = container_of(qplib_qp, struct bnxt_re_qp,
+ qplib_qp);
+ struct ib_pd *ib_pd = qp->ib_qp.pd;
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct bnxt_qplib_swqe *fence_wqe = &fence->bind_wqe;
+ struct bnxt_qplib_swqe wqe;
+ int rc;
+
+ memcpy(&wqe, fence_wqe, sizeof(wqe));
+ wqe.bind.r_key = fence->bind_rkey;
+ fence->bind_rkey = ib_inc_rkey(fence->bind_rkey);
+
+ dev_dbg(rdev_to_dev(qp->rdev),
+ "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
+ wqe.bind.r_key, qp->qplib_qp.id, pd);
+ rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+ if (rc) {
+ dev_err(rdev_to_dev(qp->rdev), "Failed to bind fence-WQE\n");
+ return rc;
+ }
+ bnxt_qplib_post_send_db(&qp->qplib_qp);
+
+ return rc;
+}
+
+static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
+{
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct device *dev = &rdev->en_dev->pdev->dev;
+ struct bnxt_re_mr *mr = fence->mr;
+
+ if (fence->mw) {
+ bnxt_re_dealloc_mw(fence->mw);
+ fence->mw = NULL;
+ }
+ if (mr) {
+ if (mr->ib_mr.rkey)
+ bnxt_qplib_dereg_mrw(&rdev->qplib_res, &mr->qplib_mr,
+ true);
+ if (mr->ib_mr.lkey)
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ kfree(mr);
+ fence->mr = NULL;
+ }
+ if (fence->dma_addr) {
+ dma_unmap_single(dev, fence->dma_addr, BNXT_RE_FENCE_BYTES,
+ DMA_BIDIRECTIONAL);
+ fence->dma_addr = 0;
+ }
+}
+
+static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
+{
+ int mr_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_MW_BIND;
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct device *dev = &rdev->en_dev->pdev->dev;
+ struct bnxt_re_mr *mr = NULL;
+ dma_addr_t dma_addr = 0;
+ struct ib_mw *mw;
+ u64 pbl_tbl;
+ int rc;
+
+ dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
+ DMA_BIDIRECTIONAL);
+ rc = dma_mapping_error(dev, dma_addr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to dma-map fence-MR-mem\n");
+ rc = -EIO;
+ fence->dma_addr = 0;
+ goto fail;
+ }
+ fence->dma_addr = dma_addr;
+
+ /* Allocate a MR */
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ fence->mr = mr;
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+ mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to alloc fence-HW-MR\n");
+ goto fail;
+ }
+
+ /* Register MR */
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ mr->qplib_mr.va = (u64)(unsigned long)fence->va;
+ mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
+ pbl_tbl = dma_addr;
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
+ BNXT_RE_FENCE_PBL_SIZE, false);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n");
+ goto fail;
+ }
+ mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+ /* Create a fence MW only for kernel consumers */
+ mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL);
+ if (!mw) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to create fence-MW for PD: %p\n", pd);
+ rc = -EINVAL;
+ goto fail;
+ }
+ fence->mw = mw;
+
+ bnxt_re_create_fence_wqe(pd);
+ return 0;
+
+fail:
+ bnxt_re_destroy_fence_mr(pd);
+ return rc;
+}
+
/* Protection Domains */
int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
{
struct bnxt_re_dev *rdev = pd->rdev;
int rc;
+ bnxt_re_destroy_fence_mr(pd);
if (ib_pd->uobject && pd->dpi.dbr) {
struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
struct bnxt_re_ucontext *ucntx;
}
}
+ if (!udata)
+ if (bnxt_re_create_fence_mr(pd))
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to create Fence-MR\n");
return &pd->ib_pd;
dbfail:
(void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
/* Shadow QP SQ depth should be same as QP1 RQ depth */
qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.sq.max_sge = 2;
+ /* Q full delta can be 1 since it is internal QP */
+ qp->qplib_qp.sq.q_full_delta = 1;
qp->qplib_qp.scq = qp1_qp->scq;
qp->qplib_qp.rcq = qp1_qp->rcq;
qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
+ /* Q full delta can be 1 since it is internal QP */
+ qp->qplib_qp.rq.q_full_delta = 1;
qp->qplib_qp.mtu = qp1_qp->mtu;
qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
IB_SIGNAL_ALL_WR) ? true : false);
- entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
- qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
- dev_attr->max_qp_wqes + 1);
-
qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge;
if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
qp->qplib_qp.rq.max_wqe = min_t(u32, entries,
dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+ qp_init_attr->cap.max_recv_wr;
+
qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge;
if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
if (qp_init_attr->qp_type == IB_QPT_GSI) {
+ /* Allocate 1 more than what's provided */
+ entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
+ qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+ qp_init_attr->cap.max_send_wr;
qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
}
} else {
+ /* Allocate 128 + 1 more than what's provided */
+ entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr +
+ BNXT_QPLIB_RESERVED_QP_WRS + 1);
+ qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes +
+ BNXT_QPLIB_RESERVED_QP_WRS + 1);
+ qp->qplib_qp.sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+
+ /*
+ * Reserving one slot for Phantom WQE. Application can
+ * post one extra entry in this case. But allowing this to avoid
+ * unexpected Queue full condition
+ */
+
+ qp->qplib_qp.sq.q_full_delta -= 1;
+
qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom;
qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
if (udata) {
qp->ib_qp.qp_num = qp->qplib_qp.id;
spin_lock_init(&qp->sq_lock);
+ spin_lock_init(&qp->rq_lock);
if (udata) {
struct bnxt_re_qp_resp resp;
}
}
-static int __from_ib_access_flags(int iflags)
-{
- int qflags = 0;
-
- if (iflags & IB_ACCESS_LOCAL_WRITE)
- qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
- if (iflags & IB_ACCESS_REMOTE_READ)
- qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
- if (iflags & IB_ACCESS_REMOTE_WRITE)
- qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
- if (iflags & IB_ACCESS_REMOTE_ATOMIC)
- qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
- if (iflags & IB_ACCESS_MW_BIND)
- qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
- if (iflags & IB_ZERO_BASED)
- qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
- if (iflags & IB_ACCESS_ON_DEMAND)
- qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
- return qflags;
-};
-
-static enum ib_access_flags __to_ib_access_flags(int qflags)
-{
- enum ib_access_flags iflags = 0;
-
- if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
- iflags |= IB_ACCESS_LOCAL_WRITE;
- if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
- iflags |= IB_ACCESS_REMOTE_WRITE;
- if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
- iflags |= IB_ACCESS_REMOTE_READ;
- if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
- iflags |= IB_ACCESS_REMOTE_ATOMIC;
- if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
- iflags |= IB_ACCESS_MW_BIND;
- if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
- iflags |= IB_ZERO_BASED;
- if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
- iflags |= IB_ACCESS_ON_DEMAND;
- return iflags;
-};
-
static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
struct bnxt_re_qp *qp1_qp,
int qp_attr_mask)
entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+ qp_attr->cap.max_send_wr;
+ /*
+ * Reserving one slot for Phantom WQE. Some application can
+ * post one extra entry in this case. Allowing this to avoid
+ * unexpected Queue full condition
+ */
+ qp->qplib_qp.sq.q_full_delta -= 1;
qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
if (qp->qplib_qp.rq.max_wqe) {
entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr);
qp->qplib_qp.rq.max_wqe =
min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+ qp_attr->cap.max_recv_wr;
qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
} else {
/* SRQ was used prior, just ignore the RQ caps */
return payload_sz;
}
+static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp)
+{
+ if ((qp->ib_qp.qp_type == IB_QPT_UD ||
+ qp->ib_qp.qp_type == IB_QPT_GSI ||
+ qp->ib_qp.qp_type == IB_QPT_RAW_ETHERTYPE) &&
+ qp->qplib_qp.wqe_cnt == BNXT_RE_UD_QP_HW_STALL) {
+ int qp_attr_mask;
+ struct ib_qp_attr qp_attr;
+
+ qp_attr_mask = IB_QP_STATE;
+ qp_attr.qp_state = IB_QPS_RTS;
+ bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, qp_attr_mask, NULL);
+ qp->qplib_qp.wqe_cnt = 0;
+ }
+}
+
static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
struct bnxt_re_qp *qp,
struct ib_send_wr *wr)
wr = wr->next;
}
bnxt_qplib_post_send_db(&qp->qplib_qp);
+ bnxt_ud_qp_hw_stall_workaround(qp);
spin_unlock_irqrestore(&qp->sq_lock, flags);
return rc;
}
wr = wr->next;
}
bnxt_qplib_post_send_db(&qp->qplib_qp);
+ bnxt_ud_qp_hw_stall_workaround(qp);
spin_unlock_irqrestore(&qp->sq_lock, flags);
return rc;
struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
struct bnxt_qplib_swqe wqe;
int rc = 0, payload_sz = 0;
+ unsigned long flags;
+ u32 count = 0;
+ spin_lock_irqsave(&qp->rq_lock, flags);
while (wr) {
/* House keeping */
memset(&wqe, 0, sizeof(wqe));
*bad_wr = wr;
break;
}
+
+ /* Ring DB if the RQEs posted reaches a threshold value */
+ if (++count >= BNXT_RE_RQ_WQE_THRESHOLD) {
+ bnxt_qplib_post_recv_db(&qp->qplib_qp);
+ count = 0;
+ }
+
wr = wr->next;
}
- bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+ if (count)
+ bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+ spin_unlock_irqrestore(&qp->rq_lock, flags);
+
return rc;
}
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
}
+static int send_phantom_wqe(struct bnxt_re_qp *qp)
+{
+ struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp;
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+
+ rc = bnxt_re_bind_fence_mw(lib_qp);
+ if (!rc) {
+ lib_qp->sq.phantom_wqe_cnt++;
+ dev_dbg(&lib_qp->sq.hwq.pdev->dev,
+ "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
+ lib_qp->id, lib_qp->sq.hwq.prod,
+ HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
+ lib_qp->sq.phantom_wqe_cnt);
+ }
+
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+ return rc;
+}
+
int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
{
struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
struct bnxt_re_qp *qp;
struct bnxt_qplib_cqe *cqe;
int i, ncqe, budget;
+ struct bnxt_qplib_q *sq;
+ struct bnxt_qplib_qp *lib_qp;
u32 tbl_idx;
struct bnxt_re_sqp_entries *sqp_entry = NULL;
unsigned long flags;
}
cqe = &cq->cql[0];
while (budget) {
- ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget);
+ lib_qp = NULL;
+ ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget, &lib_qp);
+ if (lib_qp) {
+ sq = &lib_qp->sq;
+ if (sq->send_phantom) {
+ qp = container_of(lib_qp,
+ struct bnxt_re_qp, qplib_qp);
+ if (send_phantom_wqe(qp) == -ENOMEM)
+ dev_err(rdev_to_dev(cq->rdev),
+ "Phantom failed! Scheduled to send again\n");
+ else
+ sq->send_phantom = false;
+ }
+ }
+
if (!ncqe)
break;
struct bnxt_re_dev *rdev = mr->rdev;
int rc;
+ rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
+ return rc;
+ }
+
if (mr->npages && mr->pages) {
rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
&mr->qplib_frpl);
mr->npages = 0;
mr->pages = NULL;
}
- rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
-
if (!IS_ERR_OR_NULL(mr->ib_umem))
ib_umem_release(mr->ib_umem);
return ERR_PTR(rc);
}
-/* Fast Memory Regions */
-struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *ib_pd, int mr_access_flags,
- struct ib_fmr_attr *fmr_attr)
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+ struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
- struct bnxt_re_fmr *fmr;
+ struct bnxt_re_mw *mw;
int rc;
- if (fmr_attr->max_pages > MAX_PBL_LVL_2_PGS ||
- fmr_attr->max_maps > rdev->dev_attr.max_map_per_fmr) {
- dev_err(rdev_to_dev(rdev), "Allocate FMR exceeded Max limit");
+ mw = kzalloc(sizeof(*mw), GFP_KERNEL);
+ if (!mw)
return ERR_PTR(-ENOMEM);
- }
- fmr = kzalloc(sizeof(*fmr), GFP_KERNEL);
- if (!fmr)
- return ERR_PTR(-ENOMEM);
-
- fmr->rdev = rdev;
- fmr->qplib_fmr.pd = &pd->qplib_pd;
- fmr->qplib_fmr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+ mw->rdev = rdev;
+ mw->qplib_mw.pd = &pd->qplib_pd;
- rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
- if (rc)
+ mw->qplib_mw.type = (type == IB_MW_TYPE_1 ?
+ CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 :
+ CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B);
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Allocate MW failed!");
goto fail;
+ }
+ mw->ib_mw.rkey = mw->qplib_mw.rkey;
- fmr->qplib_fmr.flags = __from_ib_access_flags(mr_access_flags);
- fmr->ib_fmr.lkey = fmr->qplib_fmr.lkey;
- fmr->ib_fmr.rkey = fmr->ib_fmr.lkey;
+ atomic_inc(&rdev->mw_count);
+ return &mw->ib_mw;
- atomic_inc(&rdev->mr_count);
- return &fmr->ib_fmr;
fail:
- kfree(fmr);
+ kfree(mw);
return ERR_PTR(rc);
}
-int bnxt_re_map_phys_fmr(struct ib_fmr *ib_fmr, u64 *page_list, int list_len,
- u64 iova)
+int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
{
- struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
- ib_fmr);
- struct bnxt_re_dev *rdev = fmr->rdev;
+ struct bnxt_re_mw *mw = container_of(ib_mw, struct bnxt_re_mw, ib_mw);
+ struct bnxt_re_dev *rdev = mw->rdev;
int rc;
- fmr->qplib_fmr.va = iova;
- fmr->qplib_fmr.total_size = list_len * PAGE_SIZE;
-
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &fmr->qplib_fmr, page_list,
- list_len, true);
- if (rc)
- dev_err(rdev_to_dev(rdev), "Failed to map FMR for lkey = 0x%x!",
- fmr->ib_fmr.lkey);
- return rc;
-}
-
-int bnxt_re_unmap_fmr(struct list_head *fmr_list)
-{
- struct bnxt_re_dev *rdev;
- struct bnxt_re_fmr *fmr;
- struct ib_fmr *ib_fmr;
- int rc = 0;
-
- /* Validate each FMRs inside the fmr_list */
- list_for_each_entry(ib_fmr, fmr_list, list) {
- fmr = container_of(ib_fmr, struct bnxt_re_fmr, ib_fmr);
- rdev = fmr->rdev;
-
- if (rdev) {
- rc = bnxt_qplib_dereg_mrw(&rdev->qplib_res,
- &fmr->qplib_fmr, true);
- if (rc)
- break;
- }
+ rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Free MW failed: %#x\n", rc);
+ return rc;
}
- return rc;
-}
-
-int bnxt_re_dealloc_fmr(struct ib_fmr *ib_fmr)
-{
- struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
- ib_fmr);
- struct bnxt_re_dev *rdev = fmr->rdev;
- int rc;
- rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
- if (rc)
- dev_err(rdev_to_dev(rdev), "Failed to free FMR");
-
- kfree(fmr);
- atomic_dec(&rdev->mr_count);
+ kfree(mw);
+ atomic_dec(&rdev->mw_count);
return rc;
}
u32 refcnt;
};
+#define BNXT_RE_FENCE_BYTES 64
+struct bnxt_re_fence_data {
+ u32 size;
+ u8 va[BNXT_RE_FENCE_BYTES];
+ dma_addr_t dma_addr;
+ struct bnxt_re_mr *mr;
+ struct ib_mw *mw;
+ struct bnxt_qplib_swqe bind_wqe;
+ u32 bind_rkey;
+};
+
struct bnxt_re_pd {
struct bnxt_re_dev *rdev;
struct ib_pd ib_pd;
struct bnxt_qplib_pd qplib_pd;
struct bnxt_qplib_dpi dpi;
+ struct bnxt_re_fence_data fence;
};
struct bnxt_re_ah {
struct bnxt_re_dev *rdev;
struct ib_qp ib_qp;
spinlock_t sq_lock; /* protect sq */
+ spinlock_t rq_lock; /* protect rq */
struct bnxt_qplib_qp qplib_qp;
struct ib_umem *sumem;
struct ib_umem *rumem;
struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int bnxt_re_dereg_mr(struct ib_mr *mr);
-struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
-int bnxt_re_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len,
- u64 iova);
-int bnxt_re_unmap_fmr(struct list_head *fmr_list);
-int bnxt_re_dealloc_fmr(struct ib_fmr *fmr);
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+ struct ib_udata *udata);
+int bnxt_re_dealloc_mw(struct ib_mw *mw);
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
ibdev->dereg_mr = bnxt_re_dereg_mr;
ibdev->alloc_mr = bnxt_re_alloc_mr;
ibdev->map_mr_sg = bnxt_re_map_mr_sg;
- ibdev->alloc_fmr = bnxt_re_alloc_fmr;
- ibdev->map_phys_fmr = bnxt_re_map_phys_fmr;
- ibdev->unmap_fmr = bnxt_re_unmap_fmr;
- ibdev->dealloc_fmr = bnxt_re_dealloc_fmr;
ibdev->reg_user_mr = bnxt_re_reg_user_mr;
ibdev->alloc_ucontext = bnxt_re_alloc_ucontext;
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_create_qp1 req;
- struct creq_create_qp1_resp *resp;
+ struct creq_create_qp1_resp resp;
struct bnxt_qplib_pbl *pbl;
struct bnxt_qplib_q *sq = &qp->sq;
struct bnxt_qplib_q *rq = &qp->rq;
req.pd_id = cpu_to_le32(qp->pd->id);
- resp = (struct creq_create_qp1_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp) {
- dev_err(&res->pdev->dev, "QPLIB: FP: CREATE_QP1 send failed");
- rc = -EINVAL;
- goto fail;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 timed out");
- rc = -ETIMEDOUT;
- goto fail;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- rc = -EINVAL;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
goto fail;
- }
- qp->id = le32_to_cpu(resp->xid);
+
+ qp->id = le32_to_cpu(resp.xid);
qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
sq->flush_in_progress = false;
rq->flush_in_progress = false;
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
struct cmdq_create_qp req;
- struct creq_create_qp_resp *resp;
+ struct creq_create_qp_resp resp;
struct bnxt_qplib_pbl *pbl;
struct sq_psn_search **psn_search_ptr;
unsigned long int psn_search, poff = 0;
}
req.pd_id = cpu_to_le32(qp->pd->id);
- resp = (struct creq_create_qp_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP send failed");
- rc = -EINVAL;
- goto fail;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP timed out");
- rc = -ETIMEDOUT;
- goto fail;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- rc = -EINVAL;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
goto fail;
- }
- qp->id = le32_to_cpu(resp->xid);
+
+ qp->id = le32_to_cpu(resp.xid);
qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
sq->flush_in_progress = false;
rq->flush_in_progress = false;
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_modify_qp req;
- struct creq_modify_qp_resp *resp;
+ struct creq_modify_qp_resp resp;
u16 cmd_flags = 0, pkey;
u32 temp32[4];
u32 bmask;
+ int rc;
RCFW_CMD_PREP(req, MODIFY_QP, cmd_flags);
req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
- resp = (struct creq_modify_qp_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
qp->cur_qp_state = qp->state;
return 0;
}
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_query_qp req;
- struct creq_query_qp_resp *resp;
+ struct creq_query_qp_resp resp;
+ struct bnxt_qplib_rcfw_sbuf *sbuf;
struct creq_query_qp_resp_sb *sb;
u16 cmd_flags = 0;
u32 temp32[4];
- int i;
+ int i, rc = 0;
RCFW_CMD_PREP(req, QUERY_QP, cmd_flags);
+ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+ if (!sbuf)
+ return -ENOMEM;
+ sb = sbuf->sb;
+
req.qp_cid = cpu_to_le32(qp->id);
req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
- resp = (struct creq_query_qp_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void **)&sb, 0);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ (void *)sbuf, 0);
+ if (rc)
+ goto bail;
/* Extract the context from the side buffer */
qp->state = sb->en_sqd_async_notify_state &
CREQ_QUERY_QP_RESP_SB_STATE_MASK;
qp->dest_qpn = le32_to_cpu(sb->dest_qp_id);
memcpy(qp->smac, sb->src_mac, 6);
qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
- return 0;
+bail:
+ bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ return rc;
}
static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_destroy_qp req;
- struct creq_destroy_qp_resp *resp;
+ struct creq_destroy_qp_resp resp;
unsigned long flags;
u16 cmd_flags = 0;
+ int rc;
RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags);
req.qp_cid = cpu_to_le32(qp->id);
- resp = (struct creq_destroy_qp_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
/* Must walk the associated CQs to nullified the QP ptr */
spin_lock_irqsave(&qp->scq->hwq.lock, flags);
rc = -EINVAL;
goto done;
}
- if (HWQ_CMP((sq->hwq.prod + 1), &sq->hwq) ==
- HWQ_CMP(sq->hwq.cons, &sq->hwq)) {
+
+ if (bnxt_qplib_queue_full(sq)) {
+ dev_err(&sq->hwq.pdev->dev,
+ "QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
+ sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
+ sq->q_full_delta);
rc = -ENOMEM;
goto done;
}
}
sq->hwq.prod++;
+
+ qp->wqe_cnt++;
+
done:
return rc;
}
rc = -EINVAL;
goto done;
}
- if (HWQ_CMP((rq->hwq.prod + 1), &rq->hwq) ==
- HWQ_CMP(rq->hwq.cons, &rq->hwq)) {
+ if (bnxt_qplib_queue_full(rq)) {
dev_err(&rq->hwq.pdev->dev,
"QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
rc = -EINVAL;
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_create_cq req;
- struct creq_create_cq_resp *resp;
+ struct creq_create_cq_resp resp;
struct bnxt_qplib_pbl *pbl;
u16 cmd_flags = 0;
int rc;
(cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) <<
CMDQ_CREATE_CQ_CNQ_ID_SFT);
- resp = (struct creq_create_cq_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ timed out");
- rc = -ETIMEDOUT;
- goto fail;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- rc = -EINVAL;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
goto fail;
- }
- cq->id = le32_to_cpu(resp->xid);
+
+ cq->id = le32_to_cpu(resp.xid);
cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
init_waitqueue_head(&cq->waitq);
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_destroy_cq req;
- struct creq_destroy_cq_resp *resp;
+ struct creq_destroy_cq_resp resp;
u16 cmd_flags = 0;
+ int rc;
RCFW_CMD_PREP(req, DESTROY_CQ, cmd_flags);
req.cq_cid = cpu_to_le32(cq->id);
- resp = (struct creq_destroy_cq_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
return 0;
}
return rc;
}
+/* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
+ * CQE is track from sw_cq_cons to max_element but valid only if VALID=1
+ */
+static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
+ u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons)
+{
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_swq *swq;
+ u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
+ struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr;
+ struct cq_req *peek_req_hwcqe;
+ struct bnxt_qplib_qp *peek_qp;
+ struct bnxt_qplib_q *peek_sq;
+ int i, rc = 0;
+
+ /* Normal mode */
+ /* Check for the psn_search marking before completing */
+ swq = &sq->swq[sw_sq_cons];
+ if (swq->psn_search &&
+ le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) {
+ /* Unmark */
+ swq->psn_search->flags_next_psn = cpu_to_le32
+ (le32_to_cpu(swq->psn_search->flags_next_psn)
+ & ~0x80000000);
+ dev_dbg(&cq->hwq.pdev->dev,
+ "FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n",
+ cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+ sq->condition = true;
+ sq->send_phantom = true;
+
+ /* TODO: Only ARM if the previous SQE is ARMALL */
+ bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL);
+
+ rc = -EAGAIN;
+ goto out;
+ }
+ if (sq->condition) {
+ /* Peek at the completions */
+ peek_raw_cq_cons = cq->hwq.cons;
+ peek_sw_cq_cons = cq_cons;
+ i = cq->hwq.max_elements;
+ while (i--) {
+ peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
+ peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
+ peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)]
+ [CQE_IDX(peek_sw_cq_cons)];
+ /* If the next hwcqe is VALID */
+ if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
+ cq->hwq.max_elements)) {
+ /* If the next hwcqe is a REQ */
+ if ((peek_hwcqe->cqe_type_toggle &
+ CQ_BASE_CQE_TYPE_MASK) ==
+ CQ_BASE_CQE_TYPE_REQ) {
+ peek_req_hwcqe = (struct cq_req *)
+ peek_hwcqe;
+ peek_qp = (struct bnxt_qplib_qp *)
+ ((unsigned long)
+ le64_to_cpu
+ (peek_req_hwcqe->qp_handle));
+ peek_sq = &peek_qp->sq;
+ peek_sq_cons_idx = HWQ_CMP(le16_to_cpu(
+ peek_req_hwcqe->sq_cons_idx) - 1
+ , &sq->hwq);
+ /* If the hwcqe's sq's wr_id matches */
+ if (peek_sq == sq &&
+ sq->swq[peek_sq_cons_idx].wr_id ==
+ BNXT_QPLIB_FENCE_WRID) {
+ /*
+ * Unbreak only if the phantom
+ * comes back
+ */
+ dev_dbg(&cq->hwq.pdev->dev,
+ "FP:Got Phantom CQE");
+ sq->condition = false;
+ sq->single = true;
+ rc = 0;
+ goto out;
+ }
+ }
+ /* Valid but not the phantom, so keep looping */
+ } else {
+ /* Not valid yet, just exit and wait */
+ rc = -EINVAL;
+ goto out;
+ }
+ peek_sw_cq_cons++;
+ peek_raw_cq_cons++;
+ }
+ dev_err(&cq->hwq.pdev->dev,
+ "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
+ cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+ rc = -EINVAL;
+ }
+out:
+ return rc;
+}
+
static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
struct cq_req *hwcqe,
- struct bnxt_qplib_cqe **pcqe, int *budget)
+ struct bnxt_qplib_cqe **pcqe, int *budget,
+ u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
{
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *sq;
struct bnxt_qplib_cqe *cqe;
- u32 sw_cons, cqe_cons;
+ u32 sw_sq_cons, cqe_sq_cons;
+ struct bnxt_qplib_swq *swq;
int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
}
sq = &qp->sq;
- cqe_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
- if (cqe_cons > sq->hwq.max_elements) {
+ cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
+ if (cqe_sq_cons > sq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process req reported ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
- cqe_cons, sq->hwq.max_elements);
+ cqe_sq_cons, sq->hwq.max_elements);
return -EINVAL;
}
/* If we were in the middle of flushing the SQ, continue */
/* Require to walk the sq's swq to fabricate CQEs for all previously
* signaled SWQEs due to CQE aggregation from the current sq cons
- * to the cqe_cons
+ * to the cqe_sq_cons
*/
cqe = *pcqe;
while (*budget) {
- sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
- if (sw_cons == cqe_cons)
+ sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+ if (sw_sq_cons == cqe_sq_cons)
+ /* Done */
break;
+
+ swq = &sq->swq[sw_sq_cons];
memset(cqe, 0, sizeof(*cqe));
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
cqe->qp_handle = (u64)(unsigned long)qp;
cqe->src_qp = qp->id;
- cqe->wr_id = sq->swq[sw_cons].wr_id;
- cqe->type = sq->swq[sw_cons].type;
+ cqe->wr_id = swq->wr_id;
+ if (cqe->wr_id == BNXT_QPLIB_FENCE_WRID)
+ goto skip;
+ cqe->type = swq->type;
/* For the last CQE, check for status. For errors, regardless
* of the request being signaled or not, it must complete with
* the hwcqe error status
*/
- if (HWQ_CMP((sw_cons + 1), &sq->hwq) == cqe_cons &&
+ if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons &&
hwcqe->status != CQ_REQ_STATUS_OK) {
cqe->status = hwcqe->status;
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Processed Req ");
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
- sw_cons, cqe->wr_id, cqe->status);
+ sw_sq_cons, cqe->wr_id, cqe->status);
cqe++;
(*budget)--;
sq->flush_in_progress = true;
/* Must block new posting of SQ and RQ */
qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ sq->condition = false;
+ sq->single = false;
} else {
- if (sq->swq[sw_cons].flags &
- SQ_SEND_FLAGS_SIGNAL_COMP) {
+ if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+ /* Before we complete, do WA 9060 */
+ if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+ cqe_sq_cons)) {
+ *lib_qp = qp;
+ goto out;
+ }
cqe->status = CQ_REQ_STATUS_OK;
cqe++;
(*budget)--;
}
}
+skip:
sq->hwq.cons++;
+ if (sq->single)
+ break;
}
+out:
*pcqe = cqe;
- if (!*budget && HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_cons) {
+ if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) {
/* Out of budget */
rc = -EAGAIN;
goto done;
}
+ /*
+ * Back to normal completion mode only after it has completed all of
+ * the WC for this CQE
+ */
+ sq->single = false;
if (!sq->flush_in_progress)
goto done;
flush:
}
int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
- int num_cqes)
+ int num_cqes, struct bnxt_qplib_qp **lib_qp)
{
struct cq_base *hw_cqe, **hw_cqe_ptr;
unsigned long flags;
case CQ_BASE_CQE_TYPE_REQ:
rc = bnxt_qplib_cq_process_req(cq,
(struct cq_req *)hw_cqe,
- &cqe, &budget);
+ &cqe, &budget,
+ sw_cons, lib_qp);
break;
case CQ_BASE_CQE_TYPE_RES_RC:
rc = bnxt_qplib_cq_process_res_rc(cq,
struct bnxt_qplib_swqe {
/* General */
+#define BNXT_QPLIB_FENCE_WRID 0x46454E43 /* "FENC" */
u64 wr_id;
u8 reqs_type;
u8 type;
struct scatterlist *sglist;
u32 nmap;
u32 max_wqe;
+ u16 q_full_delta;
u16 max_sge;
u32 psn;
bool flush_in_progress;
+ bool condition;
+ bool single;
+ bool send_phantom;
+ u32 phantom_wqe_cnt;
+ u32 phantom_cqe_cnt;
+ u32 next_cq_cons;
};
struct bnxt_qplib_qp {
u8 timeout;
u8 retry_cnt;
u8 rnr_retry;
+ u64 wqe_cnt;
u32 min_rnr_timer;
u32 max_rd_atomic;
u32 max_dest_rd_atomic;
(!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \
!((raw_cons) & (cp_bit)))
+static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q)
+{
+ return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta),
+ &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons,
+ &qplib_q->hwq);
+}
+
struct bnxt_qplib_cqe {
u8 status;
u8 type;
int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
- int num);
+ int num, struct bnxt_qplib_qp **qp);
void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/prefetch.h>
+#include <linux/delay.h>
+
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_rcfw.h"
static void bnxt_qplib_service_creq(unsigned long data);
/* Hardware communication channel */
-int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
{
u16 cbit;
int rc;
- cookie &= RCFW_MAX_COOKIE_VALUE;
cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
- if (!test_bit(cbit, rcfw->cmdq_bitmap))
- dev_warn(&rcfw->pdev->dev,
- "QPLIB: CMD bit %d for cookie 0x%x is not set?",
- cbit, cookie);
-
rc = wait_event_timeout(rcfw->waitq,
!test_bit(cbit, rcfw->cmdq_bitmap),
msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
- if (!rc) {
- dev_warn(&rcfw->pdev->dev,
- "QPLIB: Bono Error: timeout %d msec, msg {0x%x}\n",
- RCFW_CMD_WAIT_TIME_MS, cookie);
- }
-
- return rc;
+ return rc ? 0 : -ETIMEDOUT;
};
-int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
{
- u32 count = -1;
+ u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
u16 cbit;
- cookie &= RCFW_MAX_COOKIE_VALUE;
cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
if (!test_bit(cbit, rcfw->cmdq_bitmap))
goto done;
do {
+ mdelay(1); /* 1m sec */
bnxt_qplib_service_creq((unsigned long)rcfw);
} while (test_bit(cbit, rcfw->cmdq_bitmap) && --count);
done:
- return count;
+ return count ? 0 : -ETIMEDOUT;
};
-void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
- struct cmdq_base *req, void **crsbe,
- u8 is_block)
+static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
+ struct creq_base *resp, void *sb, u8 is_block)
{
- struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
- struct bnxt_qplib_hwq *crsb = &rcfw->crsb;
- struct bnxt_qplib_crsqe *crsqe = NULL;
- struct bnxt_qplib_crsbe **crsb_ptr;
+ struct bnxt_qplib_crsq *crsqe;
u32 sw_prod, cmdq_prod;
- u8 retry_cnt = 0xFF;
- dma_addr_t dma_addr;
unsigned long flags;
u32 size, opcode;
u16 cookie, cbit;
int pg, idx;
u8 *preq;
-retry:
opcode = req->opcode;
if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
(opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
dev_err(&rcfw->pdev->dev,
"QPLIB: RCFW not initialized, reject opcode 0x%x",
opcode);
- return NULL;
+ return -EINVAL;
}
if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
- return NULL;
+ return -EINVAL;
}
/* Cmdq are in 16-byte units, each request can consume 1 or more
* cmdqe
*/
spin_lock_irqsave(&cmdq->lock, flags);
- if (req->cmd_size > cmdq->max_elements -
- ((HWQ_CMP(cmdq->prod, cmdq) - HWQ_CMP(cmdq->cons, cmdq)) &
- (cmdq->max_elements - 1))) {
+ if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
spin_unlock_irqrestore(&cmdq->lock, flags);
-
- if (!retry_cnt--)
- return NULL;
- goto retry;
+ return -EAGAIN;
}
- retry_cnt = 0xFF;
- cookie = atomic_inc_return(&rcfw->seq_num) & RCFW_MAX_COOKIE_VALUE;
+ cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
if (is_block)
cookie |= RCFW_CMD_IS_BLOCKING;
+
+ set_bit(cbit, rcfw->cmdq_bitmap);
req->cookie = cpu_to_le16(cookie);
- if (test_and_set_bit(cbit, rcfw->cmdq_bitmap)) {
- dev_err(&rcfw->pdev->dev,
- "QPLIB: RCFW MAX outstanding cmd reached!");
- atomic_dec(&rcfw->seq_num);
+ crsqe = &rcfw->crsqe_tbl[cbit];
+ if (crsqe->resp) {
spin_unlock_irqrestore(&cmdq->lock, flags);
-
- if (!retry_cnt--)
- return NULL;
- goto retry;
+ return -EBUSY;
}
- /* Reserve a resp buffer slot if requested */
- if (req->resp_size && crsbe) {
- spin_lock(&crsb->lock);
- sw_prod = HWQ_CMP(crsb->prod, crsb);
- crsb_ptr = (struct bnxt_qplib_crsbe **)crsb->pbl_ptr;
- *crsbe = (void *)&crsb_ptr[get_crsb_pg(sw_prod)]
- [get_crsb_idx(sw_prod)];
- bnxt_qplib_crsb_dma_next(crsb->pbl_dma_ptr, sw_prod, &dma_addr);
- req->resp_addr = cpu_to_le64(dma_addr);
- crsb->prod++;
- spin_unlock(&crsb->lock);
-
- req->resp_size = (sizeof(struct bnxt_qplib_crsbe) +
- BNXT_QPLIB_CMDQE_UNITS - 1) /
- BNXT_QPLIB_CMDQE_UNITS;
+ memset(resp, 0, sizeof(*resp));
+ crsqe->resp = (struct creq_qp_event *)resp;
+ crsqe->resp->cookie = req->cookie;
+ crsqe->req_size = req->cmd_size;
+ if (req->resp_size && sb) {
+ struct bnxt_qplib_rcfw_sbuf *sbuf = sb;
+
+ req->resp_addr = cpu_to_le64(sbuf->dma_addr);
+ req->resp_size = (sbuf->size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+ BNXT_QPLIB_CMDQE_UNITS;
}
+
cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
preq = (u8 *)req;
size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
preq += min_t(u32, size, sizeof(*cmdqe));
size -= min_t(u32, size, sizeof(*cmdqe));
cmdq->prod++;
+ rcfw->seq_num++;
} while (size > 0);
+ rcfw->seq_num++;
+
cmdq_prod = cmdq->prod;
if (rcfw->flags & FIRMWARE_FIRST_FLAG) {
- /* The very first doorbell write is required to set this flag
- * which prompts the FW to reset its internal pointers
+ /* The very first doorbell write
+ * is required to set this flag
+ * which prompts the FW to reset
+ * its internal pointers
*/
cmdq_prod |= FIRMWARE_FIRST_FLAG;
rcfw->flags &= ~FIRMWARE_FIRST_FLAG;
}
- sw_prod = HWQ_CMP(crsq->prod, crsq);
- crsqe = &crsq->crsq[sw_prod];
- memset(crsqe, 0, sizeof(*crsqe));
- crsq->prod++;
- crsqe->req_size = req->cmd_size;
/* ring CMDQ DB */
+ wmb();
writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem +
rcfw->cmdq_bar_reg_prod_off);
writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem +
done:
spin_unlock_irqrestore(&cmdq->lock, flags);
/* Return the CREQ response pointer */
- return crsqe ? &crsqe->qp_event : NULL;
+ return 0;
}
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct cmdq_base *req,
+ struct creq_base *resp,
+ void *sb, u8 is_block)
+{
+ struct creq_qp_event *evnt = (struct creq_qp_event *)resp;
+ u16 cookie;
+ u8 opcode, retry_cnt = 0xFF;
+ int rc = 0;
+
+ do {
+ opcode = req->opcode;
+ rc = __send_message(rcfw, req, resp, sb, is_block);
+ cookie = le16_to_cpu(req->cookie) & RCFW_MAX_COOKIE_VALUE;
+ if (!rc)
+ break;
+
+ if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
+ /* send failed */
+ dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
+ cookie, opcode);
+ return rc;
+ }
+ is_block ? mdelay(1) : usleep_range(500, 1000);
+
+ } while (retry_cnt--);
+
+ if (is_block)
+ rc = __block_for_resp(rcfw, cookie);
+ else
+ rc = __wait_for_resp(rcfw, cookie);
+ if (rc) {
+ /* timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
+ cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
+ return rc;
+ }
+
+ if (evnt->status) {
+ /* failed with status */
+ dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
+ cookie, opcode, evnt->status);
+ rc = -EFAULT;
+ }
+
+ return rc;
+}
/* Completions */
static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
struct creq_func_event *func_event)
static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
struct creq_qp_event *qp_event)
{
- struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
- struct bnxt_qplib_crsqe *crsqe;
- u16 cbit, cookie, blocked = 0;
+ struct bnxt_qplib_crsq *crsqe;
unsigned long flags;
- u32 sw_cons;
+ u16 cbit, blocked = 0;
+ u16 cookie;
+ __le16 mcookie;
switch (qp_event->event) {
case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
default:
/* Command Response */
spin_lock_irqsave(&cmdq->lock, flags);
- sw_cons = HWQ_CMP(crsq->cons, crsq);
- crsqe = &crsq->crsq[sw_cons];
- crsq->cons++;
- memcpy(&crsqe->qp_event, qp_event, sizeof(crsqe->qp_event));
-
- cookie = le16_to_cpu(crsqe->qp_event.cookie);
+ cookie = le16_to_cpu(qp_event->cookie);
+ mcookie = qp_event->cookie;
blocked = cookie & RCFW_CMD_IS_BLOCKING;
cookie &= RCFW_MAX_COOKIE_VALUE;
cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ crsqe = &rcfw->crsqe_tbl[cbit];
+ if (crsqe->resp &&
+ crsqe->resp->cookie == mcookie) {
+ memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+ crsqe->resp = NULL;
+ } else {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
+ crsqe->resp ? "mismatch" : "collision",
+ crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
+ }
if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
dev_warn(&rcfw->pdev->dev,
"QPLIB: CMD bit %d was not requested", cbit);
-
cmdq->cons += crsqe->req_size;
- spin_unlock_irqrestore(&cmdq->lock, flags);
+ crsqe->req_size = 0;
+
if (!blocked)
wake_up(&rcfw->waitq);
- break;
+ spin_unlock_irqrestore(&cmdq->lock, flags);
}
return 0;
}
struct creq_base *creqe, **creq_ptr;
u32 sw_cons, raw_cons;
unsigned long flags;
- u32 type;
+ u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
- /* Service the CREQ until empty */
+ /* Service the CREQ until budget is over */
spin_lock_irqsave(&creq->lock, flags);
raw_cons = creq->cons;
- while (1) {
+ while (budget > 0) {
sw_cons = HWQ_CMP(raw_cons, creq);
creq_ptr = (struct creq_base **)creq->pbl_ptr;
creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
type = creqe->type & CREQ_BASE_TYPE_MASK;
switch (type) {
case CREQ_BASE_TYPE_QP_EVENT:
- if (!bnxt_qplib_process_qp_event
- (rcfw, (struct creq_qp_event *)creqe))
- rcfw->creq_qp_event_processed++;
- else {
- dev_warn(&rcfw->pdev->dev, "QPLIB: crsqe with");
- dev_warn(&rcfw->pdev->dev,
- "QPLIB: type = 0x%x not handled",
- type);
- }
+ bnxt_qplib_process_qp_event
+ (rcfw, (struct creq_qp_event *)creqe);
+ rcfw->creq_qp_event_processed++;
break;
case CREQ_BASE_TYPE_FUNC_EVENT:
if (!bnxt_qplib_process_func_event
break;
}
raw_cons++;
+ budget--;
}
+
if (creq->cons != raw_cons) {
creq->cons = raw_cons;
CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons,
/* RCFW */
int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
{
- struct creq_deinitialize_fw_resp *resp;
struct cmdq_deinitialize_fw req;
+ struct creq_deinitialize_fw_resp resp;
u16 cmd_flags = 0;
+ int rc;
RCFW_CMD_PREP(req, DEINITIALIZE_FW, cmd_flags);
- resp = (struct creq_deinitialize_fw_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp)
- return -EINVAL;
-
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie)))
- return -ETIMEDOUT;
-
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie))
- return -EFAULT;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ NULL, 0);
+ if (rc)
+ return rc;
clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
return 0;
int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx, int is_virtfn)
{
- struct creq_initialize_fw_resp *resp;
struct cmdq_initialize_fw req;
+ struct creq_initialize_fw_resp resp;
u16 cmd_flags = 0, level;
+ int rc;
RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
skip_ctx_setup:
req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
- resp = (struct creq_initialize_fw_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp) {
- dev_err(&rcfw->pdev->dev,
- "QPLIB: RCFW: INITIALIZE_FW send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev,
- "QPLIB: RCFW: INITIALIZE_FW timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev,
- "QPLIB: RCFW: INITIALIZE_FW failed");
- return -EINVAL;
- }
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ NULL, 0);
+ if (rc)
+ return rc;
set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
return 0;
}
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
{
- bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->crsb);
- kfree(rcfw->crsq.crsq);
+ kfree(rcfw->crsqe_tbl);
bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
-
rcfw->pdev = NULL;
}
goto fail;
}
- rcfw->crsq.max_elements = rcfw->cmdq.max_elements;
- rcfw->crsq.crsq = kcalloc(rcfw->crsq.max_elements,
- sizeof(*rcfw->crsq.crsq), GFP_KERNEL);
- if (!rcfw->crsq.crsq)
+ rcfw->crsqe_tbl = kcalloc(rcfw->cmdq.max_elements,
+ sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
+ if (!rcfw->crsqe_tbl)
goto fail;
- rcfw->crsb.max_elements = BNXT_QPLIB_CRSBE_MAX_CNT;
- if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->crsb, NULL, 0,
- &rcfw->crsb.max_elements,
- BNXT_QPLIB_CRSBE_UNITS, 0, PAGE_SIZE,
- HWQ_TYPE_CTX)) {
- dev_err(&rcfw->pdev->dev,
- "QPLIB: HW channel CRSB allocation failed");
- goto fail;
- }
return 0;
fail:
int rc;
/* General */
- atomic_set(&rcfw->seq_num, 0);
+ rcfw->seq_num = 0;
rcfw->flags = FIRMWARE_FIRST_FLAG;
bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
sizeof(unsigned long));
rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET;
- /* CRSQ */
- rcfw->crsq.prod = 0;
- rcfw->crsq.cons = 0;
-
/* CREQ */
rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION;
res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
__iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4);
return 0;
}
+
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+ struct bnxt_qplib_rcfw *rcfw,
+ u32 size)
+{
+ struct bnxt_qplib_rcfw_sbuf *sbuf;
+
+ sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC);
+ if (!sbuf)
+ return NULL;
+
+ sbuf->size = size;
+ sbuf->sb = dma_zalloc_coherent(&rcfw->pdev->dev, sbuf->size,
+ &sbuf->dma_addr, GFP_ATOMIC);
+ if (!sbuf->sb)
+ goto bail;
+
+ return sbuf;
+bail:
+ kfree(sbuf);
+ return NULL;
+}
+
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_rcfw_sbuf *sbuf)
+{
+ if (sbuf->sb)
+ dma_free_coherent(&rcfw->pdev->dev, sbuf->size,
+ sbuf->sb, sbuf->dma_addr);
+ kfree(sbuf);
+}
#define RCFW_MAX_OUTSTANDING_CMD BNXT_QPLIB_CMDQE_MAX_CNT
#define RCFW_MAX_COOKIE_VALUE 0x7FFF
#define RCFW_CMD_IS_BLOCKING 0x8000
+#define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20
/* Cmdq contains a fix number of a 16-Byte slots */
struct bnxt_qplib_cmdqe {
u8 data[1024];
};
-/* CRSQ SB */
-#define BNXT_QPLIB_CRSBE_MAX_CNT 4
-#define BNXT_QPLIB_CRSBE_UNITS sizeof(struct bnxt_qplib_crsbe)
-#define BNXT_QPLIB_CRSBE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CRSBE_UNITS)
-
-#define MAX_CRSB_IDX (BNXT_QPLIB_CRSBE_MAX_CNT - 1)
-#define MAX_CRSB_IDX_PER_PG (BNXT_QPLIB_CRSBE_CNT_PER_PG - 1)
-
-static inline u32 get_crsb_pg(u32 val)
-{
- return (val & ~MAX_CRSB_IDX_PER_PG) / BNXT_QPLIB_CRSBE_CNT_PER_PG;
-}
-
-static inline u32 get_crsb_idx(u32 val)
-{
- return val & MAX_CRSB_IDX_PER_PG;
-}
-
-static inline void bnxt_qplib_crsb_dma_next(dma_addr_t *pg_map_arr,
- u32 prod, dma_addr_t *dma_addr)
-{
- *dma_addr = pg_map_arr[(prod) / BNXT_QPLIB_CRSBE_CNT_PER_PG];
- *dma_addr += ((prod) % BNXT_QPLIB_CRSBE_CNT_PER_PG) *
- BNXT_QPLIB_CRSBE_UNITS;
-}
-
/* CREQ */
/* Allocate 1 per QP for async error notification for now */
#define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024)
#define CREQ_DB(db, raw_cons, cp_bit) \
writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
+#define CREQ_ENTRY_POLL_BUDGET 0x100
+
/* HWQ */
-struct bnxt_qplib_crsqe {
- struct creq_qp_event qp_event;
+
+struct bnxt_qplib_crsq {
+ struct creq_qp_event *resp;
u32 req_size;
};
-struct bnxt_qplib_crsq {
- struct bnxt_qplib_crsqe *crsq;
- u32 prod;
- u32 cons;
- u32 max_elements;
+struct bnxt_qplib_rcfw_sbuf {
+ void *sb;
+ dma_addr_t dma_addr;
+ u32 size;
};
/* RCFW Communication Channels */
wait_queue_head_t waitq;
int (*aeq_handler)(struct bnxt_qplib_rcfw *,
struct creq_func_event *);
- atomic_t seq_num;
+ u32 seq_num;
/* Bar region info */
void __iomem *cmdq_bar_reg_iomem;
/* Actual Cmd and Resp Queues */
struct bnxt_qplib_hwq cmdq;
- struct bnxt_qplib_crsq crsq;
- struct bnxt_qplib_hwq crsb;
+ struct bnxt_qplib_crsq *crsqe_tbl;
};
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
(struct bnxt_qplib_rcfw *,
struct creq_func_event *));
-int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
-int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
-void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
- struct cmdq_base *req, void **crsbe,
- u8 is_block);
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+ struct bnxt_qplib_rcfw *rcfw,
+ u32 size);
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_rcfw_sbuf *sbuf);
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct cmdq_base *req, struct creq_base *resp,
+ void *sbuf, u8 is_block);
int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
#define HWQ_CMP(idx, hwq) ((idx) & ((hwq)->max_elements - 1))
+#define HWQ_FREE_SLOTS(hwq) (hwq->max_elements - \
+ ((HWQ_CMP(hwq->prod, hwq)\
+ - HWQ_CMP(hwq->cons, hwq))\
+ & (hwq->max_elements - 1)))
enum bnxt_qplib_hwq_type {
HWQ_TYPE_CTX,
HWQ_TYPE_QUEUE,
struct bnxt_qplib_dev_attr *attr)
{
struct cmdq_query_func req;
- struct creq_query_func_resp *resp;
+ struct creq_query_func_resp resp;
+ struct bnxt_qplib_rcfw_sbuf *sbuf;
struct creq_query_func_resp_sb *sb;
u16 cmd_flags = 0;
u32 temp;
u8 *tqm_alloc;
- int i;
+ int i, rc = 0;
RCFW_CMD_PREP(req, QUERY_FUNC, cmd_flags);
- req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
- resp = (struct creq_query_func_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void **)&sb,
- 0);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC failed ");
+ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+ if (!sbuf) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
+ "QPLIB: SP: QUERY_FUNC alloc side buffer failed");
+ return -ENOMEM;
}
+
+ sb = sbuf->sb;
+ req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ (void *)sbuf, 0);
+ if (rc)
+ goto bail;
+
/* Extract the context from the side buffer */
attr->max_qp = le32_to_cpu(sb->max_qp);
attr->max_qp_rd_atom =
sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr);
+ /*
+ * 128 WQEs needs to be reserved for the HW (8916). Prevent
+ * reporting the max number
+ */
+ attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS;
attr->max_qp_sges = sb->max_sge;
attr->max_cq = le32_to_cpu(sb->max_cq);
attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
}
- return 0;
+
+bail:
+ bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ return rc;
}
/* SGID */
/* Remove GID from the SGID table */
if (update) {
struct cmdq_delete_gid req;
- struct creq_delete_gid_resp *resp;
+ struct creq_delete_gid_resp resp;
u16 cmd_flags = 0;
+ int rc;
RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
if (sgid_tbl->hw_id[index] == 0xFFFF) {
return -EINVAL;
}
req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
- resp = (struct creq_delete_gid_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL,
- 0);
- if (!resp) {
- dev_err(&res->pdev->dev,
- "QPLIB: SP: DELETE_GID send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
- le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&res->pdev->dev,
- "QPLIB: SP: DELETE_GID timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&res->pdev->dev,
- "QPLIB: SP: DELETE_GID failed ");
- dev_err(&res->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
}
memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
sizeof(bnxt_qplib_gid_zero));
struct bnxt_qplib_res,
sgid_tbl);
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- int i, free_idx, rc = 0;
+ int i, free_idx;
if (!sgid_tbl) {
dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
}
if (update) {
struct cmdq_add_gid req;
- struct creq_add_gid_resp *resp;
+ struct creq_add_gid_resp resp;
u16 cmd_flags = 0;
u32 temp32[4];
u16 temp16[3];
+ int rc;
RCFW_CMD_PREP(req, ADD_GID, cmd_flags);
req.src_mac[1] = cpu_to_be16(temp16[1]);
req.src_mac[2] = cpu_to_be16(temp16[2]);
- resp = (struct creq_add_gid_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp) {
- dev_err(&res->pdev->dev,
- "QPLIB: SP: ADD_GID send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
- le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&res->pdev->dev,
- "QPIB: SP: ADD_GID timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&res->pdev->dev, "QPLIB: SP: ADD_GID failed ");
- dev_err(&res->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
- sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp->xid);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
+ sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp.xid);
}
/* Add GID to the sgid_tbl */
memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
*index = free_idx;
/* unlock */
- return rc;
+ return 0;
}
/* pkeys */
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_create_ah req;
- struct creq_create_ah_resp *resp;
+ struct creq_create_ah_resp resp;
u16 cmd_flags = 0;
u32 temp32[4];
u16 temp16[3];
+ int rc;
RCFW_CMD_PREP(req, CREATE_AH, cmd_flags);
req.dest_mac[1] = cpu_to_le16(temp16[1]);
req.dest_mac[2] = cpu_to_le16(temp16[2]);
- resp = (struct creq_create_ah_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 1);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
- ah->id = le32_to_cpu(resp->xid);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ NULL, 1);
+ if (rc)
+ return rc;
+
+ ah->id = le32_to_cpu(resp.xid);
return 0;
}
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_destroy_ah req;
- struct creq_destroy_ah_resp *resp;
+ struct creq_destroy_ah_resp resp;
u16 cmd_flags = 0;
+ int rc;
/* Clean up the AH table in the device */
RCFW_CMD_PREP(req, DESTROY_AH, cmd_flags);
req.ah_cid = cpu_to_le32(ah->id);
- resp = (struct creq_destroy_ah_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 1);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ NULL, 1);
+ if (rc)
+ return rc;
return 0;
}
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_deallocate_key req;
- struct creq_deallocate_key_resp *resp;
+ struct creq_deallocate_key_resp resp;
u16 cmd_flags = 0;
+ int rc;
if (mrw->lkey == 0xFFFFFFFF) {
dev_info(&res->pdev->dev,
else
req.key = cpu_to_le32(mrw->lkey);
- resp = (struct creq_deallocate_key_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp) {
- dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR failed ");
- dev_err(&res->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ NULL, 0);
+ if (rc)
+ return rc;
+
/* Free the qplib's MRW memory */
if (mrw->hwq.max_elements)
bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_allocate_mrw req;
- struct creq_allocate_mrw_resp *resp;
+ struct creq_allocate_mrw_resp resp;
u16 cmd_flags = 0;
unsigned long tmp;
+ int rc;
RCFW_CMD_PREP(req, ALLOCATE_MRW, cmd_flags);
tmp = (unsigned long)mrw;
req.mrw_handle = cpu_to_le64(tmp);
- resp = (struct creq_allocate_mrw_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, 0);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW send failed");
- return -EINVAL;
- }
- if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
- /* Cmd timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
+
if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1) ||
(mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
(mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
- mrw->rkey = le32_to_cpu(resp->xid);
+ mrw->rkey = le32_to_cpu(resp.xid);
else
- mrw->lkey = le32_to_cpu(resp->xid);
+ mrw->lkey = le32_to_cpu(resp.xid);
return 0;
}
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_deregister_mr req;
- struct creq_deregister_mr_resp *resp;
+ struct creq_deregister_mr_resp resp;
u16 cmd_flags = 0;
int rc;
RCFW_CMD_PREP(req, DEREGISTER_MR, cmd_flags);
req.lkey = cpu_to_le32(mrw->lkey);
- resp = (struct creq_deregister_mr_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, block);
- if (!resp) {
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR send failed");
- return -EINVAL;
- }
- if (block)
- rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
- le16_to_cpu(req.cookie));
- else
- rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
- le16_to_cpu(req.cookie));
- if (!rc) {
- /* Cmd timed out */
- dev_err(&res->pdev->dev, "QPLIB: SP: DEREG_MR timed out");
- return -ETIMEDOUT;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR failed ");
- dev_err(&rcfw->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, block);
+ if (rc)
+ return rc;
/* Free the qplib's MR memory */
if (mrw->hwq.max_elements) {
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_register_mr req;
- struct creq_register_mr_resp *resp;
+ struct creq_register_mr_resp resp;
u16 cmd_flags = 0, level;
int pg_ptrs, pages, i, rc;
dma_addr_t **pbl_ptr;
req.key = cpu_to_le32(mr->lkey);
req.mr_size = cpu_to_le64(mr->total_size);
- resp = (struct creq_register_mr_resp *)
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- NULL, block);
- if (!resp) {
- dev_err(&res->pdev->dev, "SP: REG_MR send failed");
- rc = -EINVAL;
- goto fail;
- }
- if (block)
- rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
- le16_to_cpu(req.cookie));
- else
- rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
- le16_to_cpu(req.cookie));
- if (!rc) {
- /* Cmd timed out */
- dev_err(&res->pdev->dev, "SP: REG_MR timed out");
- rc = -ETIMEDOUT;
- goto fail;
- }
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&res->pdev->dev, "QPLIB: SP: REG_MR failed ");
- dev_err(&res->pdev->dev,
- "QPLIB: SP: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- rc = -EINVAL;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, block);
+ if (rc)
goto fail;
- }
+
return 0;
fail:
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_map_tc_to_cos req;
- struct creq_map_tc_to_cos_resp *resp;
+ struct creq_map_tc_to_cos_resp resp;
u16 cmd_flags = 0;
- int tleft;
+ int rc = 0;
RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags);
req.cos0 = cpu_to_le16(cids[0]);
req.cos1 = cpu_to_le16(cids[1]);
- resp = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL, 0);
- if (!resp) {
- dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS send failed");
- return -EINVAL;
- }
-
- tleft = bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie));
- if (!tleft) {
- dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS timed out");
- return -ETIMEDOUT;
- }
-
- if (resp->status ||
- le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
- dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS failed ");
- dev_err(&res->pdev->dev,
- "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
- resp->status, le16_to_cpu(req.cookie),
- le16_to_cpu(resp->cookie));
- return -EINVAL;
- }
-
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
return 0;
}
#ifndef __BNXT_QPLIB_SP_H__
#define __BNXT_QPLIB_SP_H__
+#define BNXT_QPLIB_RESERVED_QP_WRS 128
+
struct bnxt_qplib_dev_attr {
char fw_ver[32];
u16 max_sgid;
kfree(entry);
}
- list_for_each_safe(pos, nxt, &uctx->qpids) {
+ list_for_each_safe(pos, nxt, &uctx->cqids) {
entry = list_entry(pos, struct c4iw_qid_list, entry);
list_del_init(&entry->entry);
kfree(entry);
rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free");
if (!rdev->free_workq) {
err = -ENOMEM;
- goto err_free_status_page;
+ goto err_free_status_page_and_wr_log;
}
rdev->status_page->db_off = 0;
return 0;
-err_free_status_page:
+err_free_status_page_and_wr_log:
+ if (c4iw_wr_log && rdev->wr_log)
+ kfree(rdev->wr_log);
free_page((unsigned long)rdev->status_page);
destroy_ocqp_pool:
c4iw_ocqp_pool_destroy(rdev);
{
destroy_workqueue(rdev->free_workq);
kfree(rdev->wr_log);
+ c4iw_release_dev_ucontext(rdev, &rdev->uctx);
free_page((unsigned long)rdev->status_page);
c4iw_pblpool_destroy(rdev);
c4iw_rqtpool_destroy(rdev);
+ c4iw_ocqp_pool_destroy(rdev);
c4iw_destroy_resource(&rdev->resource);
}
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
- dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
- dev->ib_dev.free_rdma_netdev = mlx5_ib_free_rdma_netdev;
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
+ dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
+ dev->ib_dev.free_rdma_netdev = mlx5_ib_free_rdma_netdev;
+ }
if (mlx5_core_is_pf(mdev)) {
dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
#define QEDR_MSG_QP " QP"
#define QEDR_MSG_GSI " GSI"
-#define QEDR_CQ_MAGIC_NUMBER (0x11223344)
+#define QEDR_CQ_MAGIC_NUMBER (0x11223344)
+
+#define FW_PAGE_SIZE (RDMA_RING_PAGE_SIZE)
+#define FW_PAGE_SHIFT (12)
struct qedr_dev;
static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
struct qedr_pbl *pbl,
- struct qedr_pbl_info *pbl_info)
+ struct qedr_pbl_info *pbl_info, u32 pg_shift)
{
int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+ u32 fw_pg_cnt, fw_pg_per_umem_pg;
struct qedr_pbl *pbl_tbl;
struct scatterlist *sg;
struct regpair *pbe;
+ u64 pg_addr;
int entry;
- u32 addr;
if (!pbl_info->num_pbes)
return;
shift = umem->page_shift;
+ fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
+
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
pages = sg_dma_len(sg) >> shift;
+ pg_addr = sg_dma_address(sg);
for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
- /* store the page address in pbe */
- pbe->lo = cpu_to_le32(sg_dma_address(sg) +
- (pg_cnt << shift));
- addr = upper_32_bits(sg_dma_address(sg) +
- (pg_cnt << shift));
- pbe->hi = cpu_to_le32(addr);
- pbe_cnt++;
- total_num_pbes++;
- pbe++;
-
- if (total_num_pbes == pbl_info->num_pbes)
- return;
-
- /* If the given pbl is full storing the pbes,
- * move to next pbl.
- */
- if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct regpair *)pbl_tbl->va;
- pbe_cnt = 0;
+ for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
+ pbe->lo = cpu_to_le32(pg_addr);
+ pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
+
+ pg_addr += BIT(pg_shift);
+ pbe_cnt++;
+ total_num_pbes++;
+ pbe++;
+
+ if (total_num_pbes == pbl_info->num_pbes)
+ return;
+
+ /* If the given pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (pbe_cnt ==
+ (pbl_info->pbl_size / sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct regpair *)pbl_tbl->va;
+ pbe_cnt = 0;
+ }
+
+ fw_pg_cnt++;
}
}
}
u64 buf_addr, size_t buf_len,
int access, int dmasync)
{
- int page_cnt;
+ u32 fw_pages;
int rc;
q->buf_addr = buf_addr;
return PTR_ERR(q->umem);
}
- page_cnt = ib_umem_page_count(q->umem);
- rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+ fw_pages = ib_umem_page_count(q->umem) <<
+ (q->umem->page_shift - FW_PAGE_SHIFT);
+
+ rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
if (rc)
goto err0;
goto err0;
}
- qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+ qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
+ FW_PAGE_SHIFT);
return 0;
goto err1;
qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
- &mr->info.pbl_info);
+ &mr->info.pbl_info, mr->umem->page_shift);
rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
if (rc) {
case IB_WC_REG_MR:
qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
break;
+ case IB_WC_RDMA_READ:
+ case IB_WC_SEND:
+ wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
+ break;
default:
break;
}
static inline u32 rxe_crc32(struct rxe_dev *rxe,
u32 crc, void *next, size_t len)
{
+ u32 retval;
int err;
SHASH_DESC_ON_STACK(shash, rxe->tfm);
return crc32_le(crc, next, len);
}
- return *(u32 *)shash_desc_ctx(shash);
+ retval = *(u32 *)shash_desc_ctx(shash);
+ barrier_data(shash_desc_ctx(shash));
+ return retval;
}
int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
sge = ibwr->sg_list;
for (i = 0; i < num_sge; i++, sge++) {
- if (qp->is_user && copy_from_user(p, (__user void *)
- (uintptr_t)sge->addr, sge->length))
- return -EFAULT;
-
- else if (!qp->is_user)
- memcpy(p, (void *)(uintptr_t)sge->addr,
- sge->length);
+ memcpy(p, (void *)(uintptr_t)sge->addr,
+ sge->length);
p += sge->length;
}
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
- napi_enable(&priv->napi);
ipoib_ib_dev_stop(dev);
return -1;
}
ipoib_transport_dev_cleanup(dev);
+ netif_napi_del(&priv->napi);
+
ipoib_cm_dev_cleanup(dev);
kfree(priv->rx_ring);
kfree(priv->rx_ring);
out:
+ netif_napi_del(&priv->napi);
return -ENOMEM;
}
device_init_failed:
free_netdev(priv->dev);
+ kfree(priv);
alloc_mem_failed:
return ERR_PTR(result);
static void ipoib_remove_one(struct ib_device *device, void *client_data)
{
- struct ipoib_dev_priv *priv, *tmp;
+ struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
struct list_head *dev_list = client_data;
if (!dev_list)
flush_workqueue(priv->wq);
unregister_netdev(priv->dev);
- free_netdev(priv->dev);
+ if (device->free_rdma_netdev)
+ device->free_rdma_netdev(priv->dev);
+ else
+ free_netdev(priv->dev);
+
+ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+ kfree(cpriv);
+
kfree(priv);
}
snprintf(intf_name, sizeof intf_name, "%s.%04x",
ppriv->dev->name, pkey);
+ if (!rtnl_trylock())
+ return restart_syscall();
+
priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv)
return -ENOMEM;
- if (!rtnl_trylock())
- return restart_syscall();
-
down_write(&ppriv->vlan_rwsem);
/*
rtnl_unlock();
- if (result)
+ if (result) {
free_netdev(priv->dev);
+ kfree(priv);
+ }
return result;
}
if (dev) {
free_netdev(dev);
+ kfree(priv);
return 0;
}
if (!btns_desc) {
dev_err(dev, "ACPI Button Descriptors not found\n");
- return ERR_PTR(-ENODEV);
+ button_info = ERR_PTR(-ENODEV);
+ goto out;
}
/* The first package describes the collection */
}
if (collection_uid == -1) {
dev_err(dev, "Invalid Button Collection Descriptor\n");
- return ERR_PTR(-ENODEV);
+ button_info = ERR_PTR(-ENODEV);
+ goto out;
}
/* There are package.count - 1 buttons + 1 terminating empty entry */
button_info = devm_kcalloc(dev, btns_desc->package.count,
sizeof(*button_info), GFP_KERNEL);
- if (!button_info)
- return ERR_PTR(-ENOMEM);
+ if (!button_info) {
+ button_info = ERR_PTR(-ENOMEM);
+ goto out;
+ }
/* Parse the button descriptors */
for (i = 1, btn = 0; i < btns_desc->package.count; i++, btn++) {
if (soc_button_parse_btn_desc(dev,
&btns_desc->package.elements[i],
collection_uid,
- &button_info[btn]))
- return ERR_PTR(-ENODEV);
+ &button_info[btn])) {
+ button_info = ERR_PTR(-ENODEV);
+ goto out;
+ }
}
+out:
+ kfree(buf.pointer);
return button_info;
}
#define F54_GET_REPORT 1
#define F54_FORCE_CAL 2
-/* Fixed sizes of reports */
-#define F54_QUERY_LEN 27
-
/* F54 capabilities */
#define F54_CAP_BASELINE (1 << 2)
#define F54_CAP_IMAGE8 (1 << 3)
struct f54_data {
struct rmi_function *fn;
- u8 qry[F54_QUERY_LEN];
u8 num_rx_electrodes;
u8 num_tx_electrodes;
u8 capabilities;
{
int error;
struct f54_data *f54;
+ u8 buf[6];
f54 = dev_get_drvdata(&fn->dev);
error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
- &f54->qry, sizeof(f54->qry));
+ buf, sizeof(buf));
if (error) {
dev_err(&fn->dev, "%s: Failed to query F54 properties\n",
__func__);
return error;
}
- f54->num_rx_electrodes = f54->qry[0];
- f54->num_tx_electrodes = f54->qry[1];
- f54->capabilities = f54->qry[2];
- f54->clock_rate = f54->qry[3] | (f54->qry[4] << 8);
- f54->family = f54->qry[5];
+ f54->num_rx_electrodes = buf[0];
+ f54->num_tx_electrodes = buf[1];
+ f54->capabilities = buf[2];
+ f54->clock_rate = buf[3] | (buf[4] << 8);
+ f54->family = buf[5];
rmi_dbg(RMI_DEBUG_FN, &fn->dev, "F54 num_rx_electrodes: %d\n",
f54->num_rx_electrodes);
DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"),
},
},
+ {
+ /* Fujitsu UH554 laptop */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"),
+ },
+ },
{ }
};
u8 vector, u32 dest_apicid, int devid)
{
struct irte_ga *irte = (struct irte_ga *) entry;
- struct iommu_dev_data *dev_data = search_dev_data(devid);
irte->lo.val = 0;
irte->hi.val = 0;
- irte->lo.fields_remap.guest_mode = dev_data ? dev_data->use_vapic : 0;
irte->lo.fields_remap.int_type = delivery_mode;
irte->lo.fields_remap.dm = dest_mode;
irte->hi.fields.vector = vector;
struct irte_ga *irte = (struct irte_ga *) entry;
struct iommu_dev_data *dev_data = search_dev_data(devid);
- if (!dev_data || !dev_data->use_vapic) {
+ if (!dev_data || !dev_data->use_vapic ||
+ !irte->lo.fields_remap.guest_mode) {
irte->hi.fields.vector = vector;
irte->lo.fields_remap.destination = dest_apicid;
- irte->lo.fields_remap.guest_mode = 0;
modify_irte_ga(devid, index, irte, NULL);
}
}
* for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
* "Remapping Hardware Unit Hot Plug".
*/
-static u8 dmar_hp_uuid[] = {
- /* 0000 */ 0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C,
- /* 0008 */ 0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF
-};
+static guid_t dmar_hp_guid =
+ GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
+ 0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
/*
* Currently there's only one revision and BIOS will not check the revision id,
static inline bool dmar_detect_dsm(acpi_handle handle, int func)
{
- return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func);
+ return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
}
static int dmar_walk_dsm_resource(acpi_handle handle, int func,
if (!dmar_detect_dsm(handle, func))
return 0;
- obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID,
+ obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
func, NULL, ACPI_TYPE_BUFFER);
if (!obj)
return -ENODEV;
}
#ifdef CONFIG_CLKSRC_MIPS_GIC
-u64 gic_read_count(void)
+u64 notrace gic_read_count(void)
{
unsigned int hi, hi2, lo;
return bits;
}
-void gic_write_compare(u64 cnt)
+void notrace gic_write_compare(u64 cnt)
{
if (mips_cm_is64) {
gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE), cnt);
}
}
-void gic_write_cpu_compare(u64 cnt, int cpu)
+void notrace gic_write_cpu_compare(u64 cnt, int cpu)
{
unsigned long flags;
int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent)
{
struct irq_domain *root_domain =
- irq_domain_add_legacy(NULL, NR_IRQS, 0, 0,
+ irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
&xtensa_mx_irq_domain_ops,
&xtensa_mx_irq_chip);
irq_set_default_host(root_domain);
int __init xtensa_pic_init_legacy(struct device_node *interrupt_parent)
{
struct irq_domain *root_domain =
- irq_domain_add_legacy(NULL, NR_IRQS, 0, 0,
+ irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
&xtensa_irq_domain_ops, &xtensa_irq_chip);
irq_set_default_host(root_domain);
return 0;
spin_lock_irqsave(lock, flags);
val = bcm6328_led_read(addr);
- val |= (BIT(reg) << (((sel % 4) * 4) + 16));
+ val |= (BIT(reg % 4) << (((sel % 4) * 4) + 16));
bcm6328_led_write(addr, val);
spin_unlock_irqrestore(lock, flags);
}
spin_lock_irqsave(lock, flags);
val = bcm6328_led_read(addr);
- val |= (BIT(reg) << ((sel % 4) * 4));
+ val |= (BIT(reg % 4) << ((sel % 4) * 4));
bcm6328_led_write(addr, val);
spin_unlock_irqrestore(lock, flags);
}
#include <linux/sched/loadavg.h>
#include <linux/leds.h>
#include <linux/reboot.h>
-#include <linux/suspend.h>
#include "../leds.h"
static int panic_heartbeats;
.deactivate = heartbeat_trig_deactivate,
};
-static int heartbeat_pm_notifier(struct notifier_block *nb,
- unsigned long pm_event, void *unused)
-{
- int rc;
-
- switch (pm_event) {
- case PM_SUSPEND_PREPARE:
- case PM_HIBERNATION_PREPARE:
- case PM_RESTORE_PREPARE:
- led_trigger_unregister(&heartbeat_led_trigger);
- break;
- case PM_POST_SUSPEND:
- case PM_POST_HIBERNATION:
- case PM_POST_RESTORE:
- rc = led_trigger_register(&heartbeat_led_trigger);
- if (rc)
- pr_err("could not re-register heartbeat trigger\n");
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
static int heartbeat_reboot_notifier(struct notifier_block *nb,
unsigned long code, void *unused)
{
return NOTIFY_DONE;
}
-static struct notifier_block heartbeat_pm_nb = {
- .notifier_call = heartbeat_pm_notifier,
-};
-
static struct notifier_block heartbeat_reboot_nb = {
.notifier_call = heartbeat_reboot_notifier,
};
atomic_notifier_chain_register(&panic_notifier_list,
&heartbeat_panic_nb);
register_reboot_notifier(&heartbeat_reboot_nb);
- register_pm_notifier(&heartbeat_pm_nb);
}
return rc;
}
static void __exit heartbeat_trig_exit(void)
{
- unregister_pm_notifier(&heartbeat_pm_nb);
unregister_reboot_notifier(&heartbeat_reboot_nb);
atomic_notifier_chain_unregister(&panic_notifier_list,
&heartbeat_panic_nb);
}
mutex_unlock(&dev->mlock);
- if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end))
- return -ENOMEM;
+ ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end);
+ if (ret)
+ return ret;
t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
if (!t) {
int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
{
struct nvm_dev *dev = tgt_dev->parent;
+ int ret;
if (!dev->ops->submit_io)
return -ENODEV;
nvm_rq_tgt_to_dev(tgt_dev, rqd);
rqd->dev = tgt_dev;
- return dev->ops->submit_io(dev, rqd);
+
+ /* In case of error, fail with right address format */
+ ret = dev->ops->submit_io(dev, rqd);
+ if (ret)
+ nvm_rq_dev_to_tgt(tgt_dev, rqd);
+ return ret;
}
EXPORT_SYMBOL(nvm_submit_io);
*/
retry:
ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
- if (ret == NVM_IO_REQUEUE) {
+ switch (ret) {
+ case NVM_IO_REQUEUE:
io_schedule();
goto retry;
+ case NVM_IO_ERR:
+ pblk_pipeline_stop(pblk);
+ goto out;
}
if (unlikely(!bio_has_data(bio)))
atomic_long_add(nr_entries, &pblk->req_writes);
#endif
+ pblk_rl_inserted(&pblk->rl, nr_entries);
+
out:
pblk_write_should_kick(pblk);
return ret;
*/
#include "pblk.h"
-#include <linux/time.h>
static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
struct ppa_addr *ppa)
pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
line->id, pos);
- pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
+ pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
}
static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
*ppa = rqd->ppa_addr;
pblk_mark_bb(pblk, line, ppa);
}
+
+ atomic_dec(&pblk->inflight_io);
}
/* Erase completion assumes that only one block is erased at the time */
{
struct pblk *pblk = rqd->private;
- up(&pblk->erase_sem);
__pblk_end_io_erase(pblk, rqd);
- mempool_free(rqd, pblk->r_rq_pool);
+ mempool_free(rqd, pblk->g_rq_pool);
}
-static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+ u64 paddr)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
spin_unlock(&line->lock);
return;
}
- line->vsc--;
+ le32_add_cpu(line->vsc, -1);
if (line->state == PBLK_LINESTATE_CLOSED)
move_list = pblk_line_gc_list(pblk, line);
__pblk_map_invalidate(pblk, line, paddr);
}
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
-{
- __pblk_map_invalidate(pblk, line, paddr);
-
- pblk_rb_sync_init(&pblk->rwb, NULL);
- line->left_ssecs--;
- if (!line->left_ssecs)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
- pblk_rb_sync_end(&pblk->rwb, NULL);
-}
-
static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
unsigned int nr_secs)
{
pool = pblk->w_rq_pool;
rq_size = pblk_w_rq_size;
} else {
- pool = pblk->r_rq_pool;
- rq_size = pblk_r_rq_size;
+ pool = pblk->g_rq_pool;
+ rq_size = pblk_g_rq_size;
}
rqd = mempool_alloc(pool, GFP_KERNEL);
if (rw == WRITE)
pool = pblk->w_rq_pool;
else
- pool = pblk->r_rq_pool;
+ pool = pblk->g_rq_pool;
mempool_free(rqd, pool);
}
complete(waiting);
}
-void pblk_flush_writer(struct pblk *pblk)
+void pblk_wait_for_meta(struct pblk *pblk)
{
- struct bio *bio;
- int ret;
- DECLARE_COMPLETION_ONSTACK(wait);
-
- bio = bio_alloc(GFP_KERNEL, 1);
- if (!bio)
- return;
-
- bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
- bio->bi_private = &wait;
- bio->bi_end_io = pblk_end_bio_sync;
+ do {
+ if (!atomic_read(&pblk->inflight_io))
+ break;
- ret = pblk_write_to_cache(pblk, bio, 0);
- if (ret == NVM_IO_OK) {
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: flush cache timed out\n");
- }
- } else if (ret != NVM_IO_DONE) {
- pr_err("pblk: tear down bio failed\n");
- }
+ schedule();
+ } while (1);
+}
- if (bio->bi_error)
- pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+static void pblk_flush_writer(struct pblk *pblk)
+{
+ pblk_rb_flush(&pblk->rwb);
+ do {
+ if (!pblk_rb_sync_count(&pblk->rwb))
+ break;
- bio_put(bio);
+ pblk_write_kick(pblk);
+ schedule();
+ } while (1);
}
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
+ int vsc = le32_to_cpu(*line->vsc);
- if (!line->vsc) {
+ lockdep_assert_held(&line->lock);
+
+ if (!vsc) {
if (line->gc_group != PBLK_LINEGC_FULL) {
line->gc_group = PBLK_LINEGC_FULL;
move_list = &l_mg->gc_full_list;
}
- } else if (line->vsc < lm->mid_thrs) {
+ } else if (vsc < lm->high_thrs) {
if (line->gc_group != PBLK_LINEGC_HIGH) {
line->gc_group = PBLK_LINEGC_HIGH;
move_list = &l_mg->gc_high_list;
}
- } else if (line->vsc < lm->high_thrs) {
+ } else if (vsc < lm->mid_thrs) {
if (line->gc_group != PBLK_LINEGC_MID) {
line->gc_group = PBLK_LINEGC_MID;
move_list = &l_mg->gc_mid_list;
}
- } else if (line->vsc < line->sec_in_line) {
+ } else if (vsc < line->sec_in_line) {
if (line->gc_group != PBLK_LINEGC_LOW) {
line->gc_group = PBLK_LINEGC_LOW;
move_list = &l_mg->gc_low_list;
}
- } else if (line->vsc == line->sec_in_line) {
+ } else if (vsc == line->sec_in_line) {
if (line->gc_group != PBLK_LINEGC_EMPTY) {
line->gc_group = PBLK_LINEGC_EMPTY;
move_list = &l_mg->gc_empty_list;
line->gc_group = PBLK_LINEGC_NONE;
move_list = &l_mg->corrupt_list;
pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
- line->id, line->vsc,
+ line->id, vsc,
line->sec_in_line,
lm->high_thrs, lm->mid_thrs);
}
#endif
}
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
+{
+ pblk->sec_per_write = sec_per_write;
+}
+
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
{
struct nvm_tgt_dev *dev = pblk->dev;
}
}
#endif
+
+ atomic_inc(&pblk->inflight_io);
+
return nvm_submit_io(dev, rqd);
}
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
- gfp_t gfp_mask)
+ int alloc_type, gfp_t gfp_mask)
{
struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
void *kaddr = data;
struct page *page;
struct bio *bio;
int i, ret;
- if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
+ if (alloc_type == PBLK_KMALLOC_META)
return bio_map_kern(dev->q, kaddr, len, gfp_mask);
bio = bio_kmalloc(gfp_mask, nr_secs);
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush)
{
- int max = pblk->max_write_pgs;
+ int max = pblk->sec_per_write;
int min = pblk->min_write_pgs;
int secs_to_sync = 0;
return secs_to_sync;
}
-static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
- int nr_secs)
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
+{
+ u64 addr;
+ int i;
+
+ addr = find_next_zero_bit(line->map_bitmap,
+ pblk->lm.sec_per_line, line->cur_sec);
+ line->cur_sec = addr - nr_secs;
+
+ for (i = 0; i < nr_secs; i++, line->cur_sec--)
+ WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
+}
+
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
{
u64 addr;
int i;
+ lockdep_assert_held(&line->lock);
+
/* logic error: ppa out-of-bounds. Prevent generating bad address */
if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
WARN(1, "pblk: page allocation out of bounds\n");
return addr;
}
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
+{
+ u64 paddr;
+
+ spin_lock(&line->lock);
+ paddr = find_next_zero_bit(line->map_bitmap,
+ pblk->lm.sec_per_line, line->cur_sec);
+ spin_unlock(&line->lock);
+
+ return paddr;
+}
+
/*
* Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
* taking the per LUN semaphore.
*/
static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
- u64 paddr, int dir)
+ void *emeta_buf, u64 paddr, int dir)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
+ void *ppa_list, *meta_list;
struct bio *bio;
struct nvm_rq rqd;
- struct ppa_addr *ppa_list;
- dma_addr_t dma_ppa_list;
- void *emeta = line->emeta;
+ dma_addr_t dma_ppa_list, dma_meta_list;
int min = pblk->min_write_pgs;
- int left_ppas = lm->emeta_sec;
+ int left_ppas = lm->emeta_sec[0];
int id = line->id;
int rq_ppas, rq_len;
int cmd_op, bio_op;
- int flags;
int i, j;
int ret;
DECLARE_COMPLETION_ONSTACK(wait);
if (dir == WRITE) {
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
- flags = pblk_set_progr_mode(pblk, WRITE);
} else if (dir == READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
- flags = pblk_set_read_mode(pblk);
} else
return -EINVAL;
- ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
- if (!ppa_list)
+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &dma_meta_list);
+ if (!meta_list)
return -ENOMEM;
+ ppa_list = meta_list + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
next_rq:
memset(&rqd, 0, sizeof(struct nvm_rq));
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
rq_len = rq_ppas * geo->sec_size;
- bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
goto free_rqd_dma;
bio_set_op_attrs(bio, bio_op, 0);
rqd.bio = bio;
- rqd.opcode = cmd_op;
- rqd.flags = flags;
- rqd.nr_ppas = rq_ppas;
+ rqd.meta_list = meta_list;
rqd.ppa_list = ppa_list;
+ rqd.dma_meta_list = dma_meta_list;
rqd.dma_ppa_list = dma_ppa_list;
+ rqd.opcode = cmd_op;
+ rqd.nr_ppas = rq_ppas;
rqd.end_io = pblk_end_io_sync;
rqd.private = &wait;
if (dir == WRITE) {
+ struct pblk_sec_meta *meta_list = rqd.meta_list;
+
+ rqd.flags = pblk_set_progr_mode(pblk, WRITE);
for (i = 0; i < rqd.nr_ppas; ) {
spin_lock(&line->lock);
paddr = __pblk_alloc_page(pblk, line, min);
spin_unlock(&line->lock);
- for (j = 0; j < min; j++, i++, paddr++)
+ for (j = 0; j < min; j++, i++, paddr++) {
+ meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
rqd.ppa_list[i] =
addr_to_gen_ppa(pblk, paddr, id);
+ }
}
} else {
for (i = 0; i < rqd.nr_ppas; ) {
struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
int pos = pblk_dev_ppa_to_pos(geo, ppa);
+ int read_type = PBLK_READ_RANDOM;
+
+ if (pblk_io_aligned(pblk, rq_ppas))
+ read_type = PBLK_READ_SEQUENTIAL;
+ rqd.flags = pblk_set_read_mode(pblk, read_type);
while (test_bit(pos, line->blk_bitmap)) {
paddr += min;
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: emeta I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
- bio_put(bio);
+ if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
+ bio_put(bio);
if (rqd.error) {
if (dir == WRITE)
pblk_log_read_err(pblk, &rqd);
}
- emeta += rq_len;
+ emeta_buf += rq_len;
left_ppas -= rq_ppas;
if (left_ppas)
goto next_rq;
free_rqd_dma:
- nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return ret;
}
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
flags = pblk_set_progr_mode(pblk, WRITE);
- lba_list = pblk_line_emeta_to_lbas(line->emeta);
+ lba_list = emeta_to_lbas(pblk, line->emeta->buf);
} else if (dir == READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
- flags = pblk_set_read_mode(pblk);
+ flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
} else
return -EINVAL;
memset(&rqd, 0, sizeof(struct nvm_rq));
- rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list)
+ rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd.dma_meta_list);
+ if (!rqd.meta_list)
return -ENOMEM;
+ rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+ rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
+
bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
rqd.private = &wait;
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
+ struct pblk_sec_meta *meta_list = rqd.meta_list;
+
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
- if (dir == WRITE)
- lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+
+ if (dir == WRITE) {
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+ meta_list[i].lba = lba_list[paddr] = addr_empty;
+ }
}
/*
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: smeta I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
if (rqd.error) {
if (dir == WRITE)
}
free_ppa_list:
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return ret;
}
return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
}
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+ void *emeta_buf)
{
- return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
+ return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
+ line->emeta_ssec, READ);
}
static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
{
struct nvm_rq rqd;
- int ret;
+ int ret = 0;
DECLARE_COMPLETION_ONSTACK(wait);
memset(&rqd, 0, sizeof(struct nvm_rq));
rqd.private = pblk;
__pblk_end_io_erase(pblk, &rqd);
- return 0;
+ return ret;
}
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_meta *lm = &pblk->lm;
struct ppa_addr ppa;
- int bit = -1;
+ int ret, bit = -1;
/* Erase only good blocks, one at a time */
do {
WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
spin_unlock(&line->lock);
- if (pblk_blk_erase_sync(pblk, ppa)) {
+ ret = pblk_blk_erase_sync(pblk, ppa);
+ if (ret) {
pr_err("pblk: failed to erase line %d\n", line->id);
- return -ENOMEM;
+ return ret;
}
} while (1);
return 0;
}
+static void pblk_line_setup_metadata(struct pblk_line *line,
+ struct pblk_line_mgmt *l_mg,
+ struct pblk_line_meta *lm)
+{
+ int meta_line;
+
+ lockdep_assert_held(&l_mg->free_lock);
+
+retry_meta:
+ meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+ if (meta_line == PBLK_DATA_LINES) {
+ spin_unlock(&l_mg->free_lock);
+ io_schedule();
+ spin_lock(&l_mg->free_lock);
+ goto retry_meta;
+ }
+
+ set_bit(meta_line, &l_mg->meta_bitmap);
+ line->meta_line = meta_line;
+
+ line->smeta = l_mg->sline_meta[meta_line];
+ line->emeta = l_mg->eline_meta[meta_line];
+
+ memset(line->smeta, 0, lm->smeta_len);
+ memset(line->emeta->buf, 0, lm->emeta_len[0]);
+
+ line->emeta->mem = 0;
+ atomic_set(&line->emeta->sync, 0);
+}
+
/* For now lines are always assumed full lines. Thus, smeta former and current
* lun bitmaps are omitted.
*/
-static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
+static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
struct pblk_line *cur)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct line_smeta *smeta = line->smeta;
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
+ struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
int nr_blk_line;
/* After erasing the line, new bad blocks might appear and we risk
}
/* Run-time metadata */
- line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
+ line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
/* Mark LUNs allocated in this line (all for now) */
bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
- smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
- memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
- smeta->header.id = cpu_to_le32(line->id);
- smeta->header.type = cpu_to_le16(line->type);
- smeta->header.version = cpu_to_le16(1);
+ smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
+ memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
+ smeta_buf->header.id = cpu_to_le32(line->id);
+ smeta_buf->header.type = cpu_to_le16(line->type);
+ smeta_buf->header.version = cpu_to_le16(1);
/* Start metadata */
- smeta->seq_nr = cpu_to_le64(line->seq_nr);
- smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
+ smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+ smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
/* Fill metadata among lines */
if (cur) {
memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
- smeta->prev_id = cpu_to_le32(cur->id);
- cur->emeta->next_id = cpu_to_le32(line->id);
+ smeta_buf->prev_id = cpu_to_le32(cur->id);
+ cur->emeta->buf->next_id = cpu_to_le32(line->id);
} else {
- smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
+ smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
}
/* All smeta must be set at this point */
- smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
- smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
+ smeta_buf->header.crc = cpu_to_le32(
+ pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
+ smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
/* End metadata */
- memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
- emeta->seq_nr = cpu_to_le64(line->seq_nr);
- emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
- emeta->nr_valid_lbas = cpu_to_le64(0);
- emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
- emeta->crc = cpu_to_le32(0);
- emeta->prev_id = smeta->prev_id;
+ memcpy(&emeta_buf->header, &smeta_buf->header,
+ sizeof(struct line_header));
+ emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+ emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
+ emeta_buf->nr_valid_lbas = cpu_to_le64(0);
+ emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
+ emeta_buf->crc = cpu_to_le32(0);
+ emeta_buf->prev_id = smeta_buf->prev_id;
return 1;
}
/* Mark smeta metadata sectors as bad sectors */
bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
off = bit * geo->sec_per_pl;
-retry_smeta:
bitmap_set(line->map_bitmap, off, lm->smeta_sec);
line->sec_in_line -= lm->smeta_sec;
line->smeta_ssec = off;
if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
pr_debug("pblk: line smeta I/O failed. Retry\n");
- off += geo->sec_per_pl;
- goto retry_smeta;
+ return 1;
}
bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
* blocks to make sure that there are enough sectors to store emeta
*/
bit = lm->sec_per_line;
- off = lm->sec_per_line - lm->emeta_sec;
- bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
+ off = lm->sec_per_line - lm->emeta_sec[0];
+ bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
while (nr_bb) {
off -= geo->sec_per_pl;
if (!test_bit(off, line->invalid_bitmap)) {
}
}
- line->sec_in_line -= lm->emeta_sec;
+ line->sec_in_line -= lm->emeta_sec[0];
line->emeta_ssec = off;
- line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
+ line->nr_valid_lbas = 0;
+ line->left_msecs = line->sec_in_line;
+ *line->vsc = cpu_to_le32(line->sec_in_line);
if (lm->sec_per_line - line->sec_in_line !=
bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
spin_lock(&line->lock);
if (line->state != PBLK_LINESTATE_FREE) {
+ mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+ mempool_free(line->map_bitmap, pblk->line_meta_pool);
spin_unlock(&line->lock);
- WARN(1, "pblk: corrupted line state\n");
- return -EINTR;
+ WARN(1, "pblk: corrupted line %d, state %d\n",
+ line->id, line->state);
+ return -EAGAIN;
}
+
line->state = PBLK_LINESTATE_OPEN;
atomic_set(&line->left_eblks, blk_in_line);
atomic_set(&line->left_seblks, blk_in_line);
+
+ line->meta_distance = lm->meta_distance;
spin_unlock(&line->lock);
/* Bad blocks do not need to be erased */
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *line = NULL;
- int bit;
+ struct pblk_line *line;
+ int ret, bit;
lockdep_assert_held(&l_mg->free_lock);
-retry_get:
+retry:
if (list_empty(&l_mg->free_list)) {
pr_err("pblk: no free lines\n");
- goto out;
+ return NULL;
}
line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
list_add_tail(&line->list, &l_mg->bad_list);
pr_debug("pblk: line %d is bad\n", line->id);
- goto retry_get;
+ goto retry;
}
- if (pblk_line_prepare(pblk, line)) {
- pr_err("pblk: failed to prepare line %d\n", line->id);
- list_add(&line->list, &l_mg->free_list);
- return NULL;
+ ret = pblk_line_prepare(pblk, line);
+ if (ret) {
+ if (ret == -EAGAIN) {
+ list_add(&line->list, &l_mg->corrupt_list);
+ goto retry;
+ } else {
+ pr_err("pblk: failed to prepare line %d\n", line->id);
+ list_add(&line->list, &l_mg->free_list);
+ l_mg->nr_free_lines++;
+ return NULL;
+ }
}
-out:
return line;
}
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *retry_line;
+retry:
spin_lock(&l_mg->free_lock);
retry_line = pblk_line_get(pblk);
if (!retry_line) {
l_mg->data_line = retry_line;
spin_unlock(&l_mg->free_lock);
- if (pblk_line_erase(pblk, retry_line)) {
- spin_lock(&l_mg->free_lock);
- l_mg->data_line = NULL;
- spin_unlock(&l_mg->free_lock);
- return NULL;
- }
-
pblk_rl_free_lines_dec(&pblk->rl, retry_line);
+ if (pblk_line_erase(pblk, retry_line))
+ goto retry;
+
return retry_line;
}
+static void pblk_set_space_limit(struct pblk *pblk)
+{
+ struct pblk_rl *rl = &pblk->rl;
+
+ atomic_set(&rl->rb_space, 0);
+}
+
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line;
- int meta_line;
int is_next = 0;
spin_lock(&l_mg->free_lock);
line->type = PBLK_LINETYPE_DATA;
l_mg->data_line = line;
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- set_bit(meta_line, &l_mg->meta_bitmap);
- line->smeta = l_mg->sline_meta[meta_line].meta;
- line->emeta = l_mg->eline_meta[meta_line].meta;
- line->meta_line = meta_line;
+ pblk_line_setup_metadata(line, l_mg, &pblk->lm);
/* Allocate next line for preparation */
l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
+ if (!l_mg->data_next) {
+ /* If we cannot get a new line, we need to stop the pipeline.
+ * Only allow as many writes in as we can store safely and then
+ * fail gracefully
+ */
+ pblk_set_space_limit(pblk);
+
+ l_mg->data_next = NULL;
+ } else {
l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
l_mg->data_next->type = PBLK_LINETYPE_DATA;
is_next = 1;
}
spin_unlock(&l_mg->free_lock);
+ if (pblk_line_erase(pblk, line)) {
+ line = pblk_line_retry(pblk, line);
+ if (!line)
+ return NULL;
+ }
+
pblk_rl_free_lines_dec(&pblk->rl, line);
if (is_next)
pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
- if (pblk_line_erase(pblk, line))
- return NULL;
-
retry_setup:
- if (!pblk_line_set_metadata(pblk, line, NULL)) {
+ if (!pblk_line_init_metadata(pblk, line, NULL)) {
line = pblk_line_retry(pblk, line);
if (!line)
return NULL;
return line;
}
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
+static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
+{
+ lockdep_assert_held(&pblk->l_mg.free_lock);
+
+ pblk_set_space_limit(pblk);
+ pblk->state = PBLK_STATE_STOPPING;
+}
+
+void pblk_pipeline_stop(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ int ret;
+
+ spin_lock(&l_mg->free_lock);
+ if (pblk->state == PBLK_STATE_RECOVERING ||
+ pblk->state == PBLK_STATE_STOPPED) {
+ spin_unlock(&l_mg->free_lock);
+ return;
+ }
+ pblk->state = PBLK_STATE_RECOVERING;
+ spin_unlock(&l_mg->free_lock);
+
+ pblk_flush_writer(pblk);
+ pblk_wait_for_meta(pblk);
+
+ ret = pblk_recov_pad(pblk);
+ if (ret) {
+ pr_err("pblk: could not close data on teardown(%d)\n", ret);
+ return;
+ }
+
+ flush_workqueue(pblk->bb_wq);
+ pblk_line_close_meta_sync(pblk);
+
+ spin_lock(&l_mg->free_lock);
+ pblk->state = PBLK_STATE_STOPPED;
+ l_mg->data_line = NULL;
+ l_mg->data_next = NULL;
+ spin_unlock(&l_mg->free_lock);
+}
+
+void pblk_line_replace_data(struct pblk *pblk)
{
- struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *cur, *new;
unsigned int left_seblks;
- int meta_line;
int is_next = 0;
cur = l_mg->data_line;
new = l_mg->data_next;
if (!new)
- return NULL;
+ return;
l_mg->data_line = new;
-retry_line:
+ spin_lock(&l_mg->free_lock);
+ if (pblk->state != PBLK_STATE_RUNNING) {
+ l_mg->data_line = NULL;
+ l_mg->data_next = NULL;
+ spin_unlock(&l_mg->free_lock);
+ return;
+ }
+
+ pblk_line_setup_metadata(new, l_mg, &pblk->lm);
+ spin_unlock(&l_mg->free_lock);
+
+retry_erase:
left_seblks = atomic_read(&new->left_seblks);
if (left_seblks) {
/* If line is not fully erased, erase it */
if (atomic_read(&new->left_eblks)) {
if (pblk_line_erase(pblk, new))
- return NULL;
+ return;
} else {
io_schedule();
}
- goto retry_line;
+ goto retry_erase;
}
- spin_lock(&l_mg->free_lock);
- /* Allocate next line for preparation */
- l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- is_next = 1;
- }
-
-retry_meta:
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- if (meta_line == PBLK_DATA_LINES) {
- spin_unlock(&l_mg->free_lock);
- io_schedule();
- spin_lock(&l_mg->free_lock);
- goto retry_meta;
- }
-
- set_bit(meta_line, &l_mg->meta_bitmap);
- new->smeta = l_mg->sline_meta[meta_line].meta;
- new->emeta = l_mg->eline_meta[meta_line].meta;
- new->meta_line = meta_line;
-
- memset(new->smeta, 0, lm->smeta_len);
- memset(new->emeta, 0, lm->emeta_len);
- spin_unlock(&l_mg->free_lock);
-
- if (is_next)
- pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-
retry_setup:
- if (!pblk_line_set_metadata(pblk, new, cur)) {
+ if (!pblk_line_init_metadata(pblk, new, cur)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return NULL;
+ return;
goto retry_setup;
}
if (!pblk_line_init_bb(pblk, new, 1)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return NULL;
+ return;
goto retry_setup;
}
- return new;
+ /* Allocate next line for preparation */
+ spin_lock(&l_mg->free_lock);
+ l_mg->data_next = pblk_line_get(pblk);
+ if (!l_mg->data_next) {
+ /* If we cannot get a new line, we need to stop the pipeline.
+ * Only allow as many writes in as we can store safely and then
+ * fail gracefully
+ */
+ pblk_stop_writes(pblk, new);
+ l_mg->data_next = NULL;
+ } else {
+ l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+ l_mg->data_next->type = PBLK_LINETYPE_DATA;
+ is_next = 1;
+ }
+ spin_unlock(&l_mg->free_lock);
+
+ if (is_next)
+ pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
}
void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
if (line->invalid_bitmap)
mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+ *line->vsc = cpu_to_le32(EMPTY_ENTRY);
+
line->map_bitmap = NULL;
line->invalid_bitmap = NULL;
line->smeta = NULL;
struct nvm_rq *rqd;
int err;
- rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
- memset(rqd, 0, pblk_r_rq_size);
+ rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
+ memset(rqd, 0, pblk_g_rq_size);
pblk_setup_e_rq(pblk, rqd, ppa);
return pblk->l_mg.data_line;
}
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
+/* For now, always erase next line */
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
{
return pblk->l_mg.data_next;
}
return (line->left_msecs == 0);
}
+void pblk_line_close_meta_sync(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line *line, *tline;
+ LIST_HEAD(list);
+
+ spin_lock(&l_mg->close_lock);
+ if (list_empty(&l_mg->emeta_list)) {
+ spin_unlock(&l_mg->close_lock);
+ return;
+ }
+
+ list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
+ spin_unlock(&l_mg->close_lock);
+
+ list_for_each_entry_safe(line, tline, &list, list) {
+ struct pblk_emeta *emeta = line->emeta;
+
+ while (emeta->mem < lm->emeta_len[0]) {
+ int ret;
+
+ ret = pblk_submit_meta_io(pblk, line);
+ if (ret) {
+ pr_err("pblk: sync meta line %d failed (%d)\n",
+ line->id, ret);
+ return;
+ }
+ }
+ }
+
+ pblk_wait_for_meta(pblk);
+ flush_workqueue(pblk->close_wq);
+}
+
+static void pblk_line_should_sync_meta(struct pblk *pblk)
+{
+ if (pblk_rl_is_limit(&pblk->rl))
+ pblk_line_close_meta_sync(pblk);
+}
+
void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list;
- line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
-
- if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
- pr_err("pblk: line %d close I/O failed\n", line->id);
+#ifdef CONFIG_NVM_DEBUG
+ struct pblk_line_meta *lm = &pblk->lm;
- WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
+ WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
"pblk: corrupt closed line %d\n", line->id);
+#endif
spin_lock(&l_mg->free_lock);
WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
spin_unlock(&line->lock);
spin_unlock(&l_mg->gc_lock);
+
+ pblk_gc_should_kick(pblk);
+}
+
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
+
+ /* No need for exact vsc value; avoid a big line lock and take aprox. */
+ memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
+ memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
+
+ emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
+ emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
+
+ spin_lock(&l_mg->close_lock);
+ spin_lock(&line->lock);
+ list_add_tail(&line->list, &l_mg->emeta_list);
+ spin_unlock(&line->lock);
+ spin_unlock(&l_mg->close_lock);
+
+ pblk_line_should_sync_meta(pblk);
}
void pblk_line_close_ws(struct work_struct *work)
}
void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *))
+ void (*work)(struct work_struct *),
+ struct workqueue_struct *wq)
{
struct pblk_line_ws *line_ws;
line_ws->priv = priv;
INIT_WORK(&line_ws->ws, work);
- queue_work(pblk->kw_wq, &line_ws->ws);
+ queue_work(wq, &line_ws->ws);
}
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
- int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
+ int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
int ret;
/*
/* If the LUN has been locked for this same request, do no attempt to
* lock it again
*/
- if (test_and_set_bit(lun_id, lun_bitmap))
+ if (test_and_set_bit(pos, lun_bitmap))
return;
- rlun = &pblk->luns[lun_id];
+ rlun = &pblk->luns[pos];
ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
if (ret) {
switch (ret) {
static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
{
- kfree(gc_rq->data);
- kfree(gc_rq->lba_list);
+ vfree(gc_rq->data);
kfree(gc_rq);
}
return 1;
}
- list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
- list_move_tail(&gc_rq->list, &w_list);
- gc->w_entries--;
- }
+ list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
+ gc->w_entries = 0;
spin_unlock(&gc->w_lock);
list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
gc_rq->nr_secs, gc_rq->secs_to_gc,
gc_rq->line, PBLK_IOTYPE_GC);
- kref_put(&gc_rq->line->ref, pblk_line_put);
-
list_del(&gc_rq->list);
+ kref_put(&gc_rq->line->ref, pblk_line_put);
pblk_gc_free_gc_rq(gc_rq);
}
* Responsible for managing all memory related to a gc request. Also in case of
* failure
*/
-static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
- u64 *lba_list, unsigned int nr_secs)
+static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc;
- struct pblk_gc_rq *gc_rq;
+ struct pblk_line *line = gc_rq->line;
void *data;
unsigned int secs_to_gc;
- int ret = NVM_IO_OK;
+ int ret = 0;
- data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
+ data = vmalloc(gc_rq->nr_secs * geo->sec_size);
if (!data) {
- ret = NVM_IO_ERR;
- goto free_lba_list;
+ ret = -ENOMEM;
+ goto out;
}
/* Read from GC victim block */
- if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
+ if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
&secs_to_gc, line)) {
- ret = NVM_IO_ERR;
+ ret = -EFAULT;
goto free_data;
}
if (!secs_to_gc)
- goto free_data;
-
- gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
- if (!gc_rq) {
- ret = NVM_IO_ERR;
- goto free_data;
- }
+ goto free_rq;
- gc_rq->line = line;
gc_rq->data = data;
- gc_rq->lba_list = lba_list;
- gc_rq->nr_secs = nr_secs;
gc_rq->secs_to_gc = secs_to_gc;
- kref_get(&line->ref);
-
retry:
spin_lock(&gc->w_lock);
- if (gc->w_entries > 256) {
+ if (gc->w_entries >= PBLK_GC_W_QD) {
spin_unlock(&gc->w_lock);
- usleep_range(256, 1024);
+ pblk_gc_writer_kick(&pblk->gc);
+ usleep_range(128, 256);
goto retry;
}
gc->w_entries++;
pblk_gc_writer_kick(&pblk->gc);
- return NVM_IO_OK;
+ return 0;
+free_rq:
+ kfree(gc_rq);
free_data:
- kfree(data);
-free_lba_list:
- kfree(lba_list);
-
+ vfree(data);
+out:
+ kref_put(&line->ref, pblk_line_put);
return ret;
}
static void pblk_gc_line_ws(struct work_struct *work)
{
+ struct pblk_line_ws *line_rq_ws = container_of(work,
+ struct pblk_line_ws, ws);
+ struct pblk *pblk = line_rq_ws->pblk;
+ struct pblk_gc *gc = &pblk->gc;
+ struct pblk_line *line = line_rq_ws->line;
+ struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
+
+ up(&gc->gc_sem);
+
+ if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
+ pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
+ line->id, *line->vsc,
+ gc_rq->nr_secs);
+ }
+
+ mempool_free(line_rq_ws, pblk->line_ws_pool);
+}
+
+static void pblk_gc_line_prepare_ws(struct work_struct *work)
+{
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
ws);
struct pblk *pblk = line_ws->pblk;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line = line_ws->line;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- __le64 *lba_list = line_ws->priv;
- u64 *gc_list;
- int sec_left;
- int nr_ppas, bit;
- int put_line = 1;
+ struct pblk_gc *gc = &pblk->gc;
+ struct line_emeta *emeta_buf;
+ struct pblk_line_ws *line_rq_ws;
+ struct pblk_gc_rq *gc_rq;
+ __le64 *lba_list;
+ int sec_left, nr_secs, bit;
+ int ret;
- pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
+ emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
+ GFP_KERNEL);
+ if (!emeta_buf) {
+ pr_err("pblk: cannot use GC emeta\n");
+ return;
+ }
- spin_lock(&line->lock);
- sec_left = line->vsc;
- if (!sec_left) {
- /* Lines are erased before being used (l_mg->data_/log_next) */
- spin_unlock(&line->lock);
- goto out;
+ ret = pblk_line_read_emeta(pblk, line, emeta_buf);
+ if (ret) {
+ pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
+ goto fail_free_emeta;
+ }
+
+ /* If this read fails, it means that emeta is corrupted. For now, leave
+ * the line untouched. TODO: Implement a recovery routine that scans and
+ * moves all sectors on the line.
+ */
+ lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
+ if (!lba_list) {
+ pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
+ goto fail_free_emeta;
}
- spin_unlock(&line->lock);
+ sec_left = pblk_line_vsc(line);
if (sec_left < 0) {
pr_err("pblk: corrupted GC line (%d)\n", line->id);
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
+ goto fail_free_emeta;
}
bit = -1;
next_rq:
- gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
- if (!gc_list) {
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
- }
+ gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
+ if (!gc_rq)
+ goto fail_free_emeta;
- nr_ppas = 0;
+ nr_secs = 0;
do {
bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
bit + 1);
if (bit > line->emeta_ssec)
break;
- gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
- } while (nr_ppas < pblk->max_write_pgs);
+ gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
+ } while (nr_secs < pblk->max_write_pgs);
- if (unlikely(!nr_ppas)) {
- kfree(gc_list);
+ if (unlikely(!nr_secs)) {
+ kfree(gc_rq);
goto out;
}
- if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
- pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
- line->id, line->vsc,
- nr_ppas, nr_ppas);
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
- }
+ gc_rq->nr_secs = nr_secs;
+ gc_rq->line = line;
+
+ line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+ if (!line_rq_ws)
+ goto fail_free_gc_rq;
- sec_left -= nr_ppas;
+ line_rq_ws->pblk = pblk;
+ line_rq_ws->line = line;
+ line_rq_ws->priv = gc_rq;
+
+ down(&gc->gc_sem);
+ kref_get(&line->ref);
+
+ INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
+ queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
+
+ sec_left -= nr_secs;
if (sec_left > 0)
goto next_rq;
out:
- pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
mempool_free(line_ws, pblk->line_ws_pool);
- atomic_dec(&pblk->gc.inflight_gc);
- if (put_line)
- kref_put(&line->ref, pblk_line_put);
+
+ kref_put(&line->ref, pblk_line_put);
+ atomic_dec(&gc->inflight_gc);
+
+ return;
+
+fail_free_gc_rq:
+ kfree(gc_rq);
+fail_free_emeta:
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+ pblk_put_line_back(pblk, line);
+ kref_put(&line->ref, pblk_line_put);
+ mempool_free(line_ws, pblk->line_ws_pool);
+ atomic_dec(&gc->inflight_gc);
+
+ pr_err("pblk: Failed to GC line %d\n", line->id);
}
static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *line_ws;
- __le64 *lba_list;
- int ret;
- line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
- line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
- GFP_KERNEL);
- if (!line->emeta) {
- pr_err("pblk: cannot use GC emeta\n");
- goto fail_free_ws;
- }
-
- ret = pblk_line_read_emeta(pblk, line);
- if (ret) {
- pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
- goto fail_free_emeta;
- }
+ pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
- /* If this read fails, it means that emeta is corrupted. For now, leave
- * the line untouched. TODO: Implement a recovery routine that scans and
- * moves all sectors on the line.
- */
- lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
- if (!lba_list) {
- pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
- goto fail_free_emeta;
- }
+ line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+ if (!line_ws)
+ return -ENOMEM;
line_ws->pblk = pblk;
line_ws->line = line;
- line_ws->priv = lba_list;
- INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
- queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
+ INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
+ queue_work(gc->gc_reader_wq, &line_ws->ws);
return 0;
+}
-fail_free_emeta:
- pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
-fail_free_ws:
- mempool_free(line_ws, pblk->line_ws_pool);
- pblk_put_line_back(pblk, line);
+static int pblk_gc_read(struct pblk *pblk)
+{
+ struct pblk_gc *gc = &pblk->gc;
+ struct pblk_line *line;
+
+ spin_lock(&gc->r_lock);
+ if (list_empty(&gc->r_list)) {
+ spin_unlock(&gc->r_lock);
+ return 1;
+ }
+
+ line = list_first_entry(&gc->r_list, struct pblk_line, list);
+ list_del(&line->list);
+ spin_unlock(&gc->r_lock);
+
+ pblk_gc_kick(pblk);
- return 1;
+ if (pblk_gc_line(pblk, line))
+ pr_err("pblk: failed to GC line %d\n", line->id);
+
+ return 0;
}
-static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
+static void pblk_gc_reader_kick(struct pblk_gc *gc)
{
- struct pblk_line *line, *tline;
+ wake_up_process(gc->gc_reader_ts);
+}
- list_for_each_entry_safe(line, tline, gc_list, list) {
- if (pblk_gc_line(pblk, line))
- pr_err("pblk: failed to GC line %d\n", line->id);
- list_del(&line->list);
+static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
+ struct list_head *group_list)
+{
+ struct pblk_line *line, *victim;
+ int line_vsc, victim_vsc;
+
+ victim = list_first_entry(group_list, struct pblk_line, list);
+ list_for_each_entry(line, group_list, list) {
+ line_vsc = le32_to_cpu(*line->vsc);
+ victim_vsc = le32_to_cpu(*victim->vsc);
+ if (line_vsc < victim_vsc)
+ victim = line;
}
+
+ return victim;
+}
+
+static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
+{
+ unsigned int nr_blocks_free, nr_blocks_need;
+
+ nr_blocks_need = pblk_rl_high_thrs(rl);
+ nr_blocks_free = pblk_rl_nr_free_blks(rl);
+
+ /* This is not critical, no need to take lock here */
+ return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
}
/*
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line, *tline;
- unsigned int nr_blocks_free, nr_blocks_need;
+ struct pblk_line *line;
struct list_head *group_list;
- int run_gc, gc_group = 0;
- int prev_gc = 0;
- int inflight_gc = atomic_read(&gc->inflight_gc);
- LIST_HEAD(gc_list);
+ bool run_gc;
+ int inflight_gc, gc_group = 0, prev_group = 0;
+
+ do {
+ spin_lock(&l_mg->gc_lock);
+ if (list_empty(&l_mg->gc_full_list)) {
+ spin_unlock(&l_mg->gc_lock);
+ break;
+ }
+
+ line = list_first_entry(&l_mg->gc_full_list,
+ struct pblk_line, list);
- spin_lock(&l_mg->gc_lock);
- list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
spin_unlock(&line->lock);
list_del(&line->list);
+ spin_unlock(&l_mg->gc_lock);
+
kref_put(&line->ref, pblk_line_put);
- }
- spin_unlock(&l_mg->gc_lock);
+ } while (1);
- nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
- nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
- run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+ if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
+ return;
next_gc_group:
group_list = l_mg->gc_lists[gc_group++];
- spin_lock(&l_mg->gc_lock);
- while (run_gc && !list_empty(group_list)) {
- /* No need to queue up more GC lines than we can handle */
- if (!run_gc || inflight_gc > gc->gc_jobs_active) {
+
+ do {
+ spin_lock(&l_mg->gc_lock);
+ if (list_empty(group_list)) {
spin_unlock(&l_mg->gc_lock);
- pblk_gc_lines(pblk, &gc_list);
- return;
+ break;
}
- line = list_first_entry(group_list, struct pblk_line, list);
- nr_blocks_free += atomic_read(&line->blk_in_line);
+ line = pblk_gc_get_victim_line(pblk, group_list);
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
- list_move_tail(&line->list, &gc_list);
- atomic_inc(&gc->inflight_gc);
- inflight_gc++;
spin_unlock(&line->lock);
- prev_gc = 1;
- run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
- }
- spin_unlock(&l_mg->gc_lock);
+ list_del(&line->list);
+ spin_unlock(&l_mg->gc_lock);
+
+ spin_lock(&gc->r_lock);
+ list_add_tail(&line->list, &gc->r_list);
+ spin_unlock(&gc->r_lock);
- pblk_gc_lines(pblk, &gc_list);
+ inflight_gc = atomic_inc_return(&gc->inflight_gc);
+ pblk_gc_reader_kick(gc);
- if (!prev_gc && pblk->rl.rb_state > gc_group &&
- gc_group < PBLK_NR_GC_LISTS)
+ prev_group = 1;
+
+ /* No need to queue up more GC lines than we can handle */
+ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+ if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
+ break;
+ } while (1);
+
+ if (!prev_group && pblk->rl.rb_state > gc_group &&
+ gc_group < PBLK_GC_NR_LISTS)
goto next_gc_group;
}
-
-static void pblk_gc_kick(struct pblk *pblk)
+void pblk_gc_kick(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
wake_up_process(gc->gc_ts);
pblk_gc_writer_kick(gc);
+ pblk_gc_reader_kick(gc);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
}
return 0;
}
-static void pblk_gc_start(struct pblk *pblk)
+static int pblk_gc_reader_ts(void *data)
{
- pblk->gc.gc_active = 1;
+ struct pblk *pblk = data;
- pr_debug("pblk: gc start\n");
+ while (!kthread_should_stop()) {
+ if (!pblk_gc_read(pblk))
+ continue;
+ set_current_state(TASK_INTERRUPTIBLE);
+ io_schedule();
+ }
+
+ return 0;
}
-int pblk_gc_status(struct pblk *pblk)
+static void pblk_gc_start(struct pblk *pblk)
{
- struct pblk_gc *gc = &pblk->gc;
- int ret;
-
- spin_lock(&gc->lock);
- ret = gc->gc_active;
- spin_unlock(&gc->lock);
-
- return ret;
+ pblk->gc.gc_active = 1;
+ pr_debug("pblk: gc start\n");
}
-static void __pblk_gc_should_start(struct pblk *pblk)
+void pblk_gc_should_start(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
- lockdep_assert_held(&gc->lock);
-
if (gc->gc_enabled && !gc->gc_active)
pblk_gc_start(pblk);
-}
-void pblk_gc_should_start(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- spin_lock(&gc->lock);
- __pblk_gc_should_start(pblk);
- spin_unlock(&gc->lock);
+ pblk_gc_kick(pblk);
}
/*
*/
static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
{
- spin_lock(&pblk->gc.lock);
pblk->gc.gc_active = 0;
- spin_unlock(&pblk->gc.lock);
-
pr_debug("pblk: gc stop\n");
}
spin_unlock(&gc->lock);
}
-void pblk_gc_sysfs_force(struct pblk *pblk, int force)
+int pblk_gc_sysfs_force(struct pblk *pblk, int force)
{
struct pblk_gc *gc = &pblk->gc;
- int rsv = 0;
+
+ if (force < 0 || force > 1)
+ return -EINVAL;
spin_lock(&gc->lock);
- if (force) {
- gc->gc_enabled = 1;
- rsv = 64;
- }
- pblk_rl_set_gc_rsc(&pblk->rl, rsv);
gc->gc_forced = force;
- __pblk_gc_should_start(pblk);
+
+ if (force)
+ gc->gc_enabled = 1;
+ else
+ gc->gc_enabled = 0;
spin_unlock(&gc->lock);
+
+ pblk_gc_should_start(pblk);
+
+ return 0;
}
int pblk_gc_init(struct pblk *pblk)
goto fail_free_main_kthread;
}
+ gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
+ "pblk-gc-reader-ts");
+ if (IS_ERR(gc->gc_reader_ts)) {
+ pr_err("pblk: could not allocate GC reader kthread\n");
+ ret = PTR_ERR(gc->gc_reader_ts);
+ goto fail_free_writer_kthread;
+ }
+
setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
gc->gc_active = 0;
gc->gc_forced = 0;
gc->gc_enabled = 1;
- gc->gc_jobs_active = 8;
gc->w_entries = 0;
atomic_set(&gc->inflight_gc, 0);
- gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
+ /* Workqueue that reads valid sectors from a line and submit them to the
+ * GC writer to be recycled.
+ */
+ gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
+ if (!gc->gc_line_reader_wq) {
+ pr_err("pblk: could not allocate GC line reader workqueue\n");
+ ret = -ENOMEM;
+ goto fail_free_reader_kthread;
+ }
+
+ /* Workqueue that prepare lines for GC */
+ gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!gc->gc_reader_wq) {
pr_err("pblk: could not allocate GC reader workqueue\n");
ret = -ENOMEM;
- goto fail_free_writer_kthread;
+ goto fail_free_reader_line_wq;
}
spin_lock_init(&gc->lock);
spin_lock_init(&gc->w_lock);
+ spin_lock_init(&gc->r_lock);
+
+ sema_init(&gc->gc_sem, 128);
+
INIT_LIST_HEAD(&gc->w_list);
+ INIT_LIST_HEAD(&gc->r_list);
return 0;
+fail_free_reader_line_wq:
+ destroy_workqueue(gc->gc_line_reader_wq);
+fail_free_reader_kthread:
+ kthread_stop(gc->gc_reader_ts);
fail_free_writer_kthread:
kthread_stop(gc->gc_writer_ts);
fail_free_main_kthread:
struct pblk_gc *gc = &pblk->gc;
flush_workqueue(gc->gc_reader_wq);
+ flush_workqueue(gc->gc_line_reader_wq);
del_timer(&gc->gc_timer);
pblk_gc_stop(pblk, 1);
if (gc->gc_ts)
kthread_stop(gc->gc_ts);
- if (pblk->gc.gc_reader_wq)
- destroy_workqueue(pblk->gc.gc_reader_wq);
+ if (gc->gc_reader_wq)
+ destroy_workqueue(gc->gc_reader_wq);
+
+ if (gc->gc_line_reader_wq)
+ destroy_workqueue(gc->gc_line_reader_wq);
if (gc->gc_writer_ts)
kthread_stop(gc->gc_writer_ts);
+
+ if (gc->gc_reader_ts)
+ kthread_stop(gc->gc_reader_ts);
}
#include "pblk.h"
-static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
- *pblk_w_rq_cache, *pblk_line_meta_cache;
+static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
+ *pblk_w_rq_cache, *pblk_line_meta_cache;
static DECLARE_RWSEM(pblk_lock);
+struct bio_set *pblk_bio_set;
static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
struct bio *bio)
* constraint. Writes can be of arbitrary size.
*/
if (bio_data_dir(bio) == READ) {
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
ret = pblk_submit_read(pblk, bio);
if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
bio_put(bio);
* available for user I/O.
*/
if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
}
return -ENOMEM;
}
- pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
+ pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
0, 0, NULL);
- if (!pblk_r_rq_cache) {
+ if (!pblk_g_rq_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
up_write(&pblk_lock);
if (!pblk_w_rq_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
up_write(&pblk_lock);
return -ENOMEM;
}
if (!pblk_line_meta_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
kmem_cache_destroy(pblk_w_rq_cache);
up_write(&pblk_lock);
return -ENOMEM;
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- int max_write_ppas;
- int mod;
- pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
- max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
- pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
- max_write_ppas : nvm_max_phys_sects(dev);
pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
geo->nr_planes * geo->nr_luns;
- if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
- pr_err("pblk: cannot support device max_phys_sect\n");
- return -EINVAL;
- }
-
- div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
- if (mod) {
- pr_err("pblk: bad configuration of sectors/pages\n");
- return -EINVAL;
- }
-
if (pblk_init_global_caches(pblk))
return -ENOMEM;
if (!pblk->page_pool)
return -ENOMEM;
- pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
+ pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
pblk_blk_ws_cache);
if (!pblk->line_ws_pool)
goto free_page_pool;
if (!pblk->rec_pool)
goto free_blk_ws_pool;
- pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
- if (!pblk->r_rq_pool)
+ pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
+ pblk_g_rq_cache);
+ if (!pblk->g_rq_pool)
goto free_rec_pool;
- pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
+ pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
+ pblk_w_rq_cache);
if (!pblk->w_rq_pool)
- goto free_r_rq_pool;
+ goto free_g_rq_pool;
pblk->line_meta_pool =
- mempool_create_slab_pool(16, pblk_line_meta_cache);
+ mempool_create_slab_pool(PBLK_META_POOL_SIZE,
+ pblk_line_meta_cache);
if (!pblk->line_meta_pool)
goto free_w_rq_pool;
- pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
- if (!pblk->kw_wq)
+ pblk->close_wq = alloc_workqueue("pblk-close-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
+ if (!pblk->close_wq)
goto free_line_meta_pool;
+ pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (!pblk->bb_wq)
+ goto free_close_wq;
+
if (pblk_set_ppaf(pblk))
- goto free_kw_wq;
+ goto free_bb_wq;
if (pblk_rwb_init(pblk))
- goto free_kw_wq;
+ goto free_bb_wq;
INIT_LIST_HEAD(&pblk->compl_list);
return 0;
-free_kw_wq:
- destroy_workqueue(pblk->kw_wq);
+free_bb_wq:
+ destroy_workqueue(pblk->bb_wq);
+free_close_wq:
+ destroy_workqueue(pblk->close_wq);
free_line_meta_pool:
mempool_destroy(pblk->line_meta_pool);
free_w_rq_pool:
mempool_destroy(pblk->w_rq_pool);
-free_r_rq_pool:
- mempool_destroy(pblk->r_rq_pool);
+free_g_rq_pool:
+ mempool_destroy(pblk->g_rq_pool);
free_rec_pool:
mempool_destroy(pblk->rec_pool);
free_blk_ws_pool:
static void pblk_core_free(struct pblk *pblk)
{
- if (pblk->kw_wq)
- destroy_workqueue(pblk->kw_wq);
+ if (pblk->close_wq)
+ destroy_workqueue(pblk->close_wq);
+
+ if (pblk->bb_wq)
+ destroy_workqueue(pblk->bb_wq);
mempool_destroy(pblk->page_pool);
mempool_destroy(pblk->line_ws_pool);
mempool_destroy(pblk->rec_pool);
- mempool_destroy(pblk->r_rq_pool);
+ mempool_destroy(pblk->g_rq_pool);
mempool_destroy(pblk->w_rq_pool);
mempool_destroy(pblk->line_meta_pool);
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
kmem_cache_destroy(pblk_w_rq_cache);
kmem_cache_destroy(pblk_line_meta_cache);
}
kfree(pblk->luns);
}
+static void pblk_free_line_bitmaps(struct pblk_line *line)
+{
+ kfree(line->blk_bitmap);
+ kfree(line->erase_bitmap);
+}
+
static void pblk_lines_free(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
line = &pblk->lines[i];
pblk_line_free(pblk, line);
- kfree(line->blk_bitmap);
- kfree(line->erase_bitmap);
+ pblk_free_line_bitmaps(line);
}
spin_unlock(&l_mg->free_lock);
}
kfree(l_mg->bb_template);
kfree(l_mg->bb_aux);
+ kfree(l_mg->vsc_list);
+ spin_lock(&l_mg->free_lock);
for (i = 0; i < PBLK_DATA_LINES; i++) {
- pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
- pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+ kfree(l_mg->sline_meta[i]);
+ pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
+ kfree(l_mg->eline_meta[i]);
}
+ spin_unlock(&l_mg->free_lock);
kfree(pblk->lines);
}
return ret;
}
-static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
+static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
+ int blk_per_line)
{
- struct pblk_line_meta *lm = &pblk->lm;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
int bb_cnt = 0;
int i;
+ for (i = 0; i < blk_per_line; i++) {
+ rlun = &pblk->luns[i];
+ if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
+ continue;
+
+ set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
+ bb_cnt++;
+ }
+
+ return bb_cnt;
+}
+
+static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+
line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
if (!line->blk_bitmap)
return -ENOMEM;
return -ENOMEM;
}
- for (i = 0; i < lm->blk_per_line; i++) {
- rlun = &pblk->luns[i];
- if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
- continue;
-
- set_bit(i, line->blk_bitmap);
- bb_cnt++;
- }
-
- return bb_cnt;
+ return 0;
}
static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
}
/* See comment over struct line_emeta definition */
-static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
+static unsigned int calc_emeta_len(struct pblk *pblk)
{
- return (sizeof(struct line_emeta) +
- ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
- (pblk->l_mg.nr_lines * sizeof(u32)) +
- lm->blk_bitmap_len);
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+
+ /* Round to sector size so that lba_list starts on its own sector */
+ lm->emeta_sec[1] = DIV_ROUND_UP(
+ sizeof(struct line_emeta) + lm->blk_bitmap_len,
+ geo->sec_size);
+ lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
+
+ /* Round to sector size so that vsc_list starts on its own sector */
+ lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
+ lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
+ geo->sec_size);
+ lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
+
+ lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
+ geo->sec_size);
+ lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
+
+ lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
+
+ return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
}
static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
atomic_set(&pblk->rl.free_blocks, nr_free_blks);
}
+static int pblk_lines_alloc_metadata(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ int i;
+
+ /* smeta is always small enough to fit on a kmalloc memory allocation,
+ * emeta depends on the number of LUNs allocated to the pblk instance
+ */
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
+ if (!l_mg->sline_meta[i])
+ goto fail_free_smeta;
+ }
+
+ /* emeta allocates three different buffers for managing metadata with
+ * in-memory and in-media layouts
+ */
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ struct pblk_emeta *emeta;
+
+ emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
+ if (!emeta)
+ goto fail_free_emeta;
+
+ if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
+ l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
+
+ emeta->buf = vmalloc(lm->emeta_len[0]);
+ if (!emeta->buf) {
+ kfree(emeta);
+ goto fail_free_emeta;
+ }
+
+ emeta->nr_entries = lm->emeta_sec[0];
+ l_mg->eline_meta[i] = emeta;
+ } else {
+ l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
+
+ emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
+ if (!emeta->buf) {
+ kfree(emeta);
+ goto fail_free_emeta;
+ }
+
+ emeta->nr_entries = lm->emeta_sec[0];
+ l_mg->eline_meta[i] = emeta;
+ }
+ }
+
+ l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
+ if (!l_mg->vsc_list)
+ goto fail_free_emeta;
+
+ for (i = 0; i < l_mg->nr_lines; i++)
+ l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
+
+ return 0;
+
+fail_free_emeta:
+ while (--i >= 0) {
+ vfree(l_mg->eline_meta[i]->buf);
+ kfree(l_mg->eline_meta[i]);
+ }
+
+fail_free_smeta:
+ for (i = 0; i < PBLK_DATA_LINES; i++)
+ kfree(l_mg->sline_meta[i]);
+
+ return -ENOMEM;
+}
+
static int pblk_lines_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line *line;
unsigned int smeta_len, emeta_len;
- long nr_bad_blks, nr_meta_blks, nr_free_blks;
- int bb_distance;
- int i;
- int ret;
+ long nr_bad_blks, nr_free_blks;
+ int bb_distance, max_write_ppas, mod;
+ int i, ret;
+
+ pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
+ max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
+ pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
+ max_write_ppas : nvm_max_phys_sects(dev);
+ pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+
+ if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+ pr_err("pblk: cannot support device max_phys_sect\n");
+ return -EINVAL;
+ }
+
+ div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
+ if (mod) {
+ pr_err("pblk: bad configuration of sectors/pages\n");
+ return -EINVAL;
+ }
+
+ l_mg->nr_lines = geo->blks_per_lun;
+ l_mg->log_line = l_mg->data_line = NULL;
+ l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+ l_mg->nr_free_lines = 0;
+ bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
lm->blk_per_line = geo->nr_luns;
lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
lm->high_thrs = lm->sec_per_line / 2;
lm->mid_thrs = lm->sec_per_line / 4;
+ lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
/* Calculate necessary pages for smeta. See comment over struct
* line_smeta definition
*/
- lm->smeta_len = sizeof(struct line_smeta) +
- PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
-
i = 1;
add_smeta_page:
lm->smeta_sec = i * geo->sec_per_pl;
lm->smeta_len = lm->smeta_sec * geo->sec_size;
- smeta_len = sizeof(struct line_smeta) +
- PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+ smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
if (smeta_len > lm->smeta_len) {
i++;
goto add_smeta_page;
*/
i = 1;
add_emeta_page:
- lm->emeta_sec = i * geo->sec_per_pl;
- lm->emeta_len = lm->emeta_sec * geo->sec_size;
+ lm->emeta_sec[0] = i * geo->sec_per_pl;
+ lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
- emeta_len = calc_emeta_len(pblk, lm);
- if (emeta_len > lm->emeta_len) {
+ emeta_len = calc_emeta_len(pblk);
+ if (emeta_len > lm->emeta_len[0]) {
i++;
goto add_emeta_page;
}
- lm->emeta_bb = geo->nr_luns - i;
-
- nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
- (geo->sec_per_blk / 2)) / geo->sec_per_blk;
- lm->min_blk_line = nr_meta_blks + 1;
-
- l_mg->nr_lines = geo->blks_per_lun;
- l_mg->log_line = l_mg->data_line = NULL;
- l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
- l_mg->nr_free_lines = 0;
- bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- /* smeta is always small enough to fit on a kmalloc memory allocation,
- * emeta depends on the number of LUNs allocated to the pblk instance
- */
- l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
- if (!l_mg->sline_meta[i].meta)
- while (--i >= 0) {
- kfree(l_mg->sline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
+ lm->emeta_bb = geo->nr_luns - i;
+ lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
+ geo->sec_per_blk);
+ if (lm->min_blk_line > lm->blk_per_line) {
+ pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
+ lm->blk_per_line);
+ ret = -EINVAL;
+ goto fail;
}
- if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
- l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
- if (!l_mg->eline_meta[i].meta)
- while (--i >= 0) {
- vfree(l_mg->eline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
- }
- } else {
- l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->eline_meta[i].meta =
- kmalloc(lm->emeta_len, GFP_KERNEL);
- if (!l_mg->eline_meta[i].meta)
- while (--i >= 0) {
- kfree(l_mg->eline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
- }
- }
+ ret = pblk_lines_alloc_metadata(pblk);
+ if (ret)
+ goto fail;
l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
if (!l_mg->bb_template) {
INIT_LIST_HEAD(&l_mg->gc_low_list);
INIT_LIST_HEAD(&l_mg->gc_empty_list);
+ INIT_LIST_HEAD(&l_mg->emeta_list);
+
l_mg->gc_lists[0] = &l_mg->gc_high_list;
l_mg->gc_lists[1] = &l_mg->gc_mid_list;
l_mg->gc_lists[2] = &l_mg->gc_low_list;
spin_lock_init(&l_mg->free_lock);
+ spin_lock_init(&l_mg->close_lock);
spin_lock_init(&l_mg->gc_lock);
pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
line->type = PBLK_LINETYPE_FREE;
line->state = PBLK_LINESTATE_FREE;
line->gc_group = PBLK_LINEGC_NONE;
+ line->vsc = &l_mg->vsc_list[i];
spin_lock_init(&line->lock);
- nr_bad_blks = pblk_bb_line(pblk, line);
+ ret = pblk_alloc_line_bitmaps(pblk, line);
+ if (ret)
+ goto fail_free_lines;
+
+ nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+ pblk_free_line_bitmaps(line);
ret = -EINVAL;
goto fail_free_lines;
}
pblk_set_provision(pblk, nr_free_blks);
- sema_init(&pblk->erase_sem, 1);
-
/* Cleanup per-LUN bad block lists - managed within lines on run-time */
for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list);
return 0;
fail_free_lines:
- kfree(pblk->lines);
+ while (--i >= 0)
+ pblk_free_line_bitmaps(&pblk->lines[i]);
fail_free_bb_aux:
kfree(l_mg->bb_aux);
fail_free_bb_template:
kfree(l_mg->bb_template);
fail_free_meta:
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
- pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
- }
+ pblk_line_meta_free(pblk);
fail:
for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list);
static void pblk_writer_stop(struct pblk *pblk)
{
+ /* The pipeline must be stopped and the write buffer emptied before the
+ * write thread is stopped
+ */
+ WARN(pblk_rb_read_count(&pblk->rwb),
+ "Stopping not fully persisted write buffer\n");
+
+ WARN(pblk_rb_sync_count(&pblk->rwb),
+ "Stopping not fully synced write buffer\n");
+
if (pblk->writer_ts)
kthread_stop(pblk->writer_ts);
del_timer(&pblk->wtimer);
static void pblk_tear_down(struct pblk *pblk)
{
- pblk_flush_writer(pblk);
+ pblk_pipeline_stop(pblk);
pblk_writer_stop(pblk);
pblk_rb_sync_l2p(&pblk->rwb);
- pblk_recov_pad(pblk);
pblk_rwb_free(pblk);
pblk_rl_free(&pblk->rl);
pblk->dev = dev;
pblk->disk = tdisk;
+ pblk->state = PBLK_STATE_RUNNING;
spin_lock_init(&pblk->trans_lock);
spin_lock_init(&pblk->lock);
atomic_long_set(&pblk->req_writes, 0);
atomic_long_set(&pblk->sub_writes, 0);
atomic_long_set(&pblk->sync_writes, 0);
- atomic_long_set(&pblk->compl_writes, 0);
atomic_long_set(&pblk->inflight_reads, 0);
+ atomic_long_set(&pblk->cache_reads, 0);
atomic_long_set(&pblk->sync_reads, 0);
atomic_long_set(&pblk->recov_writes, 0);
atomic_long_set(&pblk->recov_writes, 0);
static int __init pblk_module_init(void)
{
- return nvm_register_tgt_type(&tt_pblk);
+ int ret;
+
+ pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!pblk_bio_set)
+ return -ENOMEM;
+ ret = nvm_register_tgt_type(&tt_pblk);
+ if (ret)
+ bioset_free(pblk_bio_set);
+ return ret;
}
static void pblk_module_exit(void)
{
+ bioset_free(pblk_bio_set);
nvm_unregister_tgt_type(&tt_pblk);
}
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
struct pblk_w_ctx *w_ctx;
- __le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
+ __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
u64 paddr;
int nr_secs = pblk->min_write_pgs;
int i;
w_ctx->ppa = ppa_list[i];
meta_list[i].lba = cpu_to_le64(w_ctx->lba);
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
- le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
+ line->nr_valid_lbas++;
} else {
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
- lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
- pblk_map_pad_invalidate(pblk, line, paddr);
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+ lba_list[paddr] = meta_list[i].lba = addr_empty;
+ __pblk_map_invalidate(pblk, line, paddr);
}
}
if (pblk_line_is_full(line)) {
- line = pblk_line_replace_data(pblk);
- if (!line)
- return;
+ struct pblk_line *prev_line = line;
+
+ pblk_line_replace_data(pblk);
+ pblk_line_close_meta(pblk, prev_line);
}
pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+ struct pblk_line_meta *lm = &pblk->lm;
struct pblk_sec_meta *meta_list = rqd->meta_list;
+ struct pblk_line *e_line, *d_line;
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i, erase_lun;
pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
lun_bitmap, &meta_list[i], map_secs);
- erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
- rqd->ppa_list[i].g.ch;
+ erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
- if (!test_bit(erase_lun, e_line->erase_bitmap)) {
- if (down_trylock(&pblk->erase_sem))
- continue;
+ /* line can change after page map. We might also be writing the
+ * last line.
+ */
+ e_line = pblk_line_get_erase(pblk);
+ if (!e_line)
+ return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
+ valid_secs, i + min);
+ spin_lock(&e_line->lock);
+ if (!test_bit(erase_lun, e_line->erase_bitmap)) {
set_bit(erase_lun, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
+
*erase_ppa = rqd->ppa_list[i];
erase_ppa->g.blk = e_line->id;
+ spin_unlock(&e_line->lock);
+
/* Avoid evaluating e_line->left_eblks */
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
}
+ spin_unlock(&e_line->lock);
}
- /* Erase blocks that are bad in this line but might not be in next */
- if (unlikely(ppa_empty(*erase_ppa))) {
- struct pblk_line_meta *lm = &pblk->lm;
+ d_line = pblk_line_get_data(pblk);
+
+ /* line can change after page map. We might also be writing the
+ * last line.
+ */
+ e_line = pblk_line_get_erase(pblk);
+ if (!e_line)
+ return;
- i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
- if (i == lm->blk_per_line)
+ /* Erase blocks that are bad in this line but might not be in next */
+ if (unlikely(ppa_empty(*erase_ppa)) &&
+ bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
+ int bit = -1;
+
+retry:
+ bit = find_next_bit(d_line->blk_bitmap,
+ lm->blk_per_line, bit + 1);
+ if (bit >= lm->blk_per_line)
return;
- set_bit(i, e_line->erase_bitmap);
+ spin_lock(&e_line->lock);
+ if (test_bit(bit, e_line->erase_bitmap)) {
+ spin_unlock(&e_line->lock);
+ goto retry;
+ }
+ spin_unlock(&e_line->lock);
+
+ set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
- *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
+ *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->g.blk = e_line->id;
}
}
/* Release flags on context. Protect from writes and reads */
smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
pblk_ppa_set_empty(&w_ctx->ppa);
+ w_ctx->lba = ADDR_EMPTY;
}
#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
return pblk_rb_ring_count(mem, subm, rb->nr_entries);
}
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
+{
+ unsigned int mem = READ_ONCE(rb->mem);
+ unsigned int sync = READ_ONCE(rb->sync);
+
+ return pblk_rb_ring_count(mem, sync, rb->nr_entries);
+}
+
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
{
unsigned int subm;
struct pblk_line *line;
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
+ unsigned int user_io = 0, gc_io = 0;
unsigned int i;
+ int flags;
for (i = 0; i < to_update; i++) {
entry = &rb->entries[*l2p_upd];
w_ctx = &entry->w_ctx;
+ flags = READ_ONCE(entry->w_ctx.flags);
+ if (flags & PBLK_IOTYPE_USER)
+ user_io++;
+ else if (flags & PBLK_IOTYPE_GC)
+ gc_io++;
+ else
+ WARN(1, "pblk: unknown IO type\n");
+
pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
entry->cacheline);
*l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
}
+ pblk_rl_out(&pblk->rl, user_io, gc_io);
+
return 0;
}
/* Protect syncs */
smp_store_release(&rb->sync_point, sync_point);
+ if (!bio)
+ return 0;
+
spin_lock_irq(&rb->s_lock);
bio_list_add(&entry->w_ctx.bios, bio);
spin_unlock_irq(&rb->s_lock);
return 1;
}
+void pblk_rb_flush(struct pblk_rb *rb)
+{
+ struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ unsigned int mem = READ_ONCE(rb->mem);
+
+ if (pblk_rb_sync_point_set(rb, NULL, mem))
+ return;
+
+ pblk_write_should_kick(pblk);
+}
+
static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
unsigned int *pos, struct bio *bio,
int *io_ret)
unsigned int nr_entries, unsigned int *pos)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
- int flush_done;
+ int io_ret;
spin_lock(&rb->w_lock);
- if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
+ io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
+ if (io_ret) {
spin_unlock(&rb->w_lock);
- return NVM_IO_REQUEUE;
+ return io_ret;
}
- if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
+ if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
spin_unlock(&rb->w_lock);
return NVM_IO_REQUEUE;
}
pblk_rl_user_in(&pblk->rl, nr_entries);
spin_unlock(&rb->w_lock);
- return flush_done;
+ return io_ret;
}
/*
* This function is used by the write thread to form the write bio that will
* persist data on the write buffer to the media.
*/
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
- struct pblk_c_ctx *c_ctx,
- unsigned int pos,
- unsigned int nr_entries,
- unsigned int count)
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+ struct bio *bio, unsigned int pos,
+ unsigned int nr_entries, unsigned int count)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ struct request_queue *q = pblk->dev->q;
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct pblk_rb_entry *entry;
struct page *page;
- unsigned int pad = 0, read = 0, to_read = nr_entries;
- unsigned int user_io = 0, gc_io = 0;
+ unsigned int pad = 0, to_read = nr_entries;
unsigned int i;
int flags;
- int ret;
if (count < nr_entries) {
pad = nr_entries - count;
*/
try:
flags = READ_ONCE(entry->w_ctx.flags);
- if (!(flags & PBLK_WRITTEN_DATA))
+ if (!(flags & PBLK_WRITTEN_DATA)) {
+ io_schedule();
goto try;
-
- if (flags & PBLK_IOTYPE_USER)
- user_io++;
- else if (flags & PBLK_IOTYPE_GC)
- gc_io++;
- else
- WARN(1, "pblk: unknown IO type\n");
+ }
page = virt_to_page(entry->data);
if (!page) {
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- goto out;
+ return NVM_IO_ERR;
}
- ret = bio_add_page(bio, page, rb->seg_size, 0);
- if (ret != rb->seg_size) {
+ if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
+ rb->seg_size) {
pr_err("pblk: could not add page to write bio\n");
flags &= ~PBLK_WRITTEN_DATA;
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- goto out;
+ return NVM_IO_ERR;
}
if (flags & PBLK_FLUSH_ENTRY) {
pos = (pos + 1) & (rb->nr_entries - 1);
}
- read = to_read;
- pblk_rl_out(&pblk->rl, user_io, gc_io);
+ if (pad) {
+ if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
+ pr_err("pblk: could not pad page in write bio\n");
+ return NVM_IO_ERR;
+ }
+ }
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(pad, &((struct pblk *)
(container_of(rb, struct pblk, rwb)))->padded_writes);
#endif
-out:
- return read;
+
+ return NVM_IO_OK;
}
/*
* be directed to disk.
*/
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- u64 pos, int bio_iter)
+ struct ppa_addr ppa, int bio_iter)
{
+ struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
+ struct ppa_addr l2p_ppa;
+ u64 pos = pblk_addr_to_cacheline(ppa);
void *data;
int flags;
int ret = 1;
- spin_lock(&rb->w_lock);
#ifdef CONFIG_NVM_DEBUG
/* Caller must ensure that the access will not cause an overflow */
w_ctx = &entry->w_ctx;
flags = READ_ONCE(w_ctx->flags);
+ spin_lock(&rb->w_lock);
+ spin_lock(&pblk->trans_lock);
+ l2p_ppa = pblk_trans_map_get(pblk, lba);
+ spin_unlock(&pblk->trans_lock);
+
/* Check if the entry has been overwritten or is scheduled to be */
- if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
+ if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
+ flags & PBLK_WRITABLE_ENTRY) {
ret = 0;
goto out;
}
BUG_ON(!pblk_addr_in_cache(ppa));
#endif
- return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
- pblk_addr_to_cacheline(ppa), bio_iter);
+ return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter);
}
static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
}
WARN_ON(test_and_set_bit(i, read_bitmap));
advanced_bio = 1;
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_inc(&pblk->cache_reads);
+#endif
} else {
/* Read from media non-cached sectors */
rqd->ppa_list[j++] = p;
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
+ if (pblk_io_aligned(pblk, nr_secs))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(nr_secs, &pblk->inflight_reads);
#endif
{
int err;
- rqd->flags = pblk_set_read_mode(pblk);
-
err = pblk_submit_io(pblk, rqd);
if (err)
return NVM_IO_ERR;
{
struct pblk *pblk = rqd->private;
struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = rqd->bio;
if (rqd->error)
pblk_log_read_err(pblk, rqd);
#ifdef CONFIG_NVM_DEBUG
else
- WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+ WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
#endif
- if (rqd->nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
bio_put(bio);
- if (r_ctx->orig_bio) {
+ if (r_ctx->private) {
+ struct bio *orig_bio = r_ctx->private;
+
#ifdef CONFIG_NVM_DEBUG
- WARN_ONCE(r_ctx->orig_bio->bi_error,
- "pblk: corrupted read bio\n");
+ WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
#endif
- bio_endio(r_ctx->orig_bio);
- bio_put(r_ctx->orig_bio);
+ bio_endio(orig_bio);
+ bio_put(orig_bio);
}
#ifdef CONFIG_NVM_DEBUG
#endif
pblk_free_rqd(pblk, rqd, READ);
+ atomic_dec(&pblk->inflight_io);
}
static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
rqd->bio = new_bio;
rqd->nr_ppas = nr_holes;
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd->end_io = NULL;
if (unlikely(nr_secs > 1 && nr_holes == 1)) {
goto retry;
}
WARN_ON(test_and_set_bit(0, read_bitmap));
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_inc(&pblk->cache_reads);
+#endif
} else {
rqd->ppa_addr = ppa;
}
+
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
}
int pblk_submit_read(struct pblk *pblk, struct bio *bio)
*/
bio_init_idx = pblk_get_bi_idx(bio);
+ rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd->dma_meta_list);
+ if (!rqd->meta_list) {
+ pr_err("pblk: not able to allocate ppa list\n");
+ goto fail_rqd_free;
+ }
+
if (nr_secs > 1) {
- rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd->dma_ppa_list);
- if (!rqd->ppa_list) {
- pr_err("pblk: not able to allocate ppa list\n");
- goto fail_rqd_free;
- }
+ rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
+ rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
} else {
bio_get(bio);
if (bitmap_full(&read_bitmap, nr_secs)) {
bio_endio(bio);
+ atomic_inc(&pblk->inflight_io);
pblk_end_io_read(rqd);
return NVM_IO_OK;
}
/* All sectors are to be read from the device */
if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
struct bio *int_bio = NULL;
- struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
/* Clone read bio to deal with read errors internally */
- int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
+ int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
if (!int_bio) {
pr_err("pblk: could not clone read bio\n");
return NVM_IO_ERR;
}
rqd->bio = int_bio;
- r_ctx->orig_bio = bio;
+ r_ctx->private = bio;
ret = pblk_submit_read_io(pblk, rqd);
if (ret) {
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct request_queue *q = dev->q;
struct bio *bio;
struct nvm_rq rqd;
int ret, data_len;
memset(&rqd, 0, sizeof(struct nvm_rq));
+ rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd.dma_meta_list);
+ if (!rqd.meta_list)
+ return NVM_IO_ERR;
+
if (nr_secs > 1) {
- rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list)
- return NVM_IO_ERR;
+ rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+ rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
*secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
nr_secs);
- if (*secs_to_gc == 1) {
- struct ppa_addr ppa;
-
- ppa = rqd.ppa_list[0];
- nvm_dev_dma_free(dev->parent, rqd.ppa_list,
- rqd.dma_ppa_list);
- rqd.ppa_addr = ppa;
- }
+ if (*secs_to_gc == 1)
+ rqd.ppa_addr = rqd.ppa_list[0];
} else {
*secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
}
goto out;
data_len = (*secs_to_gc) * geo->sec_size;
- bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len,
+ PBLK_KMALLOC_META, GFP_KERNEL);
if (IS_ERR(bio)) {
pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
goto err_free_dma;
rqd.end_io = pblk_end_io_sync;
rqd.private = &wait;
rqd.nr_ppas = *secs_to_gc;
+ rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd.bio = bio;
ret = pblk_submit_read_io(pblk, &rqd);
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: GC read I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
if (rqd.error) {
atomic_long_inc(&pblk->read_failed_gc);
#endif
out:
- if (rqd.nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return NVM_IO_OK;
err_free_dma:
- if (rqd.nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return NVM_IO_ERR;
}
return 0;
}
-__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
{
u32 crc;
- crc = pblk_calc_emeta_crc(pblk, emeta);
- if (le32_to_cpu(emeta->crc) != crc)
+ crc = pblk_calc_emeta_crc(pblk, emeta_buf);
+ if (le32_to_cpu(emeta_buf->crc) != crc)
return NULL;
- if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
+ if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
return NULL;
- return pblk_line_emeta_to_lbas(emeta);
+ return emeta_to_lbas(pblk, emeta_buf);
}
static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
__le64 *lba_list;
int data_start;
int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
int i;
- lba_list = pblk_recov_get_lba_list(pblk, emeta);
+ lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
if (!lba_list)
return 1;
data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
- nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
- nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
+ nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
+ nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
struct ppa_addr ppa;
if (test_and_set_bit(i, line->invalid_bitmap))
WARN_ONCE(1, "pblk: rec. double invalidate:\n");
else
- line->vsc--;
+ le32_add_cpu(line->vsc, -1);
spin_unlock(&line->lock);
continue;
if (nr_valid_lbas != nr_lbas)
pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
- line->id, line->emeta->nr_valid_lbas, nr_lbas);
+ line->id, emeta_buf->nr_valid_lbas, nr_lbas);
line->left_msecs = 0;
struct pblk_line_meta *lm = &pblk->lm;
int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
- return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
+ return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
nr_bb * geo->sec_per_blk;
}
r_ptr_int = r_ptr;
next_read_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
pr_err("pblk: L2P recovery read timed out\n");
return -EINTR;
}
-
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* At this point, the read should not fail. If it does, it is a problem
return 0;
}
+static void pblk_recov_complete(struct kref *ref)
+{
+ struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
+
+ complete(&pad_rq->wait);
+}
+
+static void pblk_end_io_recov(struct nvm_rq *rqd)
+{
+ struct pblk_pad_rq *pad_rq = rqd->private;
+ struct pblk *pblk = pad_rq->pblk;
+ struct nvm_tgt_dev *dev = pblk->dev;
+
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+ pblk_free_rqd(pblk, rqd, WRITE);
+}
+
static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p, int left_ppas)
+ int left_ppas)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct ppa_addr *ppa_list;
struct pblk_sec_meta *meta_list;
+ struct pblk_pad_rq *pad_rq;
struct nvm_rq *rqd;
struct bio *bio;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
- __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
+ __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
u64 w_ptr = line->cur_sec;
- int left_line_ppas = line->left_msecs;
- int rq_ppas, rq_len;
+ int left_line_ppas, rq_ppas, rq_len;
int i, j;
int ret = 0;
- DECLARE_COMPLETION_ONSTACK(wait);
- ppa_list = p.ppa_list;
- meta_list = p.meta_list;
- rqd = p.rqd;
- data = p.data;
- dma_ppa_list = p.dma_ppa_list;
- dma_meta_list = p.dma_meta_list;
+ spin_lock(&line->lock);
+ left_line_ppas = line->left_msecs;
+ spin_unlock(&line->lock);
+
+ pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
+ if (!pad_rq)
+ return -ENOMEM;
+
+ data = vzalloc(pblk->max_write_pgs * geo->sec_size);
+ if (!data) {
+ ret = -ENOMEM;
+ goto free_rq;
+ }
+
+ pad_rq->pblk = pblk;
+ init_completion(&pad_rq->wait);
+ kref_init(&pad_rq->ref);
next_pad_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
- if (!rq_ppas)
- rq_ppas = pblk->min_write_pgs;
+ if (rq_ppas < pblk->min_write_pgs) {
+ pr_err("pblk: corrupted pad line %d\n", line->id);
+ goto free_rq;
+ }
+
rq_len = rq_ppas * geo->sec_size;
+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+ if (!meta_list) {
+ ret = -ENOMEM;
+ goto free_data;
+ }
+
+ ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+ rqd = pblk_alloc_rqd(pblk, WRITE);
+ if (IS_ERR(rqd)) {
+ ret = PTR_ERR(rqd);
+ goto fail_free_meta;
+ }
+ memset(rqd, 0, pblk_w_rq_size);
+
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
- if (IS_ERR(bio))
- return PTR_ERR(bio);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }
bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- memset(rqd, 0, pblk_r_rq_size);
-
rqd->bio = bio;
rqd->opcode = NVM_OP_PWRITE;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list;
- rqd->end_io = pblk_end_io_sync;
- rqd->private = &wait;
+ rqd->end_io = pblk_end_io_recov;
+ rqd->private = pad_rq;
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
struct ppa_addr dev_ppa;
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
pblk_map_invalidate(pblk, dev_ppa);
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
- lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
+ lba_list[w_ptr] = meta_list[i].lba = addr_empty;
rqd->ppa_list[i] = dev_ppa;
}
}
+ kref_get(&pad_rq->ref);
+
ret = pblk_submit_io(pblk, rqd);
if (ret) {
pr_err("pblk: I/O submission failed: %d\n", ret);
- return ret;
+ goto free_data;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: L2P recovery write timed out\n");
- }
- reinit_completion(&wait);
+ atomic_dec(&pblk->inflight_io);
left_line_ppas -= rq_ppas;
left_ppas -= rq_ppas;
- if (left_ppas > 0 && left_line_ppas)
+ if (left_ppas && left_line_ppas)
goto next_pad_rq;
- return 0;
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+
+ if (!wait_for_completion_io_timeout(&pad_rq->wait,
+ msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+ pr_err("pblk: pad write timed out\n");
+ ret = -ETIME;
+ }
+
+free_rq:
+ kfree(pad_rq);
+free_data:
+ vfree(data);
+ return ret;
+
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, WRITE);
+fail_free_meta:
+ nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+ kfree(pad_rq);
+ return ret;
}
/* When this function is called, it means that not all upper pages have been
rec_round = 0;
next_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: L2P recovery read timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* This should not happen since the read failed during normal recovery,
if (pad_secs > line->left_msecs)
pad_secs = line->left_msecs;
- ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+ ret = pblk_recov_pad_oob(pblk, line, pad_secs);
if (ret)
pr_err("pblk: OOB padding failed (err:%d)\n", ret);
if (ret)
pr_err("pblk: OOB read failed (err:%d)\n", ret);
- line->left_ssecs = line->left_msecs;
left_ppas = 0;
}
*done = 1;
next_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: L2P recovery read timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* Reached the end of the written line */
/* Roll back failed sectors */
line->cur_sec -= nr_error_bits;
line->left_msecs += nr_error_bits;
- line->left_ssecs = line->left_msecs;
bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
left_ppas = 0;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line, *tline, *data_line = NULL;
- struct line_smeta *smeta;
- struct line_emeta *emeta;
+ struct pblk_smeta *smeta;
+ struct pblk_emeta *emeta;
+ struct line_smeta *smeta_buf;
int found_lines = 0, recovered_lines = 0, open_lines = 0;
int is_next = 0;
int meta_line;
spin_lock(&l_mg->free_lock);
meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
set_bit(meta_line, &l_mg->meta_bitmap);
- smeta = l_mg->sline_meta[meta_line].meta;
- emeta = l_mg->eline_meta[meta_line].meta;
+ smeta = l_mg->sline_meta[meta_line];
+ emeta = l_mg->eline_meta[meta_line];
+ smeta_buf = (struct line_smeta *)smeta;
spin_unlock(&l_mg->free_lock);
/* Order data lines using their sequence number */
memset(smeta, 0, lm->smeta_len);
line->smeta = smeta;
- line->lun_bitmap = ((void *)(smeta)) +
+ line->lun_bitmap = ((void *)(smeta_buf)) +
sizeof(struct line_smeta);
/* Lines that cannot be read are assumed as not written here */
if (pblk_line_read_smeta(pblk, line))
continue;
- crc = pblk_calc_smeta_crc(pblk, smeta);
- if (le32_to_cpu(smeta->crc) != crc)
+ crc = pblk_calc_smeta_crc(pblk, smeta_buf);
+ if (le32_to_cpu(smeta_buf->crc) != crc)
continue;
- if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
+ if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
continue;
- if (le16_to_cpu(smeta->header.version) != 1) {
+ if (le16_to_cpu(smeta_buf->header.version) != 1) {
pr_err("pblk: found incompatible line version %u\n",
- smeta->header.version);
+ smeta_buf->header.version);
return ERR_PTR(-EINVAL);
}
/* The first valid instance uuid is used for initialization */
if (!valid_uuid) {
- memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
+ memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
valid_uuid = 1;
}
- if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
+ if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
pr_debug("pblk: ignore line %u due to uuid mismatch\n",
i);
continue;
/* Update line metadata */
spin_lock(&line->lock);
- line->id = le32_to_cpu(line->smeta->header.id);
- line->type = le16_to_cpu(line->smeta->header.type);
- line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
+ line->id = le32_to_cpu(smeta_buf->header.id);
+ line->type = le16_to_cpu(smeta_buf->header.type);
+ line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
spin_unlock(&line->lock);
/* Update general metadata */
pblk_recov_line_add_ordered(&recov_list, line);
found_lines++;
pr_debug("pblk: recovering data line %d, seq:%llu\n",
- line->id, smeta->seq_nr);
+ line->id, smeta_buf->seq_nr);
}
if (!found_lines) {
recovered_lines++;
/* Calculate where emeta starts based on the line bb */
- off = lm->sec_per_line - lm->emeta_sec;
+ off = lm->sec_per_line - lm->emeta_sec[0];
nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
off -= nr_bb * geo->sec_per_pl;
- memset(emeta, 0, lm->emeta_len);
- line->emeta = emeta;
line->emeta_ssec = off;
+ line->emeta = emeta;
+ memset(line->emeta->buf, 0, lm->emeta_len[0]);
- if (pblk_line_read_emeta(pblk, line)) {
+ if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
pblk_recov_l2p_from_oob(pblk, line);
goto next;
}
}
/*
- * Pad until smeta can be read on current data line
+ * Pad current line
*/
-void pblk_recov_pad(struct pblk *pblk)
+int pblk_recov_pad(struct pblk *pblk)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
struct pblk_line *line;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct nvm_rq *rqd;
- struct pblk_recov_alloc p;
- struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
+ int left_msecs;
+ int ret = 0;
spin_lock(&l_mg->free_lock);
line = l_mg->data_line;
+ left_msecs = line->left_msecs;
spin_unlock(&l_mg->free_lock);
- rqd = pblk_alloc_rqd(pblk, READ);
- if (IS_ERR(rqd))
- return;
-
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
- if (!meta_list)
- goto free_rqd;
-
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
-
- data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
- if (!data)
- goto free_meta_list;
-
- p.ppa_list = ppa_list;
- p.meta_list = meta_list;
- p.rqd = rqd;
- p.data = data;
- p.dma_ppa_list = dma_ppa_list;
- p.dma_meta_list = dma_meta_list;
-
- if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
- pr_err("pblk: Tear down padding failed\n");
- goto free_data;
+ ret = pblk_recov_pad_oob(pblk, line, left_msecs);
+ if (ret) {
+ pr_err("pblk: Tear down padding failed (%d)\n", ret);
+ return ret;
}
- pblk_line_close(pblk, line);
-
-free_data:
- kfree(data);
-free_meta_list:
- nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-free_rqd:
- pblk_free_rqd(pblk, rqd, READ);
+ pblk_line_close_meta(pblk, line);
+ return ret;
}
mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
}
+int pblk_rl_is_limit(struct pblk_rl *rl)
+{
+ int rb_space;
+
+ rb_space = atomic_read(&rl->rb_space);
+
+ return (rb_space == 0);
+}
+
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
+ int rb_space = atomic_read(&rl->rb_space);
- return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
+ if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
+ return NVM_IO_ERR;
+
+ if (rb_user_cnt >= rl->rb_user_max)
+ return NVM_IO_REQUEUE;
+
+ return NVM_IO_OK;
+}
+
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
+{
+ int rb_space = atomic_read(&rl->rb_space);
+
+ if (unlikely(rb_space >= 0))
+ atomic_sub(nr_entries, &rl->rb_space);
}
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
/* If there is no user I/O let GC take over space on the write buffer */
rb_user_active = READ_ONCE(rl->rb_user_active);
- return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
+ return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
}
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
if (free_blocks >= rl->high) {
- rl->rb_user_max = max - rl->rb_gc_rsv;
- rl->rb_gc_max = rl->rb_gc_rsv;
+ rl->rb_user_max = max;
+ rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
} else if (free_blocks < rl->high) {
int shift = rl->high_pw - rl->rb_windows_pw;
int user_windows = free_blocks >> shift;
int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
- int gc_max;
rl->rb_user_max = user_max;
- gc_max = max - rl->rb_user_max;
- rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
-
- if (free_blocks > rl->low)
- rl->rb_state = PBLK_RL_MID;
- else
- rl->rb_state = PBLK_RL_LOW;
+ rl->rb_gc_max = max - user_max;
+
+ if (free_blocks <= rl->rsv_blocks) {
+ rl->rb_user_max = 0;
+ rl->rb_gc_max = max;
+ }
+
+ /* In the worst case, we will need to GC lines in the low list
+ * (high valid sector count). If there are lines to GC on high
+ * or mid lists, these will be prioritized
+ */
+ rl->rb_state = PBLK_RL_LOW;
}
return rl->rb_state;
}
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
-{
- rl->rb_gc_rsv = rl->rb_gc_max = rsv;
-}
-
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
{
- struct pblk *pblk = container_of(rl, struct pblk, rl);
int blk_in_line = atomic_read(&line->blk_in_line);
- int ret;
atomic_sub(blk_in_line, &rl->free_blocks);
+}
+
+void pblk_gc_should_kick(struct pblk *pblk)
+{
+ struct pblk_rl *rl = &pblk->rl;
+ int ret;
/* Rates will not change that often - no need to lock update */
ret = pblk_rl_update_rates(rl, rl->rb_budget);
pblk_gc_should_stop(pblk);
}
-int pblk_rl_gc_thrs(struct pblk_rl *rl)
+int pblk_rl_high_thrs(struct pblk_rl *rl)
{
return rl->high;
}
+int pblk_rl_low_thrs(struct pblk_rl *rl)
+{
+ return rl->low;
+}
+
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
{
return rl->rb_user_max;
void pblk_rl_init(struct pblk_rl *rl, int budget)
{
+ struct pblk *pblk = container_of(rl, struct pblk, rl);
+ struct pblk_line_meta *lm = &pblk->lm;
+ int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
unsigned int rb_windows;
rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
- rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
rl->high_pw = get_count_order(rl->high);
+ rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
+ if (rl->low < min_blocks)
+ rl->low = min_blocks;
+
+ rl->rsv_blocks = min_blocks;
+
/* This will always be a power-of-2 */
rb_windows = budget / PBLK_MAX_REQ_ADDRS;
- rl->rb_windows_pw = get_count_order(rb_windows) + 1;
+ rl->rb_windows_pw = get_count_order(rb_windows);
/* To start with, all buffer is available to user I/O writers */
rl->rb_budget = budget;
rl->rb_user_max = budget;
- atomic_set(&rl->rb_user_cnt, 0);
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
+
+ atomic_set(&rl->rb_user_cnt, 0);
atomic_set(&rl->rb_gc_cnt, 0);
+ atomic_set(&rl->rb_space, -1);
setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
+
rl->rb_user_active = 0;
+ rl->rb_gc_active = 0;
}
static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
int free_blocks, total_blocks;
int rb_user_max, rb_user_cnt;
- int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
+ int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
free_blocks = atomic_read(&pblk->rl.free_blocks);
rb_user_max = pblk->rl.rb_user_max;
rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
rb_gc_max = pblk->rl.rb_gc_max;
- rb_gc_rsv = pblk->rl.rb_gc_rsv;
rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
rb_budget = pblk->rl.rb_budget;
rb_state = pblk->rl.rb_state;
- total_blocks = geo->blks_per_lun * geo->nr_luns;
+ total_blocks = pblk->rl.total_blocks;
return snprintf(page, PAGE_SIZE,
- "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+ "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
rb_user_cnt,
rb_user_max,
rb_gc_cnt,
rb_gc_max,
- rb_gc_rsv,
rb_state,
rb_budget,
pblk->rl.low,
ssize_t sz = 0;
int nr_free_lines;
int cur_data, cur_log;
- int free_line_cnt = 0, closed_line_cnt = 0;
+ int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
int d_line_cnt = 0, l_line_cnt = 0;
int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
- int free = 0, bad = 0, cor = 0;
- int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
+ int bad = 0, cor = 0;
+ int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
int map_weight = 0, meta_weight = 0;
spin_lock(&l_mg->free_lock);
free_line_cnt++;
spin_unlock(&l_mg->free_lock);
+ spin_lock(&l_mg->close_lock);
+ list_for_each_entry(line, &l_mg->emeta_list, list)
+ emeta_line_cnt++;
+ spin_unlock(&l_mg->close_lock);
+
spin_lock(&l_mg->gc_lock);
list_for_each_entry(line, &l_mg->gc_full_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
gc_empty++;
}
- list_for_each_entry(line, &l_mg->free_list, list)
- free++;
list_for_each_entry(line, &l_mg->bad_list, list)
bad++;
list_for_each_entry(line, &l_mg->corrupt_list, list)
if (l_mg->data_line) {
cur_sec = l_mg->data_line->cur_sec;
msecs = l_mg->data_line->left_msecs;
- ssecs = l_mg->data_line->left_ssecs;
- vsc = l_mg->data_line->vsc;
+ vsc = le32_to_cpu(*l_mg->data_line->vsc);
sec_in_line = l_mg->data_line->sec_in_line;
meta_weight = bitmap_weight(&l_mg->meta_bitmap,
PBLK_DATA_LINES);
spin_unlock(&l_mg->free_lock);
if (nr_free_lines != free_line_cnt)
- pr_err("pblk: corrupted free line list\n");
+ pr_err("pblk: corrupted free line list:%d/%d\n",
+ nr_free_lines, free_line_cnt);
sz = snprintf(page, PAGE_SIZE - sz,
"line: nluns:%d, nblks:%d, nsecs:%d\n",
geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
+ "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
cur_data, cur_log,
- free, nr_free_lines, bad, cor,
+ nr_free_lines,
+ emeta_line_cnt, meta_weight,
closed_line_cnt,
+ bad, cor,
d_line_cnt, l_line_cnt,
l_mg->nr_lines);
atomic_read(&pblk->gc.inflight_gc));
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
- cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
- map_weight, lm->sec_per_line, meta_weight);
+ "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
+ cur_data, cur_sec, msecs, vsc, sec_in_line,
+ map_weight, lm->sec_per_line,
+ atomic_read(&pblk->inflight_io));
return sz;
}
lm->smeta_len, lm->smeta_sec);
sz += snprintf(page + sz, PAGE_SIZE - sz,
"emeta - len:%d, sec:%d, bb_start:%d\n",
- lm->emeta_len, lm->emeta_sec,
+ lm->emeta_len[0], lm->emeta_sec[0],
lm->emeta_bb);
sz += snprintf(page + sz, PAGE_SIZE - sz,
"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
return sz;
}
+static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
+}
+
#ifdef CONFIG_NVM_DEBUG
static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
{
atomic_long_read(&pblk->padded_wb),
atomic_long_read(&pblk->sub_writes),
atomic_long_read(&pblk->sync_writes),
- atomic_long_read(&pblk->compl_writes),
atomic_long_read(&pblk->recov_writes),
atomic_long_read(&pblk->recov_gc_writes),
atomic_long_read(&pblk->recov_gc_reads),
+ atomic_long_read(&pblk->cache_reads),
atomic_long_read(&pblk->sync_reads));
}
#endif
-static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
- size_t len)
+static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
+ size_t len)
{
- struct pblk_gc *gc = &pblk->gc;
size_t c_len;
- int value;
+ int force;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
- if (kstrtouint(page, 0, &value))
+ if (kstrtouint(page, 0, &force))
return -EINVAL;
- spin_lock(&gc->lock);
- pblk_rl_set_gc_rsc(&pblk->rl, value);
- spin_unlock(&gc->lock);
+ pblk_gc_sysfs_force(pblk, force);
return len;
}
-static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
- size_t len)
+static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
+ const char *page, size_t len)
{
size_t c_len;
- int force;
+ int sec_per_write;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
- if (kstrtouint(page, 0, &force))
+ if (kstrtouint(page, 0, &sec_per_write))
return -EINVAL;
- if (force < 0 || force > 1)
+ if (sec_per_write < pblk->min_write_pgs
+ || sec_per_write > pblk->max_write_pgs
+ || sec_per_write % pblk->min_write_pgs != 0)
return -EINVAL;
- pblk_gc_sysfs_force(pblk, force);
+ pblk_set_sec_per_write(pblk, sec_per_write);
return len;
}
.mode = 0200,
};
-static struct attribute sys_gc_rl_max = {
- .name = "gc_rl_max",
- .mode = 0200,
+static struct attribute sys_max_sec_per_write = {
+ .name = "max_sec_per_write",
+ .mode = 0644,
};
#ifdef CONFIG_NVM_DEBUG
&sys_errors_attr,
&sys_gc_state,
&sys_gc_force,
- &sys_gc_rl_max,
+ &sys_max_sec_per_write,
&sys_rb_attr,
&sys_stats_ppaf_attr,
&sys_lines_attr,
return pblk_sysfs_lines(pblk, buf);
else if (strcmp(attr->name, "lines_info") == 0)
return pblk_sysfs_lines_info(pblk, buf);
+ else if (strcmp(attr->name, "max_sec_per_write") == 0)
+ return pblk_sysfs_get_sec_per_write(pblk, buf);
#ifdef CONFIG_NVM_DEBUG
else if (strcmp(attr->name, "stats") == 0)
return pblk_sysfs_stats_debug(pblk, buf);
{
struct pblk *pblk = container_of(kobj, struct pblk, kobj);
- if (strcmp(attr->name, "gc_rl_max") == 0)
- return pblk_sysfs_rate_store(pblk, buf, len);
- else if (strcmp(attr->name, "gc_force") == 0)
+ if (strcmp(attr->name, "gc_force") == 0)
return pblk_sysfs_gc_force(pblk, buf, len);
+ else if (strcmp(attr->name, "max_sec_per_write") == 0)
+ return pblk_sysfs_set_sec_per_write(pblk, buf, len);
return 0;
}
#include "pblk.h"
-static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
-{
-#ifdef CONFIG_NVM_DEBUG
- atomic_long_inc(&pblk->sync_writes);
-#endif
-
- /* Counter protected by rb sync lock */
- line->left_ssecs--;
- if (!line->left_ssecs)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-}
-
static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx)
{
for (i = 0; i < c_ctx->nr_valid; i++) {
struct pblk_w_ctx *w_ctx;
- struct ppa_addr p;
- struct pblk_line *line;
w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
-
- p = rqd->ppa_list[i];
- line = &pblk->lines[pblk_dev_ppa_to_line(p)];
- pblk_sync_line(pblk, line);
-
while ((original_bio = bio_list_pop(&w_ctx->bios)))
bio_endio(original_bio);
}
#ifdef CONFIG_NVM_DEBUG
- atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
+ atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
#endif
ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
}
INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
- queue_work(pblk->kw_wq, &recovery->ws_rec);
+ queue_work(pblk->close_wq, &recovery->ws_rec);
out:
pblk_complete_write(pblk, rqd, c_ctx);
}
#ifdef CONFIG_NVM_DEBUG
else
- WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+ WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
#endif
pblk_complete_write(pblk, rqd, c_ctx);
+ atomic_dec(&pblk->inflight_io);
+}
+
+static void pblk_end_io_write_meta(struct nvm_rq *rqd)
+{
+ struct pblk *pblk = rqd->private;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_line *line = m_ctx->private;
+ struct pblk_emeta *emeta = line->emeta;
+ int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
+ struct pblk_lun *rlun = &pblk->luns[pos];
+ int sync;
+
+ up(&rlun->wr_sem);
+
+ if (rqd->error) {
+ pblk_log_write_err(pblk, rqd);
+ pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
+ }
+#ifdef CONFIG_NVM_DEBUG
+ else
+ WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
+#endif
+
+ sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
+ if (sync == emeta->nr_entries)
+ pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
+ pblk->close_wq);
+
+ bio_put(rqd->bio);
+ pblk_free_rqd(pblk, rqd, READ);
+
+ atomic_dec(&pblk->inflight_io);
}
static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int nr_secs)
+ unsigned int nr_secs,
+ nvm_end_io_fn(*end_io))
{
struct nvm_tgt_dev *dev = pblk->dev;
rqd->nr_ppas = nr_secs;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
rqd->private = pblk;
- rqd->end_io = pblk_end_io_write;
+ rqd->end_io = end_io;
rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
&rqd->dma_meta_list);
}
static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
+ struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
{
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *e_line = pblk_line_get_data_next(pblk);
- struct ppa_addr erase_ppa;
+ struct pblk_line *e_line = pblk_line_get_erase(pblk);
unsigned int valid = c_ctx->nr_valid;
unsigned int padded = c_ctx->nr_padded;
unsigned int nr_secs = valid + padded;
int ret = 0;
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
- if (!lun_bitmap) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!lun_bitmap)
+ return -ENOMEM;
c_ctx->lun_bitmap = lun_bitmap;
- ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
+ ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
if (ret) {
kfree(lun_bitmap);
- goto out;
+ return ret;
}
- ppa_set_empty(&erase_ppa);
if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
else
pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
- valid, &erase_ppa);
-
-out:
- if (unlikely(e_line && !ppa_empty(erase_ppa))) {
- if (pblk_blk_erase_async(pblk, erase_ppa)) {
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int bit;
-
- atomic_inc(&e_line->left_eblks);
- bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
- WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
- up(&pblk->erase_sem);
- }
- }
+ valid, erase_ppa);
- return ret;
+ return 0;
}
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
c_ctx->lun_bitmap = lun_bitmap;
- ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
+ ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
if (ret)
return ret;
return secs_to_sync;
}
+static inline int pblk_valid_meta_ppa(struct pblk *pblk,
+ struct pblk_line *meta_line,
+ struct ppa_addr *ppa_list, int nr_ppas)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line *data_line;
+ struct ppa_addr ppa, ppa_opt;
+ u64 paddr;
+ int i;
+
+ data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
+ paddr = pblk_lookup_page(pblk, meta_line);
+ ppa = addr_to_gen_ppa(pblk, paddr, 0);
+
+ if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
+ return 1;
+
+ /* Schedule a metadata I/O that is half the distance from the data I/O
+ * with regards to the number of LUNs forming the pblk instance. This
+ * balances LUN conflicts across every I/O.
+ *
+ * When the LUN configuration changes (e.g., due to GC), this distance
+ * can align, which would result on a LUN deadlock. In this case, modify
+ * the distance to not be optimal, but allow metadata I/Os to succeed.
+ */
+ ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+ if (unlikely(ppa_opt.ppa == ppa.ppa)) {
+ data_line->meta_distance--;
+ return 0;
+ }
+
+ for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+ if (ppa_list[i].g.ch == ppa_opt.g.ch &&
+ ppa_list[i].g.lun == ppa_opt.g.lun)
+ return 1;
+
+ if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
+ for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+ if (ppa_list[i].g.ch == ppa.g.ch &&
+ ppa_list[i].g.lun == ppa.g.lun)
+ return 0;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_emeta *emeta = meta_line->emeta;
+ struct pblk_g_ctx *m_ctx;
+ struct pblk_lun *rlun;
+ struct bio *bio;
+ struct nvm_rq *rqd;
+ void *data;
+ u64 paddr;
+ int rq_ppas = pblk->min_write_pgs;
+ int id = meta_line->id;
+ int rq_len;
+ int i, j;
+ int ret;
+
+ rqd = pblk_alloc_rqd(pblk, READ);
+ if (IS_ERR(rqd)) {
+ pr_err("pblk: cannot allocate write req.\n");
+ return PTR_ERR(rqd);
+ }
+ m_ctx = nvm_rq_to_pdu(rqd);
+ m_ctx->private = meta_line;
+
+ rq_len = rq_ppas * geo->sec_size;
+ data = ((void *)emeta->buf) + emeta->mem;
+
+ bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }
+ bio->bi_iter.bi_sector = 0; /* internal bio */
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ rqd->bio = bio;
+
+ ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
+ if (ret)
+ goto fail_free_bio;
+
+ for (i = 0; i < rqd->nr_ppas; ) {
+ spin_lock(&meta_line->lock);
+ paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
+ spin_unlock(&meta_line->lock);
+ for (j = 0; j < rq_ppas; j++, i++, paddr++)
+ rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
+ }
+
+ rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
+ ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+ if (ret) {
+ pr_err("pblk: lun semaphore timed out (%d)\n", ret);
+ goto fail_free_bio;
+ }
+
+ emeta->mem += rq_len;
+ if (emeta->mem >= lm->emeta_len[0]) {
+ spin_lock(&l_mg->close_lock);
+ list_del(&meta_line->list);
+ WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
+ "pblk: corrupt meta line %d\n", meta_line->id);
+ spin_unlock(&l_mg->close_lock);
+ }
+
+ ret = pblk_submit_io(pblk, rqd);
+ if (ret) {
+ pr_err("pblk: emeta I/O submission failed: %d\n", ret);
+ goto fail_rollback;
+ }
+
+ return NVM_IO_OK;
+
+fail_rollback:
+ spin_lock(&l_mg->close_lock);
+ pblk_dealloc_page(pblk, meta_line, rq_ppas);
+ list_add(&meta_line->list, &meta_line->list);
+ spin_unlock(&l_mg->close_lock);
+fail_free_bio:
+ if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
+ bio_put(bio);
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, READ);
+ return ret;
+}
+
+static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
+ int prev_n)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line *meta_line;
+
+ spin_lock(&l_mg->close_lock);
+retry:
+ if (list_empty(&l_mg->emeta_list)) {
+ spin_unlock(&l_mg->close_lock);
+ return 0;
+ }
+ meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
+ if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
+ goto retry;
+ spin_unlock(&l_mg->close_lock);
+
+ if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
+ return 0;
+
+ return pblk_submit_meta_io(pblk, meta_line);
+}
+
+static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct ppa_addr erase_ppa;
+ int err;
+
+ ppa_set_empty(&erase_ppa);
+
+ /* Assign lbas to ppas and populate request structure */
+ err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+ if (err) {
+ pr_err("pblk: could not setup write request: %d\n", err);
+ return NVM_IO_ERR;
+ }
+
+ if (likely(ppa_empty(erase_ppa))) {
+ /* Submit metadata write for previous data line */
+ err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
+ if (err) {
+ pr_err("pblk: metadata I/O submission failed: %d", err);
+ return NVM_IO_ERR;
+ }
+
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
+ } else {
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
+
+ /* Submit available erase for next data line */
+ if (pblk_blk_erase_async(pblk, erase_ppa)) {
+ struct pblk_line *e_line = pblk_line_get_erase(pblk);
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int bit;
+
+ atomic_inc(&e_line->left_eblks);
+ bit = pblk_ppa_to_pos(geo, erase_ppa);
+ WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
+ }
+ }
+
+ return NVM_IO_OK;
+}
+
+static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct bio *bio = rqd->bio;
+
+ if (c_ctx->nr_padded)
+ pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
+}
+
static int pblk_submit_write(struct pblk *pblk)
{
struct bio *bio;
struct nvm_rq *rqd;
- struct pblk_c_ctx *c_ctx;
- unsigned int pgs_read;
unsigned int secs_avail, secs_to_sync, secs_to_com;
unsigned int secs_to_flush;
unsigned long pos;
- int err;
/* If there are no sectors in the cache, flushes (bios without data)
* will be cleared on the cache threads
pr_err("pblk: cannot allocate write req.\n");
return 1;
}
- c_ctx = nvm_rq_to_pdu(rqd);
bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
if (!bio) {
secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
- pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
- secs_to_sync, secs_avail);
- if (!pgs_read) {
+ if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
+ secs_avail)) {
pr_err("pblk: corrupted write bio\n");
goto fail_put_bio;
}
- if (c_ctx->nr_padded)
- if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
- goto fail_put_bio;
-
- /* Assign lbas to ppas and populate request structure */
- err = pblk_setup_w_rq(pblk, rqd, c_ctx);
- if (err) {
- pr_err("pblk: could not setup write request\n");
- goto fail_free_bio;
- }
-
- err = pblk_submit_io(pblk, rqd);
- if (err) {
- pr_err("pblk: I/O submission failed: %d\n", err);
+ if (pblk_submit_io_set(pblk, rqd))
goto fail_free_bio;
- }
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(secs_to_sync, &pblk->sub_writes);
return 0;
fail_free_bio:
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
+ pblk_free_write_rqd(pblk, rqd);
fail_put_bio:
bio_put(bio);
fail_free_rqd:
#define PBLK_MAX_REQ_ADDRS (64)
#define PBLK_MAX_REQ_ADDRS_PW (6)
+#define PBLK_WS_POOL_SIZE (128)
+#define PBLK_META_POOL_SIZE (128)
+#define PBLK_READ_REQ_POOL_SIZE (1024)
+
+#define PBLK_NR_CLOSE_JOBS (4)
+
#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
#define PBLK_COMMAND_TIMEOUT_MS 30000
PBLK_BLK_ST_CLOSED = 0x2,
};
+struct pblk_sec_meta {
+ u64 reserved;
+ __le64 lba;
+};
+
/* The number of GC lists and the rate-limiter states go together. This way the
* rate-limiter can dictate how much GC is needed based on resource utilization.
*/
-#define PBLK_NR_GC_LISTS 3
-#define PBLK_MAX_GC_JOBS 32
+#define PBLK_GC_NR_LISTS 3
enum {
PBLK_RL_HIGH = 1,
PBLK_RL_LOW = 3,
};
-struct pblk_sec_meta {
- u64 reserved;
- __le64 lba;
-};
-
#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
-/* write completion context */
+/* write buffer completion context */
struct pblk_c_ctx {
struct list_head list; /* Head for out-of-order completion */
unsigned int nr_padded;
};
-/* Read context */
-struct pblk_r_ctx {
- struct bio *orig_bio;
+/* generic context */
+struct pblk_g_ctx {
+ void *private;
+};
+
+/* Pad context */
+struct pblk_pad_rq {
+ struct pblk *pblk;
+ struct completion wait;
+ struct kref ref;
};
/* Recovery context */
struct pblk_gc_rq {
struct pblk_line *line;
void *data;
- u64 *lba_list;
+ u64 lba_list[PBLK_MAX_REQ_ADDRS];
int nr_secs;
int secs_to_gc;
struct list_head list;
};
struct pblk_gc {
+ /* These states are not protected by a lock since (i) they are in the
+ * fast path, and (ii) they are not critical.
+ */
int gc_active;
int gc_enabled;
int gc_forced;
- int gc_jobs_active;
- atomic_t inflight_gc;
struct task_struct *gc_ts;
struct task_struct *gc_writer_ts;
+ struct task_struct *gc_reader_ts;
+
+ struct workqueue_struct *gc_line_reader_wq;
struct workqueue_struct *gc_reader_wq;
+
struct timer_list gc_timer;
+ struct semaphore gc_sem;
+ atomic_t inflight_gc;
int w_entries;
+
struct list_head w_list;
+ struct list_head r_list;
spinlock_t lock;
spinlock_t w_lock;
+ spinlock_t r_lock;
};
struct pblk_rl {
*/
unsigned int high_pw; /* High rounded up as a power of 2 */
-#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent
- * available blks
- */
-#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */
+#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
+#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */
int rb_windows_pw; /* Number of rate windows in the write buffer
* given as a power-of-2. This guarantees that
*/
int rb_budget; /* Total number of entries available for I/O */
int rb_user_max; /* Max buffer entries available for user I/O */
- atomic_t rb_user_cnt; /* User I/O buffer counter */
int rb_gc_max; /* Max buffer entries available for GC I/O */
int rb_gc_rsv; /* Reserved buffer entries for GC I/O */
int rb_state; /* Rate-limiter current state */
+
+ atomic_t rb_user_cnt; /* User I/O buffer counter */
atomic_t rb_gc_cnt; /* GC I/O buffer counter */
+ atomic_t rb_space; /* Space limit in case of reaching capacity */
+
+ int rsv_blocks; /* Reserved blocks for GC */
int rb_user_active;
+ int rb_gc_active;
+
struct timer_list u_timer;
unsigned long long nr_secs;
atomic_t free_blocks;
};
-#define PBLK_LINE_NR_LUN_BITMAP 2
-#define PBLK_LINE_NR_SEC_BITMAP 2
#define PBLK_LINE_EMPTY (~0U)
enum {
__le32 window_wr_lun; /* Number of parallel LUNs to write */
__le32 rsvd[2];
+
+ __le64 lun_bitmap[];
};
/*
- * Metadata Layout:
- * 1. struct pblk_emeta
- * 2. nr_lbas u64 forming lba list
- * 3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
- * 4. nr_luns bits (u64 format) forming line bad block bitmap
- *
- * 3. and 4. will be part of FTL log
+ * Metadata layout in media:
+ * First sector:
+ * 1. struct line_emeta
+ * 2. bad block bitmap (u64 * window_wr_lun)
+ * Mid sectors (start at lbas_sector):
+ * 3. nr_lbas (u64) forming lba list
+ * Last sectors (start at vsc_sector):
+ * 4. u32 valid sector count (vsc) for all lines (~0U: free line)
*/
struct line_emeta {
struct line_header header;
__le32 next_id; /* Line id for next line */
__le64 nr_lbas; /* Number of lbas mapped in line */
__le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
+ __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
+};
+
+struct pblk_emeta {
+ struct line_emeta *buf; /* emeta buffer in media format */
+ int mem; /* Write offset - points to next
+ * writable entry in memory
+ */
+ atomic_t sync; /* Synced - backpointer that signals the
+ * last entry that has been successfully
+ * persisted to media
+ */
+ unsigned int nr_entries; /* Number of emeta entries */
+};
+
+struct pblk_smeta {
+ struct line_smeta *buf; /* smeta buffer in persistent format */
};
struct pblk_line {
unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
- struct line_smeta *smeta; /* Start metadata */
- struct line_emeta *emeta; /* End metadata */
+ struct pblk_smeta *smeta; /* Start metadata */
+ struct pblk_emeta *emeta; /* End medatada */
+
int meta_line; /* Metadata line id */
+ int meta_distance; /* Distance between data and metadata */
+
u64 smeta_ssec; /* Sector where smeta starts */
u64 emeta_ssec; /* Sector where emeta starts */
atomic_t left_seblks; /* Blocks left for sync erasing */
int left_msecs; /* Sectors left for mapping */
- int left_ssecs; /* Sectors left to sync */
unsigned int cur_sec; /* Sector map pointer */
- unsigned int vsc; /* Valid sector count in line */
+ unsigned int nr_valid_lbas; /* Number of valid lbas in line */
+
+ __le32 *vsc; /* Valid sector count in line */
struct kref ref; /* Write buffer L2P references */
#define PBLK_DATA_LINES 4
-enum{
+enum {
PBLK_KMALLOC_META = 1,
PBLK_VMALLOC_META = 2,
};
-struct pblk_line_metadata {
- void *meta;
+enum {
+ PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */
+ PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */
+ PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */
};
struct pblk_line_mgmt {
struct list_head bad_list; /* Full lines bad */
/* GC lists - use gc_lock */
- struct list_head *gc_lists[PBLK_NR_GC_LISTS];
+ struct list_head *gc_lists[PBLK_GC_NR_LISTS];
struct list_head gc_high_list; /* Full lines ready to GC, high isc */
struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
struct list_head gc_low_list; /* Full lines ready to GC, low isc */
struct pblk_line *log_next; /* Next FTL log line */
struct pblk_line *data_next; /* Next data line */
+ struct list_head emeta_list; /* Lines queued to schedule emeta */
+
+ __le32 *vsc_list; /* Valid sector counts for all lines */
+
/* Metadata allocation type: VMALLOC | KMALLOC */
- int smeta_alloc_type;
int emeta_alloc_type;
/* Pre-allocated metadata for data lines */
- struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
- struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
+ struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
+ struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
unsigned long meta_bitmap;
/* Helpers for fast bitmap calculations */
unsigned long l_seq_nr; /* Log line unique sequence number */
spinlock_t free_lock;
+ spinlock_t close_lock;
spinlock_t gc_lock;
};
struct pblk_line_meta {
unsigned int smeta_len; /* Total length for smeta */
- unsigned int smeta_sec; /* Sectors needed for smeta*/
- unsigned int emeta_len; /* Total length for emeta */
- unsigned int emeta_sec; /* Sectors needed for emeta*/
+ unsigned int smeta_sec; /* Sectors needed for smeta */
+
+ unsigned int emeta_len[4]; /* Lengths for emeta:
+ * [0]: Total length
+ * [1]: struct line_emeta length
+ * [2]: L2P portion length
+ * [3]: vsc list length
+ */
+ unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
+ * as emeta_len
+ */
+
unsigned int emeta_bb; /* Boundary for bb that affects emeta */
+
+ unsigned int vsc_list_len; /* Length for vsc list */
unsigned int sec_bitmap_len; /* Length for sector bitmap in line */
unsigned int blk_bitmap_len; /* Length for block bitmap in line */
unsigned int lun_bitmap_len; /* Length for lun bitmap in line */
unsigned int blk_per_line; /* Number of blocks in a full line */
unsigned int sec_per_line; /* Number of sectors in a line */
+ unsigned int dsec_per_line; /* Number of data sectors in a line */
unsigned int min_blk_line; /* Min. number of good blocks in line */
unsigned int mid_thrs; /* Threshold for GC mid list */
unsigned int high_thrs; /* Threshold for GC high list */
+
+ unsigned int meta_distance; /* Distance between data and metadata */
};
struct pblk_addr_format {
u8 sec_offset;
};
+enum {
+ PBLK_STATE_RUNNING = 0,
+ PBLK_STATE_STOPPING = 1,
+ PBLK_STATE_RECOVERING = 2,
+ PBLK_STATE_STOPPED = 3,
+};
+
struct pblk {
struct nvm_tgt_dev *dev;
struct gendisk *disk;
struct pblk_rb rwb;
+ int state; /* pblk line state */
+
int min_write_pgs; /* Minimum amount of pages required by controller */
int max_write_pgs; /* Maximum amount of pages supported by controller */
int pgs_in_buffer; /* Number of pages that need to be held in buffer to
/* pblk provisioning values. Used by rate limiter */
struct pblk_rl rl;
- struct semaphore erase_sem;
+ int sec_per_write;
unsigned char instance_uuid[16];
#ifdef CONFIG_NVM_DEBUG
atomic_long_t req_writes; /* Sectors stored on write buffer */
atomic_long_t sub_writes; /* Sectors submitted from buffer */
atomic_long_t sync_writes; /* Sectors synced to media */
- atomic_long_t compl_writes; /* Sectors completed in write bio */
atomic_long_t inflight_reads; /* Inflight sector read requests */
+ atomic_long_t cache_reads; /* Read requests that hit the cache */
atomic_long_t sync_reads; /* Completed sector read requests */
atomic_long_t recov_writes; /* Sectors submitted from recovery */
atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */
atomic_long_t write_failed;
atomic_long_t erase_failed;
+ atomic_t inflight_io; /* General inflight I/O counter */
+
struct task_struct *writer_ts;
/* Simple translation map of logical addresses to physical addresses.
mempool_t *page_pool;
mempool_t *line_ws_pool;
mempool_t *rec_pool;
- mempool_t *r_rq_pool;
+ mempool_t *g_rq_pool;
mempool_t *w_rq_pool;
mempool_t *line_meta_pool;
- struct workqueue_struct *kw_wq;
+ struct workqueue_struct *close_wq;
+ struct workqueue_struct *bb_wq;
+
struct timer_list wtimer;
struct pblk_gc gc;
struct work_struct ws;
};
-#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
+#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
/*
struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
unsigned int pos);
struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
+void pblk_rb_flush(struct pblk_rb *rb);
void pblk_rb_sync_l2p(struct pblk_rb *rb);
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
- struct pblk_c_ctx *c_ctx,
- unsigned int pos,
- unsigned int nr_entries,
- unsigned int count);
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+ struct bio *bio, unsigned int pos,
+ unsigned int nr_entries, unsigned int count);
unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
struct list_head *list,
unsigned int max);
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- u64 pos, int bio_iter);
+ struct ppa_addr ppa, int bio_iter);
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
int pblk_rb_tear_down_check(struct pblk_rb *rb);
* pblk core
*/
struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx);
void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
-void pblk_flush_writer(struct pblk *pblk);
+void pblk_wait_for_meta(struct pblk *pblk);
struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
void pblk_discard(struct pblk *pblk, struct bio *bio);
void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
- gfp_t gfp_mask);
+ int alloc_type, gfp_t gfp_mask);
struct pblk_line *pblk_line_get(struct pblk *pblk);
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
+void pblk_line_replace_data(struct pblk *pblk);
int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
int pblk_line_is_full(struct pblk_line *line);
void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close_ws(struct work_struct *work);
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_close_meta_sync(struct pblk *pblk);
+void pblk_line_close_ws(struct work_struct *work);
+void pblk_pipeline_stop(struct pblk *pblk);
void pblk_line_mark_bb(struct work_struct *work);
void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *));
+ void (*work)(struct work_struct *),
+ struct workqueue_struct *wq);
u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+ void *emeta_buf);
int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
void pblk_line_put(struct kref *ref);
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush);
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
void pblk_end_io_sync(struct nvm_rq *rqd);
int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
int nr_pages);
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr);
void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
int nr_pages);
void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+ u64 paddr);
void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
struct ppa_addr ppa);
/*
* pblk read path
*/
+extern struct bio_set *pblk_bio_set;
int pblk_submit_read(struct pblk *pblk, struct bio *bio);
int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
unsigned int nr_secs, unsigned int *secs_to_gc,
*/
void pblk_submit_rec(struct work_struct *work);
struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
-void pblk_recov_pad(struct pblk *pblk);
+int pblk_recov_pad(struct pblk *pblk);
__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
struct pblk_rec_ctx *recovery, u64 *comp_bits,
/*
* pblk gc
*/
-#define PBLK_GC_TRIES 3
+#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
+#define PBLK_GC_W_QD 128 /* Queue depth for inflight GC write I/Os */
+#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
+#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
int pblk_gc_init(struct pblk *pblk);
void pblk_gc_exit(struct pblk *pblk);
void pblk_gc_should_start(struct pblk *pblk);
void pblk_gc_should_stop(struct pblk *pblk);
-int pblk_gc_status(struct pblk *pblk);
+void pblk_gc_should_kick(struct pblk *pblk);
+void pblk_gc_kick(struct pblk *pblk);
void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
int *gc_active);
-void pblk_gc_sysfs_force(struct pblk *pblk, int force);
+int pblk_gc_sysfs_force(struct pblk *pblk, int force);
/*
* pblk rate limiter
*/
void pblk_rl_init(struct pblk_rl *rl, int budget);
void pblk_rl_free(struct pblk_rl *rl);
-int pblk_rl_gc_thrs(struct pblk_rl *rl);
+int pblk_rl_high_thrs(struct pblk_rl *rl);
+int pblk_rl_low_thrs(struct pblk_rl *rl);
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left);
+int pblk_rl_is_limit(struct pblk_rl *rl);
/*
* pblk sysfs
return c_ctx - sizeof(struct nvm_rq);
}
-static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
+static inline void *emeta_to_bb(struct line_emeta *emeta)
+{
+ return emeta->bb_bitmap;
+}
+
+static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
+{
+ return ((void *)emeta + pblk->lm.emeta_len[1]);
+}
+
+static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
{
- return (emeta) + 1;
+ return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
+}
+
+static inline int pblk_line_vsc(struct pblk_line *line)
+{
+ int vsc;
+
+ spin_lock(&line->lock);
+ vsc = le32_to_cpu(*line->vsc);
+ spin_unlock(&line->lock);
+
+ return vsc;
}
#define NVM_MEM_PAGE_WRITE (8)
ppa_addr->ppa = ADDR_EMPTY;
}
+static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
+{
+ if (lppa.ppa == rppa.ppa)
+ return true;
+
+ return false;
+}
+
static inline int pblk_addr_in_cache(struct ppa_addr ppa)
{
return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
}
static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
- struct line_smeta *smeta)
+ struct line_header *header)
{
u32 crc = ~(u32)0;
- crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
+ crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
sizeof(struct line_header) - sizeof(crc));
return crc;
crc = crc32_le(crc, (unsigned char *)emeta +
sizeof(struct line_header) + sizeof(crc),
- lm->emeta_len -
+ lm->emeta_len[0] -
sizeof(struct line_header) - sizeof(crc));
return crc;
return flags;
}
-static inline int pblk_set_read_mode(struct pblk *pblk)
+enum {
+ PBLK_READ_RANDOM = 0,
+ PBLK_READ_SEQUENTIAL = 1,
+};
+
+static inline int pblk_set_read_mode(struct pblk *pblk, int type)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int flags;
+
+ flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+ if (type == PBLK_READ_SEQUENTIAL)
+ flags |= geo->plane_mode >> 1;
+
+ return flags;
+}
+
+static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
{
- return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+ return !(nr_secs % pblk->min_write_pgs);
}
#ifdef CONFIG_NVM_DEBUG
{
struct completion *waiting = bio->bi_private;
- if (bio->bi_error)
- pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
+ if (bio->bi_status)
+ pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
complete(waiting);
}
goto finished;
}
wait_for_completion_io(&wait);
- if (bio->bi_error) {
+ if (bio->bi_status) {
rrpc_inflight_laddr_release(rrpc, rqd);
goto finished;
}
wait_for_completion_io(&wait);
rrpc_inflight_laddr_release(rrpc, rqd);
- if (bio->bi_error)
+ if (bio->bi_status)
goto finished;
bio_reset(bio);
struct nvm_rq *rqd;
int err;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (bio_op(bio) == REQ_OP_DISCARD) {
rrpc_discard(rrpc, bio);
/* Forward declarations */
-void bch_count_io_errors(struct cache *, int, const char *);
+void bch_count_io_errors(struct cache *, blk_status_t, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- int, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
+ blk_status_t, const char *);
+void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
+ const char *);
void bch_bbio_free(struct bio *, struct cache_set *);
struct bio *bch_bbio_alloc(struct cache_set *);
bch_submit_bbio(bio, b->c, &b->key, 0);
closure_sync(&cl);
- if (bio->bi_error)
+ if (bio->bi_status)
set_btree_node_io_error(b);
bch_bbio_free(bio, b->c);
struct closure *cl = bio->bi_private;
struct btree *b = container_of(cl, struct btree, io);
- if (bio->bi_error)
+ if (bio->bi_status)
set_btree_node_io_error(b);
- bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree");
+ bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
closure_put(cl);
}
struct bio_vec bv, cbv;
struct bvec_iter iter, citer = { 0 };
- check = bio_clone(bio, GFP_NOIO);
+ check = bio_clone_kmalloc(bio, GFP_NOIO);
if (!check)
return;
check->bi_opf = REQ_OP_READ;
/* IO errors */
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
+void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
{
/*
* The halflife of an error is:
}
void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
- int error, const char *m)
+ blk_status_t error, const char *m)
{
struct bbio *b = container_of(bio, struct bbio, bio);
struct cache *ca = PTR_CACHE(c, &b->key, 0);
}
void bch_bbio_endio(struct cache_set *c, struct bio *bio,
- int error, const char *m)
+ blk_status_t error, const char *m)
{
struct closure *cl = bio->bi_private;
{
struct journal_write *w = bio->bi_private;
- cache_set_err_on(bio->bi_error, w->c, "journal io error");
+ cache_set_err_on(bio->bi_status, w->c, "journal io error");
closure_put(&w->c->journal.io);
}
struct moving_io *io = container_of(bio->bi_private,
struct moving_io, cl);
- if (bio->bi_error)
- io->op.error = bio->bi_error;
+ if (bio->bi_status)
+ io->op.status = bio->bi_status;
else if (!KEY_DIRTY(&b->key) &&
ptr_stale(io->op.c, &b->key, 0)) {
- io->op.error = -EINTR;
+ io->op.status = BLK_STS_IOERR;
}
- bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move");
+ bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
}
static void moving_init(struct moving_io *io)
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct data_insert_op *op = &io->op;
- if (!op->error) {
+ if (!op->status) {
moving_init(io);
io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
if (ret == -ESRCH) {
op->replace_collision = true;
} else if (ret) {
- op->error = -ENOMEM;
+ op->status = BLK_STS_RESOURCE;
op->insert_data_done = true;
}
struct closure *cl = bio->bi_private;
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* TODO: We could try to recover from this. */
if (op->writeback)
- op->error = bio->bi_error;
+ op->status = bio->bi_status;
else if (!op->replace)
set_closure_fn(cl, bch_data_insert_error, op->wq);
else
set_closure_fn(cl, NULL, NULL);
}
- bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache");
+ bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
}
static void bch_data_insert_start(struct closure *cl)
* from the backing device.
*/
- if (bio->bi_error)
- s->iop.error = bio->bi_error;
+ if (bio->bi_status)
+ s->iop.status = bio->bi_status;
else if (!KEY_DIRTY(&b->key) &&
ptr_stale(s->iop.c, &b->key, 0)) {
atomic_long_inc(&s->iop.c->cache_read_races);
- s->iop.error = -EINTR;
+ s->iop.status = BLK_STS_IOERR;
}
- bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache");
+ bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
}
/*
{
struct closure *cl = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct search *s = container_of(cl, struct search, cl);
- s->iop.error = bio->bi_error;
+ s->iop.status = bio->bi_status;
/* Only cache read errors are recoverable */
s->recoverable = false;
}
&s->d->disk->part0, s->start_time);
trace_bcache_request_end(s->d, s->orig_bio);
- s->orig_bio->bi_error = s->iop.error;
+ s->orig_bio->bi_status = s->iop.status;
bio_endio(s->orig_bio);
s->orig_bio = NULL;
}
s->iop.inode = d->id;
s->iop.write_point = hash_long((unsigned long) current, 16);
s->iop.write_prio = 0;
- s->iop.error = 0;
+ s->iop.status = 0;
s->iop.flags = 0;
s->iop.flush_journal = op_is_flush(bio->bi_opf);
s->iop.wq = bcache_wq;
/* Retry from the backing device: */
trace_bcache_read_retry(s->orig_bio);
- s->iop.error = 0;
+ s->iop.status = 0;
do_bio_hook(s, s->orig_bio);
/* XXX: invalidate cache */
!s->cache_miss, s->iop.bypass);
trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
- if (s->iop.error)
+ if (s->iop.status)
continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
else if (s->iop.bio || verify(dc, &s->bio.bio))
continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
unsigned inode;
uint16_t write_point;
uint16_t write_prio;
- short error;
+ blk_status_t status;
union {
uint16_t flags;
{
struct cache *ca = bio->bi_private;
- bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+ bch_count_io_errors(ca, bio->bi_status, "writing superblock");
closure_put(&ca->set->sb_write);
}
struct closure *cl = bio->bi_private;
struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
- cache_set_err_on(bio->bi_error, c, "accessing uuids");
+ cache_set_err_on(bio->bi_status, c, "accessing uuids");
bch_bbio_free(bio, c);
closure_put(cl);
}
{
struct cache *ca = bio->bi_private;
- cache_set_err_on(bio->bi_error, ca->set, "accessing priorities");
+ cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
bch_bbio_free(bio, ca->set);
closure_put(&ca->prio);
}
minor *= BCACHE_MINORS;
- if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+ if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER)) ||
!(d->disk = alloc_disk(BCACHE_MINORS))) {
ida_simple_remove(&bcache_minor, minor);
return -ENOMEM;
sizeof(struct bbio) + sizeof(struct bio_vec) *
bucket_pages(c))) ||
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
- !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+ !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER)) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
!(c->moving_gc_wq = alloc_workqueue("bcache_gc",
WQ_MEM_RECLAIM, 0)) ||
struct keybuf_key *w = bio->bi_private;
struct dirty_io *io = w->private;
- if (bio->bi_error)
+ if (bio->bi_status)
SET_KEY_DIRTY(&w->key, false);
closure_put(&io->cl);
struct dirty_io *io = w->private;
bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
- bio->bi_error, "reading dirty data from cache");
+ bio->bi_status, "reading dirty data from cache");
dirty_endio(bio);
}
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
void dm_cell_error(struct dm_bio_prison *prison,
- struct dm_bio_prison_cell *cell, int error)
+ struct dm_bio_prison_cell *cell, blk_status_t error)
{
struct bio_list bios;
struct bio *bio;
dm_cell_release(prison, cell, &bios);
while ((bio = bio_list_pop(&bios))) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
}
}
struct dm_bio_prison_cell *cell,
struct bio_list *inmates);
void dm_cell_error(struct dm_bio_prison *prison,
- struct dm_bio_prison_cell *cell, int error);
+ struct dm_bio_prison_cell *cell, blk_status_t error);
/*
* Visits the cell and then releases. Guarantees no new inmates are
enum data_mode data_mode;
unsigned char list_mode; /* LIST_* */
unsigned hold_count;
- int read_error;
- int write_error;
+ blk_status_t read_error;
+ blk_status_t write_error;
unsigned long state;
unsigned long last_accessed;
struct dm_bufio_client *c;
{
struct dm_buffer *b = context;
- b->bio.bi_error = error ? -EIO : 0;
+ b->bio.bi_status = error ? BLK_STS_IOERR : 0;
b->bio.bi_end_io(&b->bio);
}
r = dm_io(&io_req, 1, ®ion, NULL);
if (r) {
- b->bio.bi_error = r;
+ b->bio.bi_status = errno_to_blk_status(r);
end_io(&b->bio);
}
}
static void inline_endio(struct bio *bio)
{
bio_end_io_t *end_fn = bio->bi_private;
- int error = bio->bi_error;
+ blk_status_t status = bio->bi_status;
/*
* Reset the bio to free any attached resources
*/
bio_reset(bio);
- bio->bi_error = error;
+ bio->bi_status = status;
end_fn(bio);
}
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
- b->write_error = bio->bi_error;
- if (unlikely(bio->bi_error)) {
+ b->write_error = bio->bi_status;
+ if (unlikely(bio->bi_status)) {
struct dm_bufio_client *c = b->c;
- int error = bio->bi_error;
- (void)cmpxchg(&c->async_write_error, 0, error);
+
+ (void)cmpxchg(&c->async_write_error, 0,
+ blk_status_to_errno(bio->bi_status));
}
BUG_ON(!test_bit(B_WRITING, &b->state));
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
- b->read_error = bio->bi_error;
+ b->read_error = bio->bi_status;
BUG_ON(!test_bit(B_READING, &b->state));
wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) {
- int error = b->read_error;
+ int error = blk_status_to_errno(b->read_error);
dm_bufio_release(b);
*/
int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
{
- int a, f;
+ blk_status_t a;
+ int f;
unsigned long buffers_processed = 0;
struct dm_buffer *b, *tmp;
*/
struct continuation {
struct work_struct ws;
- int input;
+ blk_status_t input;
};
static inline void init_continuation(struct continuation *k,
/*
* The operation that everyone is waiting for.
*/
- int (*commit_op)(void *context);
+ blk_status_t (*commit_op)(void *context);
void *commit_context;
/*
static void __commit(struct work_struct *_ws)
{
struct batcher *b = container_of(_ws, struct batcher, commit_work);
-
- int r;
+ blk_status_t r;
unsigned long flags;
struct list_head work_items;
struct work_struct *ws, *tmp;
while ((bio = bio_list_pop(&bios))) {
if (r) {
- bio->bi_error = r;
+ bio->bi_status = r;
bio_endio(bio);
} else
b->issue_op(bio, b->issue_context);
}
static void batcher_init(struct batcher *b,
- int (*commit_op)(void *),
+ blk_status_t (*commit_op)(void *),
void *commit_context,
void (*issue_op)(struct bio *bio, void *),
void *issue_context,
dm_unhook_bio(&pb->hook_info, bio);
- if (bio->bi_error) {
+ if (bio->bi_status) {
bio_endio(bio);
return;
}
struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
if (read_err || write_err)
- mg->k.input = -EIO;
+ mg->k.input = BLK_STS_IOERR;
queue_continuation(mg->cache->wq, &mg->k);
}
dm_unhook_bio(&pb->hook_info, bio);
- if (bio->bi_error)
- mg->k.input = bio->bi_error;
+ if (bio->bi_status)
+ mg->k.input = bio->bi_status;
queue_continuation(mg->cache->wq, &mg->k);
}
if (mg->overwrite_bio) {
if (success)
force_set_dirty(cache, cblock);
+ else if (mg->k.input)
+ mg->overwrite_bio->bi_status = mg->k.input;
else
- mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO);
+ mg->overwrite_bio->bi_status = BLK_STS_IOERR;
bio_endio(mg->overwrite_bio);
} else {
if (success)
r = copy(mg, is_policy_promote);
if (r) {
DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
- mg->k.input = -EIO;
+ mg->k.input = BLK_STS_IOERR;
mg_complete(mg, false);
}
}
/*
* Used by the batcher.
*/
-static int commit_op(void *context)
+static blk_status_t commit_op(void *context)
{
struct cache *cache = context;
if (dm_cache_changed_this_transaction(cache->cmd))
- return commit(cache, false);
+ return errno_to_blk_status(commit(cache, false));
return 0;
}
bio_list_init(&cache->deferred_bios);
while ((bio = bio_list_pop(&bios))) {
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
}
}
return r;
}
-static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int cache_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct cache *cache = ti->private;
unsigned long flags;
bio_drop_shared_lock(cache, bio);
accounted_complete(cache, bio);
- return 0;
+ return DM_ENDIO_DONE;
}
static int write_dirty_bitset(struct cache *cache)
struct convert_context ctx;
atomic_t io_pending;
- int error;
+ blk_status_t error;
sector_t sector;
struct rb_node rb_node;
/*
* Encrypt / decrypt data from one bio to another one (can be the same one)
*/
-static int crypt_convert(struct crypt_config *cc,
+static blk_status_t crypt_convert(struct crypt_config *cc,
struct convert_context *ctx)
{
unsigned int tag_offset = 0;
*/
case -EBADMSG:
atomic_dec(&ctx->cc_pending);
- return -EILSEQ;
+ return BLK_STS_PROTECTION;
/*
* There was an error while processing the request.
*/
default:
atomic_dec(&ctx->cc_pending);
- return -EIO;
+ return BLK_STS_IOERR;
}
}
{
struct crypt_config *cc = io->cc;
struct bio *base_bio = io->base_bio;
- int error = io->error;
+ blk_status_t error = io->error;
if (!atomic_dec_and_test(&io->io_pending))
return;
else
kfree(io->integrity_metadata);
- base_bio->bi_error = error;
+ base_bio->bi_status = error;
bio_endio(base_bio);
}
struct dm_crypt_io *io = clone->bi_private;
struct crypt_config *cc = io->cc;
unsigned rw = bio_data_dir(clone);
- int error;
+ blk_status_t error;
/*
* free the processed pages
if (rw == WRITE)
crypt_free_buffer_pages(cc, clone);
- error = clone->bi_error;
+ error = clone->bi_status;
bio_put(clone);
if (rw == READ && !error) {
crypt_inc_pending(io);
if (kcryptd_io_read(io, GFP_NOIO))
- io->error = -ENOMEM;
+ io->error = BLK_STS_RESOURCE;
crypt_dec_pending(io);
}
sector_t sector;
struct rb_node **rbp, *parent;
- if (unlikely(io->error < 0)) {
+ if (unlikely(io->error)) {
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
crypt_dec_pending(io);
struct bio *clone;
int crypt_finished;
sector_t sector = io->sector;
- int r;
+ blk_status_t r;
/*
* Prevent io from disappearing until this function completes.
clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
if (unlikely(!clone)) {
- io->error = -EIO;
+ io->error = BLK_STS_IOERR;
goto dec;
}
crypt_inc_pending(io);
r = crypt_convert(cc, &io->ctx);
- if (r < 0)
+ if (r)
io->error = r;
crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
- int r = 0;
+ blk_status_t r;
crypt_inc_pending(io);
io->sector);
r = crypt_convert(cc, &io->ctx);
- if (r < 0)
+ if (r)
io->error = r;
if (atomic_dec_and_test(&io->ctx.cc_pending))
if (error == -EBADMSG) {
DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
(unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
- io->error = -EILSEQ;
+ io->error = BLK_STS_PROTECTION;
} else if (error < 0)
- io->error = -EIO;
+ io->error = BLK_STS_IOERR;
crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
goto bad;
}
- cc->bs = bioset_create(MIN_IOS, 0);
+ cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER));
if (!cc->bs) {
ti->error = "Cannot allocate crypt bioset";
goto bad;
* and is aligned to this size as defined in IO hints.
*/
if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0))
- return -EIO;
+ return DM_MAPIO_KILL;
if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1)))
- return -EIO;
+ return DM_MAPIO_KILL;
io = dm_per_bio_data(bio, cc->per_bio_data_size);
crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
if (bio_data_dir(bio) == READ) {
if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
!test_bit(ERROR_WRITES, &fc->flags))
- return -EIO;
+ return DM_MAPIO_KILL;
goto map_bio;
}
/*
* By default, error all I/O.
*/
- return -EIO;
+ return DM_MAPIO_KILL;
}
map_bio:
return DM_MAPIO_REMAPPED;
}
-static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct flakey_c *fc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
- if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+ if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
all_corrupt_bio_flags_match(bio, fc)) {
/*
* Error read during the down_interval if drop_writes
* and error_writes were not configured.
*/
- return -EIO;
+ *error = BLK_STS_IOERR;
}
}
- return error;
+ return DM_ENDIO_DONE;
}
static void flakey_status(struct dm_target *ti, status_type_t type,
unsigned metadata_offset;
atomic_t in_flight;
- int bi_error;
+ blk_status_t bi_status;
struct completion *completion;
static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
{
struct bio *bio;
- spin_lock_irq(&ic->endio_wait.lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ic->endio_wait.lock, flags);
bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
bio_list_add(&ic->flush_bio_list, bio);
- spin_unlock_irq(&ic->endio_wait.lock);
+ spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
+
queue_work(ic->commit_wq, &ic->commit_work);
}
static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
{
int r = dm_integrity_failed(ic);
- if (unlikely(r) && !bio->bi_error)
- bio->bi_error = r;
+ if (unlikely(r) && !bio->bi_status)
+ bio->bi_status = errno_to_blk_status(r);
bio_endio(bio);
}
{
struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
- if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic)))
+ if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
submit_flush_bio(ic, dio);
else
do_endio(ic, bio);
bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
- if (unlikely(dio->bi_error) && !bio->bi_error)
- bio->bi_error = dio->bi_error;
- if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
+ if (unlikely(dio->bi_status) && !bio->bi_status)
+ bio->bi_status = dio->bi_status;
+ if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
dio->range.logical_sector += dio->range.n_sectors;
bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
INIT_WORK(&dio->work, integrity_bio_wait);
dec_in_flight(dio);
return;
error:
- dio->bi_error = r;
+ dio->bi_status = errno_to_blk_status(r);
dec_in_flight(dio);
}
sector_t area, offset;
dio->ic = ic;
- dio->bi_error = 0;
+ dio->bi_status = 0;
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
submit_flush_bio(ic, dio);
DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
(unsigned long long)dio->range.logical_sector, bio_sectors(bio),
(unsigned long long)ic->provided_data_sectors);
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
ic->sectors_per_block,
(unsigned long long)dio->range.logical_sector, bio_sectors(bio));
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (ic->sectors_per_block > 1) {
if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
bv.bv_offset, bv.bv_len, ic->sectors_per_block);
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
}
wanted_tag_size *= ic->tag_size;
if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
} else {
if (unlikely(bip != NULL)) {
DMERR("Unexpected integrity data when using internal hash");
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
if (unlikely(ic->mode == 'R') && unlikely(dio->write))
- return -EIO;
+ return DM_MAPIO_KILL;
get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
ti->error = "The device is too small";
goto bad;
}
+ if (ti->len > ic->provided_data_sectors) {
+ r = -EINVAL;
+ ti->error = "Not enough provided sectors for requested mapping size";
+ goto bad;
+ }
if (!buffer_sectors)
buffer_sectors = 1;
if (!client->pool)
goto bad;
- client->bios = bioset_create(min_ios, 0);
+ client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER));
if (!client->bios)
goto bad;
fn(error_bits, context);
}
-static void dec_count(struct io *io, unsigned int region, int error)
+static void dec_count(struct io *io, unsigned int region, blk_status_t error)
{
if (error)
set_bit(region, &io->error_bits);
{
struct io *io;
unsigned region;
- int error;
+ blk_status_t error;
- if (bio->bi_error && bio_data_dir(bio) == READ)
+ if (bio->bi_status && bio_data_dir(bio) == READ)
zero_fill_bio(bio);
/*
*/
retrieve_io_and_region_from_bio(bio, &io, ®ion);
- error = bio->bi_error;
+ error = bio->bi_status;
bio_put(bio);
dec_count(io, region, error);
else if (op == REQ_OP_WRITE_SAME)
special_cmd_max_sectors = q->limits.max_write_same_sectors;
if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
- op == REQ_OP_WRITE_SAME) &&
- special_cmd_max_sectors == 0) {
- dec_count(io, region, -EOPNOTSUPP);
+ op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
+ atomic_inc(&io->count);
+ dec_count(io, region, BLK_STS_NOTSUPP);
return;
}
{
struct log_writes_c *lc = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
unsigned long flags;
- DMERR("Error writing log block, error=%d", bio->bi_error);
+ DMERR("Error writing log block, error=%d", bio->bi_status);
spin_lock_irqsave(&lc->blocks_lock, flags);
lc->logging_enabled = false;
spin_unlock_irqrestore(&lc->blocks_lock, flags);
spin_lock_irq(&lc->blocks_lock);
lc->logging_enabled = false;
spin_unlock_irq(&lc->blocks_lock);
- return -ENOMEM;
+ return DM_MAPIO_KILL;
}
INIT_LIST_HEAD(&block->list);
pb->block = block;
spin_lock_irq(&lc->blocks_lock);
lc->logging_enabled = false;
spin_unlock_irq(&lc->blocks_lock);
- return -ENOMEM;
+ return DM_MAPIO_KILL;
}
src = kmap_atomic(bv.bv_page);
return DM_MAPIO_REMAPPED;
}
-static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int normal_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct log_writes_c *lc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
spin_unlock_irqrestore(&lc->blocks_lock, flags);
}
- return error;
+ return DM_ENDIO_DONE;
}
/*
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
return DM_MAPIO_REQUEUE;
dm_report_EIO(m);
- return -EIO;
+ return DM_MAPIO_KILL;
}
mpio->pgpath = pgpath;
mpio->nr_bytes = nr_bytes;
- bio->bi_error = 0;
+ bio->bi_status = 0;
bio->bi_bdev = pgpath->path.dev->bdev;
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
- if (r < 0 || r == DM_MAPIO_REQUEUE) {
- bio->bi_error = r;
+ switch (r) {
+ case DM_MAPIO_KILL:
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ break;
+ case DM_MAPIO_REQUEUE:
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
- } else if (r == DM_MAPIO_REMAPPED)
+ break;
+ case DM_MAPIO_REMAPPED:
generic_make_request(bio);
+ break;
+ }
}
blk_finish_plug(&plug);
}
activate_or_offline_path(pgpath);
}
-static int noretry_error(int error)
+static int noretry_error(blk_status_t error)
{
switch (error) {
- case -EBADE:
- /*
- * EBADE signals an reservation conflict.
- * We shouldn't fail the path here as we can communicate with
- * the target. We should failover to the next path, but in
- * doing so we might be causing a ping-pong between paths.
- * So just return the reservation conflict error.
- */
- case -EOPNOTSUPP:
- case -EREMOTEIO:
- case -EILSEQ:
- case -ENODATA:
- case -ENOSPC:
+ case BLK_STS_NOTSUPP:
+ case BLK_STS_NOSPC:
+ case BLK_STS_TARGET:
+ case BLK_STS_NEXUS:
+ case BLK_STS_MEDIUM:
+ case BLK_STS_RESOURCE:
return 1;
}
}
static int multipath_end_io(struct dm_target *ti, struct request *clone,
- int error, union map_info *map_context)
+ blk_status_t error, union map_info *map_context)
{
struct dm_mpath_io *mpio = get_mpio(map_context);
struct pgpath *pgpath = mpio->pgpath;
if (atomic_read(&m->nr_valid_paths) == 0 &&
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
- if (error == -EIO)
+ if (error == BLK_STS_IOERR)
dm_report_EIO(m);
/* complete with the original error */
r = DM_ENDIO_DONE;
return r;
}
-static int do_end_io_bio(struct multipath *m, struct bio *clone,
- int error, struct dm_mpath_io *mpio)
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+ blk_status_t *error)
{
+ struct multipath *m = ti->private;
+ struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+ struct pgpath *pgpath = mpio->pgpath;
unsigned long flags;
+ int r = DM_ENDIO_DONE;
- if (!error)
- return 0; /* I/O complete */
-
- if (noretry_error(error))
- return error;
+ if (!*error || noretry_error(*error))
+ goto done;
- if (mpio->pgpath)
- fail_path(mpio->pgpath);
+ if (pgpath)
+ fail_path(pgpath);
if (atomic_read(&m->nr_valid_paths) == 0 &&
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
dm_report_EIO(m);
- return -EIO;
+ *error = BLK_STS_IOERR;
+ goto done;
}
/* Queue for the daemon to resubmit */
if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
queue_work(kmultipathd, &m->process_queued_bios);
- return DM_ENDIO_INCOMPLETE;
-}
-
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
-{
- struct multipath *m = ti->private;
- struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
- struct pgpath *pgpath;
- struct path_selector *ps;
- int r;
-
- BUG_ON(!mpio);
-
- r = do_end_io_bio(m, clone, error, mpio);
- pgpath = mpio->pgpath;
+ r = DM_ENDIO_INCOMPLETE;
+done:
if (pgpath) {
- ps = &pgpath->pg->ps;
+ struct path_selector *ps = &pgpath->pg->ps;
+
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
}
/********************************************************************
* BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
*
- * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist
+ * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
*/
__le32 flags; /* Flags defining array states for reshaping */
sb->layout = cpu_to_le32(mddev->layout);
sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
+ /********************************************************************
+ * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
+ *
+ * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
+ */
sb->new_level = cpu_to_le32(mddev->new_level);
sb->new_layout = cpu_to_le32(mddev->new_layout);
sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors);
mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
- /* Retrieve device size stored in superblock to be prepared for shrink */
- rdev->sectors = le64_to_cpu(sb->sectors);
+ /*
+ * Retrieve rdev size stored in superblock to be prepared for shrink.
+ * Check extended superblock members are present otherwise the size
+ * will not be set!
+ */
+ if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190)
+ rdev->sectors = le64_to_cpu(sb->sectors);
+
rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
if (rdev->recovery_offset == MaxSector)
set_bit(In_sync, &rdev->flags);
struct dm_raid1_bio_record {
struct mirror *m;
+ /* if details->bi_bdev == NULL, details were not saved */
struct dm_bio_details details;
region_t write_region;
};
* If device is suspended, complete the bio.
*/
if (dm_noflush_suspending(ms->ti))
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
else
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
* degrade the array.
*/
if (bio_op(bio) == REQ_OP_DISCARD) {
- bio->bi_error = -EOPNOTSUPP;
+ bio->bi_status = BLK_STS_NOTSUPP;
bio_endio(bio);
return;
}
struct dm_raid1_bio_record *bio_record =
dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
+ bio_record->details.bi_bdev = NULL;
+
if (rw == WRITE) {
/* Save region for mirror_end_io() handler */
bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
if (r < 0 && r != -EWOULDBLOCK)
- return r;
+ return DM_MAPIO_KILL;
/*
* If region is not in-sync queue the bio.
*/
if (!r || (r == -EWOULDBLOCK)) {
if (bio->bi_opf & REQ_RAHEAD)
- return -EWOULDBLOCK;
+ return DM_MAPIO_KILL;
queue_bio(ms, bio, rw);
return DM_MAPIO_SUBMITTED;
*/
m = choose_mirror(ms, bio->bi_iter.bi_sector);
if (unlikely(!m))
- return -EIO;
+ return DM_MAPIO_KILL;
dm_bio_record(&bio_record->details, bio);
bio_record->m = m;
return DM_MAPIO_REMAPPED;
}
-static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
int rw = bio_data_dir(bio);
struct mirror_set *ms = (struct mirror_set *) ti->private;
if (!(bio->bi_opf & REQ_PREFLUSH) &&
bio_op(bio) != REQ_OP_DISCARD)
dm_rh_dec(ms->rh, bio_record->write_region);
- return error;
+ return DM_ENDIO_DONE;
}
- if (error == -EOPNOTSUPP)
- return error;
+ if (*error == BLK_STS_NOTSUPP)
+ goto out;
+
+ if (bio->bi_opf & REQ_RAHEAD)
+ goto out;
- if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
- return error;
+ if (unlikely(*error)) {
+ if (!bio_record->details.bi_bdev) {
+ /*
+ * There wasn't enough memory to record necessary
+ * information for a retry or there was no other
+ * mirror in-sync.
+ */
+ DMERR_LIMIT("Mirror read failed.");
+ return DM_ENDIO_DONE;
+ }
- if (unlikely(error)) {
m = bio_record->m;
DMERR("Mirror read failed from %s. Trying alternative device.",
bd = &bio_record->details;
dm_bio_restore(bd, bio);
- bio->bi_error = 0;
+ bio_record->details.bi_bdev = NULL;
+ bio->bi_status = 0;
queue_bio(ms, bio, rw);
return DM_ENDIO_INCOMPLETE;
DMERR("All replicated volumes dead, failing I/O");
}
- return error;
+out:
+ bio_record->details.bi_bdev = NULL;
+
+ return DM_ENDIO_DONE;
}
static void mirror_presuspend(struct dm_target *ti)
static void dm_mq_start_queue(struct request_queue *q)
{
- blk_mq_start_stopped_hw_queues(q, true);
+ blk_mq_unquiesce_queue(q);
blk_mq_kick_requeue_list(q);
}
struct dm_rq_target_io *tio = info->tio;
struct bio *bio = info->orig;
unsigned int nr_bytes = info->orig->bi_iter.bi_size;
- int error = clone->bi_error;
+ blk_status_t error = clone->bi_status;
bio_put(clone);
* Do not use blk_end_request() here, because it may complete
* the original request before the clone, and break the ordering.
*/
- blk_update_request(tio->orig, 0, nr_bytes);
+ blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
}
static struct dm_rq_target_io *tio_from_request(struct request *rq)
* Must be called without clone's queue lock held,
* see end_clone_request() for more details.
*/
-static void dm_end_request(struct request *clone, int error)
+static void dm_end_request(struct request *clone, blk_status_t error)
{
int rw = rq_data_dir(clone);
struct dm_rq_target_io *tio = clone->end_io_data;
rq_completed(md, rw, false);
}
-static void dm_done(struct request *clone, int error, bool mapped)
+static void dm_done(struct request *clone, blk_status_t error, bool mapped)
{
int r = DM_ENDIO_DONE;
struct dm_rq_target_io *tio = clone->end_io_data;
r = rq_end_io(tio->ti, clone, error, &tio->info);
}
- if (unlikely(error == -EREMOTEIO)) {
+ if (unlikely(error == BLK_STS_TARGET)) {
if (req_op(clone) == REQ_OP_WRITE_SAME &&
!clone->q->limits.max_write_same_sectors)
disable_write_same(tio->md);
* Complete the clone and the original request with the error status
* through softirq context.
*/
-static void dm_complete_request(struct request *rq, int error)
+static void dm_complete_request(struct request *rq, blk_status_t error)
{
struct dm_rq_target_io *tio = tio_from_request(rq);
* Target's rq_end_io() function isn't called.
* This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
*/
-static void dm_kill_unmapped_request(struct request *rq, int error)
+static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
{
rq->rq_flags |= RQF_FAILED;
dm_complete_request(rq, error);
/*
* Called with the clone's queue lock held (in the case of .request_fn)
*/
-static void end_clone_request(struct request *clone, int error)
+static void end_clone_request(struct request *clone, blk_status_t error)
{
struct dm_rq_target_io *tio = clone->end_io_data;
static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
{
- int r;
+ blk_status_t r;
if (blk_queue_io_stat(clone->q))
clone->rq_flags |= RQF_IO_STAT;
break;
case DM_MAPIO_KILL:
/* The target wants to complete the I/O */
- dm_kill_unmapped_request(rq, -EIO);
+ dm_kill_unmapped_request(rq, BLK_STS_IOERR);
break;
default:
DMWARN("unimplemented target map return value: %d", r);
return __dm_rq_init_rq(set->driver_data, rq);
}
-static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
}
if (ti->type->busy && ti->type->busy(ti))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
dm_start_request(md, rq);
rq_end_stats(md, rq);
rq_completed(md, rq_data_dir(rq), false);
blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static const struct blk_mq_ops dm_mq_ops = {
struct dm_target *ti;
struct request *orig, *clone;
struct kthread_work work;
- int error;
+ blk_status_t error;
union map_info info;
struct dm_stats_aux stats_aux;
unsigned long duration_jiffies;
{
void *callback_data = bio->bi_private;
- dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0);
+ dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
}
static void start_full_bio(struct dm_snap_pending_exception *pe,
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
if (!s->valid)
- return -EIO;
+ return DM_MAPIO_KILL;
/* FIXME: should only take write lock if we need
* to copy an exception */
if (!s->valid || (unlikely(s->snapshot_overflowed) &&
bio_data_dir(bio) == WRITE)) {
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
if (!s->valid || s->snapshot_overflowed) {
free_pending_exception(pe);
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
DMERR("Snapshot overflowed: Unable to allocate exception.");
} else
__invalidate_snapshot(s, -ENOMEM);
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
}
return r;
}
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct dm_snapshot *s = ti->private;
if (is_bio_tracked(bio))
stop_tracking_chunk(s, bio);
- return 0;
+ return DM_ENDIO_DONE;
}
static void snapshot_merge_presuspend(struct dm_target *ti)
}
}
-static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
unsigned i;
char major_minor[16];
struct stripe_c *sc = ti->private;
- if (!error)
- return 0; /* I/O complete */
+ if (!*error)
+ return DM_ENDIO_DONE; /* I/O complete */
- if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
- return error;
+ if (bio->bi_opf & REQ_RAHEAD)
+ return DM_ENDIO_DONE;
- if (error == -EOPNOTSUPP)
- return error;
+ if (*error == BLK_STS_NOTSUPP)
+ return DM_ENDIO_DONE;
memset(major_minor, 0, sizeof(major_minor));
sprintf(major_minor, "%d:%d",
schedule_work(&sc->trigger_event);
}
- return error;
+ return DM_ENDIO_DONE;
}
static int stripe_iterate_devices(struct dm_target *ti,
static int io_err_map(struct dm_target *tt, struct bio *bio)
{
- return -EIO;
+ return DM_MAPIO_KILL;
}
static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
* Even if r is set, there could be sub discards in flight that we
* need to wait for.
*/
- if (r && !op->parent_bio->bi_error)
- op->parent_bio->bi_error = r;
+ if (r && !op->parent_bio->bi_status)
+ op->parent_bio->bi_status = errno_to_blk_status(r);
bio_endio(op->parent_bio);
}
}
static void cell_error_with_code(struct pool *pool,
- struct dm_bio_prison_cell *cell, int error_code)
+ struct dm_bio_prison_cell *cell, blk_status_t error_code)
{
dm_cell_error(pool->prison, cell, error_code);
dm_bio_prison_free_cell(pool->prison, cell);
}
-static int get_pool_io_error_code(struct pool *pool)
+static blk_status_t get_pool_io_error_code(struct pool *pool)
{
- return pool->out_of_data_space ? -ENOSPC : -EIO;
+ return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
}
static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
{
- int error = get_pool_io_error_code(pool);
-
- cell_error_with_code(pool, cell, error);
+ cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
}
static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
{
- cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE);
+ cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
}
/*----------------------------------------------------------------*/
bio_list_init(master);
}
-static void error_bio_list(struct bio_list *bios, int error)
+static void error_bio_list(struct bio_list *bios, blk_status_t error)
{
struct bio *bio;
while ((bio = bio_list_pop(bios))) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
}
}
-static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
+static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
+ blk_status_t error)
{
struct bio_list bios;
unsigned long flags;
__merge_bio_list(&bios, &tc->retry_on_resume_list);
spin_unlock_irqrestore(&tc->lock, flags);
- error_bio_list(&bios, DM_ENDIO_REQUEUE);
+ error_bio_list(&bios, BLK_STS_DM_REQUEUE);
requeue_deferred_cells(tc);
}
-static void error_retry_list_with_code(struct pool *pool, int error)
+static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
{
struct thin_c *tc;
static void error_retry_list(struct pool *pool)
{
- int error = get_pool_io_error_code(pool);
-
- error_retry_list_with_code(pool, error);
+ error_retry_list_with_code(pool, get_pool_io_error_code(pool));
}
/*
*/
atomic_t prepare_actions;
- int err;
+ blk_status_t status;
struct thin_c *tc;
dm_block_t virt_begin, virt_end;
dm_block_t data_block;
{
struct dm_thin_new_mapping *m = context;
- m->err = read_err || write_err ? -EIO : 0;
+ m->status = read_err || write_err ? BLK_STS_IOERR : 0;
complete_mapping_preparation(m);
}
bio->bi_end_io = m->saved_bi_end_io;
- m->err = bio->bi_error;
+ m->status = bio->bi_status;
complete_mapping_preparation(m);
}
struct bio *bio = m->bio;
int r;
- if (m->err) {
+ if (m->status) {
cell_error(pool, m->cell);
goto out;
}
return;
}
+ /*
+ * Increment the unmapped blocks. This prevents a race between the
+ * passdown io and reallocation of freed blocks.
+ */
+ r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+ if (r) {
+ metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+ bio_io_error(m->bio);
+ cell_defer_no_holder(tc, m->cell);
+ mempool_free(m, pool->mapping_pool);
+ return;
+ }
+
discard_parent = bio_alloc(GFP_NOIO, 1);
if (!discard_parent) {
DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
end_discard(&op, r);
}
}
-
- /*
- * Increment the unmapped blocks. This prevents a race between the
- * passdown io and reallocation of freed blocks.
- */
- r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
- if (r) {
- metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
- bio_io_error(m->bio);
- cell_defer_no_holder(tc, m->cell);
- mempool_free(m, pool->mapping_pool);
- return;
- }
}
static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
spin_unlock_irqrestore(&tc->lock, flags);
}
-static int should_error_unserviceable_bio(struct pool *pool)
+static blk_status_t should_error_unserviceable_bio(struct pool *pool)
{
enum pool_mode m = get_pool_mode(pool);
case PM_WRITE:
/* Shouldn't get here */
DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
- return -EIO;
+ return BLK_STS_IOERR;
case PM_OUT_OF_DATA_SPACE:
- return pool->pf.error_if_no_space ? -ENOSPC : 0;
+ return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
case PM_READ_ONLY:
case PM_FAIL:
- return -EIO;
+ return BLK_STS_IOERR;
default:
/* Shouldn't get here */
DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
- return -EIO;
+ return BLK_STS_IOERR;
}
}
static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
{
- int error = should_error_unserviceable_bio(pool);
+ blk_status_t error = should_error_unserviceable_bio(pool);
if (error) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
} else
retry_on_resume(bio);
{
struct bio *bio;
struct bio_list bios;
- int error;
+ blk_status_t error;
error = should_error_unserviceable_bio(pool);
if (error) {
unsigned count = 0;
if (tc->requeue_mode) {
- error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE);
+ error_thin_bio_list(tc, &tc->deferred_bio_list,
+ BLK_STS_DM_REQUEUE);
return;
}
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
pool->pf.error_if_no_space = true;
notify_of_pool_mode_change_to_oods(pool);
- error_retry_list_with_code(pool, -ENOSPC);
+ error_retry_list_with_code(pool, BLK_STS_NOSPC);
}
}
thin_hook_bio(tc, bio);
if (tc->requeue_mode) {
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
return DM_MAPIO_SUBMITTED;
}
return thin_bio_map(ti, bio);
}
-static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
+static int thin_endio(struct dm_target *ti, struct bio *bio,
+ blk_status_t *err)
{
unsigned long flags;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
if (h->cell)
cell_defer_no_holder(h->tc, h->cell);
- return 0;
+ return DM_ENDIO_DONE;
}
static void thin_presuspend(struct dm_target *ti)
/*
* End one "io" structure with a given error.
*/
-static void verity_finish_io(struct dm_verity_io *io, int error)
+static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
{
struct dm_verity *v = io->v;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
bio->bi_end_io = io->orig_bi_end_io;
- bio->bi_error = error;
+ bio->bi_status = status;
verity_fec_finish_io(io);
{
struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
- verity_finish_io(io, verity_verify_io(io));
+ verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
}
static void verity_end_io(struct bio *bio)
{
struct dm_verity_io *io = bio->bi_private;
- if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
- verity_finish_io(io, bio->bi_error);
+ if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+ verity_finish_io(io, bio->bi_status);
return;
}
if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
DMERR_LIMIT("unaligned io");
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (bio_end_sector(bio) >>
(v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
DMERR_LIMIT("io out of range");
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (bio_data_dir(bio) == WRITE)
- return -EIO;
+ return DM_MAPIO_KILL;
io = dm_per_bio_data(bio, ti->per_io_data_size);
io->v = v;
case REQ_OP_READ:
if (bio->bi_opf & REQ_RAHEAD) {
/* readahead of null bytes only wastes buffer cache */
- return -EIO;
+ return DM_MAPIO_KILL;
}
zero_fill_bio(bio);
break;
/* writes get silently dropped */
break;
default:
- return -EIO;
+ return DM_MAPIO_KILL;
}
bio_endio(bio);
*/
struct dm_io {
struct mapped_device *md;
- int error;
+ blk_status_t status;
atomic_t io_count;
struct bio *bio;
unsigned long start_time;
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
*/
-static void dec_pending(struct dm_io *io, int error)
+static void dec_pending(struct dm_io *io, blk_status_t error)
{
unsigned long flags;
- int io_error;
+ blk_status_t io_error;
struct bio *bio;
struct mapped_device *md = io->md;
/* Push-back supersedes any I/O errors */
if (unlikely(error)) {
spin_lock_irqsave(&io->endio_lock, flags);
- if (!(io->error > 0 && __noflush_suspending(md)))
- io->error = error;
+ if (!(io->status == BLK_STS_DM_REQUEUE &&
+ __noflush_suspending(md)))
+ io->status = error;
spin_unlock_irqrestore(&io->endio_lock, flags);
}
if (atomic_dec_and_test(&io->io_count)) {
- if (io->error == DM_ENDIO_REQUEUE) {
+ if (io->status == BLK_STS_DM_REQUEUE) {
/*
* Target requested pushing back the I/O.
*/
bio_list_add_head(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
- io->error = -EIO;
+ io->status = BLK_STS_IOERR;
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
- io_error = io->error;
+ io_error = io->status;
bio = io->bio;
end_io_acct(io);
free_io(md, io);
- if (io_error == DM_ENDIO_REQUEUE)
+ if (io_error == BLK_STS_DM_REQUEUE)
return;
if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
- bio->bi_error = io_error;
+ bio->bi_status = io_error;
bio_endio(bio);
}
}
static void clone_endio(struct bio *bio)
{
- int error = bio->bi_error;
- int r = error;
+ blk_status_t error = bio->bi_status;
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
- if (endio) {
- r = endio(tio->ti, bio, error);
- if (r < 0 || r == DM_ENDIO_REQUEUE)
- /*
- * error and requeue request are handled
- * in dec_pending().
- */
- error = r;
- else if (r == DM_ENDIO_INCOMPLETE)
- /* The target will handle the io */
- return;
- else if (r) {
- DMWARN("unimplemented target endio return value: %d", r);
- BUG();
- }
- }
-
- if (unlikely(r == -EREMOTEIO)) {
+ if (unlikely(error == BLK_STS_TARGET)) {
if (bio_op(bio) == REQ_OP_WRITE_SAME &&
!bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
disable_write_same(md);
disable_write_zeroes(md);
}
+ if (endio) {
+ int r = endio(tio->ti, bio, &error);
+ switch (r) {
+ case DM_ENDIO_REQUEUE:
+ error = BLK_STS_DM_REQUEUE;
+ /*FALLTHRU*/
+ case DM_ENDIO_DONE:
+ break;
+ case DM_ENDIO_INCOMPLETE:
+ /* The target will handle the io */
+ return;
+ default:
+ DMWARN("unimplemented target endio return value: %d", r);
+ BUG();
+ }
+ }
+
free_tio(tio);
dec_pending(io, error);
}
while ((bio = bio_list_pop(&list))) {
struct bio_set *bs = bio->bi_pool;
- if (unlikely(!bs) || bs == fs_bio_set) {
+ if (unlikely(!bs) || bs == fs_bio_set ||
+ !bs->rescue_workqueue) {
bio_list_add(¤t->bio_list[i], bio);
continue;
}
r = ti->type->map(ti, clone);
dm_offload_end(&o);
- if (r == DM_MAPIO_REMAPPED) {
+ switch (r) {
+ case DM_MAPIO_SUBMITTED:
+ break;
+ case DM_MAPIO_REMAPPED:
/* the bio has been remapped so dispatch it */
-
trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
tio->io->bio->bi_bdev->bd_dev, sector);
-
generic_make_request(clone);
- } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
- /* error the io and bail out, or requeue it if needed */
- dec_pending(tio->io, r);
+ break;
+ case DM_MAPIO_KILL:
+ dec_pending(tio->io, BLK_STS_IOERR);
+ free_tio(tio);
+ break;
+ case DM_MAPIO_REQUEUE:
+ dec_pending(tio->io, BLK_STS_DM_REQUEUE);
free_tio(tio);
- } else if (r != DM_MAPIO_SUBMITTED) {
+ break;
+ default:
DMWARN("unimplemented target map return value: %d", r);
BUG();
}
ci.map = map;
ci.md = md;
ci.io = alloc_io(md);
- ci.io->error = 0;
+ ci.io->status = 0;
atomic_set(&ci.io->io_count, 1);
ci.io->bio = bio;
ci.io->md = md;
* Initialize aspects of queue that aren't relevant for blk-mq
*/
md->queue->backing_dev_info->congested_fn = dm_any_congested;
- blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
}
static void cleanup_mapped_device(struct mapped_device *md)
BUG();
}
- pools->bs = bioset_create_nobvec(pool_size, front_pad);
+ pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
if (!pools->bs)
goto out;
static bool create_on_open = true;
/* bio_clone_mddev
- * like bio_clone, but with a local bio set
+ * like bio_clone_bioset, but with a local bio set
*/
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
unsigned int sectors;
int cpu;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (mddev == NULL || mddev->pers == NULL) {
bio_io_error(bio);
}
if (mddev->ro == 1 && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0)
- bio->bi_error = -EROFS;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
}
struct md_rdev *rdev = bio->bi_private;
struct mddev *mddev = rdev->mddev;
- if (bio->bi_error) {
- pr_err("md: super_written gets error=%d\n", bio->bi_error);
+ if (bio->bi_status) {
+ pr_err("md: super_written gets error=%d\n", bio->bi_status);
md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags)
&& (bio->bi_opf & MD_FAILFAST)) {
submit_bio_wait(bio);
- ret = !bio->bi_error;
+ ret = !bio->bi_status;
bio_put(bio);
return ret;
}
return -EINVAL;
}
-static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
{
return sb1->set_uuid0 == sb2->set_uuid0 &&
sb1->set_uuid1 == sb2->set_uuid1 &&
sb1->set_uuid3 == sb2->set_uuid3;
}
-static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
{
int ret;
mdp_super_t *tmp1, *tmp2;
} else {
__u64 ev1, ev2;
mdp_super_t *refsb = page_address(refdev->sb_page);
- if (!uuid_equal(refsb, sb)) {
+ if (!md_uuid_equal(refsb, sb)) {
pr_warn("md: %s has different UUID to %s\n",
b, bdevname(refdev->bdev,b2));
goto abort;
}
- if (!sb_equal(refsb, sb)) {
+ if (!md_sb_equal(refsb, sb)) {
pr_warn("md: %s has same UUID but different superblock to %s\n",
b, bdevname(refdev->bdev, b2));
goto abort;
}
if (mddev->bio_set == NULL) {
- mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
+ mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!mddev->bio_set)
return -ENOMEM;
}
* operation and are ready to return a success/failure code to the buffer
* cache layer.
*/
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
+static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
{
struct bio *bio = mp_bh->master_bio;
struct mpconf *conf = mp_bh->mddev->private;
- bio->bi_error = err;
+ bio->bi_status = status;
bio_endio(bio);
mempool_free(mp_bh, conf->pool);
}
struct mpconf *conf = mp_bh->mddev->private;
struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
- if (!bio->bi_error)
+ if (!bio->bi_status)
multipath_end_bh_io(mp_bh, 0);
else if (!(bio->bi_opf & REQ_RAHEAD)) {
/*
(unsigned long long)bio->bi_iter.bi_sector);
multipath_reschedule_retry(mp_bh);
} else
- multipath_end_bh_io(mp_bh, bio->bi_error);
+ multipath_end_bh_io(mp_bh, bio->bi_status);
rdev_dec_pending(rdev, conf->mddev);
}
pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
bdevname(bio->bi_bdev,b),
(unsigned long long)bio->bi_iter.bi_sector);
- multipath_end_bh_io(mp_bh, -EIO);
+ multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
} else {
pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
bdevname(bio->bi_bdev,b),
struct r1conf *conf = r1_bio->mddev->private;
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
/*
static void raid1_end_read_request(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r1bio *r1_bio = bio->bi_private;
struct r1conf *conf = r1_bio->mddev->private;
struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
struct md_rdev *rdev = conf->mirrors[mirror].rdev;
bool discard_error;
- discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+ discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
/*
* 'one mirror IO has finished' event handler:
*/
- if (bio->bi_error && !discard_error) {
+ if (bio->bi_status && !discard_error) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement, &rdev->flags))
set_bit(MD_RECOVERY_NEEDED, &
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
* or re-read if the read failed.
* We don't do much here, just schedule handling by raid1d
*/
- if (!bio->bi_error)
+ if (!bio->bi_status)
set_bit(R1BIO_Uptodate, &r1_bio->state);
if (atomic_dec_and_test(&r1_bio->remaining))
static void end_sync_write(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r1bio *r1_bio = get_resync_r1bio(bio);
struct mddev *mddev = r1_bio->mddev;
struct r1conf *conf = mddev->private;
idx ++;
}
set_bit(R1BIO_Uptodate, &r1_bio->state);
- bio->bi_error = 0;
+ bio->bi_status = 0;
return 1;
}
for (i = 0; i < conf->raid_disks * 2; i++) {
int j;
int size;
- int error;
+ blk_status_t status;
struct bio_vec *bi;
struct bio *b = r1_bio->bios[i];
struct resync_pages *rp = get_resync_pages(b);
if (b->bi_end_io != end_sync_read)
continue;
/* fixup the bio for reuse, but preserve errno */
- error = b->bi_error;
+ status = b->bi_status;
bio_reset(b);
- b->bi_error = error;
+ b->bi_status = status;
b->bi_vcnt = vcnt;
b->bi_iter.bi_size = r1_bio->sectors << 9;
b->bi_iter.bi_sector = r1_bio->sector +
}
for (primary = 0; primary < conf->raid_disks * 2; primary++)
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
- !r1_bio->bios[primary]->bi_error) {
+ !r1_bio->bios[primary]->bi_status) {
r1_bio->bios[primary]->bi_end_io = NULL;
rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
break;
int j;
struct bio *pbio = r1_bio->bios[primary];
struct bio *sbio = r1_bio->bios[i];
- int error = sbio->bi_error;
+ blk_status_t status = sbio->bi_status;
struct page **ppages = get_resync_pages(pbio)->pages;
struct page **spages = get_resync_pages(sbio)->pages;
struct bio_vec *bi;
if (sbio->bi_end_io != end_sync_read)
continue;
/* Now we can 'fixup' the error value */
- sbio->bi_error = 0;
+ sbio->bi_status = 0;
bio_for_each_segment_all(bi, sbio, j)
page_len[j] = bi->bv_len;
- if (!error) {
+ if (!status) {
for (j = vcnt; j-- ; ) {
if (memcmp(page_address(ppages[j]),
page_address(spages[j]),
if (j >= 0)
atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
- && !error)) {
+ && !status)) {
/* No need to write to this device. */
sbio->bi_end_io = NULL;
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
struct bio *bio = r1_bio->bios[m];
if (bio->bi_end_io == NULL)
continue;
- if (!bio->bi_error &&
+ if (!bio->bi_status &&
test_bit(R1BIO_MadeGood, &r1_bio->state)) {
rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
}
- if (bio->bi_error &&
+ if (bio->bi_status &&
test_bit(R1BIO_WriteError, &r1_bio->state)) {
if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
md_error(conf->mddev, rdev);
if (!conf->r1bio_pool)
goto abort;
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto abort;
struct r10conf *conf = r10_bio->mddev->private;
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
/*
static void raid10_end_read_request(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r10bio *r10_bio = bio->bi_private;
int slot, dev;
struct md_rdev *rdev;
struct bio *to_put = NULL;
bool discard_error;
- discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+ discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
- if (bio->bi_error && !discard_error) {
+ if (bio->bi_status && !discard_error) {
if (repl)
/* Never record new bad blocks to replacement,
* just fail it.
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
{
struct r10conf *conf = r10_bio->mddev->private;
- if (!bio->bi_error)
+ if (!bio->bi_status)
set_bit(R10BIO_Uptodate, &r10_bio->state);
else
/* The write handler will notice the lack of
else
rdev = conf->mirrors[d].rdev;
- if (bio->bi_error) {
+ if (bio->bi_status) {
if (repl)
md_error(mddev, rdev);
else {
/* find the first device with a block */
for (i=0; i<conf->copies; i++)
- if (!r10_bio->devs[i].bio->bi_error)
+ if (!r10_bio->devs[i].bio->bi_status)
break;
if (i == conf->copies)
tpages = get_resync_pages(tbio)->pages;
d = r10_bio->devs[i].devnum;
rdev = conf->mirrors[d].rdev;
- if (!r10_bio->devs[i].bio->bi_error) {
+ if (!r10_bio->devs[i].bio->bi_status) {
/* We know that the bi_io_vec layout is the same for
* both 'first' and 'i', so we just compare them.
* All vec entries are PAGE_SIZE;
rdev = conf->mirrors[dev].rdev;
if (r10_bio->devs[m].bio == NULL)
continue;
- if (!r10_bio->devs[m].bio->bi_error) {
+ if (!r10_bio->devs[m].bio->bi_status) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
if (r10_bio->devs[m].repl_bio == NULL)
continue;
- if (!r10_bio->devs[m].repl_bio->bi_error) {
+ if (!r10_bio->devs[m].repl_bio->bi_status) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
r10_bio->devs[m].addr,
r10_bio->sectors, 0);
rdev_dec_pending(rdev, conf->mddev);
- } else if (bio != NULL && bio->bi_error) {
+ } else if (bio != NULL && bio->bi_status) {
fail = true;
if (!narrow_write_error(r10_bio, m)) {
md_error(conf->mddev, rdev);
r10_bio->devs[i].repl_bio->bi_end_io = NULL;
bio = r10_bio->devs[i].bio;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
rcu_read_lock();
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
/* Need to set up for writing to the replacement */
bio = r10_bio->devs[i].repl_bio;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
sector = r10_bio->devs[i].addr;
bio->bi_next = biolist;
if (bio->bi_end_io == end_sync_read) {
md_sync_acct(bio->bi_bdev, nr_sectors);
- bio->bi_error = 0;
+ bio->bi_status = 0;
generic_make_request(bio);
}
}
if (!conf->r10bio_pool)
goto out;
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto out;
read_bio->bi_end_io = end_reshape_read;
bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
- read_bio->bi_error = 0;
+ read_bio->bi_status = 0;
read_bio->bi_vcnt = 0;
read_bio->bi_iter.bi_size = 0;
r10_bio->master_bio = read_bio;
rdev = conf->mirrors[d].rdev;
}
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* FIXME should record badblock */
md_error(mddev, rdev);
}
struct r5l_log *log = io->log;
unsigned long flags;
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(log->rdev->mddev, log->rdev);
bio_put(bio);
unsigned long flags;
struct r5l_io_unit *io;
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(log->rdev->mddev, log->rdev);
spin_lock_irqsave(&log->io_list_lock, flags);
if (!log->io_pool)
goto io_pool;
- log->bs = bioset_create(R5L_POOL_SIZE, 0);
+ log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!log->bs)
goto io_bs;
pr_debug("%s: seq: %llu\n", __func__, io->seq);
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(ppl_conf->mddev, log->rdev);
list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
goto err;
}
- ppl_conf->bs = bioset_create(conf->raid_disks, 0);
+ ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
if (!ppl_conf->bs) {
ret = -ENOMEM;
goto err;
pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
- bi->bi_error);
+ bi->bi_status);
if (i == disks) {
bio_reset(bi);
BUG();
s = sh->sector + rdev->new_data_offset;
else
s = sh->sector + rdev->data_offset;
- if (!bi->bi_error) {
+ if (!bi->bi_status) {
set_bit(R5_UPTODATE, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
/* Note that this cannot happen on a
}
pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
- bi->bi_error);
+ bi->bi_status);
if (i == disks) {
bio_reset(bi);
BUG();
}
if (replacement) {
- if (bi->bi_error)
+ if (bi->bi_status)
md_error(conf->mddev, rdev);
else if (is_badblock(rdev, sh->sector,
STRIPE_SECTORS,
&first_bad, &bad_sectors))
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
} else {
- if (bi->bi_error) {
+ if (bi->bi_status) {
set_bit(STRIPE_DEGRADED, &sh->state);
set_bit(WriteErrorSeen, &rdev->flags);
set_bit(R5_WriteError, &sh->dev[i].flags);
}
rdev_dec_pending(rdev, conf->mddev);
- if (sh->batch_head && bi->bi_error && !replacement)
+ if (sh->batch_head && bi->bi_status && !replacement)
set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
bio_reset(bi);
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
md_write_end(conf->mddev);
bio_endio(bi);
bi = nextbi;
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
md_write_end(conf->mddev);
bio_endio(bi);
bi = bi2;
struct bio *nextbi =
r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
bio_endio(bi);
bi = nextbi;
}
struct mddev *mddev;
struct r5conf *conf;
struct md_rdev *rdev;
- int error = bi->bi_error;
+ blk_status_t error = bi->bi_status;
bio_put(bi);
release_stripe_plug(mddev, sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
break;
}
}
goto abort;
}
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto abort;
conf->mddev = mddev;
config MEDIA_CEC_RC
bool "HDMI CEC RC integration"
depends on CEC_CORE && RC_CORE
+ depends on CEC_CORE=m || RC_CORE=y
---help---
Pass on CEC remote control messages to the RC framework.
bool block, struct cec_msg __user *parg)
{
struct cec_msg msg = {};
- long err = 0;
+ long err;
if (copy_from_user(&msg, parg, sizeof(msg)))
return -EFAULT;
- mutex_lock(&adap->lock);
- if (!adap->is_configured && fh->mode_follower < CEC_MODE_MONITOR)
- err = -ENONET;
- mutex_unlock(&adap->lock);
- if (err)
- return err;
err = cec_receive_msg(fh, &msg, block);
if (err)
static void i2c_wr8_and_or(struct v4l2_subdev *sd, u16 reg,
u8 mask, u8 val)
{
- i2c_wrreg(sd, reg, (i2c_rdreg(sd, reg, 2) & mask) | val, 2);
+ i2c_wrreg(sd, reg, (i2c_rdreg(sd, reg, 1) & mask) | val, 1);
}
static u16 i2c_rd16(struct v4l2_subdev *sd, u16 reg)
static unsigned long delt;
unsigned long deltintr;
unsigned long flags;
+ int counter = 0;
int iir, lsr;
while ((iir = inb(io + UART_IIR) & UART_IIR_ID)) {
+ if (++counter > 256) {
+ dev_err(&sir_ir_dev->dev, "Trapped in interrupt");
+ break;
+ }
+
switch (iir & UART_IIR_ID) { /* FIXME toto treba preriedit */
case UART_IIR_MSI:
(void)inb(io + UART_MSR);
serio_set_drvdata(serio, rain);
INIT_WORK(&rain->work, rain_irq_work_handler);
mutex_init(&rain->write_lock);
+ spin_lock_init(&rain->buf_lock);
err = serio_open(serio, drv);
if (err)
void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no)
{
- if (plane_no > vb->num_planes || !vb->planes[plane_no].mem_priv)
+ if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv)
return NULL;
return call_ptr_memop(vb, vaddr, vb->planes[plane_no].mem_priv);
spin_lock_irqsave(&msb->q_lock, flags);
if (len)
- if (!__blk_end_request(msb->req, 0, len))
+ if (!__blk_end_request(msb->req, BLK_STS_OK, len))
msb->req = NULL;
if (error && msb->req) {
+ blk_status_t ret = errno_to_blk_status(error);
dbg_verbose("IO: ending one sector of the request with error");
- if (!__blk_end_request(msb->req, error, msb->page_size))
+ if (!__blk_end_request(msb->req, ret, msb->page_size))
msb->req = NULL;
}
WARN_ON(!msb->io_queue_stopped);
while ((req = blk_fetch_request(q)) != NULL)
- __blk_end_request_all(req, -ENODEV);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return;
}
msb->req_sg);
if (!msb->seg_count) {
- chunk = __blk_end_request_cur(msb->block_req, -ENOMEM);
+ chunk = __blk_end_request_cur(msb->block_req,
+ BLK_STS_RESOURCE);
continue;
}
if (error && !t_len)
t_len = blk_rq_cur_bytes(msb->block_req);
- chunk = __blk_end_request(msb->block_req, error, t_len);
+ chunk = __blk_end_request(msb->block_req,
+ errno_to_blk_status(error), t_len);
error = mspro_block_issue_req(card, chunk);
if (msb->eject) {
while ((req = blk_fetch_request(q)) != NULL)
- __blk_end_request_all(req, -ENODEV);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return;
}
int ret;
ret = regmap_read_poll_timeout(arizona->regmap,
- ARIZONA_INTERRUPT_RAW_STATUS_5, val,
- ((val & mask) == target),
+ reg, val, ((val & mask) == target),
ARIZONA_REG_POLL_DELAY_US,
timeout_ms * 1000);
if (ret)
mutex_init(&ctx->mapping_lock);
ctx->mapping = NULL;
- if (cxl_is_psl8(afu)) {
+ if (cxl_is_power8()) {
spin_lock_init(&ctx->sste_lock);
/*
if (start + len > ctx->afu->adapter->ps_size)
return -EINVAL;
- if (cxl_is_psl9(ctx->afu)) {
+ if (cxl_is_power9()) {
/*
* Make sure there is a valid problem state
* area space for this AFU.
{
struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
- if (cxl_is_psl8(ctx->afu))
+ if (cxl_is_power8())
free_page((u64)ctx->sstp);
if (ctx->ff_page)
__free_page(ctx->ff_page);
#define CXL_PSL9_DSISR_An_PF_RGP 0x0000000000000090ULL /* PTE not found (Radix Guest (parent)) 0b10010000 */
#define CXL_PSL9_DSISR_An_PF_HRH 0x0000000000000094ULL /* PTE not found (HPT/Radix Host) 0b10010100 */
#define CXL_PSL9_DSISR_An_PF_STEG 0x000000000000009CULL /* PTE not found (STEG VA) 0b10011100 */
+#define CXL_PSL9_DSISR_An_URTCH 0x00000000000000B4ULL /* Unsupported Radix Tree Configuration 0b10110100 */
/****** CXL_PSL_TFC_An ******************************************************/
#define CXL_PSL_TFC_An_A (1ull << (63-28)) /* Acknowledge non-translation fault */
static inline bool cxl_is_power9(void)
{
- /* intermediate solution */
- if (!cxl_is_power8() &&
- (cpu_has_feature(CPU_FTRS_POWER9) ||
- cpu_has_feature(CPU_FTR_POWER9_DD1)))
+ if (pvr_version_is(PVR_POWER9))
return true;
return false;
}
-static inline bool cxl_is_psl8(struct cxl_afu *afu)
+static inline bool cxl_is_power9_dd1(void)
{
- if (afu->adapter->caia_major == 1)
- return true;
- return false;
-}
-
-static inline bool cxl_is_psl9(struct cxl_afu *afu)
-{
- if (afu->adapter->caia_major == 2)
+ if ((pvr_version_is(PVR_POWER9)) &&
+ cpu_has_feature(CPU_FTR_POWER9_DD1))
return true;
return false;
}
static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
{
- if ((cxl_is_psl8(ctx->afu)) && (dsisr & CXL_PSL_DSISR_An_DS))
+ if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
return true;
return false;
static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
{
- if ((cxl_is_psl8(ctx->afu)) && (dsisr & CXL_PSL_DSISR_An_DM))
- return true;
+ u64 crs; /* Translation Checkout Response Status */
- if ((cxl_is_psl9(ctx->afu)) &&
- ((dsisr & CXL_PSL9_DSISR_An_CO_MASK) &
- (CXL_PSL9_DSISR_An_PF_SLR | CXL_PSL9_DSISR_An_PF_RGC |
- CXL_PSL9_DSISR_An_PF_RGP | CXL_PSL9_DSISR_An_PF_HRH |
- CXL_PSL9_DSISR_An_PF_STEG)))
+ if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
return true;
+ if (cxl_is_power9()) {
+ crs = (dsisr & CXL_PSL9_DSISR_An_CO_MASK);
+ if ((crs == CXL_PSL9_DSISR_An_PF_SLR) ||
+ (crs == CXL_PSL9_DSISR_An_PF_RGC) ||
+ (crs == CXL_PSL9_DSISR_An_PF_RGP) ||
+ (crs == CXL_PSL9_DSISR_An_PF_HRH) ||
+ (crs == CXL_PSL9_DSISR_An_PF_STEG) ||
+ (crs == CXL_PSL9_DSISR_An_URTCH)) {
+ return true;
+ }
+ }
+
return false;
}
cxl_debugfs_init();
- if ((rc = register_cxl_calls(&cxl_calls)))
- goto err;
+ /*
+ * we don't register the callback on P9. slb callack is only
+ * used for the PSL8 MMU and CX4.
+ */
+ if (cxl_is_power8()) {
+ rc = register_cxl_calls(&cxl_calls);
+ if (rc)
+ goto err;
+ }
if (cpu_has_feature(CPU_FTR_HVMODE)) {
cxl_ops = &cxl_native_ops;
return 0;
err1:
- unregister_cxl_calls(&cxl_calls);
+ if (cxl_is_power8())
+ unregister_cxl_calls(&cxl_calls);
err:
cxl_debugfs_exit();
cxl_file_exit();
cxl_debugfs_exit();
cxl_file_exit();
- unregister_cxl_calls(&cxl_calls);
+ if (cxl_is_power8())
+ unregister_cxl_calls(&cxl_calls);
idr_destroy(&cxl_adapter_idr);
}
CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
false);
- /* Re-enable any masked interrupts */
- serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
- serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
- cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
-
+ /*
+ * Re-enable any masked interrupts when the AFU is not
+ * activated to avoid side effects after attaching a process
+ * in dedicated mode.
+ */
+ if (afu->current_mode == 0) {
+ serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+ serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
+ cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+ }
return rc;
}
pr_devel("PSL purge request\n");
- if (cxl_is_psl8(afu))
+ if (cxl_is_power8())
trans_fault = CXL_PSL_DSISR_TRANS;
- if (cxl_is_psl9(afu))
+ if (cxl_is_power9())
trans_fault = CXL_PSL9_DSISR_An_TF;
if (!cxl_ops->link_ok(afu->adapter, afu)) {
if (!test_tsk_thread_flag(current, TIF_32BIT))
sr |= CXL_PSL_SR_An_SF;
}
- if (cxl_is_psl9(ctx->afu)) {
+ if (cxl_is_power9()) {
if (radix_enabled())
sr |= CXL_PSL_SR_An_XLAT_ror;
else
static bool cxl_is_translation_fault(struct cxl_afu *afu, u64 dsisr)
{
- if ((cxl_is_psl8(afu)) && (dsisr & CXL_PSL_DSISR_TRANS))
+ if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_TRANS))
return true;
- if ((cxl_is_psl9(afu)) && (dsisr & CXL_PSL9_DSISR_An_TF))
+ if ((cxl_is_power9()) && (dsisr & CXL_PSL9_DSISR_An_TF))
return true;
return false;
if (ph != ctx->pe)
return;
dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An);
- if (cxl_is_psl8(ctx->afu) &&
+ if (cxl_is_power8() &&
((dsisr & CXL_PSL_DSISR_PENDING) == 0))
return;
- if (cxl_is_psl9(ctx->afu) &&
+ if (cxl_is_power9() &&
((dsisr & CXL_PSL9_DSISR_PENDING) == 0))
return;
/*
/* nMMU_ID Defaults to: b’000001001’*/
xsl_dsnctl |= ((u64)0x09 << (63-28));
- if (cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ if (!(cxl_is_power9_dd1())) {
/*
* Used to identify CAPI packets which should be sorted into
* the Non-Blocking queues by the PHB. This field should match
cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000003FFFF0000ULL);
/* Disable vc dd1 fix */
- if ((cxl_is_power9() && cpu_has_feature(CPU_FTR_POWER9_DD1)))
+ if (cxl_is_power9_dd1())
cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0400000000000001ULL);
return 0;
* The adapter is about to be reset, so ignore errors.
* Not supported on P9 DD1
*/
- if ((cxl_is_power8()) ||
- ((cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1))))
+ if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
cxl_data_cache_flush(adapter);
/* pcie_warm_reset requests a fundamental pci reset which includes a
.debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9,
.debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9,
.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
- .err_irq_dump_registers = cxl_native_err_irq_dump_regs,
.debugfs_stop_trace = cxl_stop_trace_psl9,
.write_timebase_ctrl = write_timebase_ctrl_psl9,
.timebase_read = timebase_read_psl9,
* Flush adapter datacache as its about to be removed.
* Not supported on P9 DD1.
*/
- if ((cxl_is_power8()) ||
- ((cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1))))
+ if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
cxl_data_cache_flush(adapter);
cxl_deconfigure_adapter(adapter);
struct mmc_card *card = md->queue.card;
unsigned int from, nr, arg;
int err = 0, type = MMC_BLK_DISCARD;
+ blk_status_t status = BLK_STS_OK;
if (!mmc_can_erase(card)) {
- err = -EOPNOTSUPP;
+ status = BLK_STS_NOTSUPP;
goto fail;
}
if (!err)
err = mmc_erase(card, from, nr, arg);
} while (err == -EIO && !mmc_blk_reset(md, card->host, type));
- if (!err)
+ if (err)
+ status = BLK_STS_IOERR;
+ else
mmc_blk_reset_success(md, type);
fail:
- blk_end_request(req, err, blk_rq_bytes(req));
+ blk_end_request(req, status, blk_rq_bytes(req));
}
static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
struct mmc_card *card = md->queue.card;
unsigned int from, nr, arg;
int err = 0, type = MMC_BLK_SECDISCARD;
+ blk_status_t status = BLK_STS_OK;
if (!(mmc_can_secure_erase_trim(card))) {
- err = -EOPNOTSUPP;
+ status = BLK_STS_NOTSUPP;
goto out;
}
err = mmc_erase(card, from, nr, arg);
if (err == -EIO)
goto out_retry;
- if (err)
+ if (err) {
+ status = BLK_STS_IOERR;
goto out;
+ }
if (arg == MMC_SECURE_TRIM1_ARG) {
if (card->quirks & MMC_QUIRK_INAND_CMD38) {
err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
if (err == -EIO)
goto out_retry;
- if (err)
+ if (err) {
+ status = BLK_STS_IOERR;
goto out;
+ }
}
out_retry:
if (!err)
mmc_blk_reset_success(md, type);
out:
- blk_end_request(req, err, blk_rq_bytes(req));
+ blk_end_request(req, status, blk_rq_bytes(req));
}
static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
int ret = 0;
ret = mmc_flush_cache(card);
- if (ret)
- ret = -EIO;
-
- blk_end_request_all(req, ret);
+ blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
}
/*
{
if (mmc_card_removed(card))
req->rq_flags |= RQF_QUIET;
- while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
+ while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req)));
mmc_queue_req_free(mq, mqrq);
}
*/
if (mmc_card_removed(mq->card)) {
req->rq_flags |= RQF_QUIET;
- blk_end_request_all(req, -EIO);
+ blk_end_request_all(req, BLK_STS_IOERR);
mmc_queue_req_free(mq, mqrq);
return;
}
*/
mmc_blk_reset_success(md, type);
- req_pending = blk_end_request(old_req, 0,
+ req_pending = blk_end_request(old_req, BLK_STS_OK,
brq->data.bytes_xfered);
/*
* If the blk_end_request function returns non-zero even
* time, so we only reach here after trying to
* read a single sector.
*/
- req_pending = blk_end_request(old_req, -EIO,
+ req_pending = blk_end_request(old_req, BLK_STS_IOERR,
brq->data.blksz);
if (!req_pending) {
mmc_queue_req_free(mq, mq_rq);
ret = mmc_blk_part_switch(card, md);
if (ret) {
if (req) {
- blk_end_request_all(req, -EIO);
+ blk_end_request_all(req, BLK_STS_IOERR);
}
goto out;
}
if (!mq) {
while ((req = blk_fetch_request(q)) != NULL) {
req->rq_flags |= RQF_QUIET;
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
return;
}
mmc_queue_setup_discard(mq->queue, card);
if (card->bouncesz) {
- blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
blk_queue_max_segments(mq->queue, card->bouncesz / 512);
blk_queue_max_segment_size(mq->queue, card->bouncesz);
int i;
bool use_desc_chain_mode = true;
+ /*
+ * Broken SDIO with AP6255-based WiFi on Khadas VIM Pro has been
+ * reported. For some strange reason this occurs in descriptor
+ * chain mode only. So let's fall back to bounce buffer mode
+ * for command SD_IO_RW_EXTENDED.
+ */
+ if (mrq->cmd->opcode == SD_IO_RW_EXTENDED)
+ return;
+
for_each_sg(data->sg, sg, data->sg_len, i)
/* check for 8 byte alignment */
if (sg->offset & 7) {
bool d3_retune;
};
-const u8 intel_dsm_uuid[] = {
- 0xA5, 0x3E, 0xC1, 0xF6, 0xCD, 0x65, 0x1F, 0x46,
- 0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61,
-};
+static const guid_t intel_dsm_guid =
+ GUID_INIT(0xF6C13EA5, 0x65CD, 0x461F,
+ 0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61);
static int __intel_dsm(struct intel_host *intel_host, struct device *dev,
unsigned int fn, u32 *result)
int err = 0;
size_t len;
- obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), intel_dsm_uuid, 0, fn, NULL);
+ obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &intel_dsm_guid, 0, fn, NULL);
if (!obj)
return -EOPNOTSUPP;
}
-static int do_blktrans_request(struct mtd_blktrans_ops *tr,
+static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
struct mtd_blktrans_dev *dev,
struct request *req)
{
nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
buf = bio_data(req->bio);
- if (req_op(req) == REQ_OP_FLUSH)
- return tr->flush(dev);
+ if (req_op(req) == REQ_OP_FLUSH) {
+ if (tr->flush(dev))
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
+ }
if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
get_capacity(req->rq_disk))
- return -EIO;
+ return BLK_STS_IOERR;
switch (req_op(req)) {
case REQ_OP_DISCARD:
- return tr->discard(dev, block, nsect);
+ if (tr->discard(dev, block, nsect))
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
case REQ_OP_READ:
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->readsect(dev, block, buf))
- return -EIO;
+ return BLK_STS_IOERR;
rq_flush_dcache_pages(req);
- return 0;
+ return BLK_STS_OK;
case REQ_OP_WRITE:
if (!tr->writesect)
- return -EIO;
+ return BLK_STS_IOERR;
rq_flush_dcache_pages(req);
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->writesect(dev, block, buf))
- return -EIO;
- return 0;
+ return BLK_STS_IOERR;
default:
- return -EIO;
+ return BLK_STS_IOERR;
}
}
spin_lock_irq(rq->queue_lock);
while (1) {
- int res;
+ blk_status_t res;
dev->bg_stop = false;
if (!req && !(req = blk_fetch_request(rq))) {
if (!dev)
while ((req = blk_fetch_request(rq)) != NULL)
- __blk_end_request_all(req, -ENODEV);
+ __blk_end_request_all(req, BLK_STS_IOERR);
else
queue_work(dev->wq, &dev->work);
}
new->rq->queuedata = new;
blk_queue_logical_block_size(new->rq, tr->blksize);
+ blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
ret = ubiblock_read(pdu);
rq_flush_dcache_pages(req);
- blk_mq_end_request(req, ret);
+ blk_mq_end_request(req, errno_to_blk_status(ret));
}
-static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *req = bd->rq;
case REQ_OP_READ:
ubi_sgl_init(&pdu->usgl);
queue_work(dev->wq, &pdu->work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
default:
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
}
struct net_device *dev = dev_id;
struct arcnet_local *lp;
int recbuf, status, diagstatus, didsomething, boguscount;
+ unsigned long flags;
int retval = IRQ_NONE;
arc_printk(D_DURING, dev, "\n");
lp = netdev_priv(dev);
BUG_ON(!lp);
- spin_lock(&lp->lock);
+ spin_lock_irqsave(&lp->lock, flags);
/* RESET flag was enabled - if device is not running, we must
* clear it right away (but nothing else).
if (lp->hw.status(dev) & RESETflag)
lp->hw.command(dev, CFLAGScmd | RESETclear);
lp->hw.intmask(dev, 0);
- spin_unlock(&lp->lock);
+ spin_unlock_irqrestore(&lp->lock, flags);
return retval;
}
udelay(1);
lp->hw.intmask(dev, lp->intmask);
- spin_unlock(&lp->lock);
+ spin_unlock_irqrestore(&lp->lock, flags);
return retval;
}
EXPORT_SYMBOL(arcnet_interrupt);
ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */
ackpkt->soft.cap.mes.ack = acked;
- arc_printk(D_PROTO, dev, "Ackknowledge for cap packet %x.\n",
+ arc_printk(D_PROTO, dev, "Acknowledge for cap packet %x.\n",
*((int *)&ackpkt->soft.cap.cookie[0]));
ackskb->protocol = cpu_to_be16(ETH_P_ARCNET);
for (i = 0; i < ci->devcount; i++) {
struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i];
struct com20020_dev *card;
+ int dev_id_mask = 0xf;
dev = alloc_arcdev(device);
if (!dev) {
arcnet_outb(0x00, ioaddr, COM20020_REG_W_COMMAND);
arcnet_inb(ioaddr, COM20020_REG_R_DIAGSTAT);
+ SET_NETDEV_DEV(dev, &pdev->dev);
dev->base_addr = ioaddr;
dev->dev_addr[0] = node;
dev->irq = pdev->irq;
/* Get the dev_id from the PLX rotary coder */
if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15))
- dev->dev_id = 0xc;
- dev->dev_id ^= inb(priv->misc + ci->rotary) >> 4;
+ dev_id_mask = 0x3;
+ dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask;
snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i);
return -ENODEV;
}
- dev->base_addr = ioaddr;
-
arc_printk(D_NORMAL, dev, "%s: station %02Xh found at %03lXh, IRQ %d.\n",
lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq);
AD_LINK_SPEED_100MBPS,
AD_LINK_SPEED_1000MBPS,
AD_LINK_SPEED_2500MBPS,
+ AD_LINK_SPEED_5000MBPS,
AD_LINK_SPEED_10000MBPS,
+ AD_LINK_SPEED_14000MBPS,
AD_LINK_SPEED_20000MBPS,
AD_LINK_SPEED_25000MBPS,
AD_LINK_SPEED_40000MBPS,
+ AD_LINK_SPEED_50000MBPS,
AD_LINK_SPEED_56000MBPS,
AD_LINK_SPEED_100000MBPS,
};
* %AD_LINK_SPEED_100MBPS,
* %AD_LINK_SPEED_1000MBPS,
* %AD_LINK_SPEED_2500MBPS,
+ * %AD_LINK_SPEED_5000MBPS,
* %AD_LINK_SPEED_10000MBPS
+ * %AD_LINK_SPEED_14000MBPS,
* %AD_LINK_SPEED_20000MBPS
* %AD_LINK_SPEED_25000MBPS
* %AD_LINK_SPEED_40000MBPS
+ * %AD_LINK_SPEED_50000MBPS
* %AD_LINK_SPEED_56000MBPS
* %AD_LINK_SPEED_100000MBPS
*/
speed = AD_LINK_SPEED_2500MBPS;
break;
+ case SPEED_5000:
+ speed = AD_LINK_SPEED_5000MBPS;
+ break;
+
case SPEED_10000:
speed = AD_LINK_SPEED_10000MBPS;
break;
+ case SPEED_14000:
+ speed = AD_LINK_SPEED_14000MBPS;
+ break;
+
case SPEED_20000:
speed = AD_LINK_SPEED_20000MBPS;
break;
speed = AD_LINK_SPEED_40000MBPS;
break;
+ case SPEED_50000:
+ speed = AD_LINK_SPEED_50000MBPS;
+ break;
+
case SPEED_56000:
speed = AD_LINK_SPEED_56000MBPS;
break;
case AD_LINK_SPEED_2500MBPS:
bandwidth = nports * 2500;
break;
+ case AD_LINK_SPEED_5000MBPS:
+ bandwidth = nports * 5000;
+ break;
case AD_LINK_SPEED_10000MBPS:
bandwidth = nports * 10000;
break;
+ case AD_LINK_SPEED_14000MBPS:
+ bandwidth = nports * 14000;
+ break;
case AD_LINK_SPEED_20000MBPS:
bandwidth = nports * 20000;
break;
case AD_LINK_SPEED_40000MBPS:
bandwidth = nports * 40000;
break;
+ case AD_LINK_SPEED_50000MBPS:
+ bandwidth = nports * 50000;
+ break;
case AD_LINK_SPEED_56000MBPS:
bandwidth = nports * 56000;
break;
struct bonding *bond = netdev_priv(bond_dev);
if (bond->wq)
destroy_workqueue(bond->wq);
- free_netdev(bond_dev);
}
void bond_setup(struct net_device *bond_dev)
bond_dev->netdev_ops = &bond_netdev_ops;
bond_dev->ethtool_ops = &bond_ethtool_ops;
- bond_dev->destructor = bond_destructor;
+ bond_dev->needs_free_netdev = true;
+ bond_dev->priv_destructor = bond_destructor;
SET_NETDEV_DEVTYPE(bond_dev, &bond_type);
rtnl_unlock();
if (res < 0)
- bond_destructor(bond_dev);
+ free_netdev(bond_dev);
return res;
}
dev->flags = IFF_POINTOPOINT | IFF_NOARP;
dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ;
dev->priv_flags |= IFF_NO_QUEUE;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->netdev_ops = &cfhsi_netdevops;
for (i = 0; i < CFHSI_PRIO_LAST; ++i)
skb_queue_head_init(&cfhsi->qhead[i]);
dev->flags = IFF_POINTOPOINT | IFF_NOARP;
dev->mtu = CAIF_MAX_MTU;
dev->priv_flags |= IFF_NO_QUEUE;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
skb_queue_head_init(&serdev->head);
serdev->common.link_select = CAIF_LINK_LOW_LATENCY;
serdev->common.use_frag = true;
dev->flags = IFF_NOARP | IFF_POINTOPOINT;
dev->priv_flags |= IFF_NO_QUEUE;
dev->mtu = SPI_MAX_PAYLOAD_SIZE;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
skb_queue_head_init(&cfspi->qhead);
skb_queue_head_init(&cfspi->chead);
cfspi->cfdev.link_select = CAIF_LINK_HIGH_BANDW;
netdev->tx_queue_len = 100;
netdev->flags = IFF_POINTOPOINT | IFF_NOARP;
netdev->mtu = CFV_DEF_MTU_SIZE;
- netdev->destructor = free_netdev;
+ netdev->needs_free_netdev = true;
}
/* Create debugfs counters for the device */
can_update_state_error_stats(dev, new_state);
priv->state = new_state;
+ if (!cf)
+ return;
+
if (unlikely(new_state == CAN_STATE_BUS_OFF)) {
cf->can_id |= CAN_ERR_BUSOFF;
return;
struct pucan_rx_msg *msg_list, int msg_count)
{
void *msg_ptr = msg_list;
- int i, msg_size;
+ int i, msg_size = 0;
for (i = 0; i < msg_count; i++) {
msg_size = peak_canfd_handle_msg(priv, msg_ptr);
static void slc_free_netdev(struct net_device *dev)
{
int i = dev->base_addr;
- free_netdev(dev);
+
slcan_devs[i] = NULL;
}
static void slc_setup(struct net_device *dev)
{
dev->netdev_ops = &slc_netdev_ops;
- dev->destructor = slc_free_netdev;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = slc_free_netdev;
dev->hard_header_len = 0;
dev->addr_len = 0;
if (sl->tty) {
printk(KERN_ERR "%s: tty discipline still running\n",
dev->name);
- /* Intentionally leak the control block. */
- dev->destructor = NULL;
}
unregister_netdev(dev);
sizeof(*dm),
1000);
+ kfree(dm);
+
return rc;
}
const struct peak_usb_adapter *peak_usb_adapter = NULL;
int i, err = -ENOMEM;
- usb_dev = interface_to_usbdev(intf);
-
/* get corresponding PCAN-USB adapter */
for (i = 0; i < ARRAY_SIZE(peak_usb_adapters_list); i++)
if (peak_usb_adapters_list[i]->device_id == usb_id_product) {
if (!peak_usb_adapter) {
/* should never come except device_id bad usage in this file */
pr_err("%s: didn't find device id. 0x%x in devices list\n",
- PCAN_USB_DRIVER_NAME, usb_dev->descriptor.idProduct);
+ PCAN_USB_DRIVER_NAME, usb_id_product);
return -ENODEV;
}
static void vcan_setup(struct net_device *dev)
{
dev->type = ARPHRD_CAN;
- dev->mtu = CAN_MTU;
+ dev->mtu = CANFD_MTU;
dev->hard_header_len = 0;
dev->addr_len = 0;
dev->tx_queue_len = 0;
dev->flags |= IFF_ECHO;
dev->netdev_ops = &vcan_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
}
static struct rtnl_link_ops vcan_link_ops __read_mostly = {
static void vxcan_setup(struct net_device *dev)
{
dev->type = ARPHRD_CAN;
- dev->mtu = CAN_MTU;
+ dev->mtu = CANFD_MTU;
dev->hard_header_len = 0;
dev->addr_len = 0;
dev->tx_queue_len = 0;
dev->flags = (IFF_NOARP|IFF_ECHO);
dev->netdev_ops = &vxcan_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
}
/* forward declaration for rtnl_create_link() */
struct dummy_priv *priv = netdev_priv(dev);
kfree(priv->vfinfo);
- free_netdev(dev);
}
static void dummy_setup(struct net_device *dev)
/* Initialize the device structure. */
dev->netdev_ops = &dummy_netdev_ops;
dev->ethtool_ops = &dummy_ethtool_ops;
- dev->destructor = dummy_free_netdev;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = dummy_free_netdev;
/* Fill in device structure with ethernet-generic values. */
dev->flags |= IFF_NOARP;
#define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF
+#define ENA_REGS_ADMIN_INTR_MASK 1
+
/*****************************************************************************/
/*****************************************************************************/
/*****************************************************************************/
tail_masked = admin_queue->sq.tail & queue_size_mask;
/* In case of queue FULL */
- cnt = admin_queue->sq.tail - admin_queue->sq.head;
+ cnt = atomic_read(&admin_queue->outstanding_cmds);
if (cnt >= admin_queue->q_depth) {
- pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n",
- admin_queue->sq.tail, admin_queue->sq.head,
- admin_queue->q_depth);
+ pr_debug("admin queue is full.\n");
admin_queue->stats.out_of_space++;
return ERR_PTR(-ENOSPC);
}
static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
struct ena_com_admin_queue *admin_queue)
{
- unsigned long flags;
- u32 start_time;
+ unsigned long flags, timeout;
int ret;
- start_time = ((u32)jiffies_to_usecs(jiffies));
+ timeout = jiffies + ADMIN_CMD_TIMEOUT_US;
+
+ while (1) {
+ spin_lock_irqsave(&admin_queue->q_lock, flags);
+ ena_com_handle_admin_completion(admin_queue);
+ spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+ if (comp_ctx->status != ENA_CMD_SUBMITTED)
+ break;
- while (comp_ctx->status == ENA_CMD_SUBMITTED) {
- if ((((u32)jiffies_to_usecs(jiffies)) - start_time) >
- ADMIN_CMD_TIMEOUT_US) {
+ if (time_is_before_jiffies(timeout)) {
pr_err("Wait for completion (polling) timeout\n");
/* ENA didn't have any completion */
spin_lock_irqsave(&admin_queue->q_lock, flags);
goto err;
}
- spin_lock_irqsave(&admin_queue->q_lock, flags);
- ena_com_handle_admin_completion(admin_queue);
- spin_unlock_irqrestore(&admin_queue->q_lock, flags);
-
msleep(100);
}
void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
{
+ u32 mask_value = 0;
+
+ if (polling)
+ mask_value = ENA_REGS_ADMIN_INTR_MASK;
+
+ writel(mask_value, ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF);
ena_dev->admin_queue.polling = polling;
}
ENA_STAT_TX_ENTRY(tx_poll),
ENA_STAT_TX_ENTRY(doorbells),
ENA_STAT_TX_ENTRY(prepare_ctx_err),
- ENA_STAT_TX_ENTRY(missing_tx_comp),
ENA_STAT_TX_ENTRY(bad_req_id),
};
ENA_STAT_RX_ENTRY(dma_mapping_err),
ENA_STAT_RX_ENTRY(bad_desc_num),
ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+ ENA_STAT_RX_ENTRY(empty_rx_ring),
};
static const struct ena_stats ena_stats_ena_com_strings[] = {
rxr->sgl_size = adapter->max_rx_sgl_size;
rxr->smoothed_interval =
ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+ rxr->empty_rx_queue = 0;
}
}
rx_ring->per_napi_bytes = 0;
}
+static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring)
+{
+ struct ena_eth_io_intr_reg intr_reg;
+
+ /* Update intr register: rx intr delay,
+ * tx intr delay and interrupt unmask
+ */
+ ena_com_update_intr_reg(&intr_reg,
+ rx_ring->smoothed_interval,
+ tx_ring->smoothed_interval,
+ true);
+
+ /* It is a shared MSI-X.
+ * Tx and Rx CQ have pointer to it.
+ * So we use one of them to reach the intr reg
+ */
+ ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+}
+
static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
struct ena_ring *rx_ring)
{
{
struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
struct ena_ring *tx_ring, *rx_ring;
- struct ena_eth_io_intr_reg intr_reg;
u32 tx_work_done;
u32 rx_work_done;
if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
ena_adjust_intr_moderation(rx_ring, tx_ring);
- /* Update intr register: rx intr delay,
- * tx intr delay and interrupt unmask
- */
- ena_com_update_intr_reg(&intr_reg,
- rx_ring->smoothed_interval,
- tx_ring->smoothed_interval,
- true);
-
- /* It is a shared MSI-X.
- * Tx and Rx CQ have pointer to it.
- * So we use one of them to reach the intr reg
- */
- ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+ ena_unmask_interrupt(tx_ring, rx_ring);
}
-
ena_update_ring_numa_node(tx_ring, rx_ring);
ret = rx_work_done;
ena_napi_enable_all(adapter);
+ /* Enable completion queues interrupt */
+ for (i = 0; i < adapter->num_queues; i++)
+ ena_unmask_interrupt(&adapter->tx_ring[i],
+ &adapter->rx_ring[i]);
+
/* schedule napi in case we had pending packets
* from the last time we disable napi
*/
"Failed to get TX queue handlers. TX queue num %d rc: %d\n",
qid, rc);
ena_com_destroy_io_queue(ena_dev, ena_qid);
+ return rc;
}
ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
"Failed to get RX queue handlers. RX queue num %d rc: %d\n",
qid, rc);
ena_com_destroy_io_queue(ena_dev, ena_qid);
+ return rc;
}
ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
tx_info->tx_descs = nb_hw_desc;
tx_info->last_jiffies = jiffies;
+ tx_info->print_once = 0;
tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
tx_ring->ring_size);
"Reset attempt failed. Can not reset the device\n");
}
-static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+static int check_missing_comp_in_queue(struct ena_adapter *adapter,
+ struct ena_ring *tx_ring)
{
struct ena_tx_buffer *tx_buf;
unsigned long last_jiffies;
+ u32 missed_tx = 0;
+ int i;
+
+ for (i = 0; i < tx_ring->ring_size; i++) {
+ tx_buf = &tx_ring->tx_buffer_info[i];
+ last_jiffies = tx_buf->last_jiffies;
+ if (unlikely(last_jiffies &&
+ time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
+ if (!tx_buf->print_once)
+ netif_notice(adapter, tx_err, adapter->netdev,
+ "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
+ tx_ring->qid, i);
+
+ tx_buf->print_once = 1;
+ missed_tx++;
+
+ if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
+ netif_err(adapter, tx_err, adapter->netdev,
+ "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
+ missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
+ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ return -EIO;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+{
struct ena_ring *tx_ring;
- int i, j, budget;
- u32 missed_tx;
+ int i, budget, rc;
/* Make sure the driver doesn't turn the device in other process */
smp_rmb();
for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
tx_ring = &adapter->tx_ring[i];
- for (j = 0; j < tx_ring->ring_size; j++) {
- tx_buf = &tx_ring->tx_buffer_info[j];
- last_jiffies = tx_buf->last_jiffies;
- if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
- netif_notice(adapter, tx_err, adapter->netdev,
- "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
- tx_ring->qid, j);
-
- u64_stats_update_begin(&tx_ring->syncp);
- missed_tx = tx_ring->tx_stats.missing_tx_comp++;
- u64_stats_update_end(&tx_ring->syncp);
-
- /* Clear last jiffies so the lost buffer won't
- * be counted twice.
- */
- tx_buf->last_jiffies = 0;
-
- if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
- netif_err(adapter, tx_err, adapter->netdev,
- "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n",
- missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
- }
- }
- }
+ rc = check_missing_comp_in_queue(adapter, tx_ring);
+ if (unlikely(rc))
+ return;
budget--;
if (!budget)
adapter->last_monitored_tx_qid = i % adapter->num_queues;
}
+/* trigger napi schedule after 2 consecutive detections */
+#define EMPTY_RX_REFILL 2
+/* For the rare case where the device runs out of Rx descriptors and the
+ * napi handler failed to refill new Rx descriptors (due to a lack of memory
+ * for example).
+ * This case will lead to a deadlock:
+ * The device won't send interrupts since all the new Rx packets will be dropped
+ * The napi handler won't allocate new Rx descriptors so the device will be
+ * able to send new packets.
+ *
+ * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
+ * It is recommended to have at least 512MB, with a minimum of 128MB for
+ * constrained environment).
+ *
+ * When such a situation is detected - Reschedule napi
+ */
+static void check_for_empty_rx_ring(struct ena_adapter *adapter)
+{
+ struct ena_ring *rx_ring;
+ int i, refill_required;
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+ return;
+
+ if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+ return;
+
+ for (i = 0; i < adapter->num_queues; i++) {
+ rx_ring = &adapter->rx_ring[i];
+
+ refill_required =
+ ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
+ if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
+ rx_ring->empty_rx_queue++;
+
+ if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
+ u64_stats_update_begin(&rx_ring->syncp);
+ rx_ring->rx_stats.empty_rx_ring++;
+ u64_stats_update_end(&rx_ring->syncp);
+
+ netif_err(adapter, drv, adapter->netdev,
+ "trigger refill for ring %d\n", i);
+
+ napi_schedule(rx_ring->napi);
+ rx_ring->empty_rx_queue = 0;
+ }
+ } else {
+ rx_ring->empty_rx_queue = 0;
+ }
+ }
+}
+
/* Check for keep alive expiration */
static void check_for_missing_keep_alive(struct ena_adapter *adapter)
{
check_for_missing_tx_completions(adapter);
+ check_for_empty_rx_ring(adapter);
+
if (debug_area)
ena_dump_stats_to_buf(adapter, debug_area);
{
int release_bars;
+ if (ena_dev->mem_bar)
+ devm_iounmap(&pdev->dev, ena_dev->mem_bar);
+
+ devm_iounmap(&pdev->dev, ena_dev->reg_bar);
+
release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
pci_release_selected_regions(pdev, release_bars);
}
goto err_free_ena_dev;
}
- ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR),
- pci_resource_len(pdev, ENA_REG_BAR));
+ ena_dev->reg_bar = devm_ioremap(&pdev->dev,
+ pci_resource_start(pdev, ENA_REG_BAR),
+ pci_resource_len(pdev, ENA_REG_BAR));
if (!ena_dev->reg_bar) {
dev_err(&pdev->dev, "failed to remap regs bar\n");
rc = -EFAULT;
ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
- ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR),
- pci_resource_len(pdev, ENA_MEM_BAR));
+ ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
+ pci_resource_start(pdev, ENA_MEM_BAR),
+ pci_resource_len(pdev, ENA_MEM_BAR));
if (!ena_dev->mem_bar) {
rc = -EFAULT;
goto err_device_destroy;
#define DRV_MODULE_VER_MAJOR 1
#define DRV_MODULE_VER_MINOR 1
-#define DRV_MODULE_VER_SUBMINOR 2
+#define DRV_MODULE_VER_SUBMINOR 7
#define DRV_MODULE_NAME "ena"
#ifndef DRV_MODULE_VERSION
u32 tx_descs;
/* num of buffers used by this skb */
u32 num_of_bufs;
- /* Save the last jiffies to detect missing tx packets */
+
+ /* Used for detect missing tx packets to limit the number of prints */
+ u32 print_once;
+ /* Save the last jiffies to detect missing tx packets
+ *
+ * sets to non zero value on ena_start_xmit and set to zero on
+ * napi and timer_Service_routine.
+ *
+ * while this value is not protected by lock,
+ * a given packet is not expected to be handled by ena_start_xmit
+ * and by napi/timer_service at the same time.
+ */
unsigned long last_jiffies;
struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
} ____cacheline_aligned;
u64 napi_comp;
u64 tx_poll;
u64 doorbells;
- u64 missing_tx_comp;
u64 bad_req_id;
};
u64 dma_mapping_err;
u64 bad_desc_num;
u64 rx_copybreak_pkt;
+ u64 empty_rx_ring;
};
struct ena_ring {
struct ena_stats_tx tx_stats;
struct ena_stats_rx rx_stats;
};
+ int empty_rx_queue;
} ____cacheline_aligned;
struct ena_stats_dev {
struct aq_hw_caps_s *aq_hw_caps,
u32 *regs_buff);
-int hw_atl_utils_hw_get_settings(struct aq_hw_s *self,
- struct ethtool_cmd *cmd);
-
int hw_atl_utils_hw_set_power(struct aq_hw_s *self,
unsigned int power_state);
/* when transmitting in a vf, start bd must hold the ethertype
* for fw to enforce it
*/
+ u16 vlan_tci = 0;
#ifndef BNX2X_STOP_ON_ERROR
- if (IS_VF(bp))
+ if (IS_VF(bp)) {
#endif
- tx_start_bd->vlan_or_ethertype =
- cpu_to_le16(ntohs(eth->h_proto));
+ /* Still need to consider inband vlan for enforced */
+ if (__vlan_get_tag(skb, &vlan_tci)) {
+ tx_start_bd->vlan_or_ethertype =
+ cpu_to_le16(ntohs(eth->h_proto));
+ } else {
+ tx_start_bd->bd_flags.as_bitfield |=
+ (X_ETH_INBAND_VLAN <<
+ ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
+ tx_start_bd->vlan_or_ethertype =
+ cpu_to_le16(vlan_tci);
+ }
#ifndef BNX2X_STOP_ON_ERROR
- else
+ } else {
/* used by FW for packet accounting */
tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
+ }
#endif
}
} else {
/* If no mc addresses are required, flush the configuration */
rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
- if (rc)
+ if (rc < 0)
BNX2X_ERR("Failed to clear multicast configuration %d\n",
rc);
}
/* release VF resources */
bnx2x_vf_free_resc(bp, vf);
+ vf->malicious = false;
+
/* re-open the mailbox */
bnx2x_vf_enable_mbx(bp, vf->abs_vfid);
return;
vf->abs_vfid, qidx);
bnx2x_vf_handle_rss_update_eqe(bp, vf);
case EVENT_RING_OPCODE_VF_FLR:
- case EVENT_RING_OPCODE_MALICIOUS_VF:
/* Do nothing for now */
return 0;
+ case EVENT_RING_OPCODE_MALICIOUS_VF:
+ vf->malicious = true;
+ return 0;
}
return 0;
continue;
}
+ if (vf->malicious) {
+ DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+ "vf %d malicious so no stats for it\n",
+ vf->abs_vfid);
+ continue;
+ }
+
DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
"add addresses for vf %d\n", vf->abs_vfid);
for_each_vfq(vf, j) {
{
BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->vf2pf_mbox_mapping,
sizeof(struct bnx2x_vf_mbx_msg));
- BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->pf2vf_bulletin_mapping,
+ BNX2X_PCI_FREE(bp->pf2vf_bulletin, bp->pf2vf_bulletin_mapping,
sizeof(union pf_vf_bulletin));
}
#define VF_RESET 3 /* VF FLR'd, pending cleanup */
bool flr_clnup_stage; /* true during flr cleanup */
+ bool malicious; /* true if FW indicated so, until FLR */
/* dma */
dma_addr_t fw_stat_map;
cp_cons = NEXT_CMP(cp_cons);
}
- if (unlikely(agg_bufs > MAX_SKB_FRAGS)) {
+ if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
- netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
- agg_bufs, (int)MAX_SKB_FRAGS);
+ if (agg_bufs > MAX_SKB_FRAGS)
+ netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
+ agg_bufs, (int)MAX_SKB_FRAGS);
return NULL;
}
return rc;
}
+/* In netpoll mode, if we are using a combined completion ring, we need to
+ * discard the rx packets and recycle the buffers.
+ */
+static int bnxt_force_rx_discard(struct bnxt *bp, struct bnxt_napi *bnapi,
+ u32 *raw_cons, u8 *event)
+{
+ struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+ u32 tmp_raw_cons = *raw_cons;
+ struct rx_cmp_ext *rxcmp1;
+ struct rx_cmp *rxcmp;
+ u16 cp_cons;
+ u8 cmp_type;
+
+ cp_cons = RING_CMP(tmp_raw_cons);
+ rxcmp = (struct rx_cmp *)
+ &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+ tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
+ cp_cons = RING_CMP(tmp_raw_cons);
+ rxcmp1 = (struct rx_cmp_ext *)
+ &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+ if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+ return -EBUSY;
+
+ cmp_type = RX_CMP_TYPE(rxcmp);
+ if (cmp_type == CMP_TYPE_RX_L2_CMP) {
+ rxcmp1->rx_cmp_cfa_code_errors_v2 |=
+ cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
+ } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
+ struct rx_tpa_end_cmp_ext *tpa_end1;
+
+ tpa_end1 = (struct rx_tpa_end_cmp_ext *)rxcmp1;
+ tpa_end1->rx_tpa_end_cmp_errors_v2 |=
+ cpu_to_le32(RX_TPA_END_CMP_ERRORS);
+ }
+ return bnxt_rx_pkt(bp, bnapi, raw_cons, event);
+}
+
#define BNXT_GET_EVENT_PORT(data) \
((data) & \
ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
if (unlikely(tx_pkts > bp->tx_wake_thresh))
rx_pkts = budget;
} else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
- rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+ if (likely(budget))
+ rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+ else
+ rc = bnxt_force_rx_discard(bp, bnapi, &raw_cons,
+ &event);
if (likely(rc >= 0))
rx_pkts += rc;
else if (rc == -EBUSY) /* partial completion */
struct bnxt *bp = netdev_priv(dev);
int i;
- for (i = 0; i < bp->cp_nr_rings; i++) {
- struct bnxt_irq *irq = &bp->irq_tbl[i];
+ /* Only process tx rings/combined rings in netpoll mode. */
+ for (i = 0; i < bp->tx_nr_rings; i++) {
+ struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
- disable_irq(irq->vector);
- irq->handler(irq->vector, bp->bnapi[i]);
- enable_irq(irq->vector);
+ napi_schedule(&txr->bnapi->napi);
}
}
#endif
__le32 rx_tpa_end_cmp_errors_v2;
#define RX_TPA_END_CMP_V2 (0x1 << 0)
- #define RX_TPA_END_CMP_ERRORS (0x7fff << 1)
+ #define RX_TPA_END_CMP_ERRORS (0x3 << 1)
#define RX_TPA_END_CMPL_ERRORS_SHIFT 1
u32 rx_tpa_end_cmp_start_opaque;
};
+#define TPA_END_ERRORS(rx_tpa_end_ext) \
+ ((rx_tpa_end_ext)->rx_tpa_end_cmp_errors_v2 & \
+ cpu_to_le32(RX_TPA_END_CMP_ERRORS))
+
#define DB_IDX_MASK 0xffffff
#define DB_IDX_VALID (0x1 << 26)
#define DB_IRQ_DIS (0x1 << 27)
{
int err;
+ mutex_lock(&uld_mutex);
err = setup_sge_queues(adap);
if (err)
- goto out;
+ goto rel_lock;
err = setup_rss(adap);
if (err)
goto freeq;
goto irq_err;
}
- mutex_lock(&uld_mutex);
enable_rx(adap);
t4_sge_start(adap);
t4_intr_enable(adap);
#endif
/* Initialize hash mac addr list*/
INIT_LIST_HEAD(&adap->mac_hlist);
- out:
return err;
+
irq_err:
dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
freeq:
t4_free_sge_resources(adap);
- goto out;
+ rel_lock:
+ mutex_unlock(&uld_mutex);
+ return err;
}
static void cxgb_down(struct adapter *adapter)
/* Initialize the device structure. */
dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
}
static int config_mgmt_dev(struct pci_dev *pdev)
priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
/* device used for DMA mapping */
- arch_setup_dma_ops(dev, 0, 0, NULL, false);
+ set_dma_ops(dev, get_dma_ops(&pdev->dev));
err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
if (err) {
dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
tristate "FMan support"
depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
select GENERIC_ALLOCATOR
+ depends on HAS_DMA
select PHYLIB
default n
help
goto no_mem;
}
+ set_dma_ops(&pdev->dev, get_dma_ops(priv->dev));
+
ret = platform_device_add_data(pdev, &data, sizeof(data));
if (ret)
goto err;
HNS_ROCE_RESET_FUNC = 0x7,
};
-const u8 hns_dsaf_acpi_dsm_uuid[] = {
- 0x1A, 0xAA, 0x85, 0x1A, 0x93, 0xE2, 0x5E, 0x41,
- 0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A
-};
+static const guid_t hns_dsaf_acpi_dsm_guid =
+ GUID_INIT(0x1A85AA1A, 0xE293, 0x415E,
+ 0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A);
static void dsaf_write_sub(struct dsaf_device *dsaf_dev, u32 reg, u32 val)
{
argv4.package.elements = obj_args;
obj = acpi_evaluate_dsm(ACPI_HANDLE(dsaf_dev->dev),
- hns_dsaf_acpi_dsm_uuid, 0, op_type, &argv4);
+ &hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4);
if (!obj) {
dev_warn(dsaf_dev->dev, "reset port_type%d port%d fail!",
port_type, port);
argv4.package.elements = &obj_args,
obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
- hns_dsaf_acpi_dsm_uuid, 0,
+ &hns_dsaf_acpi_dsm_guid, 0,
HNS_OP_GET_PORT_TYPE_FUNC, &argv4);
if (!obj || obj->type != ACPI_TYPE_INTEGER)
argv4.package.elements = &obj_args,
obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
- hns_dsaf_acpi_dsm_uuid, 0,
+ &hns_dsaf_acpi_dsm_guid, 0,
HNS_OP_GET_SFP_STAT_FUNC, &argv4);
if (!obj || obj->type != ACPI_TYPE_INTEGER)
argv4.package.elements = obj_args;
obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dsaf_dev->dev),
- hns_dsaf_acpi_dsm_uuid, 0,
+ &hns_dsaf_acpi_dsm_guid, 0,
HNS_OP_SERDES_LP_FUNC, &argv4);
if (!obj) {
dev_warn(mac_cb->dsaf_dev->dev, "set port%d serdes lp fail!",
/* Force 1000M Link, Default is 0x0200 */
phy_write(phy_dev, 7, 0x20C);
- phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
- /* Enable PHY loop-back */
+ /* Powerup Fiber */
+ phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
+ val = phy_read(phy_dev, COPPER_CONTROL_REG);
+ val &= ~PHY_POWER_DOWN;
+ phy_write(phy_dev, COPPER_CONTROL_REG, val);
+
+ /* Enable Phy Loopback */
+ phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
val = phy_read(phy_dev, COPPER_CONTROL_REG);
val |= PHY_LOOP_BACK;
val &= ~PHY_POWER_DOWN;
phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
phy_write(phy_dev, 1, 0x400);
phy_write(phy_dev, 7, 0x200);
+
+ phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
+ val = phy_read(phy_dev, COPPER_CONTROL_REG);
+ val |= PHY_POWER_DOWN;
+ phy_write(phy_dev, COPPER_CONTROL_REG, val);
+
phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
phy_write(phy_dev, 9, 0xF00);
{
struct emac_regs __iomem *p = dev->emacp;
int n = 20;
+ bool __maybe_unused try_internal_clock = false;
DBG(dev, "reset" NL);
}
#ifdef CONFIG_PPC_DCR_NATIVE
+do_retry:
/*
* PPC460EX/GT Embedded Processor Advanced User's Manual
* section 28.10.1 Mode Register 0 (EMACx_MR0) states:
* of the EMAC. If none is present, select the internal clock
* (SDR0_ETH_CFG[EMACx_PHY_CLK] = 1).
* After a soft reset, select the external clock.
+ *
+ * The AR8035-A PHY Meraki MR24 does not provide a TX Clk if the
+ * ethernet cable is not attached. This causes the reset to timeout
+ * and the PHY detection code in emac_init_phy() is unable to
+ * communicate and detect the AR8035-A PHY. As a result, the emac
+ * driver bails out early and the user has no ethernet.
+ * In order to stay compatible with existing configurations, the
+ * driver will temporarily switch to the internal clock, after
+ * the first reset fails.
*/
if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) {
- if (dev->phy_address == 0xffffffff &&
- dev->phy_map == 0xffffffff) {
+ if (try_internal_clock || (dev->phy_address == 0xffffffff &&
+ dev->phy_map == 0xffffffff)) {
/* No PHY: select internal loop clock before reset */
dcri_clrset(SDR0, SDR0_ETH_CFG,
0, SDR0_ETH_CFG_ECS << dev->cell_index);
#ifdef CONFIG_PPC_DCR_NATIVE
if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) {
- if (dev->phy_address == 0xffffffff &&
- dev->phy_map == 0xffffffff) {
+ if (!n && !try_internal_clock) {
+ /* first attempt has timed out. */
+ n = 20;
+ try_internal_clock = true;
+ goto do_retry;
+ }
+
+ if (try_internal_clock || (dev->phy_address == 0xffffffff &&
+ dev->phy_map == 0xffffffff)) {
/* No PHY: restore external clock source after reset */
dcri_clrset(SDR0, SDR0_ETH_CFG,
SDR0_ETH_CFG_ECS << dev->cell_index, 0);
return emac_reset(dev);
}
+static int emac_mdio_phy_start_aneg(struct mii_phy *phy,
+ struct phy_device *phy_dev)
+{
+ phy_dev->autoneg = phy->autoneg;
+ phy_dev->speed = phy->speed;
+ phy_dev->duplex = phy->duplex;
+ phy_dev->advertising = phy->advertising;
+ return phy_start_aneg(phy_dev);
+}
+
static int emac_mdio_setup_aneg(struct mii_phy *phy, u32 advertise)
{
struct net_device *ndev = phy->dev;
struct emac_instance *dev = netdev_priv(ndev);
- dev->phy.autoneg = AUTONEG_ENABLE;
- dev->phy.speed = SPEED_1000;
- dev->phy.duplex = DUPLEX_FULL;
- dev->phy.advertising = advertise;
phy->autoneg = AUTONEG_ENABLE;
- phy->speed = dev->phy.speed;
- phy->duplex = dev->phy.duplex;
phy->advertising = advertise;
- return phy_start_aneg(dev->phy_dev);
+ return emac_mdio_phy_start_aneg(phy, dev->phy_dev);
}
static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd)
struct net_device *ndev = phy->dev;
struct emac_instance *dev = netdev_priv(ndev);
- dev->phy.autoneg = AUTONEG_DISABLE;
- dev->phy.speed = speed;
- dev->phy.duplex = fd;
phy->autoneg = AUTONEG_DISABLE;
phy->speed = speed;
phy->duplex = fd;
- return phy_start_aneg(dev->phy_dev);
+ return emac_mdio_phy_start_aneg(phy, dev->phy_dev);
}
static int emac_mdio_poll_link(struct mii_phy *phy)
{
struct net_device *ndev = phy->dev;
struct emac_instance *dev = netdev_priv(ndev);
+ struct phy_device *phy_dev = dev->phy_dev;
int res;
- res = phy_read_status(dev->phy_dev);
+ res = phy_read_status(phy_dev);
if (res)
return res;
- dev->phy.speed = phy->speed;
- dev->phy.duplex = phy->duplex;
- dev->phy.pause = phy->pause;
- dev->phy.asym_pause = phy->asym_pause;
+ phy->speed = phy_dev->speed;
+ phy->duplex = phy_dev->duplex;
+ phy->pause = phy_dev->pause;
+ phy->asym_pause = phy_dev->asym_pause;
return 0;
}
struct emac_instance *dev = netdev_priv(ndev);
phy_start(dev->phy_dev);
- dev->phy.autoneg = phy->autoneg;
- dev->phy.speed = phy->speed;
- dev->phy.duplex = phy->duplex;
- dev->phy.advertising = phy->advertising;
- dev->phy.pause = phy->pause;
- dev->phy.asym_pause = phy->asym_pause;
-
return phy_init_hw(dev->phy_dev);
}
}
#endif
+static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ return -EOPNOTSUPP;
+}
+
static const struct net_device_ops ibmvnic_netdev_ops = {
.ndo_open = ibmvnic_open,
.ndo_stop = ibmvnic_close,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = ibmvnic_netpoll_controller,
#endif
+ .ndo_change_mtu = ibmvnic_change_mtu,
};
/* ethtool functions */
#define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(1)
#define I40E_FLAG_MSI_ENABLED BIT_ULL(2)
#define I40E_FLAG_MSIX_ENABLED BIT_ULL(3)
+#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT_ULL(4)
#define I40E_FLAG_RSS_ENABLED BIT_ULL(6)
#define I40E_FLAG_VMDQ_ENABLED BIT_ULL(7)
#define I40E_FLAG_IWARP_ENABLED BIT_ULL(10)
I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENABLED, 0),
I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
- I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_CAPABLE, 0),
+ I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
};
/* Only allow ATR evict on hardware that is capable of handling it */
if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
- pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+ pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_ENABLED;
if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) {
u16 sw_flags = 0, valid_flags = 0;
(pf->hw.aq.api_min_ver > 4))) {
/* Supported in FW API version higher than 1.4 */
pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
- pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
- } else {
- pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
}
+ /* Enable HW ATR eviction if possible */
+ if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+ pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
+
pf->eeprom_version = 0xDEAD;
pf->lan_veb = I40E_NO_VEB;
pf->lan_vsi = I40E_NO_VSI;
/* Due to lack of space, no more new filters can be programmed */
if (th->syn && (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED))
return;
- if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
+ if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) {
/* HW ATR eviction will take care of removing filters on FIN
* and RST packets.
*/
I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
- if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+ if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED)
dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
VLAN_VID_MASK));
}
+ spin_unlock_bh(&vsi->mac_filter_hash_lock);
if (vlan_id || qos)
ret = i40e_vsi_add_pvid(vsi, vlanprio);
else
i40e_vsi_remove_pvid(vsi);
+ spin_lock_bh(&vsi->mac_filter_hash_lock);
if (vlan_id) {
dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",
dma_addr_t *dma_addr,
phys_addr_t *phys_addr)
{
- int cpu = smp_processor_id();
+ int cpu = get_cpu();
*dma_addr = mvpp2_percpu_read(priv, cpu,
MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
if (sizeof(phys_addr_t) == 8)
*phys_addr |= (u64)phys_addr_highbits << 32;
}
+
+ put_cpu();
}
/* Free all buffers from the pool */
return bm;
}
-/* Get pool number from a BM cookie */
-static inline int mvpp2_bm_cookie_pool_get(unsigned long cookie)
-{
- return (cookie >> MVPP2_BM_COOKIE_POOL_OFFS) & 0xFF;
-}
-
/* Release buffer to BM */
static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
dma_addr_t buf_dma_addr,
phys_addr_t buf_phys_addr)
{
- int cpu = smp_processor_id();
+ int cpu = get_cpu();
if (port->priv->hw_version == MVPP22) {
u32 val = 0;
MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
mvpp2_percpu_write(port->priv, cpu,
MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
+
+ put_cpu();
}
/* Refill BM pool */
-static void mvpp2_pool_refill(struct mvpp2_port *port, u32 bm,
+static void mvpp2_pool_refill(struct mvpp2_port *port, int pool,
dma_addr_t dma_addr,
phys_addr_t phys_addr)
{
- int pool = mvpp2_bm_cookie_pool_get(bm);
-
mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
}
{
u32 val;
- return;
-
/* Only GOP port 0 has an XLG MAC */
if (port->gop_id == 0) {
val = readl(port->base + MVPP22_XLG_CTRL3_REG);
mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
}
-/* Obtain BM cookie information from descriptor */
-static u32 mvpp2_bm_cookie_build(struct mvpp2_port *port,
- struct mvpp2_rx_desc *rx_desc)
-{
- int cpu = smp_processor_id();
- int pool;
-
- pool = (mvpp2_rxdesc_status_get(port, rx_desc) &
- MVPP2_RXD_BM_POOL_ID_MASK) >>
- MVPP2_RXD_BM_POOL_ID_OFFS;
-
- return ((pool & 0xFF) << MVPP2_BM_COOKIE_POOL_OFFS) |
- ((cpu & 0xFF) << MVPP2_BM_COOKIE_CPU_OFFS);
-}
-
/* Tx descriptors helper methods */
/* Get pointer to next Tx descriptor to be processed (send) by HW */
static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
struct mvpp2_rx_queue *rxq)
{
- int cpu = smp_processor_id();
+ int cpu = get_cpu();
if (rxq->pkts_coal > MVPP2_OCCUPIED_THRESH_MASK)
rxq->pkts_coal = MVPP2_OCCUPIED_THRESH_MASK;
mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_THRESH_REG,
rxq->pkts_coal);
+
+ put_cpu();
}
static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz)
mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
/* Set Rx descriptors queue starting address - indirect access */
- cpu = smp_processor_id();
+ cpu = get_cpu();
mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
if (port->priv->hw_version == MVPP21)
rxq_dma = rxq->descs_dma;
mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, rxq_dma);
mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_INDEX_REG, 0);
+ put_cpu();
/* Set Offset */
mvpp2_rxq_offset_set(port, rxq->id, NET_SKB_PAD);
for (i = 0; i < rx_received; i++) {
struct mvpp2_rx_desc *rx_desc = mvpp2_rxq_next_desc_get(rxq);
- u32 bm = mvpp2_bm_cookie_build(port, rx_desc);
+ u32 status = mvpp2_rxdesc_status_get(port, rx_desc);
+ int pool;
+
+ pool = (status & MVPP2_RXD_BM_POOL_ID_MASK) >>
+ MVPP2_RXD_BM_POOL_ID_OFFS;
- mvpp2_pool_refill(port, bm,
+ mvpp2_pool_refill(port, pool,
mvpp2_rxdesc_dma_addr_get(port, rx_desc),
mvpp2_rxdesc_cookie_get(port, rx_desc));
}
* free descriptor number
*/
mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
- cpu = smp_processor_id();
+ cpu = get_cpu();
mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, 0);
mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, 0);
+ put_cpu();
}
/* Create and initialize a Tx queue */
txq->last_desc = txq->size - 1;
/* Set Tx descriptors queue starting address - indirect access */
- cpu = smp_processor_id();
+ cpu = get_cpu();
mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG,
txq->descs_dma);
mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG,
MVPP2_PREF_BUF_PTR(desc) | MVPP2_PREF_BUF_SIZE_16 |
MVPP2_PREF_BUF_THRESH(desc_per_txq / 2));
+ put_cpu();
/* WRR / EJP configuration - indirect access */
tx_port_num = mvpp2_egress_port(port);
mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->id), 0);
/* Set Tx descriptors queue starting address and size */
- cpu = smp_processor_id();
+ cpu = get_cpu();
mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG, 0);
mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_SIZE_REG, 0);
+ put_cpu();
}
/* Cleanup Tx ports */
int delay, pending, cpu;
u32 val;
- cpu = smp_processor_id();
+ cpu = get_cpu();
mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
val = mvpp2_percpu_read(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG);
val |= MVPP2_TXQ_DRAIN_EN_MASK;
val &= ~MVPP2_TXQ_DRAIN_EN_MASK;
mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG, val);
+ put_cpu();
for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
/* Reuse skb if possible, or allocate a new skb and add it to BM pool */
static int mvpp2_rx_refill(struct mvpp2_port *port,
- struct mvpp2_bm_pool *bm_pool, u32 bm)
+ struct mvpp2_bm_pool *bm_pool, int pool)
{
dma_addr_t dma_addr;
phys_addr_t phys_addr;
if (!buf)
return -ENOMEM;
- mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
+ mvpp2_pool_refill(port, pool, dma_addr, phys_addr);
return 0;
}
unsigned int frag_size;
dma_addr_t dma_addr;
phys_addr_t phys_addr;
- u32 bm, rx_status;
+ u32 rx_status;
int pool, rx_bytes, err;
void *data;
phys_addr = mvpp2_rxdesc_cookie_get(port, rx_desc);
data = (void *)phys_to_virt(phys_addr);
- bm = mvpp2_bm_cookie_build(port, rx_desc);
- pool = mvpp2_bm_cookie_pool_get(bm);
+ pool = (rx_status & MVPP2_RXD_BM_POOL_ID_MASK) >>
+ MVPP2_RXD_BM_POOL_ID_OFFS;
bm_pool = &port->priv->bm_pools[pool];
/* In case of an error, release the requested buffer pointer
dev->stats.rx_errors++;
mvpp2_rx_error(port, rx_desc);
/* Return the buffer to the pool */
- mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
+ mvpp2_pool_refill(port, pool, dma_addr, phys_addr);
continue;
}
goto err_drop_frame;
}
- err = mvpp2_rx_refill(port, bm_pool, bm);
+ err = mvpp2_rx_refill(port, bm_pool, pool);
if (err) {
netdev_err(port->dev, "failed to refill BM pools\n");
goto err_drop_frame;
struct mlx5e_rx_am_stats {
int ppms; /* packets per msec */
+ int bpms; /* bytes per msec */
int epms; /* events per msec */
};
struct mlx5e_rx_am_sample {
- ktime_t time;
- unsigned int pkt_ctr;
- u16 event_ctr;
+ ktime_t time;
+ u32 pkt_ctr;
+ u32 byte_ctr;
+ u16 event_ctr;
};
struct mlx5e_rx_am { /* Adaptive Moderation */
SOF_TIMESTAMPING_RX_HARDWARE |
SOF_TIMESTAMPING_RAW_HARDWARE;
- info->tx_types = (BIT(1) << HWTSTAMP_TX_OFF) |
- (BIT(1) << HWTSTAMP_TX_ON);
+ info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ON);
- info->rx_filters = (BIT(1) << HWTSTAMP_FILTER_NONE) |
- (BIT(1) << HWTSTAMP_FILTER_ALL);
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+ BIT(HWTSTAMP_FILTER_ALL);
return 0;
}
return netdev;
err_cleanup_nic:
- profile->cleanup(priv);
+ if (profile->cleanup)
+ profile->cleanup(priv);
free_netdev(netdev);
return NULL;
params->tx_max_inline = mlx5e_get_max_inline_cap(mdev);
params->num_tc = 1;
params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+
+ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode);
}
static void mlx5e_build_rep_netdev(struct net_device *netdev)
mlx5e_am_step(am);
}
+#define IS_SIGNIFICANT_DIFF(val, ref) \
+ (((100 * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */
+
static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr,
struct mlx5e_rx_am_stats *prev)
{
- int diff;
-
- if (!prev->ppms)
- return curr->ppms ? MLX5E_AM_STATS_BETTER :
+ if (!prev->bpms)
+ return curr->bpms ? MLX5E_AM_STATS_BETTER :
MLX5E_AM_STATS_SAME;
- diff = curr->ppms - prev->ppms;
- if (((100 * abs(diff)) / prev->ppms) > 10) /* more than 10% diff */
- return (diff > 0) ? MLX5E_AM_STATS_BETTER :
- MLX5E_AM_STATS_WORSE;
+ if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
+ return (curr->bpms > prev->bpms) ? MLX5E_AM_STATS_BETTER :
+ MLX5E_AM_STATS_WORSE;
- if (!prev->epms)
- return curr->epms ? MLX5E_AM_STATS_WORSE :
- MLX5E_AM_STATS_SAME;
+ if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
+ return (curr->ppms > prev->ppms) ? MLX5E_AM_STATS_BETTER :
+ MLX5E_AM_STATS_WORSE;
- diff = curr->epms - prev->epms;
- if (((100 * abs(diff)) / prev->epms) > 10) /* more than 10% diff */
- return (diff < 0) ? MLX5E_AM_STATS_BETTER :
- MLX5E_AM_STATS_WORSE;
+ if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
+ return (curr->epms < prev->epms) ? MLX5E_AM_STATS_BETTER :
+ MLX5E_AM_STATS_WORSE;
return MLX5E_AM_STATS_SAME;
}
{
s->time = ktime_get();
s->pkt_ctr = rq->stats.packets;
+ s->byte_ctr = rq->stats.bytes;
s->event_ctr = rq->cq.event_ctr;
}
#define MLX5E_AM_NEVENTS 64
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start,
struct mlx5e_rx_am_sample *end,
{
/* u32 holds up to 71 minutes, should be enough */
u32 delta_us = ktime_us_delta(end->time, start->time);
- unsigned int npkts = end->pkt_ctr - start->pkt_ctr;
+ u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
+ u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
+ start->byte_ctr);
if (!delta_us)
return;
- curr_stats->ppms = (npkts * USEC_PER_MSEC) / delta_us;
- curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us;
+ curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
+ curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
+ curr_stats->epms = DIV_ROUND_UP(MLX5E_AM_NEVENTS * USEC_PER_MSEC,
+ delta_us);
}
void mlx5e_rx_am_work(struct work_struct *work)
switch (am->state) {
case MLX5E_AM_MEASURE_IN_PROGRESS:
- nevents = rq->cq.event_ctr - am->start_sample.event_ctr;
+ nevents = BIT_GAP(BITS_PER_TYPE(u16), rq->cq.event_ctr,
+ am->start_sample.event_ctr);
if (nevents < MLX5E_AM_NEVENTS)
break;
mlx5e_am_sample(rq, &end_sample);
};
static const struct counter_desc mlx5e_pme_status_desc[] = {
- { "module_plug", 0 },
{ "module_unplug", 8 },
};
static const struct counter_desc mlx5e_pme_error_desc[] = {
- { "module_pwr_budget_exd", 0 }, /* power budget exceed */
- { "module_long_range", 8 }, /* long range for non MLNX cable */
- { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */
- { "module_no_eeprom", 24 }, /* no eeprom/retry time out */
- { "module_enforce_part", 32 }, /* enforce part number list */
- { "module_unknown_id", 40 }, /* unknown identifier */
- { "module_high_temp", 48 }, /* high temperature */
+ { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */
+ { "module_high_temp", 48 }, /* high temperature */
{ "module_bad_shorted", 56 }, /* bad or shorted cable/module */
- { "module_unknown_status", 64 },
};
#endif /* __MLX5_EN_STATS_H__ */
{MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_source[4])},
{MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE, 2, offsetof(struct pedit_headers, eth.h_proto)},
- {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)},
{MLX5_ACTION_IN_FIELD_OUT_IP_TTL, 1, offsetof(struct pedit_headers, ip4.ttl)},
{MLX5_ACTION_IN_FIELD_OUT_SIPV4, 4, offsetof(struct pedit_headers, ip4.saddr)},
{MLX5_ACTION_IN_FIELD_OUT_DIPV4, 4, offsetof(struct pedit_headers, ip4.daddr)},
return 0;
}
-int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+static int mlx5_devlink_eswitch_check(struct devlink *devlink)
{
- struct mlx5_core_dev *dev;
- u16 cur_mlx5_mode, mlx5_mode = 0;
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
- dev = devlink_priv(devlink);
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EOPNOTSUPP;
if (!MLX5_CAP_GEN(dev, vport_group_manager))
return -EOPNOTSUPP;
- cur_mlx5_mode = dev->priv.eswitch->mode;
-
- if (cur_mlx5_mode == SRIOV_NONE)
+ if (dev->priv.eswitch->mode == SRIOV_NONE)
return -EOPNOTSUPP;
+ return 0;
+}
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u16 cur_mlx5_mode, mlx5_mode = 0;
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ cur_mlx5_mode = dev->priv.eswitch->mode;
+
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
{
- struct mlx5_core_dev *dev;
-
- dev = devlink_priv(devlink);
-
- if (!MLX5_CAP_GEN(dev, vport_group_manager))
- return -EOPNOTSUPP;
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int err;
- if (dev->priv.eswitch->mode == SRIOV_NONE)
- return -EOPNOTSUPP;
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
}
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
- int num_vports = esw->enabled_vports;
int err, vport;
u8 mlx5_mode;
- if (!MLX5_CAP_GEN(dev, vport_group_manager))
- return -EOPNOTSUPP;
-
- if (esw->mode == SRIOV_NONE)
- return -EOPNOTSUPP;
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
if (err)
goto out;
- for (vport = 1; vport < num_vports; vport++) {
+ for (vport = 1; vport < esw->enabled_vports; vport++) {
err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
if (err) {
esw_warn(dev, "Failed to set min inline on vport %d\n",
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
+ int err;
- if (!MLX5_CAP_GEN(dev, vport_group_manager))
- return -EOPNOTSUPP;
-
- if (esw->mode == SRIOV_NONE)
- return -EOPNOTSUPP;
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
}
struct mlx5_eswitch *esw = dev->priv.eswitch;
int err;
- if (!MLX5_CAP_GEN(dev, vport_group_manager))
- return -EOPNOTSUPP;
-
- if (esw->mode == SRIOV_NONE)
- return -EOPNOTSUPP;
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
(!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
+ int err;
- if (!MLX5_CAP_GEN(dev, vport_group_manager))
- return -EOPNOTSUPP;
-
- if (esw->mode == SRIOV_NONE)
- return -EOPNOTSUPP;
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
*encap = esw->offloads.encap;
return 0;
ft_attr.level = level;
ft_attr.prio = prio;
- return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, 0);
+ return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
}
struct mlx5_flow_table*
struct mlx5_core_health *health = &dev->priv.health;
u32 count;
- if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- mod_timer(&health->timer, get_next_poll_jiffies());
- return;
- }
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto out;
count = ioread32be(health->health_counter);
if (count == health->prev)
if (health->miss_counter == MAX_MISSES) {
dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
print_health_info(dev);
- } else {
- mod_timer(&health->timer, get_next_poll_jiffies());
}
if (in_fatal(dev) && !health->sick) {
"new health works are not permitted at this stage\n");
spin_unlock(&health->wq_lock);
}
+
+out:
+ mod_timer(&health->timer, get_next_poll_jiffies());
}
void mlx5_start_health_poll(struct mlx5_core_dev *dev)
},
};
-#define FW_INIT_TIMEOUT_MILI 2000
-#define FW_INIT_WAIT_MS 2
+#define FW_INIT_TIMEOUT_MILI 2000
+#define FW_INIT_WAIT_MS 2
+#define FW_PRE_INIT_TIMEOUT_MILI 10000
static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
{
/* disable cmdif checksum */
MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
- /* If the HCA supports 4K UARs use it */
- if (MLX5_CAP_GEN_MAX(dev, uar_4k))
+ /* Enable 4K UAR only when HCA supports it and page size is bigger
+ * than 4K.
+ */
+ if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
*/
dev->state = MLX5_DEVICE_STATE_UP;
+ /* wait for firmware to accept initialization segments configurations
+ */
+ err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+ if (err) {
+ dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
+ FW_PRE_INIT_TIMEOUT_MILI);
+ goto out;
+ }
+
err = mlx5_cmd_init(dev);
if (err) {
dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
u16 vid = vlan_dev_vlan_id(vlan_dev);
+ if (netif_is_bridge_port(vlan_dev))
+ return 0;
+
if (mlxsw_sp_port_dev_check(real_dev))
return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
vid);
qed_wr(p_hwfn,
p_ptt,
s_storm_defs[storm_id].cm_ctx_wr_addr,
- BIT(9) | lid);
+ (i << 9) | lid);
*(dump_buf + offset) = qed_rd(p_hwfn,
p_ptt,
rd_reg_addr);
*index = entry->index;
resolved = false;
} else if (removing) {
- ofdpa_neigh_del(trans, found);
*index = found->index;
+ ofdpa_neigh_del(trans, found);
} else if (updating) {
ofdpa_neigh_update(found, trans, NULL, false);
resolved = !is_zero_ether_addr(found->eth_dst);
* recipients
*/
if (is_mc_recip) {
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
unsigned int depth, i;
memset(inbuf, 0, sizeof(inbuf));
efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
} else {
efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
- MC_CMD_FILTER_OP_IN_LEN,
+ MC_CMD_FILTER_OP_EXT_IN_LEN,
NULL, 0, rc);
}
}
struct efx_filter_spec *spec)
{
struct efx_ef10_filter_table *table = efx->filter_state;
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
struct efx_filter_spec *saved_spec;
unsigned int hash, i, depth = 1;
bool replacing = false;
static void efx_ef10_filter_table_remove(struct efx_nic *efx)
{
struct efx_ef10_filter_table *table = efx->filter_state;
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
struct efx_filter_spec *spec;
unsigned int filter_idx;
int rc;
/* Insert/renew filters */
for (i = 0; i < addr_count; i++) {
+ EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
rc = efx_ef10_filter_insert(efx, &spec, true);
}
return rc;
} else {
- /* mark as not inserted, and carry on */
- rc = EFX_EF10_FILTER_ID_INVALID;
+ /* keep invalid ID, and carry on */
}
+ } else {
+ ids[i] = efx_ef10_filter_get_unsafe_id(rc);
}
- ids[i] = efx_ef10_filter_get_unsafe_id(rc);
}
if (multicast && rollback) {
up_write(&vf->efx->filter_sem);
mutex_unlock(&vf->efx->mac_lock);
- up_write(&vf->efx->filter_sem);
-
rc2 = efx_net_open(vf->efx->net_dev);
if (rc2)
goto reset_nic;
{
/* Context type from W/B descriptor must be zero */
if (le32_to_cpu(p->des3) & TDES3_CONTEXT_TYPE)
- return -EINVAL;
+ return 0;
/* Tx Timestamp Status is 1 so des0 and des1'll have valid values */
if (le32_to_cpu(p->des3) & TDES3_TIMESTAMP_STATUS)
- return 0;
+ return 1;
- return 1;
+ return 0;
}
static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
}
}
exit:
- return ret;
+ if (likely(ret == 0))
+ return 1;
+
+ return 0;
}
static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
return;
/* check tx tstamp status */
- if (!priv->hw->desc->get_tx_timestamp_status(p)) {
+ if (priv->hw->desc->get_tx_timestamp_status(p)) {
/* get the valid tstamp */
ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
shhwtstamp.hwtstamp = ns_to_ktime(ns);
- netdev_info(priv->dev, "get valid TX hw timestamp %llu\n", ns);
+ netdev_dbg(priv->dev, "get valid TX hw timestamp %llu\n", ns);
/* pass tstamp to stack */
skb_tstamp_tx(skb, &shhwtstamp);
}
return;
/* Check if timestamp is available */
- if (!priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
+ if (priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
/* For GMAC4, the valid timestamp is from CTX next desc. */
if (priv->plat->has_gmac4)
ns = priv->hw->desc->get_timestamp(np, priv->adv_ts);
else
ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
- netdev_info(priv->dev, "get valid RX hw timestamp %llu\n", ns);
+ netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
shhwtstamp = skb_hwtstamps(skb);
memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
shhwtstamp->hwtstamp = ns_to_ktime(ns);
} else {
- netdev_err(priv->dev, "cannot get RX hw timestamp\n");
+ netdev_dbg(priv->dev, "cannot get RX hw timestamp\n");
}
}
/* PTP v1, UDP, any kind of event packet */
config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
/* take time stamp for all event messages */
- snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+ if (priv->plat->has_gmac4)
+ snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+ else
+ snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
ptp_v2 = PTP_TCR_TSVER2ENA;
/* take time stamp for all event messages */
- snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+ if (priv->plat->has_gmac4)
+ snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+ else
+ snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
ptp_v2 = PTP_TCR_TSVER2ENA;
/* take time stamp for all event messages */
- snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+ if (priv->plat->has_gmac4)
+ snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+ else
+ snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
tx_q->tx_skbuff_dma[first_entry].buf = des;
tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
- tx_q->tx_skbuff[first_entry] = skb;
first->des0 = cpu_to_le32(des);
tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true;
+ /* Only the last descriptor gets to point to the skb. */
+ tx_q->tx_skbuff[tx_q->cur_tx] = skb;
+
+ /* We've used all descriptors we need for this skb, however,
+ * advance cur_tx so that it references a fresh descriptor.
+ * ndo_start_xmit will fill this descriptor the next time it's
+ * called and stmmac_tx_clean may clean up to this descriptor.
+ */
tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) {
first = desc;
- tx_q->tx_skbuff[first_entry] = skb;
-
enh_desc = priv->plat->enh_desc;
/* To program the descriptors according to the size of the frame */
if (enh_desc)
skb->len);
}
- entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+ /* Only the last descriptor gets to point to the skb. */
+ tx_q->tx_skbuff[entry] = skb;
+ /* We've used all descriptors we need for this skb, however,
+ * advance cur_tx so that it references a fresh descriptor.
+ * ndo_start_xmit will fill this descriptor the next time it's
+ * called and stmmac_tx_clean may clean up to this descriptor.
+ */
+ entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
tx_q->cur_tx = entry;
if (netif_msg_pktdata(priv)) {
/* Enable Snapshot for Messages Relevant to Master */
#define PTP_TCR_TSMSTRENA BIT(15)
/* Select PTP packets for Taking Snapshots */
-#define PTP_TCR_SNAPTYPSEL_1 GENMASK(17, 16)
+#define PTP_TCR_SNAPTYPSEL_1 BIT(16)
+#define PTP_GMAC4_TCR_SNAPTYPSEL_1 GENMASK(17, 16)
/* Enable MAC address for PTP Frame Filtering */
#define PTP_TCR_TSENMACADDR BIT(18)
if (of_device_is_compatible(dev->of_node, "ti,dm816-emac"))
return cpsw_am33xx_cm_get_macid(dev, 0x30, slave, mac_addr);
- if (of_machine_is_compatible("ti,am4372"))
+ if (of_machine_is_compatible("ti,am43"))
return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
if (of_machine_is_compatible("ti,dra7"))
dev->netdev_ops = &geneve_netdev_ops;
dev->ethtool_ops = &geneve_ethtool_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
SET_NETDEV_DEVTYPE(dev, &geneve_type);
static void gtp_link_setup(struct net_device *dev)
{
dev->netdev_ops = >p_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->hard_header_len = 0;
dev->addr_len = 0;
{
/* Finish setting up the DEVICE info. */
dev->netdev_ops = &sp_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->mtu = SIXP_MTU;
dev->hard_header_len = AX25_MAX_HEADER_LEN;
dev->header_ops = &ax25_header_ops;
static void bpq_setup(struct net_device *dev)
{
dev->netdev_ops = &bpq_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN);
spinlock_t request_lock;
struct list_head req_list;
+ struct work_struct mcast_work;
+
u8 hw_mac_adr[ETH_ALEN];
u8 rss_key[NETVSC_HASH_KEYLEN];
u16 ind_table[ITAB_NUM];
int rndis_filter_close(struct netvsc_device *nvdev);
int rndis_filter_device_add(struct hv_device *dev,
struct netvsc_device_info *info);
+void rndis_filter_update(struct netvsc_device *nvdev);
void rndis_filter_device_remove(struct hv_device *dev,
struct netvsc_device *nvdev);
int rndis_filter_set_rss_param(struct rndis_device *rdev,
struct vmbus_channel *channel,
void *data, u32 buflen);
-int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter);
int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
/* list protection */
spinlock_t lock;
- struct work_struct work;
u32 msg_enable; /* debug level */
u32 tx_checksum_mask;
module_param(debug, int, S_IRUGO);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
-static void do_set_multicast(struct work_struct *w)
-{
- struct net_device_context *ndevctx =
- container_of(w, struct net_device_context, work);
- struct hv_device *device_obj = ndevctx->device_ctx;
- struct net_device *ndev = hv_get_drvdata(device_obj);
- struct netvsc_device *nvdev = rcu_dereference(ndevctx->nvdev);
- struct rndis_device *rdev;
-
- if (!nvdev)
- return;
-
- rdev = nvdev->extension;
- if (rdev == NULL)
- return;
-
- if (ndev->flags & IFF_PROMISC)
- rndis_filter_set_packet_filter(rdev,
- NDIS_PACKET_TYPE_PROMISCUOUS);
- else
- rndis_filter_set_packet_filter(rdev,
- NDIS_PACKET_TYPE_BROADCAST |
- NDIS_PACKET_TYPE_ALL_MULTICAST |
- NDIS_PACKET_TYPE_DIRECTED);
-}
-
static void netvsc_set_multicast_list(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
- schedule_work(&net_device_ctx->work);
+ rndis_filter_update(nvdev);
}
static int netvsc_open(struct net_device *net)
netif_tx_disable(net);
- /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
- cancel_work_sync(&net_device_ctx->work);
ret = rndis_filter_close(nvdev);
if (ret != 0) {
netdev_err(net, "unable to close device (ret %d).\n", ret);
channels->rx_count || channels->tx_count || channels->other_count)
return -EINVAL;
- if (count > net->num_tx_queues || count > net->num_rx_queues)
+ if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
return -EINVAL;
if (!nvdev || nvdev->destroy)
static int netvsc_get_sset_count(struct net_device *dev, int string_set)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
if (!nvdev)
return -ENODEV;
}
#ifdef CONFIG_NET_POLL_CONTROLLER
-static void netvsc_poll_controller(struct net_device *net)
+static void netvsc_poll_controller(struct net_device *dev)
{
- /* As netvsc_start_xmit() works synchronous we don't have to
- * trigger anything here.
- */
+ struct net_device_context *ndc = netdev_priv(dev);
+ struct netvsc_device *ndev;
+ int i;
+
+ rcu_read_lock();
+ ndev = rcu_dereference(ndc->nvdev);
+ if (ndev) {
+ for (i = 0; i < ndev->num_chn; i++) {
+ struct netvsc_channel *nvchan = &ndev->chan_table[i];
+
+ napi_schedule(&nvchan->napi);
+ }
+ }
+ rcu_read_unlock();
}
#endif
rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
- if (indir[i] >= dev->num_rx_queues)
+ if (indir[i] >= VRSS_CHANNEL_MAX)
return -EINVAL;
for (i = 0; i < ITAB_NUM; i++)
hv_set_drvdata(dev, net);
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
- INIT_WORK(&net_device_ctx->work, do_set_multicast);
spin_lock_init(&net_device_ctx->lock);
INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
netif_device_detach(net);
cancel_delayed_work_sync(&ndev_ctx->dwork);
- cancel_work_sync(&ndev_ctx->work);
/*
* Call to the vsc driver to let it know that the device is being
#include "hyperv_net.h"
+static void rndis_set_multicast(struct work_struct *w);
#define RNDIS_EXT_LEN PAGE_SIZE
struct rndis_request {
spin_lock_init(&device->request_lock);
INIT_LIST_HEAD(&device->req_list);
+ INIT_WORK(&device->mcast_work, rndis_set_multicast);
device->state = RNDIS_DEV_UNINITIALIZED;
return ret;
}
-int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
+static int rndis_filter_set_packet_filter(struct rndis_device *dev,
+ u32 new_filter)
{
struct rndis_request *request;
struct rndis_set_request *set;
return ret;
}
+static void rndis_set_multicast(struct work_struct *w)
+{
+ struct rndis_device *rdev
+ = container_of(w, struct rndis_device, mcast_work);
+
+ if (rdev->ndev->flags & IFF_PROMISC)
+ rndis_filter_set_packet_filter(rdev,
+ NDIS_PACKET_TYPE_PROMISCUOUS);
+ else
+ rndis_filter_set_packet_filter(rdev,
+ NDIS_PACKET_TYPE_BROADCAST |
+ NDIS_PACKET_TYPE_ALL_MULTICAST |
+ NDIS_PACKET_TYPE_DIRECTED);
+}
+
+void rndis_filter_update(struct netvsc_device *nvdev)
+{
+ struct rndis_device *rdev = nvdev->extension;
+
+ schedule_work(&rdev->mcast_work);
+}
+
static int rndis_filter_init_device(struct rndis_device *dev)
{
struct rndis_request *request;
if (dev->state != RNDIS_DEV_DATAINITIALIZED)
return 0;
+ /* Make sure rndis_set_multicast doesn't re-enable filter! */
+ cancel_work_sync(&dev->mcast_work);
+
ret = rndis_filter_set_packet_filter(dev, 0);
if (ret == -ENODEV)
ret = 0;
__skb_queue_purge(&txp->tq);
}
kfree(dp->tx_private);
- free_netdev(dev);
}
static void ifb_setup(struct net_device *dev)
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
netif_keep_dst(dev);
eth_hw_addr_random(dev);
- dev->destructor = ifb_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ifb_dev_free;
}
static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
dev->netdev_ops = &ipvlan_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->header_ops = &ipvlan_header_ops;
dev->ethtool_ops = &ipvlan_ethtool_ops;
}
{
dev_net(dev)->loopback_dev = NULL;
free_percpu(dev->lstats);
- free_netdev(dev);
}
static const struct net_device_ops loopback_ops = {
dev->ethtool_ops = &loopback_ethtool_ops;
dev->header_ops = ð_header_ops;
dev->netdev_ops = &loopback_ops;
- dev->destructor = loopback_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = loopback_dev_free;
}
/* Setup and register the loopback device. */
free_percpu(macsec->secy.tx_sc.stats);
dev_put(real_dev);
- free_netdev(dev);
}
static void macsec_setup(struct net_device *dev)
dev->max_mtu = ETH_MAX_MTU;
dev->priv_flags |= IFF_NO_QUEUE;
dev->netdev_ops = &macsec_netdev_ops;
- dev->destructor = macsec_free_netdev;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = macsec_free_netdev;
SET_NETDEV_DEVTYPE(dev, &macsec_type);
eth_zero_addr(dev->broadcast);
#define MACVLAN_HASH_SIZE (1<<MACVLAN_HASH_BITS)
#define MACVLAN_BC_QUEUE_LEN 1000
+#define MACVLAN_F_PASSTHRU 1
+#define MACVLAN_F_ADDRCHANGE 2
+
struct macvlan_port {
struct net_device *dev;
struct hlist_head vlan_hash[MACVLAN_HASH_SIZE];
struct list_head vlans;
struct sk_buff_head bc_queue;
struct work_struct bc_work;
- bool passthru;
+ u32 flags;
int count;
struct hlist_head vlan_source_hash[MACVLAN_HASH_SIZE];
DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
+ unsigned char perm_addr[ETH_ALEN];
};
struct macvlan_source_entry {
static void macvlan_port_destroy(struct net_device *dev);
+static inline bool macvlan_passthru(const struct macvlan_port *port)
+{
+ return port->flags & MACVLAN_F_PASSTHRU;
+}
+
+static inline void macvlan_set_passthru(struct macvlan_port *port)
+{
+ port->flags |= MACVLAN_F_PASSTHRU;
+}
+
+static inline bool macvlan_addr_change(const struct macvlan_port *port)
+{
+ return port->flags & MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_set_addr_change(struct macvlan_port *port)
+{
+ port->flags |= MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_clear_addr_change(struct macvlan_port *port)
+{
+ port->flags &= ~MACVLAN_F_ADDRCHANGE;
+}
+
/* Hash Ethernet address */
static u32 macvlan_eth_hash(const unsigned char *addr)
{
static bool macvlan_addr_busy(const struct macvlan_port *port,
const unsigned char *addr)
{
- /* Test to see if the specified multicast address is
+ /* Test to see if the specified address is
* currently in use by the underlying device or
* another macvlan.
*/
- if (ether_addr_equal_64bits(port->dev->dev_addr, addr))
+ if (!macvlan_passthru(port) && !macvlan_addr_change(port) &&
+ ether_addr_equal_64bits(port->dev->dev_addr, addr))
return true;
if (macvlan_hash_lookup(port, addr))
}
macvlan_forward_source(skb, port, eth->h_source);
- if (port->passthru)
+ if (macvlan_passthru(port))
vlan = list_first_or_null_rcu(&port->vlans,
struct macvlan_dev, list);
else
struct net_device *lowerdev = vlan->lowerdev;
int err;
- if (vlan->port->passthru) {
+ if (macvlan_passthru(vlan->port)) {
if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) {
err = dev_set_promiscuity(lowerdev, 1);
if (err < 0)
dev_uc_unsync(lowerdev, dev);
dev_mc_unsync(lowerdev, dev);
- if (vlan->port->passthru) {
+ if (macvlan_passthru(vlan->port)) {
if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC))
dev_set_promiscuity(lowerdev, -1);
goto hash_del;
{
struct macvlan_dev *vlan = netdev_priv(dev);
struct net_device *lowerdev = vlan->lowerdev;
+ struct macvlan_port *port = vlan->port;
int err;
if (!(dev->flags & IFF_UP)) {
if (macvlan_addr_busy(vlan->port, addr))
return -EBUSY;
- if (!vlan->port->passthru) {
+ if (!macvlan_passthru(port)) {
err = dev_uc_add(lowerdev, addr);
if (err)
return err;
macvlan_hash_change_addr(vlan, addr);
}
+ if (macvlan_passthru(port) && !macvlan_addr_change(port)) {
+ /* Since addr_change isn't set, we are here due to lower
+ * device change. Save the lower-dev address so we can
+ * restore it later.
+ */
+ ether_addr_copy(vlan->port->perm_addr,
+ lowerdev->dev_addr);
+ }
+ macvlan_clear_addr_change(port);
return 0;
}
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
+ /* If the addresses are the same, this is a no-op */
+ if (ether_addr_equal(dev->dev_addr, addr->sa_data))
+ return 0;
+
if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
+ macvlan_set_addr_change(vlan->port);
dev_set_mac_address(vlan->lowerdev, addr);
return 0;
}
/* Support unicast filter only on passthru devices.
* Multicast filter should be allowed on all devices.
*/
- if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+ if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
return -EOPNOTSUPP;
if (flags & NLM_F_REPLACE)
/* Support unicast filter only on passthru devices.
* Multicast filter should be allowed on all devices.
*/
- if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+ if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
return -EOPNOTSUPP;
if (is_unicast_ether_addr(addr))
netif_keep_dst(dev);
dev->priv_flags |= IFF_UNICAST_FLT;
dev->netdev_ops = &macvlan_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->header_ops = &macvlan_hard_header_ops;
dev->ethtool_ops = &macvlan_ethtool_ops;
}
if (port == NULL)
return -ENOMEM;
- port->passthru = false;
port->dev = dev;
+ ether_addr_copy(port->perm_addr, dev->dev_addr);
INIT_LIST_HEAD(&port->vlans);
for (i = 0; i < MACVLAN_HASH_SIZE; i++)
INIT_HLIST_HEAD(&port->vlan_hash[i]);
kfree_skb(skb);
}
+ /* If the lower device address has been changed by passthru
+ * macvlan, put it back.
+ */
+ if (macvlan_passthru(port) &&
+ !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) {
+ struct sockaddr sa;
+
+ sa.sa_family = port->dev->type;
+ memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len);
+ dev_set_mac_address(port->dev, &sa);
+ }
+
kfree(port);
}
port = macvlan_port_get_rtnl(lowerdev);
/* Only 1 macvlan device can be created in passthru mode */
- if (port->passthru) {
+ if (macvlan_passthru(port)) {
/* The macvlan port must be not created this time,
* still goto destroy_macvlan_port for readability.
*/
err = -EINVAL;
goto destroy_macvlan_port;
}
- port->passthru = true;
+ macvlan_set_passthru(port);
eth_hw_addr_inherit(dev, lowerdev);
}
if (data && data[IFLA_MACVLAN_FLAGS]) {
__u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC;
- if (vlan->port->passthru && promisc) {
+ if (macvlan_passthru(vlan->port) && promisc) {
int err;
if (flags & MACVLAN_FLAG_NOPROMISC)
}
break;
case NETDEV_CHANGEADDR:
- if (!port->passthru)
+ if (!macvlan_passthru(port))
return NOTIFY_DONE;
vlan = list_first_entry_or_null(&port->vlans,
if (err)
goto out_unlock;
- pr_info("netconsole: network logging started\n");
+ pr_info("network logging started\n");
} else { /* false */
/* We need to disable the netconsole before cleaning it up
* otherwise we might end up in write_msg() with
dev->netdev_ops = &nlmon_ops;
dev->ethtool_ops = &nlmon_ethtool_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
NETIF_F_HIGHDMA | NETIF_F_LLTX;
tristate "ThunderX SOCs MDIO buses"
depends on 64BIT
depends on PCI
+ depends on !(MDIO_DEVICE=y && PHYLIB=m)
select MDIO_CAVIUM
help
This driver supports the MDIO interfaces found on Cavium
if (overflow) {
pr_debug("tx timestamp queue overflow, count %d\n", overflow);
while (skb) {
- skb_complete_tx_timestamp(skb, NULL);
+ kfree_skb(skb);
skb = skb_dequeue(&dp83640->tx_queue);
}
return;
if ((regval & 0xFF) == 0xFF) {
phy_init_hw(phydev);
phydev->link = 0;
+ if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
+ phydev->drv->config_intr(phydev);
}
return 0;
return "5Gbps";
case SPEED_10000:
return "10Gbps";
+ case SPEED_14000:
+ return "14Gbps";
case SPEED_20000:
return "20Gbps";
case SPEED_25000:
static void sl_free_netdev(struct net_device *dev)
{
int i = dev->base_addr;
- free_netdev(dev);
+
slip_devs[i] = NULL;
}
static void sl_setup(struct net_device *dev)
{
dev->netdev_ops = &sl_netdev_ops;
- dev->destructor = sl_free_netdev;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = sl_free_netdev;
dev->hard_header_len = 0;
dev->addr_len = 0;
if (sl->tty) {
printk(KERN_ERR "%s: tty discipline still running\n",
dev->name);
- /* Intentionally leak the control block. */
- dev->destructor = NULL;
}
unregister_netdev(dev);
struct team *team = netdev_priv(dev);
free_percpu(team->pcpu_stats);
- free_netdev(dev);
}
static int team_open(struct net_device *dev)
dev->netdev_ops = &team_netdev_ops;
dev->ethtool_ops = &team_ethtool_ops;
- dev->destructor = team_destructor;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = team_destructor;
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
dev->priv_flags |= IFF_NO_QUEUE;
dev->priv_flags |= IFF_TEAM;
free_percpu(tun->pcpu_stats);
tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security);
- free_netdev(dev);
}
static void tun_setup(struct net_device *dev)
tun->group = INVALID_GID;
dev->ethtool_ops = &tun_ethtool_ops;
- dev->destructor = tun_free_netdev;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = tun_free_netdev;
/* We prefer our own queue length */
dev->tx_queue_len = TUN_READQ_SIZE;
}
.tx_fixup = ax88179_tx_fixup,
};
+static const struct driver_info belkin_info = {
+ .description = "Belkin USB Ethernet Adapter",
+ .bind = ax88179_bind,
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+ .reset = ax88179_reset,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+ .tx_fixup = ax88179_tx_fixup,
+};
+
static const struct usb_device_id products[] = {
{
/* ASIX AX88179 10/100/1000 */
/* Lenovo OneLinkDock Gigabit LAN */
USB_DEVICE(0x17ef, 0x304b),
.driver_info = (unsigned long)&lenovo_info,
+}, {
+ /* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */
+ USB_DEVICE(0x050d, 0x0128),
+ .driver_info = (unsigned long)&belkin_info,
},
{ },
};
dev->addr_len = 1;
dev->tx_queue_len = 3;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
}
/*
dev->addr_len = 0;
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
dev->netdev_ops = &qmimux_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
}
static struct net_device *qmimux_find_dev(struct usbnet *dev, u8 mux_id)
{QMI_FIXED_INTF(0x1199, 0x9056, 8)}, /* Sierra Wireless Modem */
{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
{QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */
+ {QMI_FIXED_INTF(0x1199, 0x9063, 8)}, /* Sierra Wireless EM7305 */
+ {QMI_FIXED_INTF(0x1199, 0x9063, 10)}, /* Sierra Wireless EM7305 */
{QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx */
{QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx */
{QMI_FIXED_INTF(0x1199, 0x9079, 8)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */
{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */
+ {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */
+ {QMI_FIXED_INTF(0x1c9e, 0x9803, 4)}, /* Telewell TW-3G HSPA+ */
{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */
{QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */
{QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */
break;
}
+ dev_dbg(&intf->dev, "Detected version 0x%04x\n", version);
+
return version;
}
static void veth_dev_free(struct net_device *dev)
{
free_percpu(dev->vstats);
- free_netdev(dev);
}
#ifdef CONFIG_NET_POLL_CONTROLLER
NETIF_F_HW_VLAN_STAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_STAG_RX);
- dev->destructor = veth_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = veth_dev_free;
dev->max_mtu = ETH_MAX_MTU;
dev->hw_features = VETH_FEATURES;
tbp = tb;
}
- if (tbp[IFLA_IFNAME]) {
+ if (ifmp && tbp[IFLA_IFNAME]) {
nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
name_assign_type = NET_NAME_USER;
} else {
return PTR_ERR(peer);
}
- if (tbp[IFLA_ADDRESS] == NULL)
+ if (!ifmp || !tbp[IFLA_ADDRESS])
eth_hw_addr_random(peer);
if (ifmp && (dev->ifindex != 0))
flush_work(&vi->config_work);
netif_device_detach(vi->dev);
+ netif_tx_disable(vi->dev);
cancel_delayed_work_sync(&vi->refill);
if (netif_running(vi->dev)) {
#include <net/addrconf.h>
#include <net/l3mdev.h>
#include <net/fib_rules.h>
+#include <net/netns/generic.h>
#define DRV_NAME "vrf"
#define DRV_VERSION "1.0"
#define FIB_RULE_PREF 1000 /* default preference for FIB rules */
-static bool add_fib_rules = true;
+
+static unsigned int vrf_net_id;
struct net_vrf {
struct rtable __rcu *rth;
dev->netdev_ops = &vrf_netdev_ops;
dev->l3mdev_ops = &vrf_l3mdev_ops;
dev->ethtool_ops = &vrf_ethtool_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
/* Fill in device structure with ethernet-generic values. */
eth_hw_addr_random(dev);
struct nlattr *tb[], struct nlattr *data[])
{
struct net_vrf *vrf = netdev_priv(dev);
+ bool *add_fib_rules;
+ struct net *net;
int err;
if (!data || !data[IFLA_VRF_TABLE])
if (err)
goto out;
- if (add_fib_rules) {
+ net = dev_net(dev);
+ add_fib_rules = net_generic(net, vrf_net_id);
+ if (*add_fib_rules) {
err = vrf_add_fib_rules(dev);
if (err) {
unregister_netdevice(dev);
goto out;
}
- add_fib_rules = false;
+ *add_fib_rules = false;
}
out:
.notifier_call = vrf_device_event,
};
+/* Initialize per network namespace state */
+static int __net_init vrf_netns_init(struct net *net)
+{
+ bool *add_fib_rules = net_generic(net, vrf_net_id);
+
+ *add_fib_rules = true;
+
+ return 0;
+}
+
+static struct pernet_operations vrf_net_ops __net_initdata = {
+ .init = vrf_netns_init,
+ .id = &vrf_net_id,
+ .size = sizeof(bool),
+};
+
static int __init vrf_init_module(void)
{
int rc;
register_netdevice_notifier(&vrf_notifier_block);
- rc = rtnl_link_register(&vrf_link_ops);
+ rc = register_pernet_subsys(&vrf_net_ops);
if (rc < 0)
goto error;
+ rc = rtnl_link_register(&vrf_link_ops);
+ if (rc < 0) {
+ unregister_pernet_subsys(&vrf_net_ops);
+ goto error;
+ }
+
return 0;
error:
dev->netdev_ops = &vsockmon_ops;
dev->ethtool_ops = &vsockmon_ethtool_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
NETIF_F_HIGHDMA | NETIF_F_LLTX;
eth_hw_addr_random(dev);
ether_setup(dev);
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
SET_NETDEV_DEVTYPE(dev, &vxlan_type);
dev->features |= NETIF_F_LLTX;
dev->flags = 0;
dev->header_ops = &dlci_header_ops;
dev->netdev_ops = &dlci_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dlp->receive = dlci_receive;
return -EIO;
}
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
*get_dev_p(pvc, type) = dev;
if (!used) {
state(hdlc)->dce_changed = 1;
static void lapbeth_setup(struct net_device *dev)
{
dev->netdev_ops = &lapbeth_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->type = ARPHRD_X25;
dev->hard_header_len = 3;
dev->mtu = 1000;
struct ath6kl *ar = ath6kl_priv(dev);
dev->netdev_ops = &ath6kl_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->watchdog_timeo = ATH6KL_TX_TIMEOUT;
dev->needed_headroom = ETH_HLEN;
if (vif)
brcmf_free_vif(vif);
- free_netdev(ndev);
}
static bool brcmf_is_linkup(const struct brcmf_event_msg *e)
if (!ndev)
return ERR_PTR(-ENOMEM);
- ndev->destructor = brcmf_cfg80211_free_netdev;
+ ndev->needs_free_netdev = true;
+ ndev->priv_destructor = brcmf_cfg80211_free_netdev;
ifp = netdev_priv(ndev);
ifp->ndev = ndev;
/* store mapping ifidx to bsscfgidx */
const char *nvram_name;
u16 domain_nr;
u16 bus_nr;
- void (*done)(struct device *dev, const struct firmware *fw,
+ void (*done)(struct device *dev, int err, const struct firmware *fw,
void *nvram_image, u32 nvram_len);
};
if (!nvram && !(fwctx->flags & BRCMF_FW_REQ_NV_OPTIONAL))
goto fail;
- fwctx->done(fwctx->dev, fwctx->code, nvram, nvram_length);
+ fwctx->done(fwctx->dev, 0, fwctx->code, nvram, nvram_length);
kfree(fwctx);
return;
fail:
brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
release_firmware(fwctx->code);
- device_release_driver(fwctx->dev);
+ fwctx->done(fwctx->dev, -ENOENT, NULL, NULL, 0);
kfree(fwctx);
}
static void brcmf_fw_request_code_done(const struct firmware *fw, void *ctx)
{
struct brcmf_fw *fwctx = ctx;
- int ret;
+ int ret = 0;
brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(fwctx->dev));
- if (!fw)
+ if (!fw) {
+ ret = -ENOENT;
goto fail;
-
- /* only requested code so done here */
- if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM)) {
- fwctx->done(fwctx->dev, fw, NULL, 0);
- kfree(fwctx);
- return;
}
+ /* only requested code so done here */
+ if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM))
+ goto done;
+
fwctx->code = fw;
ret = request_firmware_nowait(THIS_MODULE, true, fwctx->nvram_name,
fwctx->dev, GFP_KERNEL, fwctx,
brcmf_fw_request_nvram_done);
- if (!ret)
- return;
-
- brcmf_fw_request_nvram_done(NULL, fwctx);
+ /* pass NULL to nvram callback for bcm47xx fallback */
+ if (ret)
+ brcmf_fw_request_nvram_done(NULL, fwctx);
return;
fail:
brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
- device_release_driver(fwctx->dev);
+done:
+ fwctx->done(fwctx->dev, ret, fw, NULL, 0);
kfree(fwctx);
}
int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
const char *code, const char *nvram,
- void (*fw_cb)(struct device *dev,
+ void (*fw_cb)(struct device *dev, int err,
const struct firmware *fw,
void *nvram_image, u32 nvram_len),
u16 domain_nr, u16 bus_nr)
int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
const char *code, const char *nvram,
- void (*fw_cb)(struct device *dev,
+ void (*fw_cb)(struct device *dev, int err,
const struct firmware *fw,
void *nvram_image, u32 nvram_len))
{
*/
int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
const char *code, const char *nvram,
- void (*fw_cb)(struct device *dev,
+ void (*fw_cb)(struct device *dev, int err,
const struct firmware *fw,
void *nvram_image, u32 nvram_len),
u16 domain_nr, u16 bus_nr);
int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
const char *code, const char *nvram,
- void (*fw_cb)(struct device *dev,
+ void (*fw_cb)(struct device *dev, int err,
const struct firmware *fw,
void *nvram_image, u32 nvram_len));
struct brcmf_fws_info *fws = drvr_to_fws(ifp->drvr);
struct brcmf_fws_mac_descriptor *entry;
- if (!ifp->ndev || fws->fcmode == BRCMF_FWS_FCMODE_NONE)
+ if (!ifp->ndev || !brcmf_fws_queue_skbs(fws))
return;
entry = &fws->desc.iface[ifp->ifidx];
.write32 = brcmf_pcie_buscore_write32,
};
-static void brcmf_pcie_setup(struct device *dev, const struct firmware *fw,
+static void brcmf_pcie_setup(struct device *dev, int ret,
+ const struct firmware *fw,
void *nvram, u32 nvram_len)
{
- struct brcmf_bus *bus = dev_get_drvdata(dev);
- struct brcmf_pciedev *pcie_bus_dev = bus->bus_priv.pcie;
- struct brcmf_pciedev_info *devinfo = pcie_bus_dev->devinfo;
+ struct brcmf_bus *bus;
+ struct brcmf_pciedev *pcie_bus_dev;
+ struct brcmf_pciedev_info *devinfo;
struct brcmf_commonring **flowrings;
- int ret;
u32 i;
+ /* check firmware loading result */
+ if (ret)
+ goto fail;
+
+ bus = dev_get_drvdata(dev);
+ pcie_bus_dev = bus->bus_priv.pcie;
+ devinfo = pcie_bus_dev->devinfo;
brcmf_pcie_attach(devinfo);
/* Some of the firmwares have the size of the memory of the device
.get_memdump = brcmf_sdio_bus_get_memdump,
};
-static void brcmf_sdio_firmware_callback(struct device *dev,
+static void brcmf_sdio_firmware_callback(struct device *dev, int err,
const struct firmware *code,
void *nvram, u32 nvram_len)
{
- struct brcmf_bus *bus_if = dev_get_drvdata(dev);
- struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
- struct brcmf_sdio *bus = sdiodev->bus;
- int err = 0;
+ struct brcmf_bus *bus_if;
+ struct brcmf_sdio_dev *sdiodev;
+ struct brcmf_sdio *bus;
u8 saveclk;
- brcmf_dbg(TRACE, "Enter: dev=%s\n", dev_name(dev));
+ brcmf_dbg(TRACE, "Enter: dev=%s, err=%d\n", dev_name(dev), err);
+ bus_if = dev_get_drvdata(dev);
+ sdiodev = bus_if->bus_priv.sdio;
+ if (err)
+ goto fail;
if (!bus_if->drvr)
return;
+ bus = sdiodev->bus;
+
/* try to download image and nvram to the dongle */
bus->alp_only = true;
err = brcmf_sdio_download_firmware(bus, code, nvram, nvram_len);
fail:
brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err);
device_release_driver(dev);
+ device_release_driver(&sdiodev->func[2]->dev);
}
struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
return ret;
}
-static void brcmf_usb_probe_phase2(struct device *dev,
+static void brcmf_usb_probe_phase2(struct device *dev, int ret,
const struct firmware *fw,
void *nvram, u32 nvlen)
{
struct brcmf_bus *bus = dev_get_drvdata(dev);
- struct brcmf_usbdev_info *devinfo;
- int ret;
+ struct brcmf_usbdev_info *devinfo = bus->bus_priv.usb->devinfo;
+
+ if (ret)
+ goto error;
brcmf_dbg(USB, "Start fw downloading\n");
- devinfo = bus->bus_priv.usb->devinfo;
ret = check_file(fw->data);
if (ret < 0) {
brcmf_err("invalid firmware\n");
dev->mem_end = mdev->mem_end;
hostap_setup_dev(dev, local, type);
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
sprintf(dev->name, "%s%s", prefix, name);
if (!rtnl_locked)
static void hwsim_mon_setup(struct net_device *dev)
{
dev->netdev_ops = &hwsim_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
ether_setup(dev);
dev->priv_flags |= IFF_NO_QUEUE;
dev->type = ARPHRD_IEEE80211_RADIOTAP;
struct net_device *dev)
{
dev->netdev_ops = &mwifiex_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
/* Initialize private structure */
priv->current_key_index = 0;
priv->media_connected = false;
unsigned long remaining_credit;
struct timer_list credit_timeout;
u64 credit_window_start;
+ bool rate_limited;
/* Statistics */
struct xenvif_stats stats;
if (work_done < budget) {
napi_complete_done(napi, work_done);
- xenvif_napi_schedule_or_enable_events(queue);
+ /* If the queue is rate-limited, it shall be
+ * rescheduled in the timer callback.
+ */
+ if (likely(!queue->rate_limited))
+ xenvif_napi_schedule_or_enable_events(queue);
}
return work_done;
max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
queue->remaining_credit = min(max_credit, max_burst);
+ queue->rate_limited = false;
}
void xenvif_tx_credit_callback(unsigned long data)
msecs_to_jiffies(queue->credit_usec / 1000);
/* Timer could already be pending in rare cases. */
- if (timer_pending(&queue->credit_timeout))
+ if (timer_pending(&queue->credit_timeout)) {
+ queue->rate_limited = true;
return true;
+ }
/* Passed the point where we can replenish credit? */
if (time_after_eq64(now, next_credit)) {
mod_timer(&queue->credit_timeout,
next_credit);
queue->credit_window_start = next_credit;
+ queue->rate_limited = true;
return true;
}
.link_is_up = xeon_link_is_up,
.db_ioread = skx_db_ioread,
.db_iowrite = skx_db_iowrite,
- .db_size = sizeof(u64),
+ .db_size = sizeof(u32),
.ntb_ctl = SKX_NTBCNTL_OFFSET,
.mw_bar = {2, 4},
};
u64 rx_err_ver;
u64 rx_memcpy;
u64 rx_async;
- u64 dma_rx_prep_err;
u64 tx_bytes;
u64 tx_pkts;
u64 tx_ring_full;
u64 tx_err_no_buf;
u64 tx_memcpy;
u64 tx_async;
- u64 dma_tx_prep_err;
};
struct ntb_transport_mw {
#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count)
#define NTB_QP_DEF_NUM_ENTRIES 100
#define NTB_LINK_DOWN_TIMEOUT 10
-#define DMA_RETRIES 20
-#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50)
static void ntb_transport_rxc_db(unsigned long data);
static const struct ntb_ctx_ops ntb_transport_ops;
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"free tx - \t%u\n",
ntb_transport_tx_free_entry(qp));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "DMA tx prep err - \t%llu\n",
- qp->dma_tx_prep_err);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "DMA rx prep err - \t%llu\n",
- qp->dma_rx_prep_err);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"\n");
if (!mw->virt_addr)
return -ENOMEM;
- if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count)
+ if (mw_num < qp_count % mw_count)
num_qps_mw = qp_count / mw_count + 1;
else
num_qps_mw = qp_count / mw_count;
qp->tx_err_no_buf = 0;
qp->tx_memcpy = 0;
qp->tx_async = 0;
- qp->dma_tx_prep_err = 0;
- qp->dma_rx_prep_err = 0;
}
static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
qp->event_handler = NULL;
ntb_qp_link_down_reset(qp);
- if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count)
+ if (mw_num < qp_count % mw_count)
num_qps_mw = qp_count / mw_count + 1;
else
num_qps_mw = qp_count / mw_count;
qp_count = ilog2(qp_bitmap);
if (max_num_clients && max_num_clients < qp_count)
qp_count = max_num_clients;
- else if (mw_count < qp_count)
- qp_count = mw_count;
+ else if (nt->mw_count < qp_count)
+ qp_count = nt->mw_count;
qp_bitmap &= BIT_ULL(qp_count) - 1;
struct dmaengine_unmap_data *unmap;
dma_cookie_t cookie;
void *buf = entry->buf;
- int retries = 0;
len = entry->len;
device = chan->device;
unmap->from_cnt = 1;
- for (retries = 0; retries < DMA_RETRIES; retries++) {
- txd = device->device_prep_dma_memcpy(chan,
- unmap->addr[1],
- unmap->addr[0], len,
- DMA_PREP_INTERRUPT);
- if (txd)
- break;
-
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(DMA_OUT_RESOURCE_TO);
- }
-
- if (!txd) {
- qp->dma_rx_prep_err++;
+ txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+ unmap->addr[0], len,
+ DMA_PREP_INTERRUPT);
+ if (!txd)
goto err_get_unmap;
- }
txd->callback_result = ntb_rx_copy_callback;
txd->callback_param = entry;
struct dmaengine_unmap_data *unmap;
dma_addr_t dest;
dma_cookie_t cookie;
- int retries = 0;
device = chan->device;
dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
unmap->to_cnt = 1;
- for (retries = 0; retries < DMA_RETRIES; retries++) {
- txd = device->device_prep_dma_memcpy(chan, dest,
- unmap->addr[0], len,
- DMA_PREP_INTERRUPT);
- if (txd)
- break;
-
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(DMA_OUT_RESOURCE_TO);
- }
-
- if (!txd) {
- qp->dma_tx_prep_err++;
+ txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
+ DMA_PREP_INTERRUPT);
+ if (!txd)
goto err_get_unmap;
- }
txd->callback_result = ntb_tx_copy_callback;
txd->callback_param = entry;
static unsigned int seg_order = 19; /* 512K */
module_param(seg_order, uint, 0644);
-MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing");
static unsigned int run_order = 32; /* 4G */
module_param(run_order, uint, 0644);
-MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer");
static bool use_dma; /* default to 0 */
module_param(use_dma, bool, 0644);
* another kernel subsystem, and we just pass it through.
*/
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
goto out;
}
"io error in %s sector %lld, len %d,\n",
(rw == READ) ? "READ" : "WRITE",
(unsigned long long) iter.bi_sector, len);
- bio->bi_error = err;
+ bio->bi_status = errno_to_blk_status(err);
break;
}
}
blk_queue_make_request(q, nd_blk_make_request);
blk_queue_max_hw_sectors(q, UINT_MAX);
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
q->queuedata = nsblk;
* another kernel subsystem, and we just pass it through.
*/
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
goto out;
}
(op_is_write(bio_op(bio))) ? "WRITE" :
"READ",
(unsigned long long) iter.bi_sector, len);
- bio->bi_error = err;
+ bio->bi_status = errno_to_blk_status(err);
break;
}
}
blk_queue_make_request(btt->btt_queue, btt_make_request);
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
- blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
btt->btt_queue->queuedata = btt;
return dev;
}
-static bool uuid_is_null(u8 *uuid)
-{
- static const u8 null_uuid[16];
-
- return (memcmp(uuid, null_uuid, 16) == 0);
-}
-
/**
* nd_btt_arena_is_valid - check if the metadata layout is valid
* @nd_btt: device with BTT geometry and backing device info
if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
return false;
- if (!uuid_is_null(super->parent_uuid))
+ if (!guid_is_null((guid_t *)&super->parent_uuid))
if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
return false;
return to_nd_region(to_dev(pmem)->parent);
}
-static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
- unsigned int len)
+static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
+ phys_addr_t offset, unsigned int len)
{
struct device *dev = to_dev(pmem);
sector_t sector;
long cleared;
- int rc = 0;
+ blk_status_t rc = BLK_STS_OK;
sector = (offset - pmem->data_offset) / 512;
cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
if (cleared < len)
- rc = -EIO;
+ rc = BLK_STS_IOERR;
if (cleared > 0 && cleared / 512) {
cleared /= 512;
dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
kunmap_atomic(mem);
}
-static int read_pmem(struct page *page, unsigned int off,
+static blk_status_t read_pmem(struct page *page, unsigned int off,
void *pmem_addr, unsigned int len)
{
int rc;
rc = memcpy_mcsafe(mem + off, pmem_addr, len);
kunmap_atomic(mem);
if (rc)
- return -EIO;
- return 0;
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
}
-static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
+static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
unsigned int len, unsigned int off, bool is_write,
sector_t sector)
{
- int rc = 0;
+ blk_status_t rc = BLK_STS_OK;
bool bad_pmem = false;
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void *pmem_addr = pmem->virt_addr + pmem_off;
if (!is_write) {
if (unlikely(bad_pmem))
- rc = -EIO;
+ rc = BLK_STS_IOERR;
else {
rc = read_pmem(page, off, pmem_addr, len);
flush_dcache_page(page);
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
- int rc = 0;
+ blk_status_t rc = 0;
bool do_acct;
unsigned long start;
struct bio_vec bvec;
bvec.bv_offset, op_is_write(bio_op(bio)),
iter.bi_sector);
if (rc) {
- bio->bi_error = rc;
+ bio->bi_status = rc;
break;
}
}
struct page *page, bool is_write)
{
struct pmem_device *pmem = bdev->bd_queue->queuedata;
- int rc;
+ blk_status_t rc;
rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
if (rc == 0)
page_endio(page, is_write, 0);
- return rc;
+ return blk_status_to_errno(rc);
}
/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_max_hw_sectors(q, UINT_MAX);
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
q->queuedata = pmem;
To compile this driver as a module, choose M here: the
module will be called nvme.
-config BLK_DEV_NVME_SCSI
- bool "SCSI emulation for NVMe device nodes"
- depends on NVME_CORE
- ---help---
- This adds support for the SG_IO ioctl on the NVMe character
- and block devices nodes, as well as a translation for a small
- number of selected SCSI commands to NVMe commands to the NVMe
- driver. If you don't know what this means you probably want
- to say N here, unless you run a distro that abuses the SCSI
- emulation to provide stable device names for mount by id, like
- some OpenSuSE and SLES versions.
-
config NVME_FABRICS
tristate
obj-$(CONFIG_NVME_FC) += nvme-fc.o
nvme-core-y := core.o
-nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-y += pci.o
#include <linux/nvme_ioctl.h>
#include <linux/t10-pi.h>
#include <linux/pm_qos.h>
-#include <scsi/sg.h>
#include <asm/unaligned.h>
#include "nvme.h"
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
EXPORT_SYMBOL_GPL(nvme_io_timeout);
-unsigned char shutdown_timeout = 5;
+static unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
module_param(force_apst, bool, 0644);
MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
+static bool streams;
+module_param(streams, bool, 0644);
+MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+
+struct workqueue_struct *nvme_wq;
+EXPORT_SYMBOL_GPL(nvme_wq);
+
static LIST_HEAD(nvme_ctrl_list);
static DEFINE_SPINLOCK(dev_list_lock);
static struct class *nvme_class;
-static int nvme_error_status(struct request *req)
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ return -EBUSY;
+ if (!queue_work(nvme_wq, &ctrl->reset_work))
+ return -EBUSY;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
+
+static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
+ ret = nvme_reset_ctrl(ctrl);
+ if (!ret)
+ flush_work(&ctrl->reset_work);
+ return ret;
+}
+
+static blk_status_t nvme_error_status(struct request *req)
{
switch (nvme_req(req)->status & 0x7ff) {
case NVME_SC_SUCCESS:
- return 0;
+ return BLK_STS_OK;
case NVME_SC_CAP_EXCEEDED:
- return -ENOSPC;
- default:
- return -EIO;
-
- /*
- * XXX: these errors are a nasty side-band protocol to
- * drivers/md/dm-mpath.c:noretry_error() that aren't documented
- * anywhere..
- */
- case NVME_SC_CMD_SEQ_ERROR:
- return -EILSEQ;
+ return BLK_STS_NOSPC;
case NVME_SC_ONCS_NOT_SUPPORTED:
- return -EOPNOTSUPP;
+ return BLK_STS_NOTSUPP;
case NVME_SC_WRITE_FAULT:
case NVME_SC_READ_ERROR:
case NVME_SC_UNWRITTEN_BLOCK:
- return -ENODATA;
+ return BLK_STS_MEDIUM;
+ default:
+ return BLK_STS_IOERR;
}
}
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
- case NVME_CTRL_RECONNECTING:
changed = true;
/* FALLTHRU */
default:
}
EXPORT_SYMBOL_GPL(nvme_alloc_request);
+static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+
+ c.directive.opcode = nvme_admin_directive_send;
+ c.directive.nsid = cpu_to_le32(0xffffffff);
+ c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
+ c.directive.dtype = NVME_DIR_IDENTIFY;
+ c.directive.tdtype = NVME_DIR_STREAMS;
+ c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
+
+ return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
+}
+
+static int nvme_disable_streams(struct nvme_ctrl *ctrl)
+{
+ return nvme_toggle_streams(ctrl, false);
+}
+
+static int nvme_enable_streams(struct nvme_ctrl *ctrl)
+{
+ return nvme_toggle_streams(ctrl, true);
+}
+
+static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
+ struct streams_directive_params *s, u32 nsid)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ memset(s, 0, sizeof(*s));
+
+ c.directive.opcode = nvme_admin_directive_recv;
+ c.directive.nsid = cpu_to_le32(nsid);
+ c.directive.numd = sizeof(*s);
+ c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
+ c.directive.dtype = NVME_DIR_STREAMS;
+
+ return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
+}
+
+static int nvme_configure_directives(struct nvme_ctrl *ctrl)
+{
+ struct streams_directive_params s;
+ int ret;
+
+ if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
+ return 0;
+ if (!streams)
+ return 0;
+
+ ret = nvme_enable_streams(ctrl);
+ if (ret)
+ return ret;
+
+ ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
+ if (ret)
+ return ret;
+
+ ctrl->nssa = le16_to_cpu(s.nssa);
+ if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+ dev_info(ctrl->device, "too few streams (%u) available\n",
+ ctrl->nssa);
+ nvme_disable_streams(ctrl);
+ return 0;
+ }
+
+ ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+ dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
+ return 0;
+}
+
+/*
+ * Check if 'req' has a write hint associated with it. If it does, assign
+ * a valid namespace stream to the write.
+ */
+static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
+ struct request *req, u16 *control,
+ u32 *dsmgmt)
+{
+ enum rw_hint streamid = req->write_hint;
+
+ if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
+ streamid = 0;
+ else {
+ streamid--;
+ if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
+ return;
+
+ *control |= NVME_RW_DTYPE_STREAMS;
+ *dsmgmt |= streamid << 16;
+ }
+
+ if (streamid < ARRAY_SIZE(req->q->write_hints))
+ req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
+}
+
static inline void nvme_setup_flush(struct nvme_ns *ns,
struct nvme_command *cmnd)
{
cmnd->common.nsid = cpu_to_le32(ns->ns_id);
}
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd)
{
unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
if (!range)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
__rq_for_each_bio(bio, req) {
u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
if (WARN_ON_ONCE(n != segments)) {
kfree(range);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
memset(cmnd, 0, sizeof(*cmnd));
req->special_vec.bv_len = sizeof(*range) * segments;
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
-static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
- struct nvme_command *cmnd)
+static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
+ struct request *req, struct nvme_command *cmnd)
{
+ struct nvme_ctrl *ctrl = ns->ctrl;
u16 control = 0;
u32 dsmgmt = 0;
+ /*
+ * If formated with metadata, require the block layer provide a buffer
+ * unless this namespace is formated such that the metadata can be
+ * stripped/generated by the controller with PRACT=1.
+ */
+ if (ns && ns->ms &&
+ (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
+ !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
+ return BLK_STS_NOTSUPP;
+
if (req->cmd_flags & REQ_FUA)
control |= NVME_RW_FUA;
if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
+ nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
+
if (ns->ms) {
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
cmnd->rw.control = cpu_to_le16(control);
cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+ return 0;
}
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd)
{
- int ret = BLK_MQ_RQ_QUEUE_OK;
+ blk_status_t ret = BLK_STS_OK;
if (!(req->rq_flags & RQF_DONTPREP)) {
nvme_req(req)->retries = 0;
break;
case REQ_OP_READ:
case REQ_OP_WRITE:
- nvme_setup_rw(ns, req, cmd);
+ ret = nvme_setup_rw(ns, req, cmd);
break;
default:
WARN_ON_ONCE(1);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
cmd->common.command_id = req->tag;
result, timeout);
}
-static void nvme_keep_alive_end_io(struct request *rq, int error)
+static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
{
struct nvme_ctrl *ctrl = rq->end_io_data;
blk_mq_free_request(rq);
- if (error) {
+ if (status) {
dev_err(ctrl->device,
- "failed nvme_keep_alive_end_io error=%d\n", error);
+ "failed nvme_keep_alive_end_io error=%d\n",
+ status);
return;
}
if (nvme_keep_alive(ctrl)) {
/* allocation failure, reset the controller */
dev_err(ctrl->device, "keep-alive failed\n");
- ctrl->ops->reset_ctrl(ctrl);
+ nvme_reset_ctrl(ctrl);
return;
}
}
}
EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{
struct nvme_command c = { };
int error;
return error;
}
+static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
+{
+ struct nvme_command c = { };
+ int status;
+ void *data;
+ int pos;
+ int len;
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.nsid = cpu_to_le32(nsid);
+ c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
+
+ data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
+ NVME_IDENTIFY_DATA_SIZE);
+ if (status)
+ goto free_data;
+
+ for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
+ struct nvme_ns_id_desc *cur = data + pos;
+
+ if (cur->nidl == 0)
+ break;
+
+ switch (cur->nidt) {
+ case NVME_NIDT_EUI64:
+ if (cur->nidl != NVME_NIDT_EUI64_LEN) {
+ dev_warn(ns->ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_EUI64_LEN;
+ memcpy(ns->eui, data + pos + sizeof(*cur), len);
+ break;
+ case NVME_NIDT_NGUID:
+ if (cur->nidl != NVME_NIDT_NGUID_LEN) {
+ dev_warn(ns->ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_NGUID_LEN;
+ memcpy(ns->nguid, data + pos + sizeof(*cur), len);
+ break;
+ case NVME_NIDT_UUID:
+ if (cur->nidl != NVME_NIDT_UUID_LEN) {
+ dev_warn(ns->ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_UUID_LEN;
+ uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
+ break;
+ default:
+ /* Skip unnkown types */
+ len = cur->nidl;
+ break;
+ }
+
+ len += sizeof(*cur);
+ }
+free_data:
+ kfree(data);
+ return status;
+}
+
static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
{
struct nvme_command c = { };
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
}
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
struct nvme_id_ns **id)
{
struct nvme_command c = { };
return error;
}
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
- void *buffer, size_t buflen, u32 *result)
-{
- struct nvme_command c;
- union nvme_result res;
- int ret;
-
- memset(&c, 0, sizeof(c));
- c.features.opcode = nvme_admin_get_features;
- c.features.nsid = cpu_to_le32(nsid);
- c.features.fid = cpu_to_le32(fid);
-
- ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
- NVME_QID_ANY, 0, 0);
- if (ret >= 0 && result)
- *result = le32_to_cpu(res.u32);
- return ret;
-}
-
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
void *buffer, size_t buflen, u32 *result)
{
struct nvme_command c;
return ret;
}
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
-{
- struct nvme_command c = { };
- int error;
-
- c.common.opcode = nvme_admin_get_log_page,
- c.common.nsid = cpu_to_le32(0xFFFFFFFF),
- c.common.cdw10[0] = cpu_to_le32(
- (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
- NVME_LOG_SMART),
-
- *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
- if (!*log)
- return -ENOMEM;
-
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
- sizeof(struct nvme_smart_log));
- if (error)
- kfree(*log);
- return error;
-}
-
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
{
u32 q_count = (*count - 1) | ((*count - 1) << 16);
* access to the admin queue, as that might be only way to fix them up.
*/
if (status > 0) {
- dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+ dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
*count = 0;
} else {
nr_io_queues = min(result & 0xffff, result >> 16) + 1;
return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, (void __user *)arg);
-#ifdef CONFIG_BLK_DEV_NVME_SCSI
- case SG_GET_VERSION_NUM:
- return nvme_sg_get_version_num((void __user *)arg);
- case SG_IO:
- return nvme_sg_io(ns, (void __user *)arg);
-#endif
default:
#ifdef CONFIG_NVM
if (ns->ndev)
static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
- switch (cmd) {
- case SG_IO:
- return -ENOIOCTLCMD;
- }
return nvme_ioctl(bdev, mode, cmd, arg);
}
#else
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+static void nvme_set_chunk_size(struct nvme_ns *ns)
+{
+ u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+ blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
+}
+
static void nvme_config_discard(struct nvme_ns *ns)
{
struct nvme_ctrl *ctrl = ns->ctrl;
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
- ns->queue->limits.discard_alignment = logical_block_size;
- ns->queue->limits.discard_granularity = logical_block_size;
+ if (ctrl->nr_streams && ns->sws && ns->sgs) {
+ unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+
+ ns->queue->limits.discard_alignment = sz;
+ ns->queue->limits.discard_granularity = sz;
+ } else {
+ ns->queue->limits.discard_alignment = logical_block_size;
+ ns->queue->limits.discard_granularity = logical_block_size;
+ }
blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
- memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid));
+ memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
+ if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
+ /* Don't treat error as fatal we potentially
+ * already have a NGUID or EUI-64
+ */
+ if (nvme_identify_ns_descs(ns, ns->ns_id))
+ dev_warn(ns->ctrl->device,
+ "%s: Identify Descriptors failed\n", __func__);
+ }
return 0;
}
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
struct nvme_ns *ns = disk->private_data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
u16 bs;
/*
if (ns->lba_shift == 0)
ns->lba_shift = 9;
bs = 1 << ns->lba_shift;
+ ns->noiob = le16_to_cpu(id->noiob);
blk_mq_freeze_queue(disk->queue);
- if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+ if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
nvme_prep_integrity(disk, id, bs);
blk_queue_logical_block_size(ns->queue, bs);
+ if (ns->noiob)
+ nvme_set_chunk_size(ns);
if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
nvme_init_integrity(ns);
if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
- if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+ if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
nvme_config_discard(ns);
blk_mq_unfreeze_queue(disk->queue);
}
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
- unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+ unsigned long timeout = jiffies + (shutdown_timeout * HZ);
u32 csts;
int ret;
if (!table)
return;
- if (ctrl->ps_max_latency_us == 0) {
+ if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
/* Turn off APST. */
apste = 0;
dev_dbg(ctrl->device, "APST disabled\n");
string_matches(id->fr, q->fr, sizeof(id->fr));
}
+static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+ size_t nqnlen;
+ int off;
+
+ nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
+ if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
+ strcpy(ctrl->subnqn, id->subnqn);
+ return;
+ }
+
+ if (ctrl->vs >= NVME_VS(1, 2, 1))
+ dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
+
+ /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
+ off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
+ "nqn.2014.08.org.nvmexpress:%4x%4x",
+ le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
+ memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
+ off += sizeof(id->sn);
+ memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
+ off += sizeof(id->mn);
+ memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
+}
+
/*
* Initialize the cached copies of the Identify data and various controller
* register in our nvme_ctrl structure. This should be called as soon as
u64 cap;
int ret, page_shift;
u32 max_hw_sectors;
- u8 prev_apsta;
+ bool prev_apst_enabled;
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
if (ret) {
return -EIO;
}
+ nvme_init_subnqn(ctrl, id);
+
if (!ctrl->identified) {
/*
* Check for quirks. Quirk can depend on firmware version,
}
if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
- dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+ dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
}
ctrl->kas = le16_to_cpu(id->kas);
ctrl->npss = id->npss;
- prev_apsta = ctrl->apsta;
+ ctrl->apsta = id->apsta;
+ prev_apst_enabled = ctrl->apst_enabled;
if (ctrl->quirks & NVME_QUIRK_NO_APST) {
if (force_apst && id->apsta) {
- dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
- ctrl->apsta = 1;
+ dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
+ ctrl->apst_enabled = true;
} else {
- ctrl->apsta = 0;
+ ctrl->apst_enabled = false;
}
} else {
- ctrl->apsta = id->apsta;
+ ctrl->apst_enabled = id->apsta;
}
memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
ret = -EINVAL;
if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
- dev_err(ctrl->dev,
+ dev_err(ctrl->device,
"keep-alive support is mandatory for fabrics\n");
ret = -EINVAL;
}
} else {
ctrl->cntlid = le16_to_cpu(id->cntlid);
+ ctrl->hmpre = le32_to_cpu(id->hmpre);
+ ctrl->hmmin = le32_to_cpu(id->hmmin);
}
kfree(id);
- if (ctrl->apsta && !prev_apsta)
+ if (ctrl->apst_enabled && !prev_apst_enabled)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
- else if (!ctrl->apsta && prev_apsta)
+ else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
nvme_configure_apst(ctrl);
+ nvme_configure_directives(ctrl);
ctrl->identified = true;
return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
dev_warn(ctrl->device, "resetting controller\n");
- return ctrl->ops->reset_ctrl(ctrl);
+ return nvme_reset_ctrl_sync(ctrl);
case NVME_IOCTL_SUBSYS_RESET:
return nvme_reset_subsystem(ctrl);
case NVME_IOCTL_RESCAN:
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
int ret;
- ret = ctrl->ops->reset_ctrl(ctrl);
+ ret = nvme_reset_ctrl_sync(ctrl);
if (ret < 0)
return ret;
return count;
int serial_len = sizeof(ctrl->serial);
int model_len = sizeof(ctrl->model);
- if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
- return sprintf(buf, "eui.%16phN\n", ns->uuid);
+ if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+ return sprintf(buf, "eui.%16phN\n", ns->nguid);
if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
return sprintf(buf, "eui.%8phN\n", ns->eui);
}
static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
+static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+ return sprintf(buf, "%pU\n", ns->nguid);
+}
+static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
+
static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- return sprintf(buf, "%pU\n", ns->uuid);
+
+ /* For backward compatibility expose the NGUID to userspace if
+ * we have no UUID set
+ */
+ if (uuid_is_null(&ns->uuid)) {
+ printk_ratelimited(KERN_WARNING
+ "No UUID available providing old NGUID\n");
+ return sprintf(buf, "%pU\n", ns->nguid);
+ }
+ return sprintf(buf, "%pU\n", &ns->uuid);
}
static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
static struct attribute *nvme_ns_attrs[] = {
&dev_attr_wwid.attr,
&dev_attr_uuid.attr,
+ &dev_attr_nguid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
NULL,
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
if (a == &dev_attr_uuid.attr) {
- if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+ if (uuid_is_null(&ns->uuid) ||
+ !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+ return 0;
+ }
+ if (a == &dev_attr_nguid.attr) {
+ if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
return 0;
}
if (a == &dev_attr_eui.attr) {
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- return snprintf(buf, PAGE_SIZE, "%s\n",
- ctrl->ops->get_subsysnqn(ctrl));
+ return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
}
static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
NULL
};
-#define CHECK_ATTR(ctrl, a, name) \
- if ((a) == &dev_attr_##name.attr && \
- !(ctrl)->ops->get_##name) \
- return 0
-
static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
struct device *dev = container_of(kobj, struct device, kobj);
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- if (a == &dev_attr_delete_controller.attr) {
- if (!ctrl->ops->delete_ctrl)
- return 0;
- }
-
- CHECK_ATTR(ctrl, a, subsysnqn);
- CHECK_ATTR(ctrl, a, address);
+ if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
+ return 0;
+ if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
+ return 0;
return a->mode;
}
return ret;
}
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+ struct streams_directive_params s;
+ int ret;
+
+ if (!ctrl->nr_streams)
+ return 0;
+
+ ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
+ if (ret)
+ return ret;
+
+ ns->sws = le32_to_cpu(s.sws);
+ ns->sgs = le16_to_cpu(s.sgs);
+
+ if (ns->sws) {
+ unsigned int bs = 1 << ns->lba_shift;
+
+ blk_queue_io_min(ns->queue, bs * ns->sws);
+ if (ns->sgs)
+ blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
+ }
+
+ return 0;
+}
+
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
nvme_set_queue_limits(ctrl, ns->queue);
+ nvme_setup_streams_ns(ctrl, ns);
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
if (nvme_nvm_ns_supported(ns, id) &&
nvme_nvm_register(ns, disk_name, node)) {
- dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__);
+ dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
goto out_free_id;
}
* removal.
*/
if (ctrl->state == NVME_CTRL_LIVE)
- schedule_work(&ctrl->scan_work);
+ queue_work(nvme_wq, &ctrl->scan_work);
}
EXPORT_SYMBOL_GPL(nvme_queue_scan);
/*FALLTHRU*/
case NVME_SC_ABORT_REQ:
++ctrl->event_limit;
- schedule_work(&ctrl->async_event_work);
+ queue_work(nvme_wq, &ctrl->async_event_work);
break;
default:
break;
void nvme_queue_async_events(struct nvme_ctrl *ctrl)
{
ctrl->event_limit = NVME_NR_AERS;
- schedule_work(&ctrl->async_event_work);
+ queue_work(nvme_wq, &ctrl->async_event_work);
}
EXPORT_SYMBOL_GPL(nvme_queue_async_events);
mutex_lock(&ctrl->namespaces_mutex);
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ blk_mq_unquiesce_queue(ctrl->admin_q);
+
/* Forcibly start all queues to avoid having stuck requests */
blk_mq_start_hw_queues(ctrl->admin_q);
revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ blk_mq_unquiesce_queue(ns->queue);
+
/*
* Forcibly start all queues to avoid having stuck requests.
* Note that we must ensure the queues are not stopped
mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
- blk_mq_start_stopped_hw_queues(ns->queue, true);
+ blk_mq_unquiesce_queue(ns->queue);
blk_mq_kick_requeue_list(ns->queue);
}
mutex_unlock(&ctrl->namespaces_mutex);
{
int result;
+ nvme_wq = alloc_workqueue("nvme-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_wq)
+ return -ENOMEM;
+
result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
&nvme_dev_fops);
if (result < 0)
- return result;
+ goto destroy_wq;
else if (result > 0)
nvme_char_major = result;
return 0;
- unregister_chrdev:
+unregister_chrdev:
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+destroy_wq:
+ destroy_workqueue(nvme_wq);
return result;
}
{
class_destroy(nvme_class);
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ destroy_workqueue(nvme_wq);
}
MODULE_LICENSE("GPL");
kref_init(&host->ref);
memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
- uuid_be_gen(&host->id);
list_add_tail(&host->list, &nvmf_hosts);
out_unlock:
return NULL;
kref_init(&host->ref);
- uuid_be_gen(&host->id);
snprintf(host->nqn, NVMF_NQN_SIZE,
"nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
EXPORT_SYMBOL_GPL(nvmf_get_address);
/**
- * nvmf_get_subsysnqn() - Get subsystem NQN
- * @ctrl: Host NVMe controller instance which we got the NQN
- */
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl)
-{
- return ctrl->opts->subsysnqn;
-}
-EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn);
-
-/**
* nvmf_reg_read32() - NVMe Fabrics "Property Get" API function.
* @ctrl: Host NVMe controller instance maintaining the admin
* queue used to submit the property read command to
}
}
break;
+
+ case NVME_SC_CONNECT_INVALID_HOST:
+ dev_err(ctrl->device,
+ "Connect for subsystem %s is not allowed, hostnqn: %s\n",
+ data->subsysnqn, data->hostnqn);
+ break;
+
+ case NVME_SC_CONNECT_CTRL_BUSY:
+ dev_err(ctrl->device,
+ "Connect command failed: controller is busy or not available\n");
+ break;
+
+ case NVME_SC_CONNECT_FORMAT:
+ dev_err(ctrl->device,
+ "Connect incompatible format: %d",
+ cmd->connect.recfmt);
+ break;
+
default:
dev_err(ctrl->device,
"Connect command failed, error wo/DNR bit: %d\n",
cmd.connect.opcode = nvme_fabrics_command;
cmd.connect.fctype = nvme_fabrics_type_connect;
cmd.connect.qid = 0;
-
- /*
- * fabrics spec sets a minimum of depth 32 for admin queue,
- * so set the queue with this depth always until
- * justification otherwise.
- */
- cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+ cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
/*
* Set keep-alive timeout in seconds granularity (ms * 1000)
if (!data)
return -ENOMEM;
- memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
+ uuid_copy(&data->hostid, &ctrl->opts->host->id);
data->cntlid = cpu_to_le16(0xffff);
strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
if (!data)
return -ENOMEM;
- memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
+ uuid_copy(&data->hostid, &ctrl->opts->host->id);
data->cntlid = cpu_to_le16(ctrl->cntlid);
strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
{
if (ctrl->opts->max_reconnects != -1 &&
- ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects)
+ ctrl->nr_reconnects < ctrl->opts->max_reconnects)
return true;
return false;
{ NVMF_OPT_KATO, "keep_alive_tmo=%d" },
{ NVMF_OPT_HOSTNQN, "hostnqn=%s" },
{ NVMF_OPT_HOST_TRADDR, "host_traddr=%s" },
+ { NVMF_OPT_HOST_ID, "hostid=%s" },
{ NVMF_OPT_ERR, NULL }
};
int token, ret = 0;
size_t nqnlen = 0;
int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
+ uuid_t hostid;
/* Set defaults */
opts->queue_size = NVMF_DEF_QUEUE_SIZE;
if (!options)
return -ENOMEM;
+ uuid_gen(&hostid);
+
while ((p = strsep(&o, ",\n")) != NULL) {
if (!*p)
continue;
}
opts->host_traddr = p;
break;
+ case NVMF_OPT_HOST_ID:
+ p = match_strdup(args);
+ if (!p) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (uuid_parse(p, &hostid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
opts->host = nvmf_default_host;
}
+ uuid_copy(&opts->host->id, &hostid);
+
out:
if (!opts->discovery_nqn && !opts->kato)
opts->kato = NVME_DEFAULT_KATO;
#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
- NVMF_OPT_KATO | NVMF_OPT_HOSTNQN)
+ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
+ NVMF_OPT_HOST_ID)
static struct nvme_ctrl *
nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
goto out_unlock;
}
+ if (strcmp(ctrl->subnqn, opts->subsysnqn)) {
+ dev_warn(ctrl->device,
+ "controller returned incorrect NQN: \"%s\".\n",
+ ctrl->subnqn);
+ mutex_unlock(&nvmf_transports_mutex);
+ ctrl->ops->delete_ctrl(ctrl);
+ return ERR_PTR(-EINVAL);
+ }
+
mutex_unlock(&nvmf_transports_mutex);
return ctrl;
struct kref ref;
struct list_head list;
char nqn[NVMF_NQN_SIZE];
- uuid_be id;
+ uuid_t id;
};
/**
NVMF_OPT_RECONNECT_DELAY = 1 << 9,
NVMF_OPT_HOST_TRADDR = 1 << 10,
NVMF_OPT_CTRL_LOSS_TMO = 1 << 11,
+ NVMF_OPT_HOST_ID = 1 << 12,
};
/**
* @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
* @kato: Keep-alive timeout.
* @host: Virtual NVMe host, contains the NQN and Host ID.
- * @nr_reconnects: number of reconnect attempted since the last ctrl failure
* @max_reconnects: maximum number of allowed reconnect attempts before removing
* the controller, (-1) means reconnect forever, zero means remove
* immediately;
bool discovery_nqn;
unsigned int kato;
struct nvmf_host *host;
- int nr_reconnects;
int max_reconnects;
};
int nvmf_register_transport(struct nvmf_transport_ops *ops);
void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
*/
#define NVME_FC_NR_AEN_COMMANDS 1
#define NVME_FC_AQ_BLKMQ_DEPTH \
- (NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
+ (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
#define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1)
enum nvme_fc_queue_flags {
struct blk_mq_tag_set tag_set;
struct work_struct delete_work;
- struct work_struct reset_work;
struct delayed_work connect_work;
struct kref ref;
u32 flags;
u32 iocnt;
+ wait_queue_head_t ioabort_wait;
struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS];
static DEFINE_IDA(nvme_fc_local_port_cnt);
static DEFINE_IDA(nvme_fc_ctrl_cnt);
-static struct workqueue_struct *nvme_fc_wq;
assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize);
/* Linux supports only Dynamic controllers */
assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
- memcpy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id,
- min_t(size_t, FCNVME_ASSOC_HOSTID_LEN, sizeof(uuid_be)));
+ uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn,
min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE));
strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn,
spin_lock_irqsave(&ctrl->lock, flags);
if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
- if (ctrl->flags & FCCTRL_TERMIO)
- ctrl->iocnt--;
+ if (ctrl->flags & FCCTRL_TERMIO) {
+ if (!--ctrl->iocnt)
+ wake_up(&ctrl->ioabort_wait);
+ }
}
if (op->flags & FCOP_FLAGS_RELEASED)
complete_rq = true;
{
struct nvme_fc_ctrl *ctrl = set->driver_data;
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
- struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1];
-
- return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
-}
-
-static int
-nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq,
- unsigned int hctx_idx, unsigned int numa_node)
-{
- struct nvme_fc_ctrl *ctrl = set->driver_data;
- struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
- struct nvme_fc_queue *queue = &ctrl->queues[0];
+ int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
+ struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
}
static void
nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
{
+ /* only proceed if in LIVE state - e.g. on first error */
+ if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+ return;
+
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport association error detected: %s\n",
ctrl->cnum, errmsg);
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
- /* stop the queues on error, cleanup is in reset thread */
- if (ctrl->queue_count > 1)
- nvme_stop_queues(&ctrl->ctrl);
-
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: error_recovery: Couldn't change state "
return;
}
- if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
- dev_err(ctrl->ctrl.device,
- "NVME-FC{%d}: error_recovery: Failed to schedule "
- "reset work\n", ctrl->cnum);
+ nvme_reset_ctrl(&ctrl->ctrl);
}
static enum blk_eh_timer_return
* level FC exchange resource that is also outstanding. This must be
* considered in all cleanup operations.
*/
-static int
+static blk_status_t
nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
struct nvme_fc_fcp_op *op, u32 data_len,
enum nvmefc_fcp_datadir io_dir)
* the target device is present
*/
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
if (!nvme_fc_ctrl_get(ctrl))
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
/* format the FC-NVME CMD IU and fcp_req */
cmdiu->connection_id = cpu_to_be64(queue->connection_id);
if (ret < 0) {
nvme_cleanup_cmd(op->rq);
nvme_fc_ctrl_put(ctrl);
- return (ret == -ENOMEM || ret == -EAGAIN) ?
- BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+ if (ret == -ENOMEM || ret == -EAGAIN)
+ return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
}
queue->lldd_handle, &op->fcp_req);
if (ret) {
- if (op->rq) { /* normal request */
+ if (op->rq) /* normal request */
nvme_fc_unmap_data(ctrl, op->rq, op);
- nvme_cleanup_cmd(op->rq);
- }
/* else - aen. no cleanup needed */
nvme_fc_ctrl_put(ctrl);
if (ret != -EBUSY)
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
if (op->rq) {
blk_mq_stop_hw_queues(op->rq->q);
blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
}
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
-static int
+static blk_status_t
nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_command *sqe = &cmdiu->sqe;
enum nvmefc_fcp_datadir io_dir;
u32 data_len;
- int ret;
+ blk_status_t ret;
ret = nvme_setup_cmd(ns, rq, sqe);
if (ret)
struct nvme_fc_fcp_op *aen_op;
unsigned long flags;
bool terminating = false;
- int ret;
+ blk_status_t ret;
if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
return;
op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
FCOP_FLAGS_COMPLETE);
- nvme_cleanup_cmd(rq);
nvme_fc_unmap_data(ctrl, rq, op);
nvme_complete_rq(rq);
nvme_fc_ctrl_put(ctrl);
int ret;
bool changed;
- ++ctrl->ctrl.opts->nr_reconnects;
+ ++ctrl->ctrl.nr_reconnects;
/*
* Create the admin queue
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- ctrl->ctrl.opts->nr_reconnects = 0;
+ ctrl->ctrl.nr_reconnects = 0;
if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
/* wait for all io that had to be aborted */
spin_lock_irqsave(&ctrl->lock, flags);
- while (ctrl->iocnt) {
- spin_unlock_irqrestore(&ctrl->lock, flags);
- msleep(1000);
- spin_lock_irqsave(&ctrl->lock, flags);
- }
+ wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
ctrl->flags &= ~FCCTRL_TERMIO;
spin_unlock_irqrestore(&ctrl->lock, flags);
struct nvme_fc_ctrl *ctrl =
container_of(work, struct nvme_fc_ctrl, delete_work);
- cancel_work_sync(&ctrl->reset_work);
+ cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_delayed_work_sync(&ctrl->connect_work);
/*
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return true;
- if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
+ if (!queue_work(nvme_wq, &ctrl->delete_work))
return true;
return false;
ret = __nvme_fc_del_ctrl(ctrl);
if (!ret)
- flush_workqueue(nvme_fc_wq);
+ flush_workqueue(nvme_wq);
nvme_put_ctrl(&ctrl->ctrl);
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
- queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+ queue_delayed_work(nvme_wq, &ctrl->connect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Max reconnect attempts (%d) "
"reached. Removing controller\n",
- ctrl->cnum, ctrl->ctrl.opts->nr_reconnects);
+ ctrl->cnum, ctrl->ctrl.nr_reconnects);
WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
}
}
nvme_fc_reset_ctrl_work(struct work_struct *work)
{
struct nvme_fc_ctrl *ctrl =
- container_of(work, struct nvme_fc_ctrl, reset_work);
+ container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
int ret;
/* will block will waiting for io to terminate */
"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
}
-/*
- * called by the nvme core layer, for sysfs interface that requests
- * a reset of the nvme controller
- */
-static int
-nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-
- dev_info(ctrl->ctrl.device,
- "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
-
- if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
- return -EBUSY;
-
- flush_work(&ctrl->reset_work);
-
- return 0;
-}
-
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.name = "fc",
.module = THIS_MODULE,
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .reset_ctrl = nvme_fc_reset_nvme_ctrl,
.free_ctrl = nvme_fc_nvme_ctrl_freed,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_del_nvme_ctrl,
- .get_subsysnqn = nvmf_get_subsysnqn,
.get_address = nvmf_get_address,
};
static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
.queue_rq = nvme_fc_queue_rq,
.complete = nvme_fc_complete_rq,
- .init_request = nvme_fc_init_admin_request,
+ .init_request = nvme_fc_init_request,
.exit_request = nvme_fc_exit_request,
.reinit_request = nvme_fc_reinit_request,
.init_hctx = nvme_fc_init_admin_hctx,
kref_init(&ctrl->ref);
INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
- INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
spin_lock_init(&ctrl->lock);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
+ /* Remove core ctrl ref. */
+ nvme_put_ctrl(&ctrl->ctrl);
+
/* as we're past the point where we transition to the ref
* counting teardown path, if we return a bad pointer here,
* the calling routine, thinking it's prior to the
static int __init nvme_fc_init_module(void)
{
- int ret;
-
- nvme_fc_wq = create_workqueue("nvme_fc_wq");
- if (!nvme_fc_wq)
- return -ENOMEM;
-
- ret = nvmf_register_transport(&nvme_fc_transport);
- if (ret)
- goto err;
-
- return 0;
-err:
- destroy_workqueue(nvme_fc_wq);
- return ret;
+ return nvmf_register_transport(&nvme_fc_transport);
}
static void __exit nvme_fc_exit_module(void)
nvmf_unregister_transport(&nvme_fc_transport);
- destroy_workqueue(nvme_fc_wq);
-
ida_destroy(&nvme_fc_local_port_cnt);
ida_destroy(&nvme_fc_ctrl_cnt);
}
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
}
rqd->bio->bi_iter.bi_sector));
}
-static void nvme_nvm_end_io(struct request *rq, int error)
+static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
{
struct nvm_rq *rqd = rq->end_io_data;
rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
if (IS_ERR(rq)) {
kfree(cmd);
- return -ENOMEM;
+ return PTR_ERR(rq);
}
rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
.max_phys_sect = 64,
};
-static void nvme_nvm_end_user_vio(struct request *rq, int error)
-{
- struct completion *waiting = rq->end_io_data;
-
- complete(waiting);
-}
-
static int nvme_nvm_submit_user_cmd(struct request_queue *q,
struct nvme_ns *ns,
struct nvme_nvm_command *vcmd,
rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
- rq->end_io_data = &wait;
if (ppa_buf && ppa_len) {
ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
}
submit:
- blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio);
-
- wait_for_completion_io(&wait);
+ blk_execute_rq(q, NULL, rq, 0);
if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
ret = -EINTR;
extern unsigned char admin_timeout;
#define ADMIN_TIMEOUT (admin_timeout * HZ)
-extern unsigned char shutdown_timeout;
-#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
-
#define NVME_DEFAULT_KATO 5
#define NVME_KATO_GRACE 10
+extern struct workqueue_struct *nvme_wq;
+
enum {
NVME_NS_LBA = 0,
NVME_NS_LIGHTNVM = 1,
struct device *device; /* char device */
struct list_head node;
struct ida ns_ida;
+ struct work_struct reset_work;
struct opal_dev *opal_dev;
char serial[20];
char model[40];
char firmware_rev[8];
+ char subnqn[NVMF_NQN_SIZE];
u16 cntlid;
u32 ctrl_config;
u16 oncs;
u16 vid;
u16 oacs;
+ u16 nssa;
+ u16 nr_streams;
atomic_t abort_limit;
u8 event_limit;
u8 vwc;
/* Power saving configuration */
u64 ps_max_latency_us;
+ bool apst_enabled;
+
+ u32 hmpre;
+ u32 hmmin;
/* Fabrics only */
u16 sqsize;
u32 iorcsz;
u16 icdoff;
u16 maxcmd;
+ int nr_reconnects;
struct nvmf_ctrl_options *opts;
};
-/*
- * An NVM Express namespace is equivalent to a SCSI LUN
- */
struct nvme_ns {
struct list_head list;
int instance;
u8 eui[8];
- u8 uuid[16];
+ u8 nguid[16];
+ uuid_t uuid;
unsigned ns_id;
int lba_shift;
u16 ms;
+ u16 sgs;
+ u32 sws;
bool ext;
u8 pi_type;
unsigned long flags;
+ u16 noiob;
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
- int (*reset_ctrl)(struct nvme_ctrl *ctrl);
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
int (*delete_ctrl)(struct nvme_ctrl *ctrl);
- const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
};
#define NVME_QID_ANY -1
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags, int qid);
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd);
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
void __user *ubuffer, unsigned bufflen,
void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
- struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
- void *buffer, size_t buflen, u32 *result);
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
- void *buffer, size_t buflen, u32 *result);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
-
-struct sg_io_hdr;
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
-int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
-int nvme_sg_get_version_num(int __user *ip);
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
#ifdef CONFIG_NVM
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-pci.h>
-#include <linux/cpu.h>
-#include <linux/delay.h>
#include <linux/dmi.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/hdreg.h>
-#include <linux/idr.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/poison.h>
-#include <linux/ptrace.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/t10-pi.h>
#include <linux/timer.h>
#include <linux/types.h>
#include "nvme.h"
#define NVME_Q_DEPTH 1024
-#define NVME_AQ_DEPTH 256
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
module_param(use_cmb_sqes, bool, 0644);
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
-static struct workqueue_struct *nvme_workq;
+static unsigned int max_host_mem_size_mb = 128;
+module_param(max_host_mem_size_mb, uint, 0444);
+MODULE_PARM_DESC(max_host_mem_size_mb,
+ "Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
struct nvme_dev;
struct nvme_queue;
-static int nvme_reset(struct nvme_dev *dev);
static void nvme_process_cq(struct nvme_queue *nvmeq);
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
int q_depth;
u32 db_stride;
void __iomem *bar;
- struct work_struct reset_work;
+ unsigned long bar_mapped_size;
struct work_struct remove_work;
- struct timer_list watchdog_timer;
struct mutex shutdown_lock;
bool subsystem;
void __iomem *cmb;
u32 cmbloc;
struct nvme_ctrl ctrl;
struct completion ioq_wait;
+
+ /* shadow doorbell buffer support: */
u32 *dbbuf_dbs;
dma_addr_t dbbuf_dbs_dma_addr;
u32 *dbbuf_eis;
dma_addr_t dbbuf_eis_dma_addr;
+
+ /* host memory buffer support: */
+ u64 host_mem_size;
+ u32 nr_host_mem_descs;
+ struct nvme_host_mem_buf_desc *host_mem_descs;
+ void **host_mem_desc_bufs;
};
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
- BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
nvmeq->tags = NULL;
}
-static int nvme_admin_init_request(struct blk_mq_tag_set *set,
- struct request *req, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- struct nvme_dev *dev = set->driver_data;
- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = dev->queues[0];
-
- BUG_ON(!nvmeq);
- iod->nvmeq = nvmeq;
- return 0;
-}
-
static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
{
struct nvme_dev *dev = set->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
+ int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
+ struct nvme_queue *nvmeq = dev->queues[queue_idx];
BUG_ON(!nvmeq);
iod->nvmeq = nvmeq;
return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
}
-static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
+static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
int nseg = blk_rq_nr_phys_segments(rq);
if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
if (!iod->sg)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
} else {
iod->sg = iod->inline_sg;
}
iod->nents = 0;
iod->length = size;
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
return true;
}
-static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct request_queue *q = req->q;
enum dma_data_direction dma_dir = rq_data_dir(req) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE;
- int ret = BLK_MQ_RQ_QUEUE_ERROR;
+ blk_status_t ret = BLK_STS_IOERR;
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
iod->nents = blk_rq_map_sg(q, req, iod->sg);
if (!iod->nents)
goto out;
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ ret = BLK_STS_RESOURCE;
if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
DMA_ATTR_NO_WARN))
goto out;
if (!nvme_setup_prps(dev, req))
goto out_unmap;
- ret = BLK_MQ_RQ_QUEUE_ERROR;
+ ret = BLK_STS_IOERR;
if (blk_integrity_rq(req)) {
if (blk_rq_count_integrity_sg(q, req->bio) != 1)
goto out_unmap;
cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
if (blk_integrity_rq(req))
cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_unmap:
dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
/*
* NOTE: ns is NULL when called on the admin queue.
*/
-static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
struct nvme_command cmnd;
- int ret = BLK_MQ_RQ_QUEUE_OK;
-
- /*
- * If formated with metadata, require the block layer provide a buffer
- * unless this namespace is formated such that the metadata can be
- * stripped/generated by the controller with PRACT=1.
- */
- if (ns && ns->ms && !blk_integrity_rq(req)) {
- if (!(ns->pi_type && ns->ms == 8) &&
- !blk_rq_is_passthrough(req)) {
- blk_mq_end_request(req, -EFAULT);
- return BLK_MQ_RQ_QUEUE_OK;
- }
- }
+ blk_status_t ret;
ret = nvme_setup_cmd(ns, req, &cmnd);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
ret = nvme_init_iod(req, dev);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
goto out_free_cmd;
- if (blk_rq_nr_phys_segments(req))
+ if (blk_rq_nr_phys_segments(req)) {
ret = nvme_map_data(dev, req, &cmnd);
-
- if (ret != BLK_MQ_RQ_QUEUE_OK)
- goto out_cleanup_iod;
+ if (ret)
+ goto out_cleanup_iod;
+ }
blk_mq_start_request(req);
spin_lock_irq(&nvmeq->q_lock);
if (unlikely(nvmeq->cq_vector < 0)) {
- ret = BLK_MQ_RQ_QUEUE_ERROR;
+ ret = BLK_STS_IOERR;
spin_unlock_irq(&nvmeq->q_lock);
goto out_cleanup_iod;
}
__nvme_submit_cmd(nvmeq, &cmnd);
nvme_process_cq(nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_cleanup_iod:
nvme_free_iod(dev, req);
out_free_cmd:
return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
}
-static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
+static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
{
- u16 head, phase;
-
- head = nvmeq->cq_head;
- phase = nvmeq->cq_phase;
-
- while (nvme_cqe_valid(nvmeq, head, phase)) {
- struct nvme_completion cqe = nvmeq->cqes[head];
- struct request *req;
-
- if (++head == nvmeq->q_depth) {
- head = 0;
- phase = !phase;
- }
-
- if (tag && *tag == cqe.command_id)
- *tag = -1;
+ u16 head = nvmeq->cq_head;
- if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
- dev_warn(nvmeq->dev->ctrl.device,
- "invalid id %d completed on queue %d\n",
- cqe.command_id, le16_to_cpu(cqe.sq_id));
- continue;
- }
+ if (likely(nvmeq->cq_vector >= 0)) {
+ if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
+ nvmeq->dbbuf_cq_ei))
+ writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+ }
+}
- /*
- * AEN requests are special as they don't time out and can
- * survive any kind of queue freeze and often don't respond to
- * aborts. We don't even bother to allocate a struct request
- * for them but rather special case them here.
- */
- if (unlikely(nvmeq->qid == 0 &&
- cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
- nvme_complete_async_event(&nvmeq->dev->ctrl,
- cqe.status, &cqe.result);
- continue;
- }
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
+ struct nvme_completion *cqe)
+{
+ struct request *req;
- req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
- nvme_end_request(req, cqe.status, cqe.result);
+ if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
+ dev_warn(nvmeq->dev->ctrl.device,
+ "invalid id %d completed on queue %d\n",
+ cqe->command_id, le16_to_cpu(cqe->sq_id));
+ return;
}
- if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
+ /*
+ * AEN requests are special as they don't time out and can
+ * survive any kind of queue freeze and often don't respond to
+ * aborts. We don't even bother to allocate a struct request
+ * for them but rather special case them here.
+ */
+ if (unlikely(nvmeq->qid == 0 &&
+ cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+ nvme_complete_async_event(&nvmeq->dev->ctrl,
+ cqe->status, &cqe->result);
return;
+ }
- if (likely(nvmeq->cq_vector >= 0))
- if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
- nvmeq->dbbuf_cq_ei))
- writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
- nvmeq->cq_head = head;
- nvmeq->cq_phase = phase;
+ req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
+ nvme_end_request(req, cqe->status, cqe->result);
+}
- nvmeq->cqe_seen = 1;
+static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
+ struct nvme_completion *cqe)
+{
+ if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
+ *cqe = nvmeq->cqes[nvmeq->cq_head];
+
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = !nvmeq->cq_phase;
+ }
+ return true;
+ }
+ return false;
}
static void nvme_process_cq(struct nvme_queue *nvmeq)
{
- __nvme_process_cq(nvmeq, NULL);
+ struct nvme_completion cqe;
+ int consumed = 0;
+
+ while (nvme_read_cqe(nvmeq, &cqe)) {
+ nvme_handle_cqe(nvmeq, &cqe);
+ consumed++;
+ }
+
+ if (consumed) {
+ nvme_ring_cq_doorbell(nvmeq);
+ nvmeq->cqe_seen = 1;
+ }
}
static irqreturn_t nvme_irq(int irq, void *data)
static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
{
- if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
- spin_lock_irq(&nvmeq->q_lock);
- __nvme_process_cq(nvmeq, &tag);
- spin_unlock_irq(&nvmeq->q_lock);
+ struct nvme_completion cqe;
+ int found = 0, consumed = 0;
- if (tag == -1)
- return 1;
- }
+ if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+ return 0;
- return 0;
+ spin_lock_irq(&nvmeq->q_lock);
+ while (nvme_read_cqe(nvmeq, &cqe)) {
+ nvme_handle_cqe(nvmeq, &cqe);
+ consumed++;
+
+ if (tag == cqe.command_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (consumed)
+ nvme_ring_cq_doorbell(nvmeq);
+ spin_unlock_irq(&nvmeq->q_lock);
+
+ return found;
}
static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
}
-static void abort_endio(struct request *req, int error)
+static void abort_endio(struct request *req, blk_status_t error)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = iod->nvmeq;
blk_mq_free_request(req);
}
+static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+{
+
+ /* If true, indicates loss of adapter communication, possibly by a
+ * NVMe Subsystem reset.
+ */
+ bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+
+ /* If there is a reset ongoing, we shouldn't reset again. */
+ if (dev->ctrl.state == NVME_CTRL_RESETTING)
+ return false;
+
+ /* We shouldn't reset unless the controller is on fatal error state
+ * _or_ if we lost the communication with it.
+ */
+ if (!(csts & NVME_CSTS_CFS) && !nssro)
+ return false;
+
+ /* If PCI error recovery process is happening, we cannot reset or
+ * the recovery mechanism will surely fail.
+ */
+ if (pci_channel_offline(to_pci_dev(dev->dev)))
+ return false;
+
+ return true;
+}
+
+static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
+{
+ /* Read a config register to help see what died. */
+ u16 pci_status;
+ int result;
+
+ result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
+ &pci_status);
+ if (result == PCIBIOS_SUCCESSFUL)
+ dev_warn(dev->ctrl.device,
+ "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
+ csts, pci_status);
+ else
+ dev_warn(dev->ctrl.device,
+ "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
+ csts, result);
+}
+
static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_dev *dev = nvmeq->dev;
struct request *abort_req;
struct nvme_command cmd;
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ /*
+ * Reset immediately if the controller is failed
+ */
+ if (nvme_should_reset(dev, csts)) {
+ nvme_warn_reset(dev, csts);
+ nvme_dev_disable(dev, false);
+ nvme_reset_ctrl(&dev->ctrl);
+ return BLK_EH_HANDLED;
+ }
/*
* Did we miss an interrupt?
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
/*
* Mark the request as handled, since the inline shutdown
.complete = nvme_pci_complete_rq,
.init_hctx = nvme_admin_init_hctx,
.exit_hctx = nvme_admin_exit_hctx,
- .init_request = nvme_admin_init_request,
+ .init_request = nvme_init_request,
.timeout = nvme_timeout,
};
return 0;
}
+static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+{
+ return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
+}
+
+static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (size <= dev->bar_mapped_size)
+ return 0;
+ if (size > pci_resource_len(pdev, 0))
+ return -ENOMEM;
+ if (dev->bar)
+ iounmap(dev->bar);
+ dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+ if (!dev->bar) {
+ dev->bar_mapped_size = 0;
+ return -ENOMEM;
+ }
+ dev->bar_mapped_size = size;
+ dev->dbs = dev->bar + NVME_REG_DBS;
+
+ return 0;
+}
+
static int nvme_configure_admin_queue(struct nvme_dev *dev)
{
int result;
u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
+ result = nvme_remap_bar(dev, db_bar_size(dev, 0));
+ if (result < 0)
+ return result;
+
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
NVME_CAP_NSSRC(cap) : 0;
return result;
}
-static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
-{
-
- /* If true, indicates loss of adapter communication, possibly by a
- * NVMe Subsystem reset.
- */
- bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
-
- /* If there is a reset ongoing, we shouldn't reset again. */
- if (dev->ctrl.state == NVME_CTRL_RESETTING)
- return false;
-
- /* We shouldn't reset unless the controller is on fatal error state
- * _or_ if we lost the communication with it.
- */
- if (!(csts & NVME_CSTS_CFS) && !nssro)
- return false;
-
- /* If PCI error recovery process is happening, we cannot reset or
- * the recovery mechanism will surely fail.
- */
- if (pci_channel_offline(to_pci_dev(dev->dev)))
- return false;
-
- return true;
-}
-
-static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
-{
- /* Read a config register to help see what died. */
- u16 pci_status;
- int result;
-
- result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
- &pci_status);
- if (result == PCIBIOS_SUCCESSFUL)
- dev_warn(dev->ctrl.device,
- "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
- csts, pci_status);
- else
- dev_warn(dev->ctrl.device,
- "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
- csts, result);
-}
-
-static void nvme_watchdog_timer(unsigned long data)
-{
- struct nvme_dev *dev = (struct nvme_dev *)data;
- u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
- /* Skip controllers under certain specific conditions. */
- if (nvme_should_reset(dev, csts)) {
- if (!nvme_reset(dev))
- nvme_warn_reset(dev, csts);
- return;
- }
-
- mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-}
-
static int nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i, max;
}
}
-static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
+{
+ size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
+ struct nvme_command c;
+ u64 dma_addr;
+ int ret;
+
+ dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev->dev, dma_addr))
+ return -ENOMEM;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_set_features;
+ c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
+ c.features.dword11 = cpu_to_le32(bits);
+ c.features.dword12 = cpu_to_le32(dev->host_mem_size >>
+ ilog2(dev->ctrl.page_size));
+ c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr));
+ c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr));
+ c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs);
+
+ ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
+ if (ret) {
+ dev_warn(dev->ctrl.device,
+ "failed to set host mem (err %d, flags %#x).\n",
+ ret, bits);
+ }
+ dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
+ return ret;
+}
+
+static void nvme_free_host_mem(struct nvme_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->nr_host_mem_descs; i++) {
+ struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
+ size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
+
+ dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
+ le64_to_cpu(desc->addr));
+ }
+
+ kfree(dev->host_mem_desc_bufs);
+ dev->host_mem_desc_bufs = NULL;
+ kfree(dev->host_mem_descs);
+ dev->host_mem_descs = NULL;
+}
+
+static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{
- return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
+ struct nvme_host_mem_buf_desc *descs;
+ u32 chunk_size, max_entries, i = 0;
+ void **bufs;
+ u64 size, tmp;
+
+ /* start big and work our way down */
+ chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
+retry:
+ tmp = (preferred + chunk_size - 1);
+ do_div(tmp, chunk_size);
+ max_entries = tmp;
+ descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
+ if (!descs)
+ goto out;
+
+ bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
+ if (!bufs)
+ goto out_free_descs;
+
+ for (size = 0; size < preferred; size += chunk_size) {
+ u32 len = min_t(u64, chunk_size, preferred - size);
+ dma_addr_t dma_addr;
+
+ bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
+ DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
+ if (!bufs[i])
+ break;
+
+ descs[i].addr = cpu_to_le64(dma_addr);
+ descs[i].size = cpu_to_le32(len / dev->ctrl.page_size);
+ i++;
+ }
+
+ if (!size || (min && size < min)) {
+ dev_warn(dev->ctrl.device,
+ "failed to allocate host memory buffer.\n");
+ goto out_free_bufs;
+ }
+
+ dev_info(dev->ctrl.device,
+ "allocated %lld MiB host memory buffer.\n",
+ size >> ilog2(SZ_1M));
+ dev->nr_host_mem_descs = i;
+ dev->host_mem_size = size;
+ dev->host_mem_descs = descs;
+ dev->host_mem_desc_bufs = bufs;
+ return 0;
+
+out_free_bufs:
+ while (--i >= 0) {
+ size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
+
+ dma_free_coherent(dev->dev, size, bufs[i],
+ le64_to_cpu(descs[i].addr));
+ }
+
+ kfree(bufs);
+out_free_descs:
+ kfree(descs);
+out:
+ /* try a smaller chunk size if we failed early */
+ if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
+ chunk_size /= 2;
+ goto retry;
+ }
+ dev->host_mem_descs = NULL;
+ return -ENOMEM;
+}
+
+static void nvme_setup_host_mem(struct nvme_dev *dev)
+{
+ u64 max = (u64)max_host_mem_size_mb * SZ_1M;
+ u64 preferred = (u64)dev->ctrl.hmpre * 4096;
+ u64 min = (u64)dev->ctrl.hmmin * 4096;
+ u32 enable_bits = NVME_HOST_MEM_ENABLE;
+
+ preferred = min(preferred, max);
+ if (min > max) {
+ dev_warn(dev->ctrl.device,
+ "min host memory (%lld MiB) above limit (%d MiB).\n",
+ min >> ilog2(SZ_1M), max_host_mem_size_mb);
+ nvme_free_host_mem(dev);
+ return;
+ }
+
+ /*
+ * If we already have a buffer allocated check if we can reuse it.
+ */
+ if (dev->host_mem_descs) {
+ if (dev->host_mem_size >= min)
+ enable_bits |= NVME_HOST_MEM_RETURN;
+ else
+ nvme_free_host_mem(dev);
+ }
+
+ if (!dev->host_mem_descs) {
+ if (nvme_alloc_host_mem(dev, min, preferred))
+ return;
+ }
+
+ if (nvme_set_host_mem(dev, enable_bits))
+ nvme_free_host_mem(dev);
}
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev);
- int result, nr_io_queues, size;
+ int result, nr_io_queues;
+ unsigned long size;
nr_io_queues = num_online_cpus();
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
nvme_release_cmb(dev);
}
- size = db_bar_size(dev, nr_io_queues);
- if (size > 8192) {
- iounmap(dev->bar);
- do {
- dev->bar = ioremap(pci_resource_start(pdev, 0), size);
- if (dev->bar)
- break;
- if (!--nr_io_queues)
- return -ENOMEM;
- size = db_bar_size(dev, nr_io_queues);
- } while (1);
- dev->dbs = dev->bar + 4096;
- adminq->q_db = dev->dbs;
- }
+ do {
+ size = db_bar_size(dev, nr_io_queues);
+ result = nvme_remap_bar(dev, size);
+ if (!result)
+ break;
+ if (!--nr_io_queues)
+ return -ENOMEM;
+ } while (1);
+ adminq->q_db = dev->dbs;
/* Deregister the admin queue's interrupt */
pci_free_irq(pdev, 0, adminq);
return nvme_create_io_queues(dev);
}
-static void nvme_del_queue_end(struct request *req, int error)
+static void nvme_del_queue_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
complete(&nvmeq->dev->ioq_wait);
}
-static void nvme_del_cq_end(struct request *req, int error)
+static void nvme_del_cq_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
- del_timer_sync(&dev->watchdog_timer);
-
mutex_lock(&dev->shutdown_lock);
if (pci_is_enabled(pdev)) {
u32 csts = readl(dev->bar + NVME_REG_CSTS);
- if (dev->ctrl.state == NVME_CTRL_LIVE)
+ if (dev->ctrl.state == NVME_CTRL_LIVE ||
+ dev->ctrl.state == NVME_CTRL_RESETTING)
nvme_start_freeze(&dev->ctrl);
dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
pdev->error_state != pci_channel_io_normal);
* Give the controller a chance to complete all entered requests if
* doing a safe shutdown.
*/
- if (!dead && shutdown)
- nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+ if (!dead) {
+ if (shutdown)
+ nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+
+ /*
+ * If the controller is still alive tell it to stop using the
+ * host memory buffer. In theory the shutdown / reset should
+ * make sure that it doesn't access the host memoery anymore,
+ * but I'd rather be safe than sorry..
+ */
+ if (dev->host_mem_descs)
+ nvme_set_host_mem(dev, 0);
+
+ }
nvme_stop_queues(&dev->ctrl);
queues = dev->online_queues - 1;
static void nvme_reset_work(struct work_struct *work)
{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+ struct nvme_dev *dev =
+ container_of(work, struct nvme_dev, ctrl.reset_work);
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result = -ENODEV;
"unable to allocate dma for dbbuf\n");
}
+ if (dev->ctrl.hmpre)
+ nvme_setup_host_mem(dev);
+
result = nvme_setup_io_queues(dev);
if (result)
goto out;
if (dev->online_queues > 1)
nvme_queue_async_events(&dev->ctrl);
- mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-
/*
* Keep the controller around but remove all namespaces if we don't have
* any working I/O queue.
nvme_put_ctrl(&dev->ctrl);
}
-static int nvme_reset(struct nvme_dev *dev)
-{
- if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
- return -ENODEV;
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
- if (!queue_work(nvme_workq, &dev->reset_work))
- return -EBUSY;
- return 0;
-}
-
static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
{
*val = readl(to_nvme_dev(ctrl)->bar + off);
return 0;
}
-static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
-{
- struct nvme_dev *dev = to_nvme_dev(ctrl);
- int ret = nvme_reset(dev);
-
- if (!ret)
- flush_work(&dev->reset_work);
- return ret;
-}
-
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
.reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64,
- .reset_ctrl = nvme_pci_reset_ctrl,
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
};
if (pci_request_mem_regions(pdev, "nvme"))
return -ENODEV;
- dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
- if (!dev->bar)
+ if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
goto release;
return 0;
if (result)
goto free;
- INIT_WORK(&dev->reset_work, nvme_reset_work);
+ INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
- setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
- (unsigned long)dev);
mutex_init(&dev->shutdown_lock);
init_completion(&dev->ioq_wait);
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
- queue_work(nvme_workq, &dev->reset_work);
+ queue_work(nvme_wq, &dev->ctrl.reset_work);
return 0;
release_pools:
if (prepare)
nvme_dev_disable(dev, false);
else
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
}
static void nvme_shutdown(struct pci_dev *pdev)
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
- cancel_work_sync(&dev->reset_work);
+ cancel_work_sync(&dev->ctrl.reset_work);
pci_set_drvdata(pdev, NULL);
if (!pci_device_is_present(pdev)) {
nvme_dev_disable(dev, false);
}
- flush_work(&dev->reset_work);
+ flush_work(&dev->ctrl.reset_work);
nvme_uninit_ctrl(&dev->ctrl);
nvme_dev_disable(dev, true);
+ nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
nvme_release_prp_pools(dev);
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_reset(ndev);
+ nvme_reset_ctrl(&ndev->ctrl);
return 0;
}
#endif
dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
return PCI_ERS_RESULT_RECOVERED;
}
static int __init nvme_init(void)
{
- int result;
-
- nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
- if (!nvme_workq)
- return -ENOMEM;
-
- result = pci_register_driver(&nvme_driver);
- if (result)
- destroy_workqueue(nvme_workq);
- return result;
+ return pci_register_driver(&nvme_driver);
}
static void __exit nvme_exit(void)
{
pci_unregister_driver(&nvme_driver);
- destroy_workqueue(nvme_workq);
_nvme_check_size();
}
*/
#define NVME_RDMA_NR_AEN_COMMANDS 1
#define NVME_RDMA_AQ_BLKMQ_DEPTH \
- (NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
+ (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
struct nvme_rdma_device {
struct ib_device *dev;
};
enum nvme_rdma_queue_flags {
- NVME_RDMA_Q_CONNECTED = (1 << 0),
- NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
- NVME_RDMA_Q_DELETING = (1 << 2),
- NVME_RDMA_Q_LIVE = (1 << 3),
+ NVME_RDMA_Q_LIVE = 0,
+ NVME_RDMA_Q_DELETING = 1,
};
struct nvme_rdma_queue {
};
struct nvme_rdma_ctrl {
- /* read and written in the hot path */
- spinlock_t lock;
-
/* read only in the hot path */
struct nvme_rdma_queue *queues;
u32 queue_count;
/* other member variables */
struct blk_mq_tag_set tag_set;
struct work_struct delete_work;
- struct work_struct reset_work;
struct work_struct err_work;
struct nvme_rdma_qe async_event_sqe;
static LIST_HEAD(nvme_rdma_ctrl_list);
static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
-static struct workqueue_struct *nvme_rdma_wq;
-
/*
* Disabling this option makes small I/O goes faster, but is fundamentally
* unsafe. With it turned off we will have to register a global rkey that
return ret;
}
-static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
- struct request *rq, unsigned int queue_idx)
+static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
+ struct request *rq, unsigned int hctx_idx)
{
+ struct nvme_rdma_ctrl *ctrl = set->driver_data;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device;
DMA_TO_DEVICE);
}
-static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx)
-{
- return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx)
-{
- return __nvme_rdma_exit_request(set->driver_data, rq, 0);
-}
-
-static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
- struct request *rq, unsigned int queue_idx)
+static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
+ struct request *rq, unsigned int hctx_idx,
+ unsigned int numa_node)
{
+ struct nvme_rdma_ctrl *ctrl = set->driver_data;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
return -ENOMEM;
}
-static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- return __nvme_rdma_init_request(set->driver_data, rq, 0);
-}
-
static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct nvme_rdma_device *dev;
struct ib_device *ibdev;
- if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
- return;
-
dev = queue->device;
ibdev = dev->dev;
rdma_destroy_qp(queue->cm_id);
nvme_rdma_dev_put(dev);
}
-static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
- struct nvme_rdma_device *dev)
+static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
{
- struct ib_device *ibdev = dev->dev;
+ struct ib_device *ibdev;
const int send_wr_factor = 3; /* MR, SEND, INV */
const int cq_factor = send_wr_factor + 1; /* + RECV */
int comp_vector, idx = nvme_rdma_queue_idx(queue);
-
int ret;
- queue->device = dev;
+ queue->device = nvme_rdma_find_get_device(queue->cm_id);
+ if (!queue->device) {
+ dev_err(queue->cm_id->device->dev.parent,
+ "no client data found!\n");
+ return -ECONNREFUSED;
+ }
+ ibdev = queue->device->dev;
/*
* The admin queue is barely used once the controller is live, so don't
/* +1 for ib_stop_cq */
- queue->ib_cq = ib_alloc_cq(dev->dev, queue,
- cq_factor * queue->queue_size + 1, comp_vector,
- IB_POLL_SOFTIRQ);
+ queue->ib_cq = ib_alloc_cq(ibdev, queue,
+ cq_factor * queue->queue_size + 1,
+ comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(queue->ib_cq)) {
ret = PTR_ERR(queue->ib_cq);
- goto out;
+ goto out_put_dev;
}
ret = nvme_rdma_create_qp(queue, send_wr_factor);
ret = -ENOMEM;
goto out_destroy_qp;
}
- set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
return 0;
ib_destroy_qp(queue->qp);
out_destroy_ib_cq:
ib_free_cq(queue->ib_cq);
-out:
+out_put_dev:
+ nvme_rdma_dev_put(queue->device);
return ret;
}
}
clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
- set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
return 0;
out_destroy_cm_id:
- nvme_rdma_destroy_queue_ib(queue);
rdma_destroy_id(queue->cm_id);
return ret;
}
if (nvmf_should_reconnect(&ctrl->ctrl)) {
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
ctrl->ctrl.opts->reconnect_delay);
- queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+ queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
dev_info(ctrl->ctrl.device, "Removing controller...\n");
- queue_work(nvme_rdma_wq, &ctrl->delete_work);
+ queue_work(nvme_wq, &ctrl->delete_work);
}
}
bool changed;
int ret;
- ++ctrl->ctrl.opts->nr_reconnects;
+ ++ctrl->ctrl.nr_reconnects;
if (ctrl->queue_count > 1) {
nvme_rdma_free_io_queues(ctrl);
if (ret)
goto requeue;
- ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+ ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
if (ret)
goto requeue;
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- ctrl->ctrl.opts->nr_reconnects = 0;
+ ctrl->ctrl.nr_reconnects = 0;
if (ctrl->queue_count > 1) {
nvme_queue_scan(&ctrl->ctrl);
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
- ctrl->ctrl.opts->nr_reconnects);
+ ctrl->ctrl.nr_reconnects);
nvme_rdma_reconnect_or_remove(ctrl);
}
nvme_stop_keep_alive(&ctrl->ctrl);
- for (i = 0; i < ctrl->queue_count; i++) {
- clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+ for (i = 0; i < ctrl->queue_count; i++)
clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
- }
if (ctrl->queue_count > 1)
nvme_stop_queues(&ctrl->ctrl);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
return;
- queue_work(nvme_rdma_wq, &ctrl->err_work);
+ queue_work(nvme_wq, &ctrl->err_work);
}
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
{
- struct nvme_rdma_device *dev;
int ret;
- dev = nvme_rdma_find_get_device(queue->cm_id);
- if (!dev) {
- dev_err(queue->cm_id->device->dev.parent,
- "no client data found!\n");
- return -ECONNREFUSED;
- }
-
- ret = nvme_rdma_create_queue_ib(queue, dev);
- if (ret) {
- nvme_rdma_dev_put(dev);
- goto out;
- }
+ ret = nvme_rdma_create_queue_ib(queue);
+ if (ret)
+ return ret;
ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
if (ret) {
out_destroy_queue:
nvme_rdma_destroy_queue_ib(queue);
-out:
return ret;
}
* specified by the Fabrics standard.
*/
if (priv.qid == 0) {
- priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH);
- priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+ priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
+ priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
} else {
/*
* current interpretation of the fabrics spec
complete(&queue->cm_done);
return 0;
case RDMA_CM_EVENT_REJECTED:
+ nvme_rdma_destroy_queue_ib(queue);
cm_error = nvme_rdma_conn_rejected(queue, ev);
break;
- case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
+ nvme_rdma_destroy_queue_ib(queue);
+ case RDMA_CM_EVENT_ADDR_ERROR:
dev_dbg(queue->ctrl->ctrl.device,
"CM error event %d\n", ev->event);
cm_error = -ECONNRESET;
/*
* We cannot accept any other command until the Connect command has completed.
*/
-static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
- struct request *rq)
+static inline blk_status_t
+nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
{
if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
struct nvme_command *cmd = nvme_req(rq)->cmd;
* failover.
*/
if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
- return -EIO;
- else
- return -EAGAIN;
+ return BLK_STS_IOERR;
+ return BLK_STS_RESOURCE; /* try again later */
}
}
return 0;
}
-static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
struct nvme_command *c = sqe->data;
bool flush = false;
struct ib_device *dev;
- int ret;
+ blk_status_t ret;
+ int err;
WARN_ON_ONCE(rq->tag < 0);
ret = nvme_rdma_queue_is_ready(queue, rq);
if (unlikely(ret))
- goto err;
+ return ret;
dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE);
ret = nvme_setup_cmd(ns, rq, c);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
blk_mq_start_request(rq);
- ret = nvme_rdma_map_data(queue, rq, c);
- if (ret < 0) {
+ err = nvme_rdma_map_data(queue, rq, c);
+ if (err < 0) {
dev_err(queue->ctrl->ctrl.device,
- "Failed to map data (%d)\n", ret);
+ "Failed to map data (%d)\n", err);
nvme_cleanup_cmd(rq);
goto err;
}
if (req_op(rq) == REQ_OP_FLUSH)
flush = true;
- ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+ err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
- if (ret) {
+ if (err) {
nvme_rdma_unmap_data(queue, rq);
goto err;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
err:
- return (ret == -ENOMEM || ret == -EAGAIN) ?
- BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+ if (err == -ENOMEM || err == -EAGAIN)
+ return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
struct ib_wc wc;
int found = 0;
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
while (ib_poll_cq(cq, 1, &wc) > 0) {
struct ib_cqe *cqe = wc.wr_cqe;
static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
.queue_rq = nvme_rdma_queue_rq,
.complete = nvme_rdma_complete_rq,
- .init_request = nvme_rdma_init_admin_request,
- .exit_request = nvme_rdma_exit_admin_request,
+ .init_request = nvme_rdma_init_request,
+ .exit_request = nvme_rdma_exit_request,
.reinit_request = nvme_rdma_reinit_request,
.init_hctx = nvme_rdma_init_admin_hctx,
.timeout = nvme_rdma_timeout,
{
int error;
- error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+ error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
if (error)
return error;
nvme_rdma_free_io_queues(ctrl);
}
- if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
+ if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
nvme_shutdown_ctrl(&ctrl->ctrl);
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return -EBUSY;
- if (!queue_work(nvme_rdma_wq, &ctrl->delete_work))
+ if (!queue_work(nvme_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{
- struct nvme_rdma_ctrl *ctrl = container_of(work,
- struct nvme_rdma_ctrl, reset_work);
+ struct nvme_rdma_ctrl *ctrl =
+ container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
int ret;
bool changed;
del_dead_ctrl:
/* Deleting this dead controller... */
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
- WARN_ON(!queue_work(nvme_rdma_wq, &ctrl->delete_work));
-}
-
-static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
-
- if (!queue_work(nvme_rdma_wq, &ctrl->reset_work))
- return -EBUSY;
-
- flush_work(&ctrl->reset_work);
-
- return 0;
+ WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work));
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .reset_ctrl = nvme_rdma_reset_ctrl,
.free_ctrl = nvme_rdma_free_ctrl,
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_del_ctrl,
- .get_subsysnqn = nvmf_get_subsysnqn,
.get_address = nvmf_get_address,
};
nvme_rdma_reconnect_ctrl_work);
INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
- INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work);
- spin_lock_init(&ctrl->lock);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
ctrl->ctrl.sqsize = opts->queue_size - 1;
/* sanity check icdoff */
if (ctrl->ctrl.icdoff) {
dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
+ ret = -EINVAL;
goto out_remove_admin_queue;
}
/* sanity check keyed sgls */
if (!(ctrl->ctrl.sgls & (1 << 20))) {
dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n");
+ ret = -EINVAL;
goto out_remove_admin_queue;
}
}
mutex_unlock(&nvme_rdma_ctrl_mutex);
- flush_workqueue(nvme_rdma_wq);
+ flush_workqueue(nvme_wq);
}
static struct ib_client nvme_rdma_ib_client = {
{
int ret;
- nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
- if (!nvme_rdma_wq)
- return -ENOMEM;
-
ret = ib_register_client(&nvme_rdma_ib_client);
if (ret)
- goto err_destroy_wq;
+ return ret;
ret = nvmf_register_transport(&nvme_rdma_transport);
if (ret)
err_unreg_client:
ib_unregister_client(&nvme_rdma_ib_client);
-err_destroy_wq:
- destroy_workqueue(nvme_rdma_wq);
return ret;
}
{
nvmf_unregister_transport(&nvme_rdma_transport);
ib_unregister_client(&nvme_rdma_ib_client);
- destroy_workqueue(nvme_rdma_wq);
}
module_init(nvme_rdma_init_module);
+++ /dev/null
-/*
- * NVM Express device driver
- * Copyright (c) 2011-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-
-/*
- * Refer to the SCSI-NVMe Translation spec for details on how
- * each command is translated.
- */
-
-#include <linux/bio.h>
-#include <linux/bitops.h>
-#include <linux/blkdev.h>
-#include <linux/compat.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kthread.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/poison.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <asm/unaligned.h>
-#include <scsi/sg.h>
-#include <scsi/scsi.h>
-#include <scsi/scsi_request.h>
-
-#include "nvme.h"
-
-static int sg_version_num = 30534; /* 2 digits for each component */
-
-/* VPD Page Codes */
-#define VPD_SUPPORTED_PAGES 0x00
-#define VPD_SERIAL_NUMBER 0x80
-#define VPD_DEVICE_IDENTIFIERS 0x83
-#define VPD_EXTENDED_INQUIRY 0x86
-#define VPD_BLOCK_LIMITS 0xB0
-#define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1
-
-/* format unit paramter list offsets */
-#define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4
-#define FORMAT_UNIT_LONG_PARM_LIST_LEN 8
-#define FORMAT_UNIT_PROT_INT_OFFSET 3
-#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0
-#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07
-
-/* Misc. defines */
-#define FIXED_SENSE_DATA 0x70
-#define DESC_FORMAT_SENSE_DATA 0x72
-#define FIXED_SENSE_DATA_ADD_LENGTH 10
-#define LUN_ENTRY_SIZE 8
-#define LUN_DATA_HEADER_SIZE 8
-#define ALL_LUNS_RETURNED 0x02
-#define ALL_WELL_KNOWN_LUNS_RETURNED 0x01
-#define RESTRICTED_LUNS_RETURNED 0x00
-#define DOWNLOAD_SAVE_ACTIVATE 0x05
-#define DOWNLOAD_SAVE_DEFER_ACTIVATE 0x0E
-#define ACTIVATE_DEFERRED_MICROCODE 0x0F
-#define FORMAT_UNIT_IMMED_MASK 0x2
-#define FORMAT_UNIT_IMMED_OFFSET 1
-#define KELVIN_TEMP_FACTOR 273
-#define FIXED_FMT_SENSE_DATA_SIZE 18
-#define DESC_FMT_SENSE_DATA_SIZE 8
-
-/* SCSI/NVMe defines and bit masks */
-#define INQ_STANDARD_INQUIRY_PAGE 0x00
-#define INQ_SUPPORTED_VPD_PAGES_PAGE 0x00
-#define INQ_UNIT_SERIAL_NUMBER_PAGE 0x80
-#define INQ_DEVICE_IDENTIFICATION_PAGE 0x83
-#define INQ_EXTENDED_INQUIRY_DATA_PAGE 0x86
-#define INQ_BDEV_LIMITS_PAGE 0xB0
-#define INQ_BDEV_CHARACTERISTICS_PAGE 0xB1
-#define INQ_SERIAL_NUMBER_LENGTH 0x14
-#define INQ_NUM_SUPPORTED_VPD_PAGES 6
-#define VERSION_SPC_4 0x06
-#define ACA_UNSUPPORTED 0
-#define STANDARD_INQUIRY_LENGTH 36
-#define ADDITIONAL_STD_INQ_LENGTH 31
-#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C
-#define RESERVED_FIELD 0
-
-/* Mode Sense/Select defines */
-#define MODE_PAGE_INFO_EXCEP 0x1C
-#define MODE_PAGE_CACHING 0x08
-#define MODE_PAGE_CONTROL 0x0A
-#define MODE_PAGE_POWER_CONDITION 0x1A
-#define MODE_PAGE_RETURN_ALL 0x3F
-#define MODE_PAGE_BLK_DES_LEN 0x08
-#define MODE_PAGE_LLBAA_BLK_DES_LEN 0x10
-#define MODE_PAGE_CACHING_LEN 0x14
-#define MODE_PAGE_CONTROL_LEN 0x0C
-#define MODE_PAGE_POW_CND_LEN 0x28
-#define MODE_PAGE_INF_EXC_LEN 0x0C
-#define MODE_PAGE_ALL_LEN 0x54
-#define MODE_SENSE6_MPH_SIZE 4
-#define MODE_SENSE_PAGE_CONTROL_MASK 0xC0
-#define MODE_SENSE_PAGE_CODE_OFFSET 2
-#define MODE_SENSE_PAGE_CODE_MASK 0x3F
-#define MODE_SENSE_LLBAA_MASK 0x10
-#define MODE_SENSE_LLBAA_SHIFT 4
-#define MODE_SENSE_DBD_MASK 8
-#define MODE_SENSE_DBD_SHIFT 3
-#define MODE_SENSE10_MPH_SIZE 8
-#define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10
-#define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1
-#define MODE_SELECT_6_BD_OFFSET 3
-#define MODE_SELECT_10_BD_OFFSET 6
-#define MODE_SELECT_10_LLBAA_OFFSET 4
-#define MODE_SELECT_10_LLBAA_MASK 1
-#define MODE_SELECT_6_MPH_SIZE 4
-#define MODE_SELECT_10_MPH_SIZE 8
-#define CACHING_MODE_PAGE_WCE_MASK 0x04
-#define MODE_SENSE_BLK_DESC_ENABLED 0
-#define MODE_SENSE_BLK_DESC_COUNT 1
-#define MODE_SELECT_PAGE_CODE_MASK 0x3F
-#define SHORT_DESC_BLOCK 8
-#define LONG_DESC_BLOCK 16
-#define MODE_PAGE_POW_CND_LEN_FIELD 0x26
-#define MODE_PAGE_INF_EXC_LEN_FIELD 0x0A
-#define MODE_PAGE_CACHING_LEN_FIELD 0x12
-#define MODE_PAGE_CONTROL_LEN_FIELD 0x0A
-#define MODE_SENSE_PC_CURRENT_VALUES 0
-
-/* Log Sense defines */
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE 0x00
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07
-#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F
-#define LOG_PAGE_TEMPERATURE_PAGE 0x0D
-#define LOG_SENSE_CDB_SP_NOT_ENABLED 0
-#define LOG_SENSE_CDB_PC_MASK 0xC0
-#define LOG_SENSE_CDB_PC_SHIFT 6
-#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1
-#define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F
-#define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8
-#define LOG_INFO_EXCP_PAGE_LENGTH 0xC
-#define REMAINING_TEMP_PAGE_LENGTH 0xC
-#define LOG_TEMP_PAGE_LENGTH 0x10
-#define LOG_TEMP_UNKNOWN 0xFF
-#define SUPPORTED_LOG_PAGES_PAGE_LENGTH 0x3
-
-/* Read Capacity defines */
-#define READ_CAP_10_RESP_SIZE 8
-#define READ_CAP_16_RESP_SIZE 32
-
-/* NVMe Namespace and Command Defines */
-#define BYTES_TO_DWORDS 4
-#define NVME_MAX_FIRMWARE_SLOT 7
-
-/* Report LUNs defines */
-#define REPORT_LUNS_FIRST_LUN_OFFSET 8
-
-/* SCSI ADDITIONAL SENSE Codes */
-
-#define SCSI_ASC_NO_SENSE 0x00
-#define SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT 0x03
-#define SCSI_ASC_LUN_NOT_READY 0x04
-#define SCSI_ASC_WARNING 0x0B
-#define SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED 0x10
-#define SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED 0x10
-#define SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED 0x10
-#define SCSI_ASC_UNRECOVERED_READ_ERROR 0x11
-#define SCSI_ASC_MISCOMPARE_DURING_VERIFY 0x1D
-#define SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID 0x20
-#define SCSI_ASC_ILLEGAL_COMMAND 0x20
-#define SCSI_ASC_ILLEGAL_BLOCK 0x21
-#define SCSI_ASC_INVALID_CDB 0x24
-#define SCSI_ASC_INVALID_LUN 0x25
-#define SCSI_ASC_INVALID_PARAMETER 0x26
-#define SCSI_ASC_FORMAT_COMMAND_FAILED 0x31
-#define SCSI_ASC_INTERNAL_TARGET_FAILURE 0x44
-
-/* SCSI ADDITIONAL SENSE Code Qualifiers */
-
-#define SCSI_ASCQ_CAUSE_NOT_REPORTABLE 0x00
-#define SCSI_ASCQ_FORMAT_COMMAND_FAILED 0x01
-#define SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED 0x01
-#define SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED 0x02
-#define SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED 0x03
-#define SCSI_ASCQ_FORMAT_IN_PROGRESS 0x04
-#define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08
-#define SCSI_ASCQ_INVALID_LUN_ID 0x09
-
-/* copied from drivers/usb/gadget/function/storage_common.h */
-static inline u32 get_unaligned_be24(u8 *buf)
-{
- return 0xffffff & (u32) get_unaligned_be32(buf - 1);
-}
-
-/* Struct to gather data that needs to be extracted from a SCSI CDB.
- Not conforming to any particular CDB variant, but compatible with all. */
-
-struct nvme_trans_io_cdb {
- u8 fua;
- u8 prot_info;
- u64 lba;
- u32 xfer_len;
-};
-
-
-/* Internal Helper Functions */
-
-
-/* Copy data to userspace memory */
-
-static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
- unsigned long n)
-{
- int i;
- void *index = from;
- size_t remaining = n;
- size_t xfer_len;
-
- if (hdr->iovec_count > 0) {
- struct sg_iovec sgl;
-
- for (i = 0; i < hdr->iovec_count; i++) {
- if (copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec)))
- return -EFAULT;
- xfer_len = min(remaining, sgl.iov_len);
- if (copy_to_user(sgl.iov_base, index, xfer_len))
- return -EFAULT;
-
- index += xfer_len;
- remaining -= xfer_len;
- if (remaining == 0)
- break;
- }
- return 0;
- }
-
- if (copy_to_user(hdr->dxferp, from, n))
- return -EFAULT;
- return 0;
-}
-
-/* Copy data from userspace memory */
-
-static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
- unsigned long n)
-{
- int i;
- void *index = to;
- size_t remaining = n;
- size_t xfer_len;
-
- if (hdr->iovec_count > 0) {
- struct sg_iovec sgl;
-
- for (i = 0; i < hdr->iovec_count; i++) {
- if (copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec)))
- return -EFAULT;
- xfer_len = min(remaining, sgl.iov_len);
- if (copy_from_user(index, sgl.iov_base, xfer_len))
- return -EFAULT;
- index += xfer_len;
- remaining -= xfer_len;
- if (remaining == 0)
- break;
- }
- return 0;
- }
-
- if (copy_from_user(to, hdr->dxferp, n))
- return -EFAULT;
- return 0;
-}
-
-/* Status/Sense Buffer Writeback */
-
-static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
- u8 asc, u8 ascq)
-{
- u8 xfer_len;
- u8 resp[DESC_FMT_SENSE_DATA_SIZE];
-
- if (scsi_status_is_good(status)) {
- hdr->status = SAM_STAT_GOOD;
- hdr->masked_status = GOOD;
- hdr->host_status = DID_OK;
- hdr->driver_status = DRIVER_OK;
- hdr->sb_len_wr = 0;
- } else {
- hdr->status = status;
- hdr->masked_status = status >> 1;
- hdr->host_status = DID_OK;
- hdr->driver_status = DRIVER_OK;
-
- memset(resp, 0, DESC_FMT_SENSE_DATA_SIZE);
- resp[0] = DESC_FORMAT_SENSE_DATA;
- resp[1] = sense_key;
- resp[2] = asc;
- resp[3] = ascq;
-
- xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
- hdr->sb_len_wr = xfer_len;
- if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
- return -EFAULT;
- }
-
- return 0;
-}
-
-/*
- * Take a status code from a lowlevel routine, and if it was a positive NVMe
- * error code update the sense data based on it. In either case the passed
- * in value is returned again, unless an -EFAULT from copy_to_user overrides
- * it.
- */
-static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
-{
- u8 status, sense_key, asc, ascq;
- int res;
-
- /* For non-nvme (Linux) errors, simply return the error code */
- if (nvme_sc < 0)
- return nvme_sc;
-
- /* Mask DNR, More, and reserved fields */
- switch (nvme_sc & 0x7FF) {
- /* Generic Command Status */
- case NVME_SC_SUCCESS:
- status = SAM_STAT_GOOD;
- sense_key = NO_SENSE;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_INVALID_OPCODE:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ILLEGAL_COMMAND;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_INVALID_FIELD:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_INVALID_CDB;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_DATA_XFER_ERROR:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_POWER_LOSS:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_WARNING;
- ascq = SCSI_ASCQ_POWER_LOSS_EXPECTED;
- break;
- case NVME_SC_INTERNAL:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = HARDWARE_ERROR;
- asc = SCSI_ASC_INTERNAL_TARGET_FAILURE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_ABORT_REQ:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_ABORT_QUEUE:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_FUSED_FAIL:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_FUSED_MISSING:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_INVALID_NS:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
- ascq = SCSI_ASCQ_INVALID_LUN_ID;
- break;
- case NVME_SC_LBA_RANGE:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ILLEGAL_BLOCK;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_CAP_EXCEEDED:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_NS_NOT_READY:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = NOT_READY;
- asc = SCSI_ASC_LUN_NOT_READY;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
-
- /* Command Specific Status */
- case NVME_SC_INVALID_FORMAT:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_FORMAT_COMMAND_FAILED;
- ascq = SCSI_ASCQ_FORMAT_COMMAND_FAILED;
- break;
- case NVME_SC_BAD_ATTRIBUTES:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_INVALID_CDB;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
-
- /* Media Errors */
- case NVME_SC_WRITE_FAULT:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_READ_ERROR:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_UNRECOVERED_READ_ERROR;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_GUARD_CHECK:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED;
- ascq = SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED;
- break;
- case NVME_SC_APPTAG_CHECK:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED;
- ascq = SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED;
- break;
- case NVME_SC_REFTAG_CHECK:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED;
- ascq = SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED;
- break;
- case NVME_SC_COMPARE_FAILED:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MISCOMPARE;
- asc = SCSI_ASC_MISCOMPARE_DURING_VERIFY;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_ACCESS_DENIED:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
- ascq = SCSI_ASCQ_INVALID_LUN_ID;
- break;
-
- /* Unspecified/Default */
- case NVME_SC_CMDID_CONFLICT:
- case NVME_SC_CMD_SEQ_ERROR:
- case NVME_SC_CQ_INVALID:
- case NVME_SC_QID_INVALID:
- case NVME_SC_QUEUE_SIZE:
- case NVME_SC_ABORT_LIMIT:
- case NVME_SC_ABORT_MISSING:
- case NVME_SC_ASYNC_LIMIT:
- case NVME_SC_FIRMWARE_SLOT:
- case NVME_SC_FIRMWARE_IMAGE:
- case NVME_SC_INVALID_VECTOR:
- case NVME_SC_INVALID_LOG_PAGE:
- default:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- }
-
- res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
- return res ? res : nvme_sc;
-}
-
-/* INQUIRY Helper Functions */
-
-static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response,
- int alloc_len)
-{
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_id_ns *id_ns;
- int res;
- int nvme_sc;
- int xfer_len;
- u8 resp_data_format = 0x02;
- u8 protect;
- u8 cmdque = 0x01 << 1;
- u8 fw_offset = sizeof(ctrl->firmware_rev);
-
- /* nvme ns identify - use DPS value for PROTECT field */
- nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- if (id_ns->dps)
- protect = 0x01;
- else
- protect = 0;
- kfree(id_ns);
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[2] = VERSION_SPC_4;
- inq_response[3] = resp_data_format; /*normaca=0 | hisup=0 */
- inq_response[4] = ADDITIONAL_STD_INQ_LENGTH;
- inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
- inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */
- strncpy(&inq_response[8], "NVMe ", 8);
- strncpy(&inq_response[16], ctrl->model, 16);
-
- while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
- fw_offset--;
- fw_offset -= 4;
- strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
-
- xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response,
- int alloc_len)
-{
- int xfer_len;
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[1] = INQ_SUPPORTED_VPD_PAGES_PAGE; /* Page Code */
- inq_response[3] = INQ_NUM_SUPPORTED_VPD_PAGES; /* Page Length */
- inq_response[4] = INQ_SUPPORTED_VPD_PAGES_PAGE;
- inq_response[5] = INQ_UNIT_SERIAL_NUMBER_PAGE;
- inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE;
- inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE;
- inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE;
- inq_response[9] = INQ_BDEV_LIMITS_PAGE;
-
- xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response,
- int alloc_len)
-{
- int xfer_len;
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
- inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */
- strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
-
- xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *inq_response, int alloc_len)
-{
- struct nvme_id_ns *id_ns;
- int nvme_sc, res;
- size_t len;
- void *eui;
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- eui = id_ns->eui64;
- len = sizeof(id_ns->eui64);
-
- if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) {
- if (bitmap_empty(eui, len * 8)) {
- eui = id_ns->nguid;
- len = sizeof(id_ns->nguid);
- }
- }
-
- if (bitmap_empty(eui, len * 8)) {
- res = -EOPNOTSUPP;
- goto out_free_id;
- }
-
- memset(inq_response, 0, alloc_len);
- inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
- inq_response[3] = 4 + len; /* Page Length */
-
- /* Designation Descriptor start */
- inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
- inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = len; /* Designator Length */
- memcpy(&inq_response[8], eui, len);
-
- res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
-out_free_id:
- kfree(id_ns);
- return res;
-}
-
-static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
-{
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_id_ctrl *id_ctrl;
- int nvme_sc, res;
-
- if (alloc_len < 72) {
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
-
- nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- memset(inq_response, 0, alloc_len);
- inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
- inq_response[3] = 0x48; /* Page Length */
-
- /* Designation Descriptor start */
- inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
- inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = 0x44; /* Designator Length */
-
- sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
- memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
- sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
- memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
-
- res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
- kfree(id_ctrl);
- return res;
-}
-
-static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *resp, int alloc_len)
-{
- int res;
-
- if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) {
- res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
- if (res != -EOPNOTSUPP)
- return res;
- }
-
- return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
-}
-
-static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- u8 *inq_response;
- int res;
- int nvme_sc;
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_id_ctrl *id_ctrl;
- struct nvme_id_ns *id_ns;
- int xfer_len;
- u8 microcode = 0x80;
- u8 spt;
- u8 spt_lut[8] = {0, 0, 2, 1, 4, 6, 5, 7};
- u8 grd_chk, app_chk, ref_chk, protect;
- u8 uask_sup = 0x20;
- u8 v_sup;
- u8 luiclr = 0x01;
-
- inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
- if (inq_response == NULL)
- return -ENOMEM;
-
- nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_free_inq;
-
- spt = spt_lut[id_ns->dpc & 0x07] << 3;
- if (id_ns->dps)
- protect = 0x01;
- else
- protect = 0;
- kfree(id_ns);
-
- grd_chk = protect << 2;
- app_chk = protect << 1;
- ref_chk = protect;
-
- nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_free_inq;
-
- v_sup = id_ctrl->vwc;
- kfree(id_ctrl);
-
- memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
- inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */
- inq_response[2] = 0x00; /* Page Length MSB */
- inq_response[3] = 0x3C; /* Page Length LSB */
- inq_response[4] = microcode | spt | grd_chk | app_chk | ref_chk;
- inq_response[5] = uask_sup;
- inq_response[6] = v_sup;
- inq_response[7] = luiclr;
- inq_response[8] = 0;
- inq_response[9] = 0;
-
- xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free_inq:
- kfree(inq_response);
- return res;
-}
-
-static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *inq_response, int alloc_len)
-{
- __be32 max_sectors = cpu_to_be32(
- nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
- __be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
- __be32 discard_desc_count = cpu_to_be32(0x100);
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[1] = VPD_BLOCK_LIMITS;
- inq_response[3] = 0x3c; /* Page Length */
- memcpy(&inq_response[8], &max_sectors, sizeof(u32));
- memcpy(&inq_response[20], &max_discard, sizeof(u32));
-
- if (max_discard)
- memcpy(&inq_response[24], &discard_desc_count, sizeof(u32));
-
- return nvme_trans_copy_to_user(hdr, inq_response, 0x3c);
-}
-
-static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- u8 *inq_response;
- int res;
- int xfer_len;
-
- inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
- if (inq_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */
- inq_response[2] = 0x00; /* Page Length MSB */
- inq_response[3] = 0x3C; /* Page Length LSB */
- inq_response[4] = 0x00; /* Medium Rotation Rate MSB */
- inq_response[5] = 0x01; /* Medium Rotation Rate LSB */
- inq_response[6] = 0x00; /* Form Factor */
-
- xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- kfree(inq_response);
- out_mem:
- return res;
-}
-
-/* LOG SENSE Helper Functions */
-
-static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- int res;
- int xfer_len;
- u8 *log_response;
-
- log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
- if (log_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
- /* Subpage=0x00, Page Length MSB=0 */
- log_response[3] = SUPPORTED_LOG_PAGES_PAGE_LENGTH;
- log_response[4] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
- log_response[5] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
- log_response[6] = LOG_PAGE_TEMPERATURE_PAGE;
-
- xfer_len = min(alloc_len, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
- res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- kfree(log_response);
- out_mem:
- return res;
-}
-
-static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, int alloc_len)
-{
- int res;
- int xfer_len;
- u8 *log_response;
- struct nvme_smart_log *smart_log;
- u8 temp_c;
- u16 temp_k;
-
- log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
- if (log_response == NULL)
- return -ENOMEM;
-
- res = nvme_get_log_page(ns->ctrl, &smart_log);
- if (res < 0)
- goto out_free_response;
-
- if (res != NVME_SC_SUCCESS) {
- temp_c = LOG_TEMP_UNKNOWN;
- } else {
- temp_k = (smart_log->temperature[1] << 8) +
- (smart_log->temperature[0]);
- temp_c = temp_k - KELVIN_TEMP_FACTOR;
- }
- kfree(smart_log);
-
- log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
- /* Subpage=0x00, Page Length MSB=0 */
- log_response[3] = REMAINING_INFO_EXCP_PAGE_LENGTH;
- /* Informational Exceptions Log Parameter 1 Start */
- /* Parameter Code=0x0000 bytes 4,5 */
- log_response[6] = 0x23; /* DU=0, TSD=1, ETC=0, TMC=0, FMT_AND_LNK=11b */
- log_response[7] = 0x04; /* PARAMETER LENGTH */
- /* Add sense Code and qualifier = 0x00 each */
- /* Use Temperature from NVMe Get Log Page, convert to C from K */
- log_response[10] = temp_c;
-
- xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
- kfree(log_response);
- return res;
-}
-
-static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- int res;
- int xfer_len;
- u8 *log_response;
- struct nvme_smart_log *smart_log;
- u32 feature_resp;
- u8 temp_c_cur, temp_c_thresh;
- u16 temp_k;
-
- log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
- if (log_response == NULL)
- return -ENOMEM;
-
- res = nvme_get_log_page(ns->ctrl, &smart_log);
- if (res < 0)
- goto out_free_response;
-
- if (res != NVME_SC_SUCCESS) {
- temp_c_cur = LOG_TEMP_UNKNOWN;
- } else {
- temp_k = (smart_log->temperature[1] << 8) +
- (smart_log->temperature[0]);
- temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
- }
- kfree(smart_log);
-
- /* Get Features for Temp Threshold */
- res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0,
- &feature_resp);
- if (res != NVME_SC_SUCCESS)
- temp_c_thresh = LOG_TEMP_UNKNOWN;
- else
- temp_c_thresh = (feature_resp & 0xFFFF) - KELVIN_TEMP_FACTOR;
-
- log_response[0] = LOG_PAGE_TEMPERATURE_PAGE;
- /* Subpage=0x00, Page Length MSB=0 */
- log_response[3] = REMAINING_TEMP_PAGE_LENGTH;
- /* Temperature Log Parameter 1 (Temperature) Start */
- /* Parameter Code = 0x0000 */
- log_response[6] = 0x01; /* Format and Linking = 01b */
- log_response[7] = 0x02; /* Parameter Length */
- /* Use Temperature from NVMe Get Log Page, convert to C from K */
- log_response[9] = temp_c_cur;
- /* Temperature Log Parameter 2 (Reference Temperature) Start */
- log_response[11] = 0x01; /* Parameter Code = 0x0001 */
- log_response[12] = 0x01; /* Format and Linking = 01b */
- log_response[13] = 0x02; /* Parameter Length */
- /* Use Temperature Thresh from NVMe Get Log Page, convert to C from K */
- log_response[15] = temp_c_thresh;
-
- xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
- kfree(log_response);
- return res;
-}
-
-/* MODE SENSE Helper Functions */
-
-static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
- u16 mode_data_length, u16 blk_desc_len)
-{
- /* Quick check to make sure I don't stomp on my own memory... */
- if ((cdb10 && len < 8) || (!cdb10 && len < 4))
- return -EINVAL;
-
- if (cdb10) {
- resp[0] = (mode_data_length & 0xFF00) >> 8;
- resp[1] = (mode_data_length & 0x00FF);
- resp[3] = 0x10 /* DPOFUA */;
- resp[4] = llbaa;
- resp[5] = RESERVED_FIELD;
- resp[6] = (blk_desc_len & 0xFF00) >> 8;
- resp[7] = (blk_desc_len & 0x00FF);
- } else {
- resp[0] = (mode_data_length & 0x00FF);
- resp[2] = 0x10 /* DPOFUA */;
- resp[3] = (blk_desc_len & 0x00FF);
- }
-
- return 0;
-}
-
-static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *resp, int len, u8 llbaa)
-{
- int res;
- int nvme_sc;
- struct nvme_id_ns *id_ns;
- u8 flbas;
- u32 lba_length;
-
- if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
- return -EINVAL;
- else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
- return -EINVAL;
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- flbas = (id_ns->flbas) & 0x0F;
- lba_length = (1 << (id_ns->lbaf[flbas].ds));
-
- if (llbaa == 0) {
- __be32 tmp_cap = cpu_to_be32(le64_to_cpu(id_ns->ncap));
- /* Byte 4 is reserved */
- __be32 tmp_len = cpu_to_be32(lba_length & 0x00FFFFFF);
-
- memcpy(resp, &tmp_cap, sizeof(u32));
- memcpy(&resp[4], &tmp_len, sizeof(u32));
- } else {
- __be64 tmp_cap = cpu_to_be64(le64_to_cpu(id_ns->ncap));
- __be32 tmp_len = cpu_to_be32(lba_length);
-
- memcpy(resp, &tmp_cap, sizeof(u64));
- /* Bytes 8, 9, 10, 11 are reserved */
- memcpy(&resp[12], &tmp_len, sizeof(u32));
- }
-
- kfree(id_ns);
- return res;
-}
-
-static int nvme_trans_fill_control_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *resp,
- int len)
-{
- if (len < MODE_PAGE_CONTROL_LEN)
- return -EINVAL;
-
- resp[0] = MODE_PAGE_CONTROL;
- resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
- resp[2] = 0x0E; /* TST=000b, TMF_ONLY=0, DPICZ=1,
- * D_SENSE=1, GLTSD=1, RLEC=0 */
- resp[3] = 0x12; /* Q_ALGO_MODIFIER=1h, NUAR=0, QERR=01b */
- /* Byte 4: VS=0, RAC=0, UA_INT=0, SWP=0 */
- resp[5] = 0x40; /* ATO=0, TAS=1, ATMPE=0, RWWP=0, AUTOLOAD=0 */
- /* resp[6] and [7] are obsolete, thus zero */
- resp[8] = 0xFF; /* Busy timeout period = 0xffff */
- resp[9] = 0xFF;
- /* Bytes 10,11: Extended selftest completion time = 0x0000 */
-
- return 0;
-}
-
-static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr,
- u8 *resp, int len)
-{
- int res = 0;
- int nvme_sc;
- u32 feature_resp;
- u8 vwc;
-
- if (len < MODE_PAGE_CACHING_LEN)
- return -EINVAL;
-
- nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, NULL, 0,
- &feature_resp);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- vwc = feature_resp & 0x00000001;
-
- resp[0] = MODE_PAGE_CACHING;
- resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
- resp[2] = vwc << 2;
- return 0;
-}
-
-static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *resp,
- int len)
-{
- if (len < MODE_PAGE_POW_CND_LEN)
- return -EINVAL;
-
- resp[0] = MODE_PAGE_POWER_CONDITION;
- resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
- /* All other bytes are zero */
-
- return 0;
-}
-
-static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *resp,
- int len)
-{
- if (len < MODE_PAGE_INF_EXC_LEN)
- return -EINVAL;
-
- resp[0] = MODE_PAGE_INFO_EXCEP;
- resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
- resp[2] = 0x88;
- /* All other bytes are zero */
-
- return 0;
-}
-
-static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *resp, int len)
-{
- int res;
- u16 mode_pages_offset_1 = 0;
- u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
-
- mode_pages_offset_2 = mode_pages_offset_1 + MODE_PAGE_CACHING_LEN;
- mode_pages_offset_3 = mode_pages_offset_2 + MODE_PAGE_CONTROL_LEN;
- mode_pages_offset_4 = mode_pages_offset_3 + MODE_PAGE_POW_CND_LEN;
-
- res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
- MODE_PAGE_CACHING_LEN);
- if (res)
- return res;
- res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
- MODE_PAGE_CONTROL_LEN);
- if (res)
- return res;
- res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
- MODE_PAGE_POW_CND_LEN);
- if (res)
- return res;
- return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
- MODE_PAGE_INF_EXC_LEN);
-}
-
-static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
-{
- if (dbd == MODE_SENSE_BLK_DESC_ENABLED) {
- /* SPC-4: len = 8 x Num_of_descriptors if llbaa = 0, 16x if 1 */
- return 8 * (llbaa + 1) * MODE_SENSE_BLK_DESC_COUNT;
- } else {
- return 0;
- }
-}
-
-static int nvme_trans_mode_page_create(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *cmd,
- u16 alloc_len, u8 cdb10,
- int (*mode_page_fill_func)
- (struct nvme_ns *,
- struct sg_io_hdr *hdr, u8 *, int),
- u16 mode_pages_tot_len)
-{
- int res;
- int xfer_len;
- u8 *response;
- u8 dbd, llbaa;
- u16 resp_size;
- int mph_size;
- u16 mode_pages_offset_1;
- u16 blk_desc_len, blk_desc_offset, mode_data_length;
-
- dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
- llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
- mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
-
- blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
-
- resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
- /* Refer spc4r34 Table 440 for calculation of Mode data Length field */
- mode_data_length = 3 + (3 * cdb10) + blk_desc_len + mode_pages_tot_len;
-
- blk_desc_offset = mph_size;
- mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
-
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
- llbaa, mode_data_length, blk_desc_len);
- if (res)
- goto out_free;
- if (blk_desc_len > 0) {
- res = nvme_trans_fill_blk_desc(ns, hdr,
- &response[blk_desc_offset],
- blk_desc_len, llbaa);
- if (res)
- goto out_free;
- }
- res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
- mode_pages_tot_len);
- if (res)
- goto out_free;
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- out_free:
- kfree(response);
- out_mem:
- return res;
-}
-
-/* Read Capacity Helper Functions */
-
-static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
- u8 cdb16)
-{
- u8 flbas;
- u32 lba_length;
- u64 rlba;
- u8 prot_en;
- u8 p_type_lut[4] = {0, 0, 1, 2};
- __be64 tmp_rlba;
- __be32 tmp_rlba_32;
- __be32 tmp_len;
-
- flbas = (id_ns->flbas) & 0x0F;
- lba_length = (1 << (id_ns->lbaf[flbas].ds));
- rlba = le64_to_cpup(&id_ns->nsze) - 1;
- (id_ns->dps) ? (prot_en = 0x01) : (prot_en = 0);
-
- if (!cdb16) {
- if (rlba > 0xFFFFFFFF)
- rlba = 0xFFFFFFFF;
- tmp_rlba_32 = cpu_to_be32(rlba);
- tmp_len = cpu_to_be32(lba_length);
- memcpy(response, &tmp_rlba_32, sizeof(u32));
- memcpy(&response[4], &tmp_len, sizeof(u32));
- } else {
- tmp_rlba = cpu_to_be64(rlba);
- tmp_len = cpu_to_be32(lba_length);
- memcpy(response, &tmp_rlba, sizeof(u64));
- memcpy(&response[8], &tmp_len, sizeof(u32));
- response[12] = (p_type_lut[id_ns->dps & 0x3] << 1) | prot_en;
- /* P_I_Exponent = 0x0 | LBPPBE = 0x0 */
- /* LBPME = 0 | LBPRZ = 0 | LALBA = 0x00 */
- /* Bytes 16-31 - Reserved */
- }
-}
-
-/* Start Stop Unit Helper Functions */
-
-static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 buffer_id)
-{
- struct nvme_command c;
- int nvme_sc;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_activate_fw;
- c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
-
- nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 opcode, u32 tot_len, u32 offset,
- u8 buffer_id)
-{
- int nvme_sc;
- struct nvme_command c;
-
- if (hdr->iovec_count > 0) {
- /* Assuming SGL is not allowed for this command */
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_download_fw;
- c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
- c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
-
- nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
- hdr->dxferp, tot_len, NULL, 0);
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-/* Mode Select Helper Functions */
-
-static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10,
- u16 *bd_len, u8 *llbaa)
-{
- if (cdb10) {
- /* 10 Byte CDB */
- *bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) +
- parm_list[MODE_SELECT_10_BD_OFFSET + 1];
- *llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] &
- MODE_SELECT_10_LLBAA_MASK;
- } else {
- /* 6 Byte CDB */
- *bd_len = parm_list[MODE_SELECT_6_BD_OFFSET];
- }
-}
-
-static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
- u16 idx, u16 bd_len, u8 llbaa)
-{
- /* Store block descriptor info if a FORMAT UNIT comes later */
- /* TODO Saving 1st BD info; what to do if multiple BD received? */
- if (llbaa == 0) {
- /* Standard Block Descriptor - spc4r34 7.5.5.1 */
- ns->mode_select_num_blocks =
- (parm_list[idx + 1] << 16) +
- (parm_list[idx + 2] << 8) +
- (parm_list[idx + 3]);
-
- ns->mode_select_block_len =
- (parm_list[idx + 5] << 16) +
- (parm_list[idx + 6] << 8) +
- (parm_list[idx + 7]);
- } else {
- /* Long LBA Block Descriptor - sbc3r27 6.4.2.3 */
- ns->mode_select_num_blocks =
- (((u64)parm_list[idx + 0]) << 56) +
- (((u64)parm_list[idx + 1]) << 48) +
- (((u64)parm_list[idx + 2]) << 40) +
- (((u64)parm_list[idx + 3]) << 32) +
- (((u64)parm_list[idx + 4]) << 24) +
- (((u64)parm_list[idx + 5]) << 16) +
- (((u64)parm_list[idx + 6]) << 8) +
- ((u64)parm_list[idx + 7]);
-
- ns->mode_select_block_len =
- (parm_list[idx + 12] << 24) +
- (parm_list[idx + 13] << 16) +
- (parm_list[idx + 14] << 8) +
- (parm_list[idx + 15]);
- }
-}
-
-static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *mode_page, u8 page_code)
-{
- int res = 0;
- int nvme_sc;
- unsigned dword11;
-
- switch (page_code) {
- case MODE_PAGE_CACHING:
- dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
- nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
- dword11, NULL, 0, NULL);
- res = nvme_trans_status_code(hdr, nvme_sc);
- break;
- case MODE_PAGE_CONTROL:
- break;
- case MODE_PAGE_POWER_CONDITION:
- /* Verify the OS is not trying to set timers */
- if ((mode_page[2] & 0x01) != 0 || (mode_page[3] & 0x0F) != 0) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- return res;
-}
-
-static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd, u16 parm_list_len, u8 pf,
- u8 sp, u8 cdb10)
-{
- int res;
- u8 *parm_list;
- u16 bd_len;
- u8 llbaa = 0;
- u16 index, saved_index;
- u8 page_code;
- u16 mp_size;
-
- /* Get parm list from data-in/out buffer */
- parm_list = kmalloc(parm_list_len, GFP_KERNEL);
- if (parm_list == NULL) {
- res = -ENOMEM;
- goto out;
- }
-
- res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
- if (res)
- goto out_mem;
-
- nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
- index = (cdb10) ? (MODE_SELECT_10_MPH_SIZE) : (MODE_SELECT_6_MPH_SIZE);
-
- if (bd_len != 0) {
- /* Block Descriptors present, parse */
- nvme_trans_modesel_save_bd(ns, parm_list, index, bd_len, llbaa);
- index += bd_len;
- }
- saved_index = index;
-
- /* Multiple mode pages may be present; iterate through all */
- /* In 1st Iteration, don't do NVME Command, only check for CDB errors */
- do {
- page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
- mp_size = parm_list[index + 1] + 2;
- if ((page_code != MODE_PAGE_CACHING) &&
- (page_code != MODE_PAGE_CONTROL) &&
- (page_code != MODE_PAGE_POWER_CONDITION)) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_mem;
- }
- index += mp_size;
- } while (index < parm_list_len);
-
- /* In 2nd Iteration, do the NVME Commands */
- index = saved_index;
- do {
- page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
- mp_size = parm_list[index + 1] + 2;
- res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
- page_code);
- if (res)
- break;
- index += mp_size;
- } while (index < parm_list_len);
-
- out_mem:
- kfree(parm_list);
- out:
- return res;
-}
-
-/* Format Unit Helper Functions */
-
-static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
- struct sg_io_hdr *hdr)
-{
- int res = 0;
- int nvme_sc;
- u8 flbas;
-
- /*
- * SCSI Expects a MODE SELECT would have been issued prior to
- * a FORMAT UNIT, and the block size and number would be used
- * from the block descriptor in it. If a MODE SELECT had not
- * been issued, FORMAT shall use the current values for both.
- */
-
- if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
- struct nvme_id_ns *id_ns;
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- if (ns->mode_select_num_blocks == 0)
- ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
- if (ns->mode_select_block_len == 0) {
- flbas = (id_ns->flbas) & 0x0F;
- ns->mode_select_block_len =
- (1 << (id_ns->lbaf[flbas].ds));
- }
-
- kfree(id_ns);
- }
-
- return 0;
-}
-
-static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
- u8 format_prot_info, u8 *nvme_pf_code)
-{
- int res;
- u8 *parm_list;
- u8 pf_usage, pf_code;
-
- parm_list = kmalloc(len, GFP_KERNEL);
- if (parm_list == NULL) {
- res = -ENOMEM;
- goto out;
- }
- res = nvme_trans_copy_from_user(hdr, parm_list, len);
- if (res)
- goto out_mem;
-
- if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
- FORMAT_UNIT_IMMED_MASK) != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_mem;
- }
-
- if (len == FORMAT_UNIT_LONG_PARM_LIST_LEN &&
- (parm_list[FORMAT_UNIT_PROT_INT_OFFSET] & 0x0F) != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_mem;
- }
- pf_usage = parm_list[FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET] &
- FORMAT_UNIT_PROT_FIELD_USAGE_MASK;
- pf_code = (pf_usage << 2) | format_prot_info;
- switch (pf_code) {
- case 0:
- *nvme_pf_code = 0;
- break;
- case 2:
- *nvme_pf_code = 1;
- break;
- case 3:
- *nvme_pf_code = 2;
- break;
- case 7:
- *nvme_pf_code = 3;
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out_mem:
- kfree(parm_list);
- out:
- return res;
-}
-
-static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 prot_info)
-{
- int res;
- int nvme_sc;
- struct nvme_id_ns *id_ns;
- u8 i;
- u8 nlbaf;
- u8 selected_lbaf = 0xFF;
- u32 cdw10 = 0;
- struct nvme_command c;
-
- /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- nlbaf = id_ns->nlbaf;
-
- for (i = 0; i < nlbaf; i++) {
- if (ns->mode_select_block_len == (1 << (id_ns->lbaf[i].ds))) {
- selected_lbaf = i;
- break;
- }
- }
- if (selected_lbaf > 0x0F) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- if (ns->mode_select_num_blocks != le64_to_cpu(id_ns->ncap)) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
-
- cdw10 |= prot_info << 5;
- cdw10 |= selected_lbaf & 0x0F;
- memset(&c, 0, sizeof(c));
- c.format.opcode = nvme_admin_format_nvm;
- c.format.nsid = cpu_to_le32(ns->ns_id);
- c.format.cdw10 = cpu_to_le32(cdw10);
-
- nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
- res = nvme_trans_status_code(hdr, nvme_sc);
-
- kfree(id_ns);
- return res;
-}
-
-static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
- struct nvme_trans_io_cdb *cdb_info,
- u32 max_blocks)
-{
- /* If using iovecs, send one nvme command per vector */
- if (hdr->iovec_count > 0)
- return hdr->iovec_count;
- else if (cdb_info->xfer_len > max_blocks)
- return ((cdb_info->xfer_len - 1) / max_blocks) + 1;
- else
- return 1;
-}
-
-static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
- struct nvme_trans_io_cdb *cdb_info)
-{
- u16 control = 0;
-
- /* When Protection information support is added, implement here */
-
- if (cdb_info->fua > 0)
- control |= NVME_RW_FUA;
-
- return control;
-}
-
-static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- struct nvme_trans_io_cdb *cdb_info, u8 is_write)
-{
- int nvme_sc = NVME_SC_SUCCESS;
- u32 num_cmds;
- u64 unit_len;
- u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */
- u32 retcode;
- u32 i = 0;
- u64 nvme_offset = 0;
- void __user *next_mapping_addr;
- struct nvme_command c;
- u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
- u16 control;
- u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9);
-
- num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
-
- /*
- * This loop handles two cases.
- * First, when an SGL is used in the form of an iovec list:
- * - Use iov_base as the next mapping address for the nvme command_id
- * - Use iov_len as the data transfer length for the command.
- * Second, when we have a single buffer
- * - If larger than max_blocks, split into chunks, offset
- * each nvme command accordingly.
- */
- for (i = 0; i < num_cmds; i++) {
- memset(&c, 0, sizeof(c));
- if (hdr->iovec_count > 0) {
- struct sg_iovec sgl;
-
- retcode = copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec));
- if (retcode)
- return -EFAULT;
- unit_len = sgl.iov_len;
- unit_num_blocks = unit_len >> ns->lba_shift;
- next_mapping_addr = sgl.iov_base;
- } else {
- unit_num_blocks = min((u64)max_blocks,
- (cdb_info->xfer_len - nvme_offset));
- unit_len = unit_num_blocks << ns->lba_shift;
- next_mapping_addr = hdr->dxferp +
- ((1 << ns->lba_shift) * nvme_offset);
- }
-
- c.rw.opcode = opcode;
- c.rw.nsid = cpu_to_le32(ns->ns_id);
- c.rw.slba = cpu_to_le64(cdb_info->lba + nvme_offset);
- c.rw.length = cpu_to_le16(unit_num_blocks - 1);
- control = nvme_trans_io_get_control(ns, cdb_info);
- c.rw.control = cpu_to_le16(control);
-
- if (get_capacity(ns->disk) - unit_num_blocks <
- cdb_info->lba + nvme_offset) {
- nvme_sc = NVME_SC_LBA_RANGE;
- break;
- }
- nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
- next_mapping_addr, unit_len, NULL, 0);
- if (nvme_sc)
- break;
-
- nvme_offset += unit_num_blocks;
- }
-
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-
-/* SCSI Command Translation Functions */
-
-static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
- u8 *cmd)
-{
- int res = 0;
- struct nvme_trans_io_cdb cdb_info = { 0, };
- u8 opcode = cmd[0];
- u64 xfer_bytes;
- u64 sum_iov_len = 0;
- struct sg_iovec sgl;
- int i;
- size_t not_copied;
-
- /*
- * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
- * but always in the same place for all others.
- */
- switch (opcode) {
- case WRITE_6:
- case READ_6:
- break;
- default:
- cdb_info.fua = cmd[1] & 0x8;
- cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
- if (cdb_info.prot_info && !ns->pi_type) {
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- }
-
- switch (opcode) {
- case WRITE_6:
- case READ_6:
- cdb_info.lba = get_unaligned_be24(&cmd[1]);
- cdb_info.xfer_len = cmd[4];
- if (cdb_info.xfer_len == 0)
- cdb_info.xfer_len = 256;
- break;
- case WRITE_10:
- case READ_10:
- cdb_info.lba = get_unaligned_be32(&cmd[2]);
- cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
- break;
- case WRITE_12:
- case READ_12:
- cdb_info.lba = get_unaligned_be32(&cmd[2]);
- cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
- break;
- case WRITE_16:
- case READ_16:
- cdb_info.lba = get_unaligned_be64(&cmd[2]);
- cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
- break;
- default:
- /* Will never really reach here */
- res = -EIO;
- goto out;
- }
-
- /* Calculate total length of transfer (in bytes) */
- if (hdr->iovec_count > 0) {
- for (i = 0; i < hdr->iovec_count; i++) {
- not_copied = copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec));
- if (not_copied)
- return -EFAULT;
- sum_iov_len += sgl.iov_len;
- /* IO vector sizes should be multiples of block size */
- if (sgl.iov_len % (1 << ns->lba_shift) != 0) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- }
- } else {
- sum_iov_len = hdr->dxfer_len;
- }
-
- /* As Per sg ioctl howto, if the lengths differ, use the lower one */
- xfer_bytes = min(((u64)hdr->dxfer_len), sum_iov_len);
-
- /* If block count and actual data buffer size dont match, error out */
- if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) {
- res = -EINVAL;
- goto out;
- }
-
- /* Check for 0 length transfer - it is not illegal */
- if (cdb_info.xfer_len == 0)
- goto out;
-
- /* Send NVMe IO Command(s) */
- res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
- if (res)
- goto out;
-
- out:
- return res;
-}
-
-static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res = 0;
- u8 evpd;
- u8 page_code;
- int alloc_len;
- u8 *inq_response;
-
- evpd = cmd[1] & 0x01;
- page_code = cmd[2];
- alloc_len = get_unaligned_be16(&cmd[3]);
-
- inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
- GFP_KERNEL);
- if (inq_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- if (evpd == 0) {
- if (page_code == INQ_STANDARD_INQUIRY_PAGE) {
- res = nvme_trans_standard_inquiry_page(ns, hdr,
- inq_response, alloc_len);
- } else {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- } else {
- switch (page_code) {
- case VPD_SUPPORTED_PAGES:
- res = nvme_trans_supported_vpd_pages(ns, hdr,
- inq_response, alloc_len);
- break;
- case VPD_SERIAL_NUMBER:
- res = nvme_trans_unit_serial_page(ns, hdr, inq_response,
- alloc_len);
- break;
- case VPD_DEVICE_IDENTIFIERS:
- res = nvme_trans_device_id_page(ns, hdr, inq_response,
- alloc_len);
- break;
- case VPD_EXTENDED_INQUIRY:
- res = nvme_trans_ext_inq_page(ns, hdr, alloc_len);
- break;
- case VPD_BLOCK_LIMITS:
- res = nvme_trans_bdev_limits_page(ns, hdr, inq_response,
- alloc_len);
- break;
- case VPD_BLOCK_DEV_CHARACTERISTICS:
- res = nvme_trans_bdev_char_page(ns, hdr, alloc_len);
- break;
- default:
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
- }
- kfree(inq_response);
- out_mem:
- return res;
-}
-
-static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- u16 alloc_len;
- u8 pc;
- u8 page_code;
-
- if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
-
- page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
- pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
- if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- alloc_len = get_unaligned_be16(&cmd[7]);
- switch (page_code) {
- case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
- res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
- break;
- case LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE:
- res = nvme_trans_log_info_exceptions(ns, hdr, alloc_len);
- break;
- case LOG_PAGE_TEMPERATURE_PAGE:
- res = nvme_trans_log_temperature(ns, hdr, alloc_len);
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out:
- return res;
-}
-
-static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- u8 cdb10 = 0;
- u16 parm_list_len;
- u8 page_format;
- u8 save_pages;
-
- page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
- save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
-
- if (cmd[0] == MODE_SELECT) {
- parm_list_len = cmd[4];
- } else {
- parm_list_len = cmd[7];
- cdb10 = 1;
- }
-
- if (parm_list_len != 0) {
- /*
- * According to SPC-4 r24, a paramter list length field of 0
- * shall not be considered an error
- */
- return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
- page_format, save_pages, cdb10);
- }
-
- return 0;
-}
-
-static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res = 0;
- u16 alloc_len;
- u8 cdb10 = 0;
-
- if (cmd[0] == MODE_SENSE) {
- alloc_len = cmd[4];
- } else {
- alloc_len = get_unaligned_be16(&cmd[7]);
- cdb10 = 1;
- }
-
- if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
- MODE_SENSE_PC_CURRENT_VALUES) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
-
- switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
- case MODE_PAGE_CACHING:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_caching_page,
- MODE_PAGE_CACHING_LEN);
- break;
- case MODE_PAGE_CONTROL:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_control_page,
- MODE_PAGE_CONTROL_LEN);
- break;
- case MODE_PAGE_POWER_CONDITION:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_pow_cnd_page,
- MODE_PAGE_POW_CND_LEN);
- break;
- case MODE_PAGE_INFO_EXCEP:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_inf_exc_page,
- MODE_PAGE_INF_EXC_LEN);
- break;
- case MODE_PAGE_RETURN_ALL:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_all_pages,
- MODE_PAGE_ALL_LEN);
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out:
- return res;
-}
-
-static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd, u8 cdb16)
-{
- int res;
- int nvme_sc;
- u32 alloc_len;
- u32 resp_size;
- u32 xfer_len;
- struct nvme_id_ns *id_ns;
- u8 *response;
-
- if (cdb16) {
- alloc_len = get_unaligned_be32(&cmd[10]);
- resp_size = READ_CAP_16_RESP_SIZE;
- } else {
- alloc_len = READ_CAP_10_RESP_SIZE;
- resp_size = READ_CAP_10_RESP_SIZE;
- }
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out_free_id;
- }
- nvme_trans_fill_read_cap(response, id_ns, cdb16);
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- kfree(response);
- out_free_id:
- kfree(id_ns);
- return res;
-}
-
-static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- int nvme_sc;
- u32 alloc_len, xfer_len, resp_size;
- u8 *response;
- struct nvme_id_ctrl *id_ctrl;
- u32 ll_length, lun_id;
- u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
- __be32 tmp_len;
-
- switch (cmd[2]) {
- default:
- return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- case ALL_LUNS_RETURNED:
- case ALL_WELL_KNOWN_LUNS_RETURNED:
- case RESTRICTED_LUNS_RETURNED:
- nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
- resp_size = ll_length + LUN_DATA_HEADER_SIZE;
-
- alloc_len = get_unaligned_be32(&cmd[6]);
- if (alloc_len < resp_size) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_free_id;
- }
-
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out_free_id;
- }
-
- /* The first LUN ID will always be 0 per the SAM spec */
- for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
- /*
- * Set the LUN Id and then increment to the next LUN
- * location in the parameter data.
- */
- __be64 tmp_id = cpu_to_be64(lun_id);
- memcpy(&response[lun_id_offset], &tmp_id, sizeof(u64));
- lun_id_offset += LUN_ENTRY_SIZE;
- }
- tmp_len = cpu_to_be32(ll_length);
- memcpy(response, &tmp_len, sizeof(u32));
- }
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- kfree(response);
- out_free_id:
- kfree(id_ctrl);
- return res;
-}
-
-static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- u8 alloc_len, xfer_len, resp_size;
- u8 desc_format;
- u8 *response;
-
- desc_format = cmd[1] & 0x01;
- alloc_len = cmd[4];
-
- resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
- (FIXED_FMT_SENSE_DATA_SIZE));
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out;
- }
-
- if (desc_format) {
- /* Descriptor Format Sense Data */
- response[0] = DESC_FORMAT_SENSE_DATA;
- response[1] = NO_SENSE;
- /* TODO How is LOW POWER CONDITION ON handled? (byte 2) */
- response[2] = SCSI_ASC_NO_SENSE;
- response[3] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- /* SDAT_OVFL = 0 | Additional Sense Length = 0 */
- } else {
- /* Fixed Format Sense Data */
- response[0] = FIXED_SENSE_DATA;
- /* Byte 1 = Obsolete */
- response[2] = NO_SENSE; /* FM, EOM, ILI, SDAT_OVFL = 0 */
- /* Bytes 3-6 - Information - set to zero */
- response[7] = FIXED_SENSE_DATA_ADD_LENGTH;
- /* Bytes 8-11 - Cmd Specific Information - set to zero */
- response[12] = SCSI_ASC_NO_SENSE;
- response[13] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- /* Byte 14 = Field Replaceable Unit Code = 0 */
- /* Bytes 15-17 - SKSV=0; Sense Key Specific = 0 */
- }
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- kfree(response);
- out:
- return res;
-}
-
-static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
- struct sg_io_hdr *hdr)
-{
- int nvme_sc;
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_cmd_flush;
- c.common.nsid = cpu_to_le32(ns->ns_id);
-
- nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- u8 parm_hdr_len = 0;
- u8 nvme_pf_code = 0;
- u8 format_prot_info, long_list, format_data;
-
- format_prot_info = (cmd[1] & 0xc0) >> 6;
- long_list = cmd[1] & 0x20;
- format_data = cmd[1] & 0x10;
-
- if (format_data != 0) {
- if (format_prot_info != 0) {
- if (long_list == 0)
- parm_hdr_len = FORMAT_UNIT_SHORT_PARM_LIST_LEN;
- else
- parm_hdr_len = FORMAT_UNIT_LONG_PARM_LIST_LEN;
- }
- } else if (format_data == 0 && format_prot_info != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
-
- /* Get parm header from data-in/out buffer */
- /*
- * According to the translation spec, the only fields in the parameter
- * list we are concerned with are in the header. So allocate only that.
- */
- if (parm_hdr_len > 0) {
- res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
- format_prot_info, &nvme_pf_code);
- if (res)
- goto out;
- }
-
- /* Attempt to activate any previously downloaded firmware image */
- res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
-
- /* Determine Block size and count and send format command */
- res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
- if (res)
- goto out;
-
- res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
-
- out:
- return res;
-}
-
-static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
- struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- if (nvme_ctrl_ready(ns->ctrl))
- return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- NOT_READY, SCSI_ASC_LUN_NOT_READY,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- else
- return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
-}
-
-static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res = 0;
- u32 buffer_offset, parm_list_length;
- u8 buffer_id, mode;
-
- parm_list_length = get_unaligned_be24(&cmd[6]);
- if (parm_list_length % BYTES_TO_DWORDS != 0) {
- /* NVMe expects Firmware file to be a whole number of DWORDS */
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- buffer_id = cmd[2];
- if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- mode = cmd[1] & 0x1f;
- buffer_offset = get_unaligned_be24(&cmd[3]);
-
- switch (mode) {
- case DOWNLOAD_SAVE_ACTIVATE:
- res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
- parm_list_length, buffer_offset,
- buffer_id);
- if (res)
- goto out;
- res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
- break;
- case DOWNLOAD_SAVE_DEFER_ACTIVATE:
- res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
- parm_list_length, buffer_offset,
- buffer_id);
- break;
- case ACTIVATE_DEFERRED_MICROCODE:
- res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out:
- return res;
-}
-
-struct scsi_unmap_blk_desc {
- __be64 slba;
- __be32 nlb;
- u32 resv;
-};
-
-struct scsi_unmap_parm_list {
- __be16 unmap_data_len;
- __be16 unmap_blk_desc_data_len;
- u32 resv;
- struct scsi_unmap_blk_desc desc[0];
-};
-
-static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- struct scsi_unmap_parm_list *plist;
- struct nvme_dsm_range *range;
- struct nvme_command c;
- int i, nvme_sc, res;
- u16 ndesc, list_len;
-
- list_len = get_unaligned_be16(&cmd[7]);
- if (!list_len)
- return -EINVAL;
-
- plist = kmalloc(list_len, GFP_KERNEL);
- if (!plist)
- return -ENOMEM;
-
- res = nvme_trans_copy_from_user(hdr, plist, list_len);
- if (res)
- goto out;
-
- ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
- if (!ndesc || ndesc > 256) {
- res = -EINVAL;
- goto out;
- }
-
- range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
- if (!range) {
- res = -ENOMEM;
- goto out;
- }
-
- for (i = 0; i < ndesc; i++) {
- range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
- range[i].slba = cpu_to_le64(be64_to_cpu(plist->desc[i].slba));
- range[i].cattr = 0;
- }
-
- memset(&c, 0, sizeof(c));
- c.dsm.opcode = nvme_cmd_dsm;
- c.dsm.nsid = cpu_to_le32(ns->ns_id);
- c.dsm.nr = cpu_to_le32(ndesc - 1);
- c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
-
- nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
- ndesc * sizeof(*range));
- res = nvme_trans_status_code(hdr, nvme_sc);
-
- kfree(range);
- out:
- kfree(plist);
- return res;
-}
-
-static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
-{
- u8 cmd[16];
- int retcode;
- unsigned int opcode;
-
- if (hdr->cmdp == NULL)
- return -EMSGSIZE;
- if (hdr->cmd_len > sizeof(cmd))
- return -EINVAL;
- if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
- return -EFAULT;
-
- /*
- * Prime the hdr with good status for scsi commands that don't require
- * an nvme command for translation.
- */
- retcode = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
- if (retcode)
- return retcode;
-
- opcode = cmd[0];
-
- switch (opcode) {
- case READ_6:
- case READ_10:
- case READ_12:
- case READ_16:
- retcode = nvme_trans_io(ns, hdr, 0, cmd);
- break;
- case WRITE_6:
- case WRITE_10:
- case WRITE_12:
- case WRITE_16:
- retcode = nvme_trans_io(ns, hdr, 1, cmd);
- break;
- case INQUIRY:
- retcode = nvme_trans_inquiry(ns, hdr, cmd);
- break;
- case LOG_SENSE:
- retcode = nvme_trans_log_sense(ns, hdr, cmd);
- break;
- case MODE_SELECT:
- case MODE_SELECT_10:
- retcode = nvme_trans_mode_select(ns, hdr, cmd);
- break;
- case MODE_SENSE:
- case MODE_SENSE_10:
- retcode = nvme_trans_mode_sense(ns, hdr, cmd);
- break;
- case READ_CAPACITY:
- retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
- break;
- case SERVICE_ACTION_IN_16:
- switch (cmd[1]) {
- case SAI_READ_CAPACITY_16:
- retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
- break;
- default:
- goto out;
- }
- break;
- case REPORT_LUNS:
- retcode = nvme_trans_report_luns(ns, hdr, cmd);
- break;
- case REQUEST_SENSE:
- retcode = nvme_trans_request_sense(ns, hdr, cmd);
- break;
- case SYNCHRONIZE_CACHE:
- retcode = nvme_trans_synchronize_cache(ns, hdr);
- break;
- case FORMAT_UNIT:
- retcode = nvme_trans_format_unit(ns, hdr, cmd);
- break;
- case TEST_UNIT_READY:
- retcode = nvme_trans_test_unit_ready(ns, hdr, cmd);
- break;
- case WRITE_BUFFER:
- retcode = nvme_trans_write_buffer(ns, hdr, cmd);
- break;
- case UNMAP:
- retcode = nvme_trans_unmap(ns, hdr, cmd);
- break;
- default:
- out:
- retcode = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
- return retcode;
-}
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
-{
- struct sg_io_hdr hdr;
- int retcode;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- if (copy_from_user(&hdr, u_hdr, sizeof(hdr)))
- return -EFAULT;
- if (hdr.interface_id != 'S')
- return -EINVAL;
-
- /*
- * A positive return code means a NVMe status, which has been
- * translated to sense data.
- */
- retcode = nvme_scsi_translate(ns, &hdr);
- if (retcode < 0)
- return retcode;
- if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
- return -EFAULT;
- return 0;
-}
-
-int nvme_sg_get_version_num(int __user *ip)
-{
- return put_user(sg_version_num, ip);
-}
static void nvmet_execute_identify_nslist(struct nvmet_req *req)
{
- static const int buf_size = 4096;
+ static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
nvmet_req_complete(req, status);
}
+static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len,
+ void *id, off_t *off)
+{
+ struct nvme_ns_id_desc desc = {
+ .nidt = type,
+ .nidl = len,
+ };
+ u16 status;
+
+ status = nvmet_copy_to_sgl(req, *off, &desc, sizeof(desc));
+ if (status)
+ return status;
+ *off += sizeof(desc);
+
+ status = nvmet_copy_to_sgl(req, *off, id, len);
+ if (status)
+ return status;
+ *off += len;
+
+ return 0;
+}
+
+static void nvmet_execute_identify_desclist(struct nvmet_req *req)
+{
+ struct nvmet_ns *ns;
+ u16 status = 0;
+ off_t off = 0;
+
+ ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+ if (!ns) {
+ status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ goto out;
+ }
+
+ if (memchr_inv(&ns->uuid, 0, sizeof(ns->uuid))) {
+ status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID,
+ NVME_NIDT_UUID_LEN,
+ &ns->uuid, &off);
+ if (status)
+ goto out_put_ns;
+ }
+ if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) {
+ status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID,
+ NVME_NIDT_NGUID_LEN,
+ &ns->nguid, &off);
+ if (status)
+ goto out_put_ns;
+ }
+
+ if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
+ off) != NVME_IDENTIFY_DATA_SIZE - off)
+ status = NVME_SC_INTERNAL | NVME_SC_DNR;
+out_put_ns:
+ nvmet_put_namespace(ns);
+out:
+ nvmet_req_complete(req, status);
+}
+
/*
* A "mimimum viable" abort implementation: the command is mandatory in the
* spec, but we are not required to do any useful work. We couldn't really
}
break;
case nvme_admin_identify:
- req->data_len = 4096;
+ req->data_len = NVME_IDENTIFY_DATA_SIZE;
switch (cmd->identify.cns) {
case NVME_ID_CNS_NS:
req->execute = nvmet_execute_identify_ns;
case NVME_ID_CNS_NS_ACTIVE_LIST:
req->execute = nvmet_execute_identify_nslist;
return 0;
+ case NVME_ID_CNS_NS_DESC_LIST:
+ req->execute = nvmet_execute_identify_desclist;
+ return 0;
}
break;
case nvme_admin_abort_cmd:
CONFIGFS_ATTR(nvmet_ns_, device_path);
+static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
+}
+
+static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ struct nvmet_subsys *subsys = ns->subsys;
+ int ret = 0;
+
+
+ mutex_lock(&subsys->lock);
+ if (ns->enabled) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+
+ if (uuid_parse(page, &ns->uuid))
+ ret = -EINVAL;
+
+out_unlock:
+ mutex_unlock(&subsys->lock);
+ return ret ? ret : count;
+}
+
static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
{
return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
}
+CONFIGFS_ATTR(nvmet_ns_, device_uuid);
+
static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
const char *page, size_t count)
{
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
+ &nvmet_ns_attr_device_uuid,
&nvmet_ns_attr_enable,
NULL,
};
CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
+static ssize_t nvmet_subsys_version_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+
+ if (NVME_TERTIARY(subsys->ver))
+ return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
+ (int)NVME_MAJOR(subsys->ver),
+ (int)NVME_MINOR(subsys->ver),
+ (int)NVME_TERTIARY(subsys->ver));
+ else
+ return snprintf(page, PAGE_SIZE, "%d.%d\n",
+ (int)NVME_MAJOR(subsys->ver),
+ (int)NVME_MINOR(subsys->ver));
+}
+
+static ssize_t nvmet_subsys_version_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ int major, minor, tertiary = 0;
+ int ret;
+
+
+ ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
+ if (ret != 2 && ret != 3)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ subsys->ver = NVME_VS(major, minor, tertiary);
+ up_write(&nvmet_config_sem);
+
+ return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, version);
+
static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host,
+ &nvmet_subsys_attr_version,
NULL,
};
ns->nsid = nsid;
ns->subsys = subsys;
+ uuid_gen(&ns->uuid);
return ns;
}
if (!subsys)
return NULL;
- subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
+ subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
switch (type) {
case NVME_NQN_NVME:
e->portid = port->disc_addr.portid;
/* we support only dynamic controllers */
e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
- e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH);
+ e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
e->subtype = type;
memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
case nvme_admin_identify:
- req->data_len = 4096;
+ req->data_len = NVME_IDENTIFY_DATA_SIZE;
switch (cmd->identify.cns) {
case NVME_ID_CNS_CTRL:
req->execute =
/* clear any response payload */
memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
+ fod->data_sg = NULL;
+ fod->data_sg_cnt = 0;
+
ret = nvmet_req_init(&fod->req,
&fod->queue->nvme_cq,
&fod->queue->nvme_sq,
&nvmet_fc_tgt_fcp_ops);
- if (!ret) { /* bad SQE content or invalid ctrl state */
- nvmet_fc_abort_op(tgtport, fod);
+ if (!ret) {
+ /* bad SQE content or invalid ctrl state */
+ /* nvmet layer has already called op done to send rsp. */
return;
}
/* keep a running counter of tail position */
atomic_inc(&fod->queue->sqtail);
- fod->data_sg = NULL;
- fod->data_sg_cnt = 0;
if (fod->total_length) {
ret = nvmet_fc_alloc_tgt_pgs(fod);
if (ret) {
struct nvmefc_tgt_fcp_req *tgt_fcpreq)
{
struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
- int active;
/*
* mark aborted only in case there were 2 threads in transport
* after the abort request
*/
spin_lock(&tfcp_req->reqlock);
- active = tfcp_req->active;
tfcp_req->aborted = true;
spin_unlock(&tfcp_req->reqlock);
struct nvmet_req *req = bio->bi_private;
nvmet_req_complete(req,
- bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+ bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
if (bio != &req->inline_bio)
bio_put(bio);
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
if (status) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else {
submit_bio(bio);
#include "../host/nvme.h"
#include "../host/fabrics.h"
-#define NVME_LOOP_AQ_DEPTH 256
-
#define NVME_LOOP_MAX_SEGMENTS 256
/*
*/
#define NVME_LOOP_NR_AEN_COMMANDS 1
#define NVME_LOOP_AQ_BLKMQ_DEPTH \
- (NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
+ (NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
struct nvme_loop_iod {
struct nvme_request nvme_req;
};
struct nvme_loop_ctrl {
- spinlock_t lock;
struct nvme_loop_queue *queues;
u32 queue_count;
struct nvmet_ctrl *target_ctrl;
struct work_struct delete_work;
- struct work_struct reset_work;
};
static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
/* queue error recovery */
- schedule_work(&iod->queue->ctrl->reset_work);
+ nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
/* fail with DNR on admin cmd timeout */
nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
return BLK_EH_HANDLED;
}
-static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
struct nvme_loop_queue *queue = hctx->driver_data;
struct request *req = bd->rq;
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
- int ret;
+ blk_status_t ret;
ret = nvme_setup_cmd(ns, req, &iod->cmd);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
nvme_cleanup_cmd(req);
blk_mq_start_request(req);
nvme_loop_queue_response(&iod->req);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
if (blk_rq_bytes(req)) {
iod->sg_table.sgl = iod->first_sgl;
- ret = sg_alloc_table_chained(&iod->sg_table,
+ if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl);
- if (ret)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ iod->sg_table.sgl))
+ return BLK_STS_RESOURCE;
iod->req.sg = iod->sg_table.sgl;
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
blk_mq_start_request(req);
schedule_work(&iod->work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
struct request *req, unsigned int hctx_idx,
unsigned int numa_node)
{
- return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req),
- hctx_idx + 1);
-}
+ struct nvme_loop_ctrl *ctrl = set->driver_data;
-static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set,
- struct request *req, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0);
+ return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
+ (set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
}
static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
.queue_rq = nvme_loop_queue_rq,
.complete = nvme_loop_complete_rq,
- .init_request = nvme_loop_init_admin_request,
+ .init_request = nvme_loop_init_request,
.init_hctx = nvme_loop_init_admin_hctx,
.timeout = nvme_loop_timeout,
};
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return -EBUSY;
- if (!schedule_work(&ctrl->delete_work))
+ if (!queue_work(nvme_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
static void nvme_loop_reset_ctrl_work(struct work_struct *work)
{
- struct nvme_loop_ctrl *ctrl = container_of(work,
- struct nvme_loop_ctrl, reset_work);
+ struct nvme_loop_ctrl *ctrl =
+ container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
bool changed;
int ret;
nvme_put_ctrl(&ctrl->ctrl);
}
-static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
-
- if (!schedule_work(&ctrl->reset_work))
- return -EBUSY;
-
- flush_work(&ctrl->reset_work);
-
- return 0;
-}
-
static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
.name = "loop",
.module = THIS_MODULE,
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .reset_ctrl = nvme_loop_reset_ctrl,
.free_ctrl = nvme_loop_free_ctrl,
.submit_async_event = nvme_loop_submit_async_event,
.delete_ctrl = nvme_loop_del_ctrl,
- .get_subsysnqn = nvmf_get_subsysnqn,
};
static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
INIT_LIST_HEAD(&ctrl->list);
INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
- INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work);
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
0 /* no quirks, we're perfect! */);
if (ret)
goto out_put_ctrl;
- spin_lock_init(&ctrl->lock);
-
ret = -ENOMEM;
ctrl->ctrl.sqsize = opts->queue_size - 1;
__nvme_loop_del_ctrl(ctrl);
mutex_unlock(&nvme_loop_ctrl_mutex);
- flush_scheduled_work();
+ flush_workqueue(nvme_wq);
}
module_init(nvme_loop_init_module);
#include <linux/percpu-refcount.h>
#include <linux/list.h>
#include <linux/mutex.h>
+#include <linux/uuid.h>
#include <linux/nvme.h>
#include <linux/configfs.h>
#include <linux/rcupdate.h>
u32 blksize_shift;
loff_t size;
u8 nguid[16];
+ uuid_t uuid;
bool enabled;
struct nvmet_subsys *subsys;
queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
queue->send_queue_size = le16_to_cpu(req->hrqsize);
- if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
+ if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
return NVME_RDMA_CM_INVALID_HSQSIZE;
/* XXX: Should we enforce some kind of max for IO queues? */
/**
* nvme_rdma_device_removal() - Handle RDMA device removal
+ * @cm_id: rdma_cm id, used for nvmet port
* @queue: nvmet rdma queue (cm id qp_context)
- * @addr: nvmet address (cm_id context)
*
* DEVICE_REMOVAL event notifies us that the RDMA device is about
- * to unplug so we should take care of destroying our RDMA resources.
- * This event will be generated for each allocated cm_id.
+ * to unplug. Note that this event can be generated on a normal
+ * queue cm_id and/or a device bound listener cm_id (where in this
+ * case queue will be null).
*
- * Note that this event can be generated on a normal queue cm_id
- * and/or a device bound listener cm_id (where in this case
- * queue will be null).
- *
- * we claim ownership on destroying the cm_id. For queues we move
- * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * We registered an ib_client to handle device removal for queues,
+ * so we only need to handle the listening port cm_ids. In this case
* we nullify the priv to prevent double cm_id destruction and destroying
* the cm_id implicitely by returning a non-zero rc to the callout.
*/
static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
struct nvmet_rdma_queue *queue)
{
- unsigned long flags;
-
- if (!queue) {
- struct nvmet_port *port = cm_id->context;
+ struct nvmet_port *port;
+ if (queue) {
/*
- * This is a listener cm_id. Make sure that
- * future remove_port won't invoke a double
- * cm_id destroy. use atomic xchg to make sure
- * we don't compete with remove_port.
- */
- if (xchg(&port->priv, NULL) != cm_id)
- return 0;
- } else {
- /*
- * This is a queue cm_id. Make sure that
- * release queue will not destroy the cm_id
- * and schedule all ctrl queues removal (only
- * if the queue is not disconnecting already).
+ * This is a queue cm_id. we have registered
+ * an ib_client to handle queues removal
+ * so don't interfear and just return.
*/
- spin_lock_irqsave(&queue->state_lock, flags);
- if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
- queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
- spin_unlock_irqrestore(&queue->state_lock, flags);
- nvmet_rdma_queue_disconnect(queue);
- flush_scheduled_work();
+ return 0;
}
+ port = cm_id->context;
+
+ /*
+ * This is a listener cm_id. Make sure that
+ * future remove_port won't invoke a double
+ * cm_id destroy. use atomic xchg to make sure
+ * we don't compete with remove_port.
+ */
+ if (xchg(&port->priv, NULL) != cm_id)
+ return 0;
+
/*
* We need to return 1 so that the core will destroy
* it's own ID. What a great API design..
.delete_ctrl = nvmet_rdma_delete_ctrl,
};
+static void nvmet_rdma_add_one(struct ib_device *ib_device)
+{
+}
+
+static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+ struct nvmet_rdma_queue *queue;
+
+ /* Device is being removed, delete all queues using this device */
+ mutex_lock(&nvmet_rdma_queue_mutex);
+ list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
+ if (queue->dev->device != ib_device)
+ continue;
+
+ pr_info("Removing queue %d\n", queue->idx);
+ __nvmet_rdma_queue_disconnect(queue);
+ }
+ mutex_unlock(&nvmet_rdma_queue_mutex);
+
+ flush_scheduled_work();
+}
+
+static struct ib_client nvmet_rdma_ib_client = {
+ .name = "nvmet_rdma",
+ .add = nvmet_rdma_add_one,
+ .remove = nvmet_rdma_remove_one
+};
+
static int __init nvmet_rdma_init(void)
{
- return nvmet_register_transport(&nvmet_rdma_ops);
+ int ret;
+
+ ret = ib_register_client(&nvmet_rdma_ib_client);
+ if (ret)
+ return ret;
+
+ ret = nvmet_register_transport(&nvmet_rdma_ops);
+ if (ret)
+ goto err_ib_client;
+
+ return 0;
+
+err_ib_client:
+ ib_unregister_client(&nvmet_rdma_ib_client);
+ return ret;
}
static void __exit nvmet_rdma_exit(void)
mutex_unlock(&nvmet_rdma_queue_mutex);
flush_scheduled_work();
+ ib_unregister_client(&nvmet_rdma_ib_client);
ida_destroy(&nvmet_rdma_queue_ida);
}
{
if (pci_dev_is_disconnected(dev)) {
*val = ~0;
- return -ENODEV;
+ return PCIBIOS_DEVICE_NOT_FOUND;
}
return pci_bus_read_config_byte(dev->bus, dev->devfn, where, val);
}
{
if (pci_dev_is_disconnected(dev)) {
*val = ~0;
- return -ENODEV;
+ return PCIBIOS_DEVICE_NOT_FOUND;
}
return pci_bus_read_config_word(dev->bus, dev->devfn, where, val);
}
{
if (pci_dev_is_disconnected(dev)) {
*val = ~0;
- return -ENODEV;
+ return PCIBIOS_DEVICE_NOT_FOUND;
}
return pci_bus_read_config_dword(dev->bus, dev->devfn, where, val);
}
int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val)
{
if (pci_dev_is_disconnected(dev))
- return -ENODEV;
+ return PCIBIOS_DEVICE_NOT_FOUND;
return pci_bus_write_config_byte(dev->bus, dev->devfn, where, val);
}
EXPORT_SYMBOL(pci_write_config_byte);
int pci_write_config_word(const struct pci_dev *dev, int where, u16 val)
{
if (pci_dev_is_disconnected(dev))
- return -ENODEV;
+ return PCIBIOS_DEVICE_NOT_FOUND;
return pci_bus_write_config_word(dev->bus, dev->devfn, where, val);
}
EXPORT_SYMBOL(pci_write_config_word);
u32 val)
{
if (pci_dev_is_disconnected(dev))
- return -ENODEV;
+ return PCIBIOS_DEVICE_NOT_FOUND;
return pci_bus_write_config_dword(dev->bus, dev->devfn, where, val);
}
EXPORT_SYMBOL(pci_write_config_dword);
config PCI_EPF_TEST
tristate "PCI Endpoint Test driver"
depends on PCI_ENDPOINT
+ select CRC32
help
Enable this configuration option to enable the test driver
for PCI Endpoint.
#include "pci.h"
/*
- * The UUID is defined in the PCI Firmware Specification available here:
+ * The GUID is defined in the PCI Firmware Specification available here:
* https://www.pcisig.com/members/downloads/pcifw_r3_1_13Dec10.pdf
*/
-const u8 pci_acpi_dsm_uuid[] = {
- 0xd0, 0x37, 0xc9, 0xe5, 0x53, 0x35, 0x7a, 0x4d,
- 0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d
-};
+const guid_t pci_acpi_dsm_guid =
+ GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
+ 0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d);
#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
static int acpi_get_rc_addr(struct acpi_device *adev, struct resource *res)
if (!pci_is_root_bus(bus))
return;
- obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), pci_acpi_dsm_uuid, 3,
+ obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), &pci_acpi_dsm_guid, 3,
RESET_DELAY_DSM, NULL);
if (!obj)
return;
if (bridge->ignore_reset_delay)
pdev->d3cold_delay = 0;
- obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 3,
+ obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 3,
FUNCTION_DELAY_DSM, NULL);
if (!obj)
return;
if (!handle)
return -1;
- obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 0x2,
+ obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 0x2,
DEVICE_LABEL_DSM, NULL);
if (!obj)
return -1;
if (!handle)
return false;
- return !!acpi_check_dsm(handle, pci_acpi_dsm_uuid, 0x2,
+ return !!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 0x2,
1 << DEVICE_LABEL_DSM);
}
.flags = IRQCHIP_SKIP_SET_WAKE,
};
-static void amd_gpio_irq_handler(struct irq_desc *desc)
+#define PIN_IRQ_PENDING (BIT(INTERRUPT_STS_OFF) | BIT(WAKE_STS_OFF))
+
+static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
{
- u32 i;
- u32 off;
- u32 reg;
- u32 pin_reg;
- u64 reg64;
- int handled = 0;
- unsigned int irq;
+ struct amd_gpio *gpio_dev = dev_id;
+ struct gpio_chip *gc = &gpio_dev->gc;
+ irqreturn_t ret = IRQ_NONE;
+ unsigned int i, irqnr;
unsigned long flags;
- struct irq_chip *chip = irq_desc_get_chip(desc);
- struct gpio_chip *gc = irq_desc_get_handler_data(desc);
- struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
+ u32 *regs, regval;
+ u64 status, mask;
- chained_irq_enter(chip, desc);
- /*enable GPIO interrupt again*/
+ /* Read the wake status */
raw_spin_lock_irqsave(&gpio_dev->lock, flags);
- reg = readl(gpio_dev->base + WAKE_INT_STATUS_REG1);
- reg64 = reg;
- reg64 = reg64 << 32;
-
- reg = readl(gpio_dev->base + WAKE_INT_STATUS_REG0);
- reg64 |= reg;
+ status = readl(gpio_dev->base + WAKE_INT_STATUS_REG1);
+ status <<= 32;
+ status |= readl(gpio_dev->base + WAKE_INT_STATUS_REG0);
raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
- /*
- * first 46 bits indicates interrupt status.
- * one bit represents four interrupt sources.
- */
- for (off = 0; off < 46 ; off++) {
- if (reg64 & BIT(off)) {
- for (i = 0; i < 4; i++) {
- pin_reg = readl(gpio_dev->base +
- (off * 4 + i) * 4);
- if ((pin_reg & BIT(INTERRUPT_STS_OFF)) ||
- (pin_reg & BIT(WAKE_STS_OFF))) {
- irq = irq_find_mapping(gc->irqdomain,
- off * 4 + i);
- generic_handle_irq(irq);
- writel(pin_reg,
- gpio_dev->base
- + (off * 4 + i) * 4);
- handled++;
- }
- }
+ /* Bit 0-45 contain the relevant status bits */
+ status &= (1ULL << 46) - 1;
+ regs = gpio_dev->base;
+ for (mask = 1, irqnr = 0; status; mask <<= 1, regs += 4, irqnr += 4) {
+ if (!(status & mask))
+ continue;
+ status &= ~mask;
+
+ /* Each status bit covers four pins */
+ for (i = 0; i < 4; i++) {
+ regval = readl(regs + i);
+ if (!(regval & PIN_IRQ_PENDING))
+ continue;
+ irq = irq_find_mapping(gc->irqdomain, irqnr + i);
+ generic_handle_irq(irq);
+ /* Clear interrupt */
+ writel(regval, regs + i);
+ ret = IRQ_HANDLED;
}
}
- if (handled == 0)
- handle_bad_irq(desc);
-
+ /* Signal EOI to the GPIO unit */
raw_spin_lock_irqsave(&gpio_dev->lock, flags);
- reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
- reg |= EOI_MASK;
- writel(reg, gpio_dev->base + WAKE_INT_MASTER_REG);
+ regval = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
+ regval |= EOI_MASK;
+ writel(regval, gpio_dev->base + WAKE_INT_MASTER_REG);
raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
- chained_irq_exit(chip, desc);
+ return ret;
}
static int amd_get_groups_count(struct pinctrl_dev *pctldev)
goto out2;
}
- gpiochip_set_chained_irqchip(&gpio_dev->gc,
- &amd_gpio_irqchip,
- irq_base,
- amd_gpio_irq_handler);
+ ret = devm_request_irq(&pdev->dev, irq_base, amd_gpio_irq_handler, 0,
+ KBUILD_MODNAME, gpio_dev);
+ if (ret)
+ goto out2;
+
platform_set_drvdata(pdev, gpio_dev);
dev_dbg(&pdev->dev, "amd gpio driver loaded\n");
* @gpio_chip: gpiolib chip
* @grange: gpio range
* @slock: spinlock for the gpio bank
- * @irq_lock: bus lock for irq chip
- * @new_irqs: newly configured irqs which must be muxed as GPIOs in
- * irq_bus_sync_unlock()
*/
struct rockchip_pin_bank {
void __iomem *reg_base;
struct pinctrl_gpio_range grange;
raw_spinlock_t slock;
u32 toggle_edge_mode;
- struct mutex irq_lock;
- u32 new_irqs;
};
#define PIN_BANK(id, pins, label) \
int ret;
/* make sure the pin is configured as gpio input */
- ret = rockchip_verify_mux(bank, d->hwirq, RK_FUNC_GPIO);
+ ret = rockchip_set_mux(bank, d->hwirq, RK_FUNC_GPIO);
if (ret < 0)
return ret;
- bank->new_irqs |= mask;
-
+ clk_enable(bank->clk);
raw_spin_lock_irqsave(&bank->slock, flags);
data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
default:
irq_gc_unlock(gc);
raw_spin_unlock_irqrestore(&bank->slock, flags);
+ clk_disable(bank->clk);
return -EINVAL;
}
irq_gc_unlock(gc);
raw_spin_unlock_irqrestore(&bank->slock, flags);
+ clk_disable(bank->clk);
return 0;
}
clk_disable(bank->clk);
}
-static void rockchip_irq_bus_lock(struct irq_data *d)
-{
- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- struct rockchip_pin_bank *bank = gc->private;
-
- clk_enable(bank->clk);
- mutex_lock(&bank->irq_lock);
-}
-
-static void rockchip_irq_bus_sync_unlock(struct irq_data *d)
-{
- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- struct rockchip_pin_bank *bank = gc->private;
-
- while (bank->new_irqs) {
- unsigned int irq = __ffs(bank->new_irqs);
- int ret;
-
- ret = rockchip_set_mux(bank, irq, RK_FUNC_GPIO);
- WARN_ON(ret < 0);
-
- bank->new_irqs &= ~BIT(irq);
- }
-
- mutex_unlock(&bank->irq_lock);
- clk_disable(bank->clk);
-}
-
static int rockchip_interrupts_register(struct platform_device *pdev,
struct rockchip_pinctrl *info)
{
gc->chip_types[0].chip.irq_suspend = rockchip_irq_suspend;
gc->chip_types[0].chip.irq_resume = rockchip_irq_resume;
gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type;
- gc->chip_types[0].chip.irq_bus_lock = rockchip_irq_bus_lock;
- gc->chip_types[0].chip.irq_bus_sync_unlock =
- rockchip_irq_bus_sync_unlock;
gc->wake_enabled = IRQ_MSK(bank->nr_pins);
irq_set_chained_handler_and_data(bank->irq,
int bank_pins = 0;
raw_spin_lock_init(&bank->slock);
- mutex_init(&bank->irq_lock);
bank->drvdata = d;
bank->pin_base = ctrl->nr_pins;
ctrl->nr_pins += bank->nr_pins;
break;
case PIN_CONFIG_OUTPUT:
__stm32_gpio_set(bank, offset, arg);
- ret = stm32_pmx_gpio_set_direction(pctldev, NULL, pin, false);
+ ret = stm32_pmx_gpio_set_direction(pctldev, range, pin, false);
break;
default:
ret = -EINVAL;
} \
}
-#ifdef CONFIG_PM_SLEEP
static u8 suspend_prep_ok;
static u32 suspend_shlw_ctr_temp, suspend_deep_ctr_temp;
static u64 suspend_shlw_res_temp, suspend_deep_res_temp;
-#endif
struct telemetry_susp_stats {
u32 shlw_swake_ctr;
.release = single_release,
};
-#ifdef CONFIG_PM_SLEEP
static int pm_suspend_prep_cb(void)
{
struct telemetry_evtlog evtlog[TELEM_MAX_OS_ALLOCATED_EVENTS];
static struct notifier_block pm_notifier = {
.notifier_call = pm_notification,
};
-#endif /* CONFIG_PM_SLEEP */
static int __init telemetry_debugfs_init(void)
{
if (err < 0)
return -EINVAL;
-
-#ifdef CONFIG_PM_SLEEP
register_pm_notifier(&pm_notifier);
-#endif /* CONFIG_PM_SLEEP */
debugfs_conf->telemetry_dbg_dir = debugfs_create_dir("telemetry", NULL);
- if (!debugfs_conf->telemetry_dbg_dir)
- return -ENOMEM;
+ if (!debugfs_conf->telemetry_dbg_dir) {
+ err = -ENOMEM;
+ goto out_pm;
+ }
f = debugfs_create_file("pss_info", S_IFREG | S_IRUGO,
debugfs_conf->telemetry_dbg_dir, NULL,
out:
debugfs_remove_recursive(debugfs_conf->telemetry_dbg_dir);
debugfs_conf->telemetry_dbg_dir = NULL;
+out_pm:
+ unregister_pm_notifier(&pm_notifier);
return err;
}
{
debugfs_remove_recursive(debugfs_conf->telemetry_dbg_dir);
debugfs_conf->telemetry_dbg_dir = NULL;
+ unregister_pm_notifier(&pm_notifier);
}
late_initcall(telemetry_debugfs_init);
*/
if (basedev->state < DASD_STATE_READY) {
while ((req = blk_fetch_request(block->request_queue)))
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return;
}
"Rejecting write request %p",
req);
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
continue;
}
if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
"Rejecting failfast request %p",
req);
blk_start_request(req);
- __blk_end_request_all(req, -ETIMEDOUT);
+ __blk_end_request_all(req, BLK_STS_TIMEOUT);
continue;
}
cqr = basedev->discipline->build_cp(basedev, block, req);
"on request %p",
PTR_ERR(cqr), req);
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
continue;
}
/*
{
struct request *req;
int status;
- int error = 0;
+ blk_status_t error = BLK_STS_OK;
req = (struct request *) cqr->callback_data;
dasd_profile_end(cqr->block, cqr, req);
+
status = cqr->block->base->discipline->free_cp(cqr, req);
if (status < 0)
- error = status;
+ error = errno_to_blk_status(status);
else if (status == 0) {
- if (cqr->intrc == -EPERM)
- error = -EBADE;
- else if (cqr->intrc == -ENOLINK ||
- cqr->intrc == -ETIMEDOUT)
- error = cqr->intrc;
- else
- error = -EIO;
+ switch (cqr->intrc) {
+ case -EPERM:
+ error = BLK_STS_NEXUS;
+ break;
+ case -ENOLINK:
+ error = BLK_STS_TRANSPORT;
+ break;
+ case -ETIMEDOUT:
+ error = BLK_STS_TIMEOUT;
+ break;
+ default:
+ error = BLK_STS_IOERR;
+ break;
+ }
}
__blk_end_request_all(req, error);
}
spin_lock_irq(&block->request_queue_lock);
while ((req = blk_fetch_request(block->request_queue)))
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
spin_unlock_irq(&block->request_queue_lock);
}
unsigned long source_addr;
unsigned long bytes_done;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
bytes_done = 0;
dev_info = bio->bi_bdev->bd_disk->private_data;
aob->request.data = (u64) aobrq;
scmrq->bdev = bdev;
scmrq->retries = 4;
- scmrq->error = 0;
+ scmrq->error = BLK_STS_OK;
/* We don't use all msbs - place aidaws at the end of the aob page. */
scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
scm_request_cluster_init(scmrq);
{
struct aob *aob = scmrq->aob;
- if (scmrq->error == -ETIMEDOUT)
+ if (scmrq->error == BLK_STS_TIMEOUT)
SCM_LOG(1, "Request timeout");
else {
SCM_LOG(1, "Request error");
scmrq->error);
}
-void scm_blk_irq(struct scm_device *scmdev, void *data, int error)
+void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
{
struct scm_request *scmrq = data;
struct scm_blk_dev *bdev = scmrq->bdev;
struct scm_blk_dev *bdev = scmrq->bdev;
unsigned long flags;
- if (scmrq->error != -EIO)
+ if (scmrq->error != BLK_STS_IOERR)
goto restart;
/* For -EIO the response block is valid. */
struct aob *aob;
struct list_head list;
u8 retries;
- int error;
+ blk_status_t error;
#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
struct {
enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
void scm_blk_dev_cleanup(struct scm_blk_dev *);
void scm_blk_set_available(struct scm_blk_dev *);
-void scm_blk_irq(struct scm_device *, void *, int);
+void scm_blk_irq(struct scm_device *, void *, blk_status_t);
void scm_request_finish(struct scm_request *);
void scm_request_requeue(struct scm_request *);
unsigned long page_addr;
unsigned long bytes;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if ((bio->bi_iter.bi_sector & 7) != 0 ||
(bio->bi_iter.bi_size & 4095) != 0)
struct eadm_private *private = get_eadm_private(sch);
struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
struct irb *irb = this_cpu_ptr(&cio_irb);
- int error = 0;
+ blk_status_t error = BLK_STS_OK;
EADM_LOG(6, "irq");
EADM_LOG_HEX(6, irb, sizeof(*irb));
if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))
&& scsw->eswf == 1 && irb->esw.eadm.erw.r)
- error = -EIO;
+ error = BLK_STS_IOERR;
if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC)
- error = -ETIMEDOUT;
+ error = BLK_STS_TIMEOUT;
eadm_subchannel_set_timeout(sch, 0);
}
EXPORT_SYMBOL_GPL(scm_driver_unregister);
-void scm_irq_handler(struct aob *aob, int error)
+void scm_irq_handler(struct aob *aob, blk_status_t error)
{
struct aob_rq_header *aobrq = (void *) aob->request.data;
struct scm_device *scmdev = aobrq->scmdev;
{
return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
}
-MDEV_TYPE_ATTR_RO(name);
+static MDEV_TYPE_ATTR_RO(name);
static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
char *buf)
{
return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
}
-MDEV_TYPE_ATTR_RO(device_api);
+static MDEV_TYPE_ATTR_RO(device_api);
static ssize_t available_instances_show(struct kobject *kobj,
struct device *dev, char *buf)
return sprintf(buf, "%d\n", atomic_read(&private->avail));
}
-MDEV_TYPE_ATTR_RO(available_instances);
+static MDEV_TYPE_ATTR_RO(available_instances);
static struct attribute *mdev_types_attrs[] = {
&mdev_type_attr_name.attr,
.attrs = mdev_types_attrs,
};
-struct attribute_group *mdev_type_groups[] = {
+static struct attribute_group *mdev_type_groups[] = {
&mdev_type_group,
NULL,
};
&events, &private->nb);
}
-void vfio_ccw_mdev_release(struct mdev_device *mdev)
+static void vfio_ccw_mdev_release(struct mdev_device *mdev)
{
struct vfio_ccw_private *private =
dev_get_drvdata(mdev_parent_dev(mdev));
}
}
-int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
+static int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
{
if (info->index != VFIO_CCW_IO_IRQ_INDEX)
return -EINVAL;
struct ap_driver *ap_drv = to_ap_drv(dev->driver);
int rc;
+ /* Add queue/card to list of active queues/cards */
+ spin_lock_bh(&ap_list_lock);
+ if (is_card_dev(dev))
+ list_add(&to_ap_card(dev)->list, &ap_card_list);
+ else
+ list_add(&to_ap_queue(dev)->list,
+ &to_ap_queue(dev)->card->queues);
+ spin_unlock_bh(&ap_list_lock);
+
ap_dev->drv = ap_drv;
rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV;
- if (rc)
+
+ if (rc) {
+ spin_lock_bh(&ap_list_lock);
+ if (is_card_dev(dev))
+ list_del_init(&to_ap_card(dev)->list);
+ else
+ list_del_init(&to_ap_queue(dev)->list);
+ spin_unlock_bh(&ap_list_lock);
ap_dev->drv = NULL;
+ }
+
return rc;
}
struct ap_device *ap_dev = to_ap_dev(dev);
struct ap_driver *ap_drv = ap_dev->drv;
+ if (ap_drv->remove)
+ ap_drv->remove(ap_dev);
+
+ /* Remove queue/card from list of active queues/cards */
spin_lock_bh(&ap_list_lock);
if (is_card_dev(dev))
list_del_init(&to_ap_card(dev)->list);
else
list_del_init(&to_ap_queue(dev)->list);
spin_unlock_bh(&ap_list_lock);
- if (ap_drv->remove)
- ap_drv->remove(ap_dev);
+
return 0;
}
}
/* get it and thus adjust reference counter */
get_device(&ac->ap_dev.device);
- /* Add card device to card list */
- spin_lock_bh(&ap_list_lock);
- list_add(&ac->list, &ap_card_list);
- spin_unlock_bh(&ap_list_lock);
}
/* now create the new queue device */
aq = ap_queue_create(qid, type);
aq->ap_dev.device.parent = &ac->ap_dev.device;
dev_set_name(&aq->ap_dev.device,
"%02x.%04x", id, dom);
- /* Add queue device to card queue list */
- spin_lock_bh(&ap_list_lock);
- list_add(&aq->list, &ac->queues);
- spin_unlock_bh(&ap_list_lock);
/* Start with a device reset */
spin_lock_bh(&aq->lock);
ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
/* Register device */
rc = device_register(&aq->ap_dev.device);
if (rc) {
- spin_lock_bh(&ap_list_lock);
- list_del_init(&aq->list);
- spin_unlock_bh(&ap_list_lock);
put_device(&aq->ap_dev.device);
continue;
}
static void ap_card_device_release(struct device *dev)
{
- kfree(to_ap_card(dev));
+ struct ap_card *ac = to_ap_card(dev);
+
+ if (!list_empty(&ac->list)) {
+ spin_lock_bh(&ap_list_lock);
+ list_del_init(&ac->list);
+ spin_unlock_bh(&ap_list_lock);
+ }
+ kfree(ac);
}
struct ap_card *ap_card_create(int id, int queue_depth, int device_type,
static void ap_queue_device_release(struct device *dev)
{
- kfree(to_ap_queue(dev));
+ struct ap_queue *aq = to_ap_queue(dev);
+
+ if (!list_empty(&aq->list)) {
+ spin_lock_bh(&ap_list_lock);
+ list_del_init(&aq->list);
+ spin_unlock_bh(&ap_list_lock);
+ }
+ kfree(aq);
}
struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
privptr->conn = NULL; privptr->fsm = NULL;
/* privptr gets freed by free_netdev() */
}
- free_netdev(dev);
}
/**
dev->mtu = NETIUCV_MTU_DEFAULT;
dev->min_mtu = 576;
dev->max_mtu = NETIUCV_MTU_MAX;
- dev->destructor = netiucv_free_netdevice;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = netiucv_free_netdevice;
dev->hard_header_len = NETIUCV_HDRLEN;
dev->addr_len = 0;
dev->type = ARPHRD_SLIP;
struct jsfd_part *jdp = req->rq_disk->private_data;
unsigned long offset = blk_rq_pos(req) << 9;
size_t len = blk_rq_cur_bytes(req);
- int err = -EIO;
+ blk_status_t err = BLK_STS_IOERR;
if ((offset + len) > jdp->dsize)
goto end;
}
jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
- err = 0;
+ err = BLK_STS_OK;
end:
if (!__blk_end_request_cur(req, err))
req = jsfd_next_request();
put_disk(disk);
goto out;
}
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
jsfd_disk[i] = disk;
}
* code paths.
*/
if (unlikely(rq->bio))
- blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq));
+ blk_end_request(rq, BLK_STS_IOERR, blk_rq_bytes(rq));
else
blk_put_request(rq);
}
EXPORT_SYMBOL(osd_end_request);
static void _set_error_resid(struct osd_request *or, struct request *req,
- int error)
+ blk_status_t error)
{
or->async_error = error;
- or->req_errors = scsi_req(req)->result ? : error;
+ or->req_errors = scsi_req(req)->result;
or->sense_len = scsi_req(req)->sense_len;
if (or->sense_len)
memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
int osd_execute_request(struct osd_request *or)
{
- int error;
-
blk_execute_rq(or->request->q, NULL, or->request, 0);
- error = scsi_req(or->request)->result ? -EIO : 0;
- _set_error_resid(or, or->request, error);
- return error;
+ if (scsi_req(or->request)->result) {
+ _set_error_resid(or, or->request, BLK_STS_IOERR);
+ return -EIO;
+ }
+
+ _set_error_resid(or, or->request, BLK_STS_OK);
+ return 0;
}
EXPORT_SYMBOL(osd_execute_request);
-static void osd_request_async_done(struct request *req, int error)
+static void osd_request_async_done(struct request *req, blk_status_t error)
{
struct osd_request *or = req->end_io_data;
flags);
if (IS_ERR(req))
return req;
- scsi_req_init(req);
for_each_bio(bio) {
- struct bio *bounce_bio = bio;
-
- blk_queue_bounce(req->q, &bounce_bio);
- ret = blk_rq_append_bio(req, bounce_bio);
+ ret = blk_rq_append_bio(req, bio);
if (ret)
return ERR_PTR(ret);
}
ret = PTR_ERR(req);
goto out;
}
- scsi_req_init(req);
or->in.req = or->request->next_rq = req;
}
} else if (has_in)
/* scsi sense is Empty, the request was never issued to target
* linux return code might tell us what happened.
*/
- if (or->async_error == -ENOMEM)
+ if (or->async_error == BLK_STS_RESOURCE)
osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
else
osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;
/* Wakeup from interrupt */
-static void osst_end_async(struct request *req, int update)
+static void osst_end_async(struct request *req, blk_status_t status)
{
struct scsi_request *rq = scsi_req(req);
struct osst_request *SRpnt = req->end_io_data;
return DRIVER_ERROR << 24;
rq = scsi_req(req);
- scsi_req_init(req);
req->rq_flags |= RQF_QUIET;
SRpnt->bio = NULL;
QEDI_ERR(&qedi->dbg_ctx,
"Delayed or untracked cleanup response, itt=0x%x, tid=0x%x, cid=0x%x, task=%p\n",
protoitt, cqe->itid, qedi_conn->iscsi_conn_id, task);
- WARN_ON(1);
}
}
void qedi_clear_task_idx(struct qedi_ctx *qedi, int idx)
{
- if (!test_and_clear_bit(idx, qedi->task_idx_map)) {
+ if (!test_and_clear_bit(idx, qedi->task_idx_map))
QEDI_ERR(&qedi->dbg_ctx,
"FW task context, already cleared, tid=0x%x\n", idx);
- WARN_ON(1);
- }
}
void qedi_update_itt_map(struct qedi_ctx *qedi, u32 tid, u32 proto_itt,
unsigned int channel;
unsigned int target;
u64 lun;
- uuid_be lu_name;
+ uuid_t lu_name;
struct sdebug_host_info *sdbg_host;
unsigned long uas_bm[1];
atomic_t num_in_q;
static int inquiry_vpd_83(unsigned char *arr, int port_group_id,
int target_dev_id, int dev_id_num,
const char *dev_id_str, int dev_id_str_len,
- const uuid_be *lu_name)
+ const uuid_t *lu_name)
{
int num, port_a;
char b[32];
}
static bool got_shared_uuid;
-static uuid_be shared_uuid;
+static uuid_t shared_uuid;
static struct sdebug_dev_info *sdebug_device_create(
struct sdebug_host_info *sdbg_host, gfp_t flags)
devip = kzalloc(sizeof(*devip), flags);
if (devip) {
if (sdebug_uuid_ctl == 1)
- uuid_be_gen(&devip->lu_name);
+ uuid_gen(&devip->lu_name);
else if (sdebug_uuid_ctl == 2) {
if (got_shared_uuid)
devip->lu_name = shared_uuid;
else {
- uuid_be_gen(&shared_uuid);
+ uuid_gen(&shared_uuid);
got_shared_uuid = true;
devip->lu_name = shared_uuid;
}
}
}
-static void eh_lock_door_done(struct request *req, int uptodate)
+static void eh_lock_door_done(struct request *req, blk_status_t status)
{
__blk_put_request(req->q, req);
}
if (IS_ERR(req))
return;
rq = scsi_req(req);
- scsi_req_init(req);
rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
rq->cmd[1] = 0;
if (IS_ERR(req))
return ret;
rq = scsi_req(req);
- scsi_req_init(req);
if (bufflen && blk_rq_map_kern(sdev->request_queue, req,
buffer, bufflen, __GFP_RECLAIM))
cmd->request->next_rq->special = NULL;
}
-static bool scsi_end_request(struct request *req, int error,
+static bool scsi_end_request(struct request *req, blk_status_t error,
unsigned int bytes, unsigned int bidi_bytes)
{
struct scsi_cmnd *cmd = req->special;
* @cmd: SCSI command (unused)
* @result: scsi error code
*
- * Translate SCSI error code into standard UNIX errno.
- * Return values:
- * -ENOLINK temporary transport failure
- * -EREMOTEIO permanent target failure, do not retry
- * -EBADE permanent nexus failure, retry on other path
- * -ENOSPC No write space available
- * -ENODATA Medium error
- * -EIO unspecified I/O error
+ * Translate SCSI error code into block errors.
*/
-static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result)
+static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
+ int result)
{
- int error = 0;
-
- switch(host_byte(result)) {
+ switch (host_byte(result)) {
case DID_TRANSPORT_FAILFAST:
- error = -ENOLINK;
- break;
+ return BLK_STS_TRANSPORT;
case DID_TARGET_FAILURE:
set_host_byte(cmd, DID_OK);
- error = -EREMOTEIO;
- break;
+ return BLK_STS_TARGET;
case DID_NEXUS_FAILURE:
- set_host_byte(cmd, DID_OK);
- error = -EBADE;
- break;
+ return BLK_STS_NEXUS;
case DID_ALLOC_FAILURE:
set_host_byte(cmd, DID_OK);
- error = -ENOSPC;
- break;
+ return BLK_STS_NOSPC;
case DID_MEDIUM_ERROR:
set_host_byte(cmd, DID_OK);
- error = -ENODATA;
- break;
+ return BLK_STS_MEDIUM;
default:
- error = -EIO;
- break;
+ return BLK_STS_IOERR;
}
-
- return error;
}
/*
int result = cmd->result;
struct request_queue *q = cmd->device->request_queue;
struct request *req = cmd->request;
- int error = 0;
+ blk_status_t error = BLK_STS_OK;
struct scsi_sense_hdr sshdr;
bool sense_valid = false;
int sense_deferred = 0, level = 0;
* both sides at once.
*/
scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
- if (scsi_end_request(req, 0, blk_rq_bytes(req),
+ if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
blk_rq_bytes(req->next_rq)))
BUG();
return;
scsi_print_sense(cmd);
result = 0;
/* for passthrough error may be set */
- error = 0;
+ error = BLK_STS_OK;
}
/*
action = ACTION_REPREP;
} else if (sshdr.asc == 0x10) /* DIX */ {
action = ACTION_FAIL;
- error = -EILSEQ;
+ error = BLK_STS_PROTECTION;
/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
action = ACTION_FAIL;
- error = -EREMOTEIO;
+ error = BLK_STS_TARGET;
} else
action = ACTION_FAIL;
break;
case ABORTED_COMMAND:
action = ACTION_FAIL;
if (sshdr.asc == 0x10) /* DIF */
- error = -EILSEQ;
+ error = BLK_STS_PROTECTION;
break;
case NOT_READY:
/* If the device is in the process of becoming
}
EXPORT_SYMBOL(scsi_init_io);
+/**
+ * scsi_initialize_rq - initialize struct scsi_cmnd.req
+ *
+ * Called from inside blk_get_request().
+ */
+void scsi_initialize_rq(struct request *rq)
+{
+ struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+ scsi_req_init(&cmd->req);
+}
+EXPORT_SYMBOL(scsi_initialize_rq);
+
+/* Called after a request has been started. */
void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
{
void *buf = cmd->sense_buffer;
blk_delay_queue(q, SCSI_QUEUE_DELAY);
}
-static inline int prep_to_mq(int ret)
+static inline blk_status_t prep_to_mq(int ret)
{
switch (ret) {
case BLKPREP_OK:
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
case BLKPREP_DEFER:
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
default:
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
}
blk_mq_complete_request(cmd->request);
}
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *req = bd->rq;
struct scsi_device *sdev = q->queuedata;
struct Scsi_Host *shost = sdev->host;
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
- int ret;
+ blk_status_t ret;
int reason;
ret = prep_to_mq(scsi_prep_state_check(sdev, req));
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret != BLK_STS_OK)
goto out;
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ ret = BLK_STS_RESOURCE;
if (!get_device(&sdev->sdev_gendev))
goto out;
if (!(req->rq_flags & RQF_DONTPREP)) {
ret = prep_to_mq(scsi_mq_prep_fn(req));
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret != BLK_STS_OK)
goto out_dec_host_busy;
req->rq_flags |= RQF_DONTPREP;
} else {
reason = scsi_dispatch_cmd(cmd);
if (reason) {
scsi_set_blocked(cmd, reason);
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ ret = BLK_STS_RESOURCE;
goto out_dec_host_busy;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_dec_host_busy:
atomic_dec(&shost->host_busy);
put_device(&sdev->sdev_gendev);
out:
switch (ret) {
- case BLK_MQ_RQ_QUEUE_BUSY:
+ case BLK_STS_OK:
+ break;
+ case BLK_STS_RESOURCE:
if (atomic_read(&sdev->device_busy) == 0 &&
!scsi_device_blocked(sdev))
blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
break;
- case BLK_MQ_RQ_QUEUE_ERROR:
+ default:
/*
* Make sure to release all allocated ressources when
* we hit an error, as we will never see this command
if (req->rq_flags & RQF_DONTPREP)
scsi_mq_uninit_cmd(cmd);
break;
- default:
- break;
}
return ret;
}
{
struct device *dev = shost->dma_dev;
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+
/*
* this limit is imposed by hardware restrictions
*/
q->request_fn = scsi_request_fn;
q->init_rq_fn = scsi_init_rq;
q->exit_rq_fn = scsi_exit_rq;
+ q->initialize_rq_fn = scsi_initialize_rq;
if (blk_init_allocated_queue(q) < 0) {
blk_cleanup_queue(q);
#endif
.init_request = scsi_init_request,
.exit_request = scsi_exit_request,
+ .initialize_rq_fn = scsi_initialize_rq,
.map_queues = scsi_map_queues,
};
if (wait)
blk_mq_quiesce_queue(q);
else
- blk_mq_stop_hw_queues(q);
+ blk_mq_quiesce_queue_nowait(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_stop_queue(q);
return -EINVAL;
if (q->mq_ops) {
- blk_mq_start_stopped_hw_queues(q, false);
+ blk_mq_unquiesce_queue(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_start_queue(q);
#include <linux/bsg.h>
#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_request.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
struct sas_rphy *rphy)
{
struct request *req;
- int ret;
+ blk_status_t ret;
int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
while ((req = blk_fetch_request(q)) != NULL) {
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return -ENOMEM;
+ q->initialize_rq_fn = scsi_initialize_rq;
q->cmd_size = sizeof(struct scsi_request);
if (rphy) {
if (error)
goto out_cleanup_queue;
+ /*
+ * by default assume old behaviour and bounce for any highmem page
+ */
+ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
error = bsg_register_queue(q, dev, name, release);
if (error)
goto out_cleanup_queue;
q->queuedata = shost;
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
return 0;
out_cleanup_queue:
} Sg_device;
/* tasklet or soft irq callback */
-static void sg_rq_end_io(struct request *rq, int uptodate);
+static void sg_rq_end_io(struct request *rq, blk_status_t status);
static int sg_start_req(Sg_request *srp, unsigned char *cmd);
static int sg_finish_rem_req(Sg_request * srp);
static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
if (atomic_read(&sdp->detaching)) {
if (srp->bio) {
scsi_req_free_cmd(scsi_req(srp->rq));
- blk_end_request_all(srp->rq, -EIO);
+ blk_end_request_all(srp->rq, BLK_STS_IOERR);
srp->rq = NULL;
}
* level when a command is completed (or has failed).
*/
static void
-sg_rq_end_io(struct request *rq, int uptodate)
+sg_rq_end_io(struct request *rq, blk_status_t status)
{
struct sg_request *srp = rq->end_io_data;
struct scsi_request *req = scsi_req(rq);
}
req = scsi_req(rq);
- scsi_req_init(rq);
-
if (hp->cmd_len > BLK_MAX_CDB)
req->cmd = long_cmdp;
memcpy(req->cmd, cmd, hp->cmd_len);
atomic64_dec(&STp->stats->in_flight);
}
-static void st_scsi_execute_end(struct request *req, int uptodate)
+static void st_scsi_execute_end(struct request *req, blk_status_t status)
{
struct st_request *SRpnt = req->end_io_data;
struct scsi_request *rq = scsi_req(req);
if (IS_ERR(req))
return DRIVER_ERROR << 24;
rq = scsi_req(req);
- scsi_req_init(req);
req->rq_flags |= RQF_QUIET;
mdata->null_mapped = 1;
if (i >= ARRAY_SIZE(ad7152_filter_rate_table))
i = ARRAY_SIZE(ad7152_filter_rate_table) - 1;
- mutex_lock(&chip->state_lock);
ret = i2c_smbus_write_byte_data(chip->client,
AD7152_REG_CFG2, AD7152_CFG2_OSR(i));
- if (ret < 0) {
- mutex_unlock(&chip->state_lock);
+ if (ret < 0)
return ret;
- }
chip->filter_rate_setup = i;
- mutex_unlock(&chip->state_lock);
return ret;
}
static void mon_setup(struct net_device *dev)
{
dev->netdev_ops = &mon_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
ether_setup(dev);
dev->priv_flags |= IFF_NO_QUEUE;
dev->type = ARPHRD_IEEE80211;
mon_ndev->type = ARPHRD_IEEE80211_RADIOTAP;
strncpy(mon_ndev->name, name, IFNAMSIZ);
mon_ndev->name[IFNAMSIZ - 1] = 0;
- mon_ndev->destructor = rtw_ndev_destructor;
+ mon_ndev->needs_free_netdev = true;
+ mon_ndev->priv_destructor = rtw_ndev_destructor;
mon_ndev->netdev_ops = &rtw_cfg80211_monitor_if_ops;
if (ndev->ieee80211_ptr)
kfree((u8 *)ndev->ieee80211_ptr);
-
- free_netdev(ndev);
}
void rtw_dev_unload(struct adapter *padapter)
oldfs = get_fs(); set_fs(get_ds());
if (1!=readFile(fp, &buf, 1))
- ret = PTR_ERR(fp);
+ ret = -EINVAL;
set_fs(oldfs);
filp_close(fp, NULL);
*/
if (dump_payload)
goto after_immediate_data;
+ /*
+ * Check for underflow case where both EDTL and immediate data payload
+ * exceeds what is presented by CDB's TRANSFER LENGTH, and what has
+ * already been set in target_cmd_size_check() as se_cmd->data_length.
+ *
+ * For this special case, fail the command and dump the immediate data
+ * payload.
+ */
+ if (cmd->first_burst_len > cmd->se_cmd.data_length) {
+ cmd->sense_reason = TCM_INVALID_CDB_FIELD;
+ goto after_immediate_data;
+ }
immed_ret = iscsit_handle_immediate_data(cmd, hdr,
cmd->first_burst_len);
* always sleep waiting for RX/TX thread shutdown to complete
* within iscsit_close_connection().
*/
- if (!conn->conn_transport->rdma_shutdown)
+ if (!conn->conn_transport->rdma_shutdown) {
sleep = cmpxchg(&conn->tx_thread_active, true, false);
+ if (!sleep)
+ return;
+ }
atomic_set(&conn->conn_logout_remove, 0);
complete(&conn->conn_logout_comp);
{
int sleep = 1;
- if (!conn->conn_transport->rdma_shutdown)
+ if (!conn->conn_transport->rdma_shutdown) {
sleep = cmpxchg(&conn->tx_thread_active, true, false);
+ if (!sleep)
+ return;
+ }
atomic_set(&conn->conn_logout_remove, 0);
complete(&conn->conn_logout_comp);
return -EINVAL;
}
- ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0);
+ ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!ib_dev->ibd_bio_set) {
pr_err("IBLOCK: Unable to create bioset\n");
goto out;
struct se_cmd *cmd = bio->bi_private;
struct iblock_req *ibr = cmd->priv;
- if (bio->bi_error) {
- pr_err("bio error: %p, err: %d\n", bio, bio->bi_error);
+ if (bio->bi_status) {
+ pr_err("bio error: %p, err: %d\n", bio, bio->bi_status);
/*
* Bump the ib_bio_err_cnt and release bio.
*/
{
struct se_cmd *cmd = bio->bi_private;
- if (bio->bi_error)
- pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error);
+ if (bio->bi_status)
+ pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_status);
if (cmd) {
- if (bio->bi_error)
+ if (bio->bi_status)
target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
else
target_complete_cmd(cmd, SAM_STAT_GOOD);
void release_se_kmem_caches(void);
u32 scsi_get_new_index(scsi_index_t);
void transport_subsystem_check_init(void);
-void transport_cmd_finish_abort(struct se_cmd *, int);
+int transport_cmd_finish_abort(struct se_cmd *, int);
unsigned char *transport_dump_cmd_direction(struct se_cmd *);
void transport_dump_dev_state(struct se_device *, char *, int *);
void transport_dump_dev_info(struct se_device *, struct se_lun *,
}
static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
-static void pscsi_req_done(struct request *, int);
+static void pscsi_req_done(struct request *, blk_status_t);
/* pscsi_attach_hba():
*
goto fail;
}
- scsi_req_init(req);
-
if (sgl) {
ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
if (ret)
return 0;
}
-static void pscsi_req_done(struct request *req, int uptodate)
+static void pscsi_req_done(struct request *req, blk_status_t status)
{
struct se_cmd *cmd = req->end_io_data;
struct pscsi_plugin_task *pt = cmd->priv;
kfree(tmr);
}
-static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
+static int core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
{
unsigned long flags;
bool remove = true, send_tas;
transport_send_task_abort(cmd);
}
- transport_cmd_finish_abort(cmd, remove);
+ return transport_cmd_finish_abort(cmd, remove);
}
static int target_check_cdb_and_preempt(struct list_head *list,
cancel_work_sync(&se_cmd->work);
transport_wait_for_tasks(se_cmd);
- transport_cmd_finish_abort(se_cmd, true);
- target_put_sess_cmd(se_cmd);
+ if (!transport_cmd_finish_abort(se_cmd, true))
+ target_put_sess_cmd(se_cmd);
printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
" ref_tag: %llu\n", ref_tag);
cancel_work_sync(&cmd->work);
transport_wait_for_tasks(cmd);
- transport_cmd_finish_abort(cmd, 1);
- target_put_sess_cmd(cmd);
+ if (!transport_cmd_finish_abort(cmd, 1))
+ target_put_sess_cmd(cmd);
}
}
cancel_work_sync(&cmd->work);
transport_wait_for_tasks(cmd);
- core_tmr_handle_tas_abort(cmd, tas);
- target_put_sess_cmd(cmd);
+ if (!core_tmr_handle_tas_abort(cmd, tas))
+ target_put_sess_cmd(cmd);
}
}
percpu_ref_put(&lun->lun_ref);
}
-void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
+int transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
{
bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF);
+ int ret = 0;
if (cmd->se_cmd_flags & SCF_SE_LUN_CMD)
transport_lun_remove_cmd(cmd);
cmd->se_tfo->aborted_task(cmd);
if (transport_cmd_check_stop_to_fabric(cmd))
- return;
+ return 1;
if (remove && ack_kref)
- transport_put_cmd(cmd);
+ ret = transport_put_cmd(cmd);
+
+ return ret;
}
static void target_complete_failure_work(struct work_struct *work)
INT3400_THERMAL_MAXIMUM_UUID,
};
-static u8 *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
+static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
"42A441D6-AE6A-462b-A84B-4A8CE79027D3",
"3A95C389-E4B8-4629-A526-C52C88626BAE",
"97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
}
for (j = 0; j < INT3400_THERMAL_MAXIMUM_UUID; j++) {
- u8 uuid[16];
+ guid_t guid;
- acpi_str_to_uuid(int3400_thermal_uuids[j], uuid);
- if (!strncmp(uuid, objb->buffer.pointer, 16)) {
+ guid_parse(int3400_thermal_uuids[j], &guid);
+ if (guid_equal((guid_t *)objb->buffer.pointer, &guid)) {
priv->uuid_bitmap |= (1 << j);
break;
}
#define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee
#define PCI_DEVICE_ID_INTEL_CNPH 0xa36e
-#define PCI_INTEL_BXT_DSM_UUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+#define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
#define PCI_INTEL_BXT_FUNC_PMU_PWR 4
#define PCI_INTEL_BXT_STATE_D0 0
#define PCI_INTEL_BXT_STATE_D3 3
* struct dwc3_pci - Driver private structure
* @dwc3: child dwc3 platform_device
* @pci: our link to PCI bus
- * @uuid: _DSM UUID
+ * @guid: _DSM GUID
* @has_dsm_for_pm: true for devices which need to run _DSM on runtime PM
*/
struct dwc3_pci {
struct platform_device *dwc3;
struct pci_dev *pci;
- u8 uuid[16];
+ guid_t guid;
unsigned int has_dsm_for_pm:1;
};
if (pdev->device == PCI_DEVICE_ID_INTEL_BXT ||
pdev->device == PCI_DEVICE_ID_INTEL_BXT_M) {
- acpi_str_to_uuid(PCI_INTEL_BXT_DSM_UUID, dwc->uuid);
+ guid_parse(PCI_INTEL_BXT_DSM_GUID, &dwc->guid);
dwc->has_dsm_for_pm = true;
}
tmp.type = ACPI_TYPE_INTEGER;
tmp.integer.value = param;
- obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), dwc->uuid,
+ obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), &dwc->guid,
1, PCI_INTEL_BXT_FUNC_PMU_PWR, &argv4);
if (!obj) {
dev_err(&dwc->pci->dev, "failed to evaluate _DSM\n");
list_del(&f->list);
if (f->unbind)
f->unbind(c, f);
+
+ if (f->bind_deactivated)
+ usb_function_activate(f);
}
EXPORT_SYMBOL_GPL(usb_remove_function);
f = list_first_entry(&config->functions,
struct usb_function, list);
- list_del(&f->list);
- if (f->unbind) {
- DBG(cdev, "unbind function '%s'/%p\n", f->name, f);
- f->unbind(config, f);
- /* may free memory for "f" */
- }
+
+ usb_remove_function(config, f);
}
list_del(&config->list);
if (config->unbind) {
dev->tx_queue_len = 1;
dev->netdev_ops = &pn_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->header_ops = &phonet_header_ops;
}
/* closing ep0 === shutdown all */
- if (dev->gadget_registered)
+ if (dev->gadget_registered) {
usb_gadget_unregister_driver (&gadgetfs_driver);
+ dev->gadget_registered = false;
+ }
/* at this point "good" hardware has disconnected the
* device from USB; the host won't see it any more.
gadgetfs_suspend (struct usb_gadget *gadget)
{
struct dev_data *dev = get_gadget_data (gadget);
+ unsigned long flags;
INFO (dev, "suspended from state %d\n", dev->state);
- spin_lock (&dev->lock);
+ spin_lock_irqsave(&dev->lock, flags);
switch (dev->state) {
case STATE_DEV_SETUP: // VERY odd... host died??
case STATE_DEV_CONNECTED:
default:
break;
}
- spin_unlock (&dev->lock);
+ spin_unlock_irqrestore(&dev->lock, flags);
}
static struct usb_gadget_driver gadgetfs_driver = {
/* Report reset and disconnect events to the driver */
if (dum->driver && (disconnect || reset)) {
stop_activity(dum);
- spin_unlock(&dum->lock);
if (reset)
usb_gadget_udc_reset(&dum->gadget, dum->driver);
else
dum->driver->disconnect(&dum->gadget);
- spin_lock(&dum->lock);
}
} else if (dum_hcd->active != dum_hcd->old_active) {
- if (dum_hcd->old_active && dum->driver->suspend) {
- spin_unlock(&dum->lock);
+ if (dum_hcd->old_active && dum->driver->suspend)
dum->driver->suspend(&dum->gadget);
- spin_lock(&dum->lock);
- } else if (!dum_hcd->old_active && dum->driver->resume) {
- spin_unlock(&dum->lock);
+ else if (!dum_hcd->old_active && dum->driver->resume)
dum->driver->resume(&dum->gadget);
- spin_lock(&dum->lock);
- }
}
dum_hcd->old_status = dum_hcd->port_status;
struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g);
struct dummy *dum = dum_hcd->dum;
+ spin_lock_irq(&dum->lock);
dum->driver = NULL;
+ spin_unlock_irq(&dum->lock);
return 0;
}
nuke(&dev->ep[i]);
/* report disconnect; the driver is already quiesced */
- if (driver) {
- spin_unlock(&dev->lock);
+ if (driver)
driver->disconnect(&dev->gadget);
- spin_lock(&dev->lock);
- }
usb_reinit(dev);
}
BIT(PCI_RETRY_ABORT_INTERRUPT))
static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
-__releases(dev->lock)
-__acquires(dev->lock)
{
struct net2280_ep *ep;
u32 tmp, num, mask, scratch;
if (disconnect || reset) {
stop_activity(dev, dev->driver);
ep0_start(dev);
- spin_unlock(&dev->lock);
if (reset)
usb_gadget_udc_reset
(&dev->gadget, dev->driver);
else
(dev->driver->disconnect)
(&dev->gadget);
- spin_lock(&dev->lock);
return;
}
}
{
u32 temp, port_offset, port_count;
int i;
- u8 major_revision;
+ u8 major_revision, minor_revision;
struct xhci_hub *rhub;
temp = readl(addr);
major_revision = XHCI_EXT_PORT_MAJOR(temp);
+ minor_revision = XHCI_EXT_PORT_MINOR(temp);
if (major_revision == 0x03) {
rhub = &xhci->usb3_rhub;
return;
}
rhub->maj_rev = XHCI_EXT_PORT_MAJOR(temp);
- rhub->min_rev = XHCI_EXT_PORT_MINOR(temp);
+
+ if (rhub->min_rev < minor_revision)
+ rhub->min_rev = minor_revision;
/* Port offset and count in the third dword, see section 7.2 */
temp = readl(addr + 2);
if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
pdev->device == 0x1042)
xhci->quirks |= XHCI_BROKEN_STREAMS;
+ if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
+ pdev->device == 0x1142)
+ xhci->quirks |= XHCI_TRUST_TX_LENGTH;
if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241)
xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7;
#ifdef CONFIG_ACPI
static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev)
{
- static const u8 intel_dsm_uuid[] = {
- 0xb7, 0x0c, 0x34, 0xac, 0x01, 0xe9, 0xbf, 0x45,
- 0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23,
- };
+ static const guid_t intel_dsm_guid =
+ GUID_INIT(0xac340cb7, 0xe901, 0x45bf,
+ 0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23);
union acpi_object *obj;
- obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1,
+ obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), &intel_dsm_guid, 3, 1,
NULL);
ACPI_FREE(obj);
}
static int ucsi_acpi_cmd(struct ucsi *ucsi, struct ucsi_control *ctrl)
{
- uuid_le uuid = UUID_LE(0x6f8398c2, 0x7ca4, 0x11e4,
- 0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f);
+ guid_t guid = GUID_INIT(0x6f8398c2, 0x7ca4, 0x11e4,
+ 0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f);
union acpi_object *obj;
ucsi->data->ctrl.raw_cmd = ctrl->raw_cmd;
- obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), uuid.b, 1, 1, NULL);
+ obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), &guid, 1, 1, NULL);
if (!obj) {
dev_err(ucsi->dev, "%s: failed to evaluate _DSM\n", __func__);
return -EIO;
WCOVE_ROLE_DEVICE,
};
-static uuid_le uuid = UUID_LE(0x482383f0, 0x2876, 0x4e49,
- 0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37);
+static guid_t guid = GUID_INIT(0x482383f0, 0x2876, 0x4e49,
+ 0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37);
static int wcove_typec_func(struct wcove_typec *wcove,
enum wcove_typec_func func, int param)
tmp.type = ACPI_TYPE_INTEGER;
tmp.integer.value = param;
- obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), uuid.b, 1, func,
+ obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), &guid, 1, func,
&argv4);
if (!obj) {
dev_err(wcove->dev, "%s: failed to evaluate _DSM\n", __func__);
if (ret)
return ret;
- if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), uuid.b, 0, 0x1f)) {
+ if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), &guid, 0, 0x1f)) {
dev_err(&pdev->dev, "Missing _DSM functions\n");
return -ENODEV;
}
for (i = 0; i < (128 - edid[2]) / DETAILED_TIMING_DESCRIPTION_SIZE;
i++, block += DETAILED_TIMING_DESCRIPTION_SIZE)
- if (PIXEL_CLOCK)
+ if (PIXEL_CLOCK != 0)
edt[num++] = block - edid;
/* Yikes, EDID data is totally useless */
dev_dbg(dev->gdev, "%s %s - serial #%s\n",
usbdev->manufacturer, usbdev->product, usbdev->serial);
dev_dbg(dev->gdev, "vid_%04x&pid_%04x&rev_%04x driver's ufx_data struct at %p\n",
- usbdev->descriptor.idVendor, usbdev->descriptor.idProduct,
- usbdev->descriptor.bcdDevice, dev);
+ le16_to_cpu(usbdev->descriptor.idVendor),
+ le16_to_cpu(usbdev->descriptor.idProduct),
+ le16_to_cpu(usbdev->descriptor.bcdDevice), dev);
dev_dbg(dev->gdev, "console enable=%d\n", console);
dev_dbg(dev->gdev, "fb_defio enable=%d\n", fb_defio);
char *bufptr;
struct urb *urb;
- pr_info("/dev/fb%d FB_BLANK mode %d --> %d\n",
- info->node, dev->blank_mode, blank_mode);
+ pr_debug("/dev/fb%d FB_BLANK mode %d --> %d\n",
+ info->node, dev->blank_mode, blank_mode);
if ((dev->blank_mode == FB_BLANK_POWERDOWN) &&
(blank_mode != FB_BLANK_POWERDOWN)) {
pr_info("%s %s - serial #%s\n",
usbdev->manufacturer, usbdev->product, usbdev->serial);
pr_info("vid_%04x&pid_%04x&rev_%04x driver's dlfb_data struct at %p\n",
- usbdev->descriptor.idVendor, usbdev->descriptor.idProduct,
- usbdev->descriptor.bcdDevice, dev);
+ le16_to_cpu(usbdev->descriptor.idVendor),
+ le16_to_cpu(usbdev->descriptor.idProduct),
+ le16_to_cpu(usbdev->descriptor.bcdDevice), dev);
pr_info("console enable=%d\n", console);
pr_info("fb_defio enable=%d\n", fb_defio);
pr_info("shadow enable=%d\n", shadow);
}
static void viafb_remove_proc(struct viafb_shared *shared)
{
- struct proc_dir_entry *viafb_entry = shared->proc_entry,
- *iga1_entry = shared->iga1_proc_entry,
- *iga2_entry = shared->iga2_proc_entry;
+ struct proc_dir_entry *viafb_entry = shared->proc_entry;
if (!viafb_entry)
return;
- remove_proc_entry("output_devices", iga2_entry);
+ remove_proc_entry("output_devices", shared->iga2_proc_entry);
remove_proc_entry("iga2", viafb_entry);
- remove_proc_entry("output_devices", iga1_entry);
+ remove_proc_entry("output_devices", shared->iga1_proc_entry);
remove_proc_entry("iga1", viafb_entry);
remove_proc_entry("supported_output_devices", viafb_entry);
}
#endif
+static int virtballoon_validate(struct virtio_device *vdev)
+{
+ __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
+ return 0;
+}
+
static unsigned int features[] = {
VIRTIO_BALLOON_F_MUST_TELL_HOST,
VIRTIO_BALLOON_F_STATS_VQ,
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
+ .validate = virtballoon_validate,
.probe = virtballoon_probe,
.remove = virtballoon_remove,
.config_changed = virtballoon_changed,
return xen_tmem_new_pool(uuid_private, 0, pagesize);
}
-static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
+static int tmem_cleancache_init_shared_fs(uuid_t *uuid, size_t pagesize)
{
struct tmem_pool_uuid shared_uuid;
- shared_uuid.uuid_lo = *(u64 *)uuid;
- shared_uuid.uuid_hi = *(u64 *)(&uuid[8]);
+ shared_uuid.uuid_lo = *(u64 *)&uuid->b[0];
+ shared_uuid.uuid_hi = *(u64 *)&uuid->b[8];
return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
}
{
struct sockaddr_rxrpc srx;
struct afs_server *server;
- struct uuid_v1 *r;
+ struct afs_uuid *r;
unsigned loop;
__be32 *b;
int ret;
}
_debug("unmarshall UUID");
- call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+ call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
if (!call->request)
return -ENOMEM;
static void SRXAFSCB_ProbeUuid(struct work_struct *work)
{
struct afs_call *call = container_of(work, struct afs_call, work);
- struct uuid_v1 *r = call->request;
+ struct afs_uuid *r = call->request;
struct {
__be32 match;
*/
static int afs_deliver_cb_probe_uuid(struct afs_call *call)
{
- struct uuid_v1 *r;
+ struct afs_uuid *r;
unsigned loop;
__be32 *b;
int ret;
}
_debug("unmarshall UUID");
- call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+ call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
if (!call->request)
return -ENOMEM;
b = call->buffer;
r = call->request;
- r->time_low = b[0];
- r->time_mid = htons(ntohl(b[1]));
- r->time_hi_and_version = htons(ntohl(b[2]));
+ r->time_low = ntohl(b[0]);
+ r->time_mid = ntohl(b[1]);
+ r->time_hi_and_version = ntohl(b[2]);
r->clock_seq_hi_and_reserved = ntohl(b[3]);
r->clock_seq_low = ntohl(b[4]);
unsigned mtu; /* MTU of interface */
};
+struct afs_uuid {
+ __be32 time_low; /* low part of timestamp */
+ __be16 time_mid; /* mid part of timestamp */
+ __be16 time_hi_and_version; /* high part of timestamp and version */
+ __u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */
+ __u8 clock_seq_low; /* clock seq low */
+ __u8 node[6]; /* spatially unique node ID (MAC addr) */
+};
+
/*****************************************************************************/
/*
* cache.c
* main.c
*/
extern struct workqueue_struct *afs_wq;
-extern struct uuid_v1 afs_uuid;
+extern struct afs_uuid afs_uuid;
/*
* misc.c
module_param(rootcell, charp, 0);
MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
-struct uuid_v1 afs_uuid;
+struct afs_uuid afs_uuid;
struct workqueue_struct *afs_wq;
/*
ssize_t ret;
/* enforce forwards compatibility on users */
- if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
+ if (unlikely(iocb->aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
req->common.ki_pos = iocb->aio_offset;
req->common.ki_complete = aio_complete;
req->common.ki_flags = iocb_flags(req->common.ki_filp);
+ req->common.ki_hint = file_write_hint(file);
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
req->common.ki_flags |= IOCB_EVENTFD;
}
+ ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
+ if (unlikely(ret)) {
+ pr_debug("EINVAL: aio_rw_flags\n");
+ goto out_put_req;
+ }
+
+ if ((req->common.ki_flags & IOCB_NOWAIT) &&
+ !(req->common.ki_flags & IOCB_DIRECT)) {
+ ret = -EOPNOTSUPP;
+ goto out_put_req;
+ }
+
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
if (unlikely(ret)) {
pr_debug("EFAULT: aio_key\n");
int status;
token = (autofs_wqt_t) param->fail.token;
- status = param->fail.status ? param->fail.status : -ENOENT;
+ status = param->fail.status < 0 ? param->fail.status : -ENOENT;
return autofs4_wait_release(sbi, token, status);
}
bio_init(&bio, vecs, nr_pages);
bio.bi_bdev = bdev;
bio.bi_iter.bi_sector = pos >> 9;
+ bio.bi_write_hint = iocb->ki_hint;
bio.bi_private = current;
bio.bi_end_io = blkdev_bio_end_io_simple;
if (vecs != inline_vecs)
kfree(vecs);
- if (unlikely(bio.bi_error))
- return bio.bi_error;
+ if (unlikely(bio.bi_status))
+ ret = blk_status_to_errno(bio.bi_status);
+
+ bio_uninit(&bio);
+
return ret;
}
bool should_dirty = dio->should_dirty;
if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
- if (bio->bi_error && !dio->bio.bi_error)
- dio->bio.bi_error = bio->bi_error;
+ if (bio->bi_status && !dio->bio.bi_status)
+ dio->bio.bi_status = bio->bi_status;
} else {
if (!dio->is_sync) {
struct kiocb *iocb = dio->iocb;
- ssize_t ret = dio->bio.bi_error;
+ ssize_t ret;
- if (likely(!ret)) {
+ if (likely(!dio->bio.bi_status)) {
ret = dio->size;
iocb->ki_pos += ret;
+ } else {
+ ret = blk_status_to_errno(dio->bio.bi_status);
}
dio->iocb->ki_complete(iocb, ret, 0);
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
- int ret;
+ int ret = 0;
if ((pos | iov_iter_alignment(iter)) &
(bdev_logical_block_size(bdev) - 1))
for (;;) {
bio->bi_bdev = bdev;
bio->bi_iter.bi_sector = pos >> 9;
+ bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
ret = bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret)) {
- bio->bi_error = ret;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
break;
}
}
__set_current_state(TASK_RUNNING);
- ret = dio->bio.bi_error;
+ if (!ret)
+ ret = blk_status_to_errno(dio->bio.bi_status);
if (likely(!ret))
ret = dio->size;
static __init int blkdev_init(void)
{
- blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio));
+ blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
if (!blkdev_dio_pool)
return -ENOMEM;
return 0;
* The original bio may be split to several sub-bios, this is
* done during endio of sub-bios
*/
- int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
+ blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
+ blk_status_t);
};
/*
/* mutex is not held! This is not save if IO is not yet completed
* on umount */
iodone_w_error = 0;
- if (bp->bi_error)
+ if (bp->bi_status)
iodone_w_error = 1;
BUG_ON(NULL == block);
if ((dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
- bp->bi_error,
+ bp->bi_status,
btrfsic_get_block_type(dev_state->state, block),
block->logical_bytenr, dev_state->name,
block->dev_bytenr, block->mirror_num);
unsigned long index;
int ret;
- if (bio->bi_error)
+ if (bio->bi_status)
cb->errors = 1;
/* if there are more bios still pending for this compressed
struct page *page;
unsigned long index;
- if (bio->bi_error)
+ if (bio->bi_status)
cb->errors = 1;
/* if there are more bios still pending for this compressed
cb->start,
cb->start + cb->len - 1,
NULL,
- bio->bi_error ? 0 : 1);
+ bio->bi_status ? 0 : 1);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
* This also checksums the file bytes and gets things ready for
* the end io hooks.
*/
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
struct page *page;
u64 first_byte = disk_start;
struct block_device *bdev;
- int ret;
+ blk_status_t ret;
int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
WARN_ON(start & ((u64)PAGE_SIZE - 1));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
refcount_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
if (!bio) {
kfree(cb);
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
}
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_private = cb;
/* create and submit bios for the compressed pages */
bytes_left = compressed_len;
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
+ int submit = 0;
+
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
if (bio->bi_iter.bi_size)
- ret = io_tree->ops->merge_bio_hook(page, 0,
+ submit = io_tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
bio, 0);
- else
- ret = 0;
page->mapping = NULL;
- if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+ if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(bio);
ret = btrfs_map_bio(fs_info, bio, 0, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
ret = btrfs_map_bio(fs_info, bio, 0, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
* After the compressed pages are read, we copy the bytes into the
* bio we were passed and then call the bio end_io calls
*/
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 em_len;
u64 em_start;
struct extent_map *em;
- int ret = -ENOMEM;
+ blk_status_t ret = BLK_STS_RESOURCE;
int faili = 0;
u32 *sums;
PAGE_SIZE);
read_unlock(&em_tree->lock);
if (!em)
- return -EIO;
+ return BLK_STS_IOERR;
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
__GFP_HIGHMEM);
if (!cb->compressed_pages[pg_index]) {
faili = pg_index - 1;
- ret = -ENOMEM;
+ ret = BLK_STS_RESOURCE;
goto fail2;
}
}
refcount_set(&cb->pending_bios, 1);
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+ int submit = 0;
+
page = cb->compressed_pages[pg_index];
page->mapping = inode->i_mapping;
page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
- ret = tree->ops->merge_bio_hook(page, 0,
+ submit = tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
comp_bio, 0);
- else
- ret = 0;
page->mapping = NULL;
- if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+ if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(comp_bio);
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
if (ret) {
- comp_bio->bi_error = ret;
+ comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
if (ret) {
- comp_bio->bi_error = ret;
+ comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
unsigned long total_out, u64 disk_start,
struct bio *bio);
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages);
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
enum btrfs_compression_type {
struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
u64 logical_offset);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
bio_end_io_t *end_io;
void *private;
struct btrfs_fs_info *info;
- int error;
+ blk_status_t status;
enum btrfs_wq_endio_type metadata;
struct list_head list;
struct btrfs_work work;
*/
u64 bio_offset;
struct btrfs_work work;
- int error;
+ blk_status_t status;
};
/*
btrfs_work_func_t func;
fs_info = end_io_wq->info;
- end_io_wq->error = bio->bi_error;
+ end_io_wq->status = bio->bi_status;
if (bio_op(bio) == REQ_OP_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
btrfs_queue_work(wq, &end_io_wq->work);
}
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata)
{
struct btrfs_end_io_wq *end_io_wq;
end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
if (!end_io_wq)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
end_io_wq->private = bio->bi_private;
end_io_wq->end_io = bio->bi_end_io;
end_io_wq->info = info;
- end_io_wq->error = 0;
+ end_io_wq->status = 0;
end_io_wq->bio = bio;
end_io_wq->metadata = metadata;
static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async;
- int ret;
+ blk_status_t ret;
async = container_of(work, struct async_submit_bio, work);
ret = async->submit_bio_start(async->inode, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
if (ret)
- async->error = ret;
+ async->status = ret;
}
static void run_one_async_done(struct btrfs_work *work)
wake_up(&fs_info->async_submit_wait);
/* If an error occurred we just want to clean up the bio and move on */
- if (async->error) {
- async->bio->bi_error = async->error;
+ if (async->status) {
+ async->bio->bi_status = async->status;
bio_endio(async->bio);
return;
}
kfree(async);
}
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags,
- u64 bio_offset,
- extent_submit_bio_hook_t *submit_bio_start,
- extent_submit_bio_hook_t *submit_bio_done)
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+ struct inode *inode, struct bio *bio, int mirror_num,
+ unsigned long bio_flags, u64 bio_offset,
+ extent_submit_bio_hook_t *submit_bio_start,
+ extent_submit_bio_hook_t *submit_bio_done)
{
struct async_submit_bio *async;
async = kmalloc(sizeof(*async), GFP_NOFS);
if (!async)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
async->inode = inode;
async->bio = bio;
async->bio_flags = bio_flags;
async->bio_offset = bio_offset;
- async->error = 0;
+ async->status = 0;
atomic_inc(&fs_info->nr_async_submits);
return 0;
}
-static int btree_csum_one_bio(struct bio *bio)
+static blk_status_t btree_csum_one_bio(struct bio *bio)
{
struct bio_vec *bvec;
struct btrfs_root *root;
break;
}
- return ret;
+ return errno_to_blk_status(ret);
}
-static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btree_submit_bio_start(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
/*
* when we're called for a write, we're already in the async
return btree_csum_one_bio(bio);
}
-static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btree_submit_bio_done(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
- int ret;
+ blk_status_t ret;
/*
* when we're called for a write, we're already in the async
*/
ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
return 1;
}
-static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int async = check_async_write(bio_flags);
- int ret;
+ blk_status_t ret;
if (bio_op(bio) != REQ_OP_WRITE) {
/*
return 0;
out_w_error:
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
return ret;
}
end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
bio = end_io_wq->bio;
- bio->bi_error = end_io_wq->error;
+ bio->bi_status = end_io_wq->status;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
* any device where the flush fails with eopnotsupp are flagged as not-barrier
* capable
*/
-static int write_dev_flush(struct btrfs_device *device, int wait)
+static blk_status_t write_dev_flush(struct btrfs_device *device, int wait)
{
struct request_queue *q = bdev_get_queue(device->bdev);
struct bio *bio;
- int ret = 0;
+ blk_status_t ret = 0;
if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
return 0;
wait_for_completion(&device->flush_wait);
- if (bio->bi_error) {
- ret = bio->bi_error;
+ if (bio->bi_status) {
+ ret = bio->bi_status;
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
device->flush_bio = NULL;
bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
bio->bi_end_io = btrfs_end_empty_barrier;
bio->bi_bdev = device->bdev;
struct btrfs_device *dev;
int errors_send = 0;
int errors_wait = 0;
- int ret;
+ blk_status_t ret;
/* send down all the barriers */
head = &info->fs_devices->devices;
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, u8 *result);
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags, u64 bio_offset,
- extent_submit_bio_hook_t *submit_bio_start,
- extent_submit_bio_hook_t *submit_bio_done);
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+ struct inode *inode, struct bio *bio, int mirror_num,
+ unsigned long bio_flags, u64 bio_offset,
+ extent_submit_bio_hook_t *submit_bio_start,
+ extent_submit_bio_hook_t *submit_bio_done);
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
int btrfs_write_tree_block(struct extent_buffer *buf);
int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
goto free_state_cache;
btrfs_bioset = bioset_create(BIO_POOL_SIZE,
- offsetof(struct btrfs_io_bio, bio));
+ offsetof(struct btrfs_io_bio, bio),
+ BIOSET_NEED_BVECS);
if (!btrfs_bioset)
goto free_buffer_cache;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct bio *bio;
int read_mode = 0;
+ blk_status_t status;
int ret;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
read_mode, failrec->this_mirror, failrec->in_validation);
- ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
+ status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
failrec->bio_flags, 0);
- if (ret) {
+ if (status) {
free_io_failure(BTRFS_I(inode), failrec);
bio_put(bio);
+ ret = blk_status_to_errno(status);
}
return ret;
*/
static void end_bio_extent_writepage(struct bio *bio)
{
+ int error = blk_status_to_errno(bio->bi_status);
struct bio_vec *bvec;
u64 start;
u64 end;
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
- end_extent_writepage(page, bio->bi_error, start, end);
+ end_extent_writepage(page, error, start, end);
end_page_writeback(page);
}
static void end_bio_extent_readpage(struct bio *bio)
{
struct bio_vec *bvec;
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree;
u64 offset = 0;
btrfs_debug(fs_info,
"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
- (u64)bio->bi_iter.bi_sector, bio->bi_error,
+ (u64)bio->bi_iter.bi_sector, bio->bi_status,
io_bio->mirror_num);
tree = &BTRFS_I(inode)->io_tree;
ret = bio_readpage_error(bio, offset, page,
start, end, mirror);
if (ret == 0) {
- uptodate = !bio->bi_error;
+ uptodate = !bio->bi_status;
offset += len;
continue;
}
endio_readpage_release_extent(tree, extent_start, extent_len,
uptodate);
if (io_bio->end_io)
- io_bio->end_io(io_bio, bio->bi_error);
+ io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
bio_put(bio);
}
static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
unsigned long bio_flags)
{
- int ret = 0;
+ blk_status_t ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
btrfsic_submit_bio(bio);
bio_put(bio);
- return ret;
+ return blk_status_to_errno(ret);
}
static int merge_bio(struct extent_io_tree *tree, struct page *page,
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
+ bio->bi_write_hint = page->mapping->host->i_write_hint;
bio_set_op_attrs(bio, op, op_flags);
if (wbc) {
wbc_init_bio(wbc, bio);
BUG_ON(!eb);
done = atomic_dec_and_test(&eb->io_pages);
- if (bio->bi_error ||
+ if (bio->bi_status ||
test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
ClearPageUptodate(page);
set_btree_ioerr(page);
struct btrfs_io_bio;
struct io_failure_record;
-typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset);
+typedef blk_status_t (extent_submit_bio_hook_t)(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset);
struct extent_io_ops {
/*
* The following callbacks must be allways defined, the function
kfree(bio->csum_allocated);
}
-static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
u64 logical_offset, u32 *dst, int dio)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
path = btrfs_alloc_path();
if (!path)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
if (!dst) {
csum_size, GFP_NOFS);
if (!btrfs_bio->csum_allocated) {
btrfs_free_path(path);
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
}
btrfs_bio->csum = btrfs_bio->csum_allocated;
btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
return 0;
}
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
{
return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
}
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
{
return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
}
return ret;
}
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
GFP_NOFS);
if (!sums)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
ssize_t num_written = 0;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
ssize_t err;
- loff_t pos;
- size_t count;
+ loff_t pos = iocb->ki_pos;
+ size_t count = iov_iter_count(from);
loff_t oldsize;
int clean_page = 0;
- inode_lock(inode);
+ if ((iocb->ki_flags & IOCB_NOWAIT) &&
+ (iocb->ki_flags & IOCB_DIRECT)) {
+ /* Don't sleep on inode rwsem */
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ /*
+ * We will allocate space in case nodatacow is not set,
+ * so bail
+ */
+ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) ||
+ check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
+ } else
+ inode_lock(inode);
+
err = generic_write_checks(iocb, from);
if (err <= 0) {
inode_unlock(inode);
*/
update_time_for_write(inode);
- pos = iocb->ki_pos;
- count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
return offset;
}
+static int btrfs_file_open(struct inode *inode, struct file *filp)
+{
+ filp->f_mode |= FMODE_AIO_NOWAIT;
+ return generic_file_open(inode, filp);
+}
+
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
.mmap = btrfs_file_mmap,
- .open = generic_file_open,
+ .open = btrfs_file_open,
.release = btrfs_release_file,
.fsync = btrfs_sync_file,
.fallocate = btrfs_fallocate,
{
SHASH_DESC_ON_STACK(shash, tfm);
u32 *ctx = (u32 *)shash_desc_ctx(shash);
+ u32 retval;
int err;
shash->tfm = tfm;
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
- return *ctx;
+ retval = *ctx;
+ barrier_data(ctx);
+ return retval;
}
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK);
- ret = btrfs_submit_compressed_write(inode,
+ if (btrfs_submit_compressed_write(inode,
async_extent->start,
async_extent->ram_size,
ins.objectid,
ins.offset, async_extent->pages,
- async_extent->nr_pages);
- if (ret) {
+ async_extent->nr_pages)) {
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct page *p = async_extent->pages[0];
const u64 start = async_extent->start;
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_start(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
- int ret = 0;
+ blk_status_t ret = 0;
ret = btrfs_csum_one_bio(inode, bio, 0, 0);
BUG_ON(ret); /* -ENOMEM */
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_done(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int ret;
+ blk_status_t ret;
ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
* extent_io.c submission hook. This does the right thing for csum calculation
* on write, or reading the csums from the tree before a read
*/
-static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
- int ret = 0;
+ blk_status_t ret = 0;
int skip_sum;
int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
out:
- if (ret < 0) {
- bio->bi_error = ret;
+ if (ret) {
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
struct bio_vec *bvec;
int i;
- if (bio->bi_error)
+ if (bio->bi_status)
goto end;
ASSERT(bio->bi_vcnt == 1);
int ret;
int i;
- if (bio->bi_error)
+ if (bio->bi_status)
goto end;
uptodate = 1;
bio_put(bio);
}
-static int __btrfs_subio_endio_read(struct inode *inode,
- struct btrfs_io_bio *io_bio, int err)
+static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
+ struct btrfs_io_bio *io_bio, blk_status_t err)
{
struct btrfs_fs_info *fs_info;
struct bio_vec *bvec;
io_bio->mirror_num,
btrfs_retry_endio, &done);
if (ret) {
- err = ret;
+ err = errno_to_blk_status(ret);
goto next;
}
return err;
}
-static int btrfs_subio_endio_read(struct inode *inode,
- struct btrfs_io_bio *io_bio, int err)
+static blk_status_t btrfs_subio_endio_read(struct inode *inode,
+ struct btrfs_io_bio *io_bio, blk_status_t err)
{
bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
struct inode *inode = dip->inode;
struct bio *dio_bio;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
err = btrfs_subio_endio_read(inode, io_bio, err);
kfree(dip);
- dio_bio->bi_error = bio->bi_error;
- dio_end_io(dio_bio, bio->bi_error);
+ dio_bio->bi_status = bio->bi_status;
+ dio_end_io(dio_bio);
if (io_bio->end_io)
- io_bio->end_io(io_bio, err);
+ io_bio->end_io(io_bio, blk_status_to_errno(err));
bio_put(bio);
}
struct bio *dio_bio = dip->dio_bio;
__endio_write_update_ordered(dip->inode, dip->logical_offset,
- dip->bytes, !bio->bi_error);
+ dip->bytes, !bio->bi_status);
kfree(dip);
- dio_bio->bi_error = bio->bi_error;
- dio_end_io(dio_bio, bio->bi_error);
+ dio_bio->bi_status = bio->bi_status;
+ dio_end_io(dio_bio);
bio_put(bio);
}
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
+static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode,
struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 offset)
{
- int ret;
+ blk_status_t ret;
ret = btrfs_csum_one_bio(inode, bio, offset, 1);
BUG_ON(ret); /* -ENOMEM */
return 0;
static void btrfs_end_dio_bio(struct bio *bio)
{
struct btrfs_dio_private *dip = bio->bi_private;
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
if (err)
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
if (dip->errors) {
bio_io_error(dip->orig_bio);
} else {
- dip->dio_bio->bi_error = 0;
+ dip->dio_bio->bi_status = 0;
bio_endio(dip->orig_bio);
}
out:
return bio;
}
-static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
+static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
struct btrfs_dio_private *dip,
struct bio *bio,
u64 file_offset)
{
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
- int ret;
+ blk_status_t ret;
/*
* We load all the csum data we need when we submit
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_dio_private *dip = bio->bi_private;
bool write = bio_op(bio) == REQ_OP_WRITE;
- int ret;
+ blk_status_t ret;
if (async_submit)
async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
* callbacks - they require an allocated dip and a clone of dio_bio.
*/
if (io_bio && dip) {
- io_bio->bi_error = -EIO;
+ io_bio->bi_status = BLK_STS_IOERR;
bio_endio(io_bio);
/*
* The end io callbacks free our dip, do the final put on io_bio
unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
file_offset + dio_bio->bi_iter.bi_size - 1);
- dio_bio->bi_error = -EIO;
+ dio_bio->bi_status = BLK_STS_IOERR;
/*
* Releases and cleans up our dio_bio, no need to bio_put()
* nor bio_endio()/bio_io_error() against dio_bio.
*/
- dio_end_io(dio_bio, ret);
+ dio_end_io(dio_bio);
}
if (io_bio)
bio_put(io_bio);
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
if (ret)
* this frees the rbio and runs through all the bios in the
* bio_list and calls end_io on them
*/
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
{
struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *next;
while (cur) {
next = cur->bi_next;
cur->bi_next = NULL;
- cur->bi_error = err;
+ cur->bi_status = err;
bio_endio(cur);
cur = next;
}
static void raid_write_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
int max_errors;
if (err)
max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
0 : rbio->bbio->max_errors;
if (atomic_read(&rbio->error) > max_errors)
- err = -EIO;
+ err = BLK_STS_IOERR;
rbio_orig_end_io(rbio, err);
}
* devices or if they are not contiguous
*/
if (last_end == disk_start && stripe->dev->bdev &&
- !last->bi_error &&
+ !last->bi_status &&
last->bi_bdev == stripe->dev->bdev) {
ret = bio_add_page(last, page, PAGE_SIZE, 0);
if (ret == PAGE_SIZE)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
* we only read stripe pages off the disk, set them
* up to date if there were no errors
*/
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
struct scrub_ctx *sctx;
struct btrfs_device *dev;
struct bio *bio;
- int err;
+ blk_status_t status;
u64 logical;
u64 physical;
#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
struct scrub_bio_ret {
struct completion event;
- int error;
+ blk_status_t status;
};
static void scrub_bio_wait_endio(struct bio *bio)
{
struct scrub_bio_ret *ret = bio->bi_private;
- ret->error = bio->bi_error;
+ ret->status = bio->bi_status;
complete(&ret->event);
}
int ret;
init_completion(&done.event);
- done.error = 0;
+ done.status = 0;
bio->bi_iter.bi_sector = page->logical >> 9;
bio->bi_private = &done;
bio->bi_end_io = scrub_bio_wait_endio;
return ret;
wait_for_completion(&done.event);
- if (done.error)
+ if (done.status)
return -EIO;
return 0;
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- sbio->err = 0;
+ sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical_for_dev_replace ||
sbio->logical + sbio->page_count * PAGE_SIZE !=
struct scrub_bio *sbio = bio->bi_private;
struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
- sbio->err = bio->bi_error;
+ sbio->status = bio->bi_status;
sbio->bio = bio;
btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
int i;
WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
- if (sbio->err) {
+ if (sbio->status) {
struct btrfs_dev_replace *dev_replace =
&sbio->sctx->fs_info->dev_replace;
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
- sbio->err = 0;
+ sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical ||
sbio->logical + sbio->page_count * PAGE_SIZE !=
struct scrub_block *sblock = bio->bi_private;
struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
- if (bio->bi_error)
+ if (bio->bi_status)
sblock->no_io_error_seen = 0;
bio_put(bio);
struct scrub_bio *sbio = bio->bi_private;
struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
- sbio->err = bio->bi_error;
+ sbio->status = bio->bi_status;
sbio->bio = bio;
btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
int i;
BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
- if (sbio->err) {
+ if (sbio->status) {
for (i = 0; i < sbio->page_count; i++) {
struct scrub_page *spage = sbio->pagev[i];
struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
- if (bio->bi_error)
+ if (bio->bi_status)
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors);
struct btrfs_bio *bbio = bio->bi_private;
int is_orig_bio = 0;
- if (bio->bi_error) {
+ if (bio->bi_status) {
atomic_inc(&bbio->error);
- if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
+ if (bio->bi_status == BLK_STS_IOERR ||
+ bio->bi_status == BLK_STS_TARGET) {
unsigned int stripe_index =
btrfs_io_bio(bio)->stripe_index;
struct btrfs_device *dev;
* beyond the tolerance of the btrfs bio
*/
if (atomic_read(&bbio->error) > bbio->max_errors) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
} else {
/*
* this bio is actually up to date, we didn't
* go over the max number of errors
*/
- bio->bi_error = 0;
+ bio->bi_status = 0;
}
btrfs_end_bbio(bbio, bio);
btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
bio->bi_iter.bi_sector = logical >> 9;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
btrfs_end_bbio(bbio, bio);
}
}
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- struct writeback_control *wbc);
+ enum rw_hint hint, struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
- submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
if (unlikely(bio_flagged(bio, BIO_QUIET)))
set_bit(BH_Quiet, &bh->b_state);
- bh->b_end_io(bh, !bio->bi_error);
+ bh->b_end_io(bh, !bio->bi_status);
bio_put(bio);
}
}
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- struct writeback_control *wbc)
+ enum rw_hint write_hint, struct writeback_control *wbc)
{
struct bio *bio;
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
+ bio->bi_write_hint = write_hint;
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
BUG_ON(bio->bi_iter.bi_size != bh->b_size);
int submit_bh(int op, int op_flags, struct buffer_head *bh)
{
- return submit_bh_wbc(op, op_flags, bh, NULL);
+ return submit_bh_wbc(op, op_flags, bh, 0, NULL);
}
EXPORT_SYMBOL(submit_bh);
}
if (new_mode != old_mode) {
+ newattrs.ia_ctime = current_time(inode);
newattrs.ia_mode = new_mode;
newattrs.ia_valid = ATTR_MODE;
ret = __ceph_setattr(inode, &newattrs);
ceph_mdsc_put_request(req);
if (!inode)
return ERR_PTR(-ESTALE);
+ if (inode->i_nlink == 0) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
}
return d_obtain_alias(inode);
attr->ia_size > inode->i_size) {
i_size_write(inode, attr->ia_size);
inode->i_blocks = calc_inode_blocks(attr->ia_size);
- inode->i_ctime = attr->ia_ctime;
ci->i_reported_size = attr->ia_size;
dirtied |= CEPH_CAP_FILE_EXCL;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
only ? "ctime only" : "ignored");
- inode->i_ctime = attr->ia_ctime;
if (only) {
/*
* if kernel wants to dirty ctime but nothing else,
if (dirtied) {
inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
&prealloc_cf);
- inode->i_ctime = current_time(inode);
+ inode->i_ctime = attr->ia_ctime;
}
release &= issued;
req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
+ req->r_stamp = attr->ia_ctime;
err = ceph_mdsc_do_request(mdsc, NULL, req);
}
dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
{
struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
- struct timespec ts;
if (!req)
return ERR_PTR(-ENOMEM);
init_completion(&req->r_safe_completion);
INIT_LIST_HEAD(&req->r_unsafe_item);
- ktime_get_real_ts(&ts);
- req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
+ req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran);
req->r_op = op;
req->r_direct_mode = mode;
if (!is_sync_kiocb(iocb))
ctx->iocb = iocb;
- if (to->type & ITER_IOVEC)
+ if (to->type == ITER_IOVEC)
ctx->should_dirty = true;
rc = setup_aio_ctx_iter(ctx, to, READ);
if (!pages) {
pages = vmalloc(max_pages * sizeof(struct page *));
- if (!bv) {
+ if (!pages) {
kvfree(bv);
return -ENOMEM;
}
struct cifs_fid *fid, __u16 search_flags,
struct cifs_search_info *srch_inf)
{
- return CIFSFindFirst(xid, tcon, path, cifs_sb,
- &fid->netfid, search_flags, srch_inf, true);
+ int rc;
+
+ rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
+ &fid->netfid, search_flags, srch_inf, true);
+ if (rc)
+ cifs_dbg(FYI, "find first failed=%d\n", rc);
+ return rc;
}
static int
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
kfree(utf16_path);
if (rc) {
- cifs_dbg(VFS, "open dir failed\n");
+ cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
return rc;
}
rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
fid->volatile_fid, 0, srch_inf);
if (rc) {
- cifs_dbg(VFS, "query directory failed\n");
+ cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
}
return rc;
sg = init_sg(rqst, sign);
if (!sg) {
- cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
+ cifs_dbg(VFS, "%s: Failed to init sg", __func__);
+ rc = -ENOMEM;
goto free_req;
}
iv = kzalloc(iv_len, GFP_KERNEL);
if (!iv) {
cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
+ rc = -ENOMEM;
goto free_sg;
}
iv[0] = 3;
pcreatetime = (__u64 *)value;
*pcreatetime = CIFS_I(inode)->createtime;
return sizeof(__u64);
-
- return rc;
}
}
EXPORT_SYMBOL(config_item_get);
+struct config_item *config_item_get_unless_zero(struct config_item *item)
+{
+ if (item && kref_get_unless_zero(&item->ci_kref))
+ return item;
+ return NULL;
+}
+EXPORT_SYMBOL(config_item_get_unless_zero);
+
static void config_item_cleanup(struct config_item *item)
{
struct config_item_type *t = item->ci_type;
ret = -ENOMEM;
sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
if (sl) {
- sl->sl_target = config_item_get(item);
spin_lock(&configfs_dirent_lock);
if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
spin_unlock(&configfs_dirent_lock);
- config_item_put(item);
kfree(sl);
return -ENOENT;
}
+ sl->sl_target = config_item_get(item);
list_add(&sl->sl_list, &target_sd->s_links);
spin_unlock(&configfs_dirent_lock);
ret = configfs_create_link(sl, parent_item->ci_dentry,
goto errout;
}
err = submit_bio_wait(bio);
- if ((err == 0) && bio->bi_error)
+ if (err == 0 && bio->bi_status)
err = -EIO;
bio_put(bio);
if (err)
if (ret < 0)
goto out;
}
+ start_index = indices[pvec.nr - 1] + 1;
}
out:
put_dax(dax_dev);
{
struct detach_data *data = _data;
- if (!data->mountpoint && !data->select.found)
+ if (!data->mountpoint && list_empty(&data->select.dispose))
__d_drop(data->select.start);
}
d_walk(dentry, &data, detach_and_collect, check_and_drop);
- if (data.select.found)
+ if (!list_empty(&data.select.dispose))
shrink_dentry_list(&data.select.dispose);
+ else if (!data.mountpoint)
+ return;
if (data.mountpoint) {
detach_mounts(data.mountpoint);
dput(data.mountpoint);
}
-
- if (!data.mountpoint && !data.select.found)
- break;
-
cond_resched();
}
}
dio_complete(dio, 0, true);
}
-static int dio_bio_complete(struct dio *dio, struct bio *bio);
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
/*
* Asynchronous IO callback.
/**
* dio_end_io - handle the end io action for the given bio
* @bio: The direct io bio thats being completed
- * @error: Error if there was one
*
* This is meant to be called by any filesystem that uses their own dio_submit_t
* so that the DIO specific endio actions are dealt with after the filesystem
* has done it's completion work.
*/
-void dio_end_io(struct bio *bio, int error)
+void dio_end_io(struct bio *bio)
{
struct dio *dio = bio->bi_private;
else
bio->bi_end_io = dio_bio_end_io;
+ bio->bi_write_hint = dio->iocb->ki_hint;
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
}
/*
* Process one completed BIO. No locks are held.
*/
-static int dio_bio_complete(struct dio *dio, struct bio *bio)
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
{
struct bio_vec *bvec;
unsigned i;
- int err;
+ blk_status_t err = bio->bi_status;
- if (bio->bi_error)
- dio->io_error = -EIO;
+ if (err) {
+ if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
+ dio->io_error = -EAGAIN;
+ else
+ dio->io_error = -EIO;
+ }
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
- err = bio->bi_error;
bio_check_pages_dirty(bio); /* transfers ownership */
} else {
bio_for_each_segment_all(bvec, bio, i) {
set_page_dirty_lock(page);
put_page(page);
}
- err = bio->bi_error;
bio_put(bio);
}
return err;
bio = dio->bio_list;
dio->bio_list = bio->bi_private;
spin_unlock_irqrestore(&dio->bio_lock, flags);
- ret2 = dio_bio_complete(dio, bio);
+ ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
if (ret == 0)
ret = ret2;
}
if (iov_iter_rw(iter) == WRITE) {
dio->op = REQ_OP_WRITE;
dio->op_flags = REQ_SYNC | REQ_IDLE;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ dio->op_flags |= REQ_NOWAIT;
} else {
dio->op = REQ_OP_READ;
}
if (write) {
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+ unsigned long ptr_size;
struct rlimit *rlim;
+ /*
+ * Since the stack will hold pointers to the strings, we
+ * must account for them as well.
+ *
+ * The size calculation is the entire vma while each arg page is
+ * built, so each time we get here it's calculating how far it
+ * is currently (rather than each call being just the newly
+ * added size from the arg page). As a result, we need to
+ * always add the entire size of the pointers, so that on the
+ * last call to get_arg_page() we'll actually have the entire
+ * correct size.
+ */
+ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ if (ptr_size > ULONG_MAX - size)
+ goto fail;
+ size += ptr_size;
+
acct_arg_size(bprm, size / PAGE_SIZE);
/*
* to work from.
*/
rlim = current->signal->rlim;
- if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
- put_page(page);
- return NULL;
- }
+ if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+ goto fail;
}
return page;
+
+fail:
+ put_page(page);
+ return NULL;
}
static void put_arg_page(struct page *page)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
- inode_lock_shared(inode);
+ if (!inode_trylock_shared(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock_shared(inode);
+ }
/*
* Recheck under inode lock - at this point we are sure it cannot
* change anymore
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
return ext4_dax_write_iter(iocb, from);
#endif
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
+
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
iocb->private = &overwrite;
/* Check whether we do a DIO overwrite or not */
- if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
- ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
- overwrite = 1;
+ if (o_direct && !unaligned_aio) {
+ if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+ if (ext4_should_dioread_nolock(inode))
+ overwrite = 1;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
ret = __generic_file_write_iter(iocb, from);
inode_unlock(inode);
if (ret < 0)
return ret;
}
+
+ /* Set the flags to support nowait AIO */
+ filp->f_mode |= FMODE_AIO_NOWAIT;
+
return dquot_file_open(inode, filp);
}
}
#endif
- if (bio->bi_error) {
+ if (bio->bi_status) {
SetPageError(page);
mapping_set_error(page->mapping, -EIO);
}
continue;
}
clear_buffer_async_write(bh);
- if (bio->bi_error)
+ if (bio->bi_status)
buffer_io_error(bh);
} while ((bh = bh->b_this_page) != head);
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
bdevname(bio->bi_bdev, b),
(long long) bio->bi_iter.bi_sector,
(unsigned) bio_sectors(bio),
- bio->bi_error)) {
+ bio->bi_status)) {
ext4_finish_bio(bio);
bio_put(bio);
return;
}
bio->bi_end_io = NULL;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct inode *inode = io_end->inode;
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
"(offset %llu size %ld starting block %llu)",
- bio->bi_error, inode->i_ino,
+ bio->bi_status, inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long)
bi_sector >> (inode->i_blkbits - 9));
- mapping_set_error(inode->i_mapping, bio->bi_error);
+ mapping_set_error(inode->i_mapping,
+ blk_status_to_errno(bio->bi_status));
}
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
if (bio) {
int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
REQ_SYNC : 0;
+ io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
submit_bio(io->io_bio);
}
ret = io_submit_init_bio(io, bh);
if (ret)
return ret;
+ io->io_bio->bi_write_hint = inode->i_write_hint;
}
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
int i;
if (ext4_bio_encrypted(bio)) {
- if (bio->bi_error) {
+ if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
fscrypt_decrypt_bio_pages(bio->bi_private, bio);
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- if (!bio->bi_error) {
+ if (!bio->bi_status) {
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
sb->s_qcop = &ext4_qctl_operations;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
- memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+ memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
f2fs_show_injection_info(FAULT_IO);
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
}
#endif
if (f2fs_bio_encrypted(bio)) {
- if (bio->bi_error) {
+ if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
fscrypt_decrypt_bio_pages(bio->bi_private, bio);
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
- if (!bio->bi_error) {
+ if (!bio->bi_status) {
if (!PageUptodate(page))
SetPageUptodate(page);
} else {
unlock_page(page);
mempool_free(page, sbi->write_io_dummy);
- if (unlikely(bio->bi_error))
+ if (unlikely(bio->bi_status))
f2fs_stop_checkpoint(sbi, true);
continue;
}
fscrypt_pullback_bio_page(&page, true);
- if (unlikely(bio->bi_error)) {
+ if (unlikely(bio->bi_status)) {
mapping_set_error(page->mapping, -EIO);
f2fs_stop_checkpoint(sbi, true);
}
{
SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver);
u32 *ctx = (u32 *)shash_desc_ctx(shash);
+ u32 retval;
int err;
shash->tfm = sbi->s_chksum_driver;
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
- return *ctx;
+ retval = *ctx;
+ barrier_data(ctx);
+ return retval;
}
static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
- dc->error = bio->bi_error;
+ dc->error = blk_status_to_errno(bio->bi_status);
dc->state = D_DONE;
complete(&dc->wait);
bio_put(bio);
sb->s_time_gran = 1;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
- memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+ memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
}
#endif
+static bool rw_hint_valid(enum rw_hint hint)
+{
+ switch (hint) {
+ case RWF_WRITE_LIFE_NOT_SET:
+ case RWH_WRITE_LIFE_NONE:
+ case RWH_WRITE_LIFE_SHORT:
+ case RWH_WRITE_LIFE_MEDIUM:
+ case RWH_WRITE_LIFE_LONG:
+ case RWH_WRITE_LIFE_EXTREME:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static long fcntl_rw_hint(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct inode *inode = file_inode(file);
+ u64 *argp = (u64 __user *)arg;
+ enum rw_hint hint;
+ u64 h;
+
+ switch (cmd) {
+ case F_GET_FILE_RW_HINT:
+ h = file_write_hint(file);
+ if (copy_to_user(argp, &h, sizeof(*argp)))
+ return -EFAULT;
+ return 0;
+ case F_SET_FILE_RW_HINT:
+ if (copy_from_user(&h, argp, sizeof(h)))
+ return -EFAULT;
+ hint = (enum rw_hint) h;
+ if (!rw_hint_valid(hint))
+ return -EINVAL;
+
+ spin_lock(&file->f_lock);
+ file->f_write_hint = hint;
+ spin_unlock(&file->f_lock);
+ return 0;
+ case F_GET_RW_HINT:
+ h = inode->i_write_hint;
+ if (copy_to_user(argp, &h, sizeof(*argp)))
+ return -EFAULT;
+ return 0;
+ case F_SET_RW_HINT:
+ if (copy_from_user(&h, argp, sizeof(h)))
+ return -EFAULT;
+ hint = (enum rw_hint) h;
+ if (!rw_hint_valid(hint))
+ return -EINVAL;
+
+ inode_lock(inode);
+ inode->i_write_hint = hint;
+ inode_unlock(inode);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
case F_GET_SEALS:
err = shmem_fcntl(filp, cmd, arg);
break;
+ case F_GET_RW_HINT:
+ case F_SET_RW_HINT:
+ case F_GET_FILE_RW_HINT:
+ case F_SET_FILE_RW_HINT:
+ err = fcntl_rw_hint(filp, cmd, arg);
+ break;
default:
break;
}
atomic_t sd_log_in_flight;
struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
- int sd_log_error;
atomic_t sd_reserving_log;
wait_queue_head_t sd_reserving_log_wait;
*/
static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
- int error)
+ blk_status_t error)
{
struct buffer_head *bh, *next;
struct page *page = bvec->bv_page;
struct page *page;
int i;
- if (bio->bi_error) {
- sdp->sd_log_error = bio->bi_error;
- fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
- }
+ if (bio->bi_status)
+ fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
bio_for_each_segment_all(bvec, bio, i) {
page = bvec->bv_page;
if (page_has_buffers(page))
- gfs2_end_log_write_bh(sdp, bvec, bio->bi_error);
+ gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
else
mempool_free(page, gfs2_page_pool);
}
do {
struct buffer_head *next = bh->b_this_page;
len -= bh->b_size;
- bh->b_end_io(bh, !bio->bi_error);
+ bh->b_end_io(bh, !bio->bi_status);
bh = next;
} while (bh && len);
}
{
struct page *page = bio->bi_private;
- if (!bio->bi_error)
+ if (!bio->bi_status)
SetPageUptodate(page);
else
- pr_warn("error %d reading superblock\n", bio->bi_error);
+ pr_warn("error %d reading superblock\n", bio->bi_status);
unlock_page(page);
}
memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
- memcpy(s->s_uuid, str->sb_uuid, 16);
+ memcpy(&s->s_uuid, str->sb_uuid, 16);
}
/**
return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
}
-static int gfs2_uuid_valid(const u8 *uuid)
-{
- int i;
-
- for (i = 0; i < 16; i++) {
- if (uuid[i])
- return 1;
- }
- return 0;
-}
-
static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
{
struct super_block *s = sdp->sd_vfs;
- const u8 *uuid = s->s_uuid;
+
buf[0] = '\0';
- if (!gfs2_uuid_valid(uuid))
+ if (uuid_is_null(&s->s_uuid))
return 0;
- return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid);
+ return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid);
}
static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
struct super_block *s = sdp->sd_vfs;
- const u8 *uuid = s->s_uuid;
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid);
- if (gfs2_uuid_valid(uuid))
- add_uevent_var(env, "UUID=%pUB", uuid);
+ if (!uuid_is_null(&s->s_uuid))
+ add_uevent_var(env, "UUID=%pUB", &s->s_uuid);
return 0;
}
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
i_gid_write(inode, 0);
atomic_set(&inode->i_writecount, 0);
inode->i_size = 0;
+ inode->i_write_hint = WRITE_LIFE_NOT_SET;
inode->i_blocks = 0;
inode->i_bytes = 0;
inode->i_generation = 0;
struct iomap_dio *dio = bio->bi_private;
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
- if (bio->bi_error)
- iomap_dio_set_error(dio, bio->bi_error);
+ if (bio->bi_status)
+ iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
if (atomic_dec_and_test(&dio->ref)) {
if (is_sync_kiocb(dio->iocb)) {
bio->bi_bdev = iomap->bdev;
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
+ bio->bi_write_hint = dio->iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
flags |= IOMAP_WRITE;
}
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (filemap_range_has_page(mapping, start, end)) {
+ ret = -EAGAIN;
+ goto out_free_dio;
+ }
+ flags |= IOMAP_NOWAIT;
+ }
+
ret = filemap_write_and_wait_range(mapping, start, end);
if (ret)
goto out_free_dio;
bp->l_flag |= lbmDONE;
- if (bio->bi_error) {
+ if (bio->bi_status) {
bp->l_flag |= lbmERROR;
jfs_err("lbmIODone: I/O error in JFS log");
{
struct page *page = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
printk(KERN_ERR "metapage_read_end_io: I/O error\n");
SetPageError(page);
}
BUG_ON(!PagePrivate(page));
- if (bio->bi_error) {
+ if (bio->bi_status) {
printk(KERN_ERR "metapage_write_end_io: I/O error\n");
SetPageError(page);
}
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+ page_endio(page, op_is_write(bio_op(bio)),
+ blk_status_to_errno(bio->bi_status));
}
bio_put(bio);
goto confused;
wbc_init_bio(wbc, bio);
+ bio->bi_write_hint = inode->i_write_hint;
}
/*
return err;
}
+ put_mnt_ns(old_mnt_ns);
+
/* Update the pwd and root */
set_fs_pwd(fs, &root);
set_fs_root(fs, &root);
{
struct parallel_io *par = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
struct parallel_io *par = bio->bi_private;
struct nfs_pgio_header *header = par->data;
- if (bio->bi_error) {
+ if (bio->bi_status) {
if (!header->pnfs_error)
header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg);
* A single slot, so highest used slotid is either 0 or -1
*/
nfs4_free_slot(tbl, slot);
- nfs4_slot_tbl_drain_complete(tbl);
spin_unlock(&tbl->slot_tbl_lock);
}
}
EXPORT_SYMBOL_GPL(nfs_link);
-static void
-nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
-{
- struct dentry *old_dentry = data->old_dentry;
- struct dentry *new_dentry = data->new_dentry;
- struct inode *old_inode = d_inode(old_dentry);
- struct inode *new_inode = d_inode(new_dentry);
-
- nfs_mark_for_revalidate(old_inode);
-
- switch (task->tk_status) {
- case 0:
- if (new_inode != NULL)
- nfs_drop_nlink(new_inode);
- d_move(old_dentry, new_dentry);
- nfs_set_verifier(new_dentry,
- nfs_save_change_attribute(data->new_dir));
- break;
- case -ENOENT:
- nfs_dentry_handle_enoent(old_dentry);
- }
-}
-
/*
* RENAME
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
- struct dentry *dentry = NULL;
+ struct dentry *dentry = NULL, *rehash = NULL;
struct rpc_task *task;
int error = -EBUSY;
* To prevent any new references to the target during the
* rename, we unhash the dentry in advance.
*/
- if (!d_unhashed(new_dentry))
+ if (!d_unhashed(new_dentry)) {
d_drop(new_dentry);
+ rehash = new_dentry;
+ }
if (d_count(new_dentry) > 2) {
int err;
goto out;
new_dentry = dentry;
+ rehash = NULL;
new_inode = NULL;
}
}
if (new_inode != NULL)
NFS_PROTO(new_inode)->return_delegation(new_inode);
- task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
- nfs_complete_rename);
+ task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
if (IS_ERR(task)) {
error = PTR_ERR(task);
goto out;
if (error == 0)
error = task->tk_status;
rpc_put_task(task);
+ nfs_mark_for_revalidate(old_inode);
out:
+ if (rehash)
+ d_rehash(rehash);
trace_nfs_rename_exit(old_dir, old_dentry,
new_dir, new_dentry, error);
+ if (!error) {
+ if (new_inode != NULL)
+ nfs_drop_nlink(new_inode);
+ /*
+ * The d_move() should be here instead of in an async RPC completion
+ * handler because we need the proper locks to move the dentry. If
+ * we're interrupted by a signal, the async RPC completion handler
+ * should mark the directories for revalidation.
+ */
+ d_move(old_dentry, new_dentry);
+ nfs_set_verifier(new_dentry,
+ nfs_save_change_attribute(new_dir));
+ } else if (error == -ENOENT)
+ nfs_dentry_handle_enoent(old_dentry);
+
/* new dentry created? */
if (dentry)
dput(dentry);
/* Except MODE, it seems harmless of setting twice. */
if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
- attrset[1] & FATTR4_WORD1_MODE)
+ (attrset[1] & FATTR4_WORD1_MODE ||
+ attrset[2] & FATTR4_WORD2_MODE_UMASK))
sattr->ia_valid &= ~ATTR_MODE;
if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
size_t max_pages = max_response_pages(server);
dprintk("--> %s\n", __func__);
+ nfs4_sequence_free_slot(&lgp->res.seq_res);
nfs4_free_pages(lgp->args.layout.pages, max_pages);
pnfs_put_layout_hdr(NFS_I(inode)->layout);
put_nfs_open_context(lgp->args.ctx);
/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
if (status == 0 && lgp->res.layoutp->len)
lseg = pnfs_layout_process(lgp);
- nfs4_sequence_free_slot(&lgp->res.seq_res);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
if (status)
put_rpccred(cred);
switch (status) {
case 0:
+ case -EINTR:
+ case -ERESTARTSYS:
break;
case -ETIMEDOUT:
if (clnt->cl_softrtry)
u8 *buf, *d, type, assoc;
int error;
+ if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
+ return -EINVAL;
+
buf = kzalloc(bufflen, GFP_KERNEL);
if (!buf)
return -ENOMEM;
goto out_free_buf;
}
req = scsi_req(rq);
- scsi_req_init(rq);
error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
if (error)
#endif
static inline int
-uuid_parse(char **mesg, char *buf, unsigned char **puuid)
+nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
{
int len;
if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
else if (strcmp(buf, "uuid") == 0)
- err = uuid_parse(&mesg, buf, &exp.ex_uuid);
+ err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
else if (strcmp(buf, "secinfo") == 0)
err = secinfo_parse(&mesg, buf, &exp);
else
{
struct nilfs_segment_buffer *segbuf = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
atomic_inc(&segbuf->sb_err);
bio_put(bio);
{
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
- if (bio->bi_error) {
- mlog(ML_ERROR, "IO Error %d\n", bio->bi_error);
- wc->wc_error = bio->bi_error;
+ if (bio->bi_status) {
+ mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
+ wc->wc_error = blk_status_to_errno(bio->bi_status);
}
o2hb_bio_wait_dec(wc, 1);
struct ocfs2_lock_res *lockres;
lockres = &OCFS2_I(inode)->ip_inode_lockres;
+ /* had_lock means that the currect process already takes the cluster
+ * lock previously. If had_lock is 1, we have nothing to do here, and
+ * it will get unlocked where we got the lock.
+ */
if (!had_lock) {
ocfs2_remove_holder(lockres, oh);
ocfs2_inode_unlock(inode, ex);
cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
- memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
+ memcpy(&sb->s_uuid, di->id2.i_super.s_uuid,
sizeof(di->id2.i_super.s_uuid));
osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
void *buffer,
size_t buffer_size)
{
- int ret;
+ int ret, had_lock;
struct buffer_head *di_bh = NULL;
+ struct ocfs2_lock_holder oh;
- ret = ocfs2_inode_lock(inode, &di_bh, 0);
- if (ret < 0) {
- mlog_errno(ret);
- return ret;
+ had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
+ if (had_lock < 0) {
+ mlog_errno(had_lock);
+ return had_lock;
}
down_read(&OCFS2_I(inode)->ip_xattr_sem);
ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
name, buffer, buffer_size);
up_read(&OCFS2_I(inode)->ip_xattr_sem);
- ocfs2_inode_unlock(inode, 0);
+ ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
brelse(di_bh);
{
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
- int ret, credits, ref_meta = 0, ref_credits = 0;
+ int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct inode *tl_inode = osb->osb_tl_inode;
struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
struct ocfs2_refcount_tree *ref_tree = NULL;
+ struct ocfs2_lock_holder oh;
struct ocfs2_xattr_info xi = {
.xi_name_index = name_index,
return -ENOMEM;
}
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
- if (ret < 0) {
+ had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
+ if (had_lock < 0) {
+ ret = had_lock;
mlog_errno(ret);
goto cleanup_nolock;
}
if (ret)
mlog_errno(ret);
}
- ocfs2_inode_unlock(inode, 1);
+ ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
cleanup_nolock:
brelse(di_bh);
brelse(xbs.xattr_bh);
likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE;
+ f->f_write_hint = WRITE_LIFE_NOT_SET;
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
return err;
}
-static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_be *uuid)
+static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
{
struct ovl_fh *fh;
int fh_type, fh_len, dwords;
struct dentry *upper)
{
struct super_block *sb = lower->d_sb;
- uuid_be *uuid = (uuid_be *) &sb->s_uuid;
const struct ovl_fh *fh = NULL;
int err;
* up and a pure upper inode.
*/
if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
- uuid_be_cmp(*uuid, NULL_UUID_BE)) {
- fh = ovl_encode_fh(lower, uuid);
+ !uuid_is_null(&sb->s_uuid)) {
+ fh = ovl_encode_fh(lower, &sb->s_uuid);
if (IS_ERR(fh))
return PTR_ERR(fh);
}
.link = link
};
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
- err = PTR_ERR(upper);
- if (IS_ERR(upper))
- goto out;
-
err = security_inode_copy_up(dentry, &new_creds);
if (err < 0)
- goto out1;
+ goto out;
if (new_creds)
old_creds = override_creds(new_creds);
}
if (err)
- goto out2;
+ goto out;
if (S_ISREG(stat->mode)) {
struct path upperpath;
/*
* Store identifier of lower inode in upper inode xattr to
* allow lookup of the copy up origin inode.
+ *
+ * Don't set origin when we are breaking the association with a lower
+ * hard link.
*/
- err = ovl_set_origin(dentry, lowerpath->dentry, temp);
- if (err)
+ if (S_ISDIR(stat->mode) || stat->nlink == 1) {
+ err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+ if (err)
+ goto out_cleanup;
+ }
+
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ if (IS_ERR(upper)) {
+ err = PTR_ERR(upper);
+ upper = NULL;
goto out_cleanup;
+ }
if (tmpfile)
err = ovl_do_link(temp, udir, upper, true);
/* Restore timestamps on parent (best effort) */
ovl_set_timestamps(upperdir, pstat);
-out2:
+out:
dput(temp);
-out1:
dput(upper);
-out:
return err;
out_cleanup:
if (!tmpfile)
ovl_cleanup(wdir, temp);
- goto out2;
+ goto out;
}
/*
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
*/
- if (uuid_be_cmp(fh->uuid, *(uuid_be *) &mnt->mnt_sb->s_uuid))
+ if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
goto out;
origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
u8 len; /* size of this header + size of fid */
u8 flags; /* OVL_FH_FLAG_* */
u8 type; /* fid_type of fid */
- uuid_be uuid; /* uuid of filesystem */
+ uuid_t uuid; /* uuid of filesystem */
u8 fid[0]; /* file identifier */
} __packed;
/* We don't show the stack guard page in /proc/maps */
start = vma->vm_start;
- if (stack_guard_page_start(vma, start))
- start += PAGE_SIZE;
end = vma->vm_end;
- if (stack_guard_page_end(vma, end))
- end -= PAGE_SIZE;
seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
struct kiocb kiocb;
ssize_t ret;
- if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
- return -EOPNOTSUPP;
-
init_sync_kiocb(&kiocb, filp);
- if (flags & RWF_HIPRI)
- kiocb.ki_flags |= IOCB_HIPRI;
- if (flags & RWF_DSYNC)
- kiocb.ki_flags |= IOCB_DSYNC;
- if (flags & RWF_SYNC)
- kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ ret = kiocb_set_rw_flags(&kiocb, flags);
+ if (ret)
+ return ret;
kiocb.ki_pos = *ppos;
if (type == READ)
if (!(file->f_mode & FMODE_CAN_WRITE))
goto out;
- ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
+ ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
out:
if (ret > 0)
/*
* There is not enough space for user on the device
*/
- if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
- mutex_unlock(&UFS_SB(sb)->s_lock);
- UFSD("EXIT (FAILED)\n");
- return 0;
+ if (unlikely(ufs_freefrags(uspi) <= uspi->s_root_blocks)) {
+ if (!capable(CAP_SYS_RESOURCE)) {
+ mutex_unlock(&UFS_SB(sb)->s_lock);
+ UFSD("EXIT (FAILED)\n");
+ return 0;
+ }
}
if (goal >= uspi->s_size)
if (result) {
ufs_clear_frags(inode, result + oldcount,
newcount - oldcount, locked_page != NULL);
+ *err = 0;
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result);
- write_sequnlock(&UFS_I(inode)->meta_lock);
- *err = 0;
UFS_I(inode)->i_lastfrag =
max(UFS_I(inode)->i_lastfrag, fragment + count);
+ write_sequnlock(&UFS_I(inode)->meta_lock);
}
mutex_unlock(&UFS_SB(sb)->s_lock);
UFSD("EXIT, result %llu\n", (unsigned long long)result);
result = ufs_add_fragments(inode, tmp, oldcount, newcount);
if (result) {
*err = 0;
+ read_seqlock_excl(&UFS_I(inode)->meta_lock);
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count);
+ read_sequnlock_excl(&UFS_I(inode)->meta_lock);
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
mutex_unlock(&UFS_SB(sb)->s_lock);
/*
* allocate new block and move data
*/
- switch (fs32_to_cpu(sb, usb1->fs_optim)) {
- case UFS_OPTSPACE:
+ if (fs32_to_cpu(sb, usb1->fs_optim) == UFS_OPTSPACE) {
request = newcount;
- if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree
- > uspi->s_dsize * uspi->s_minfree / (2 * 100))
- break;
- usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
- break;
- default:
- usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-
- case UFS_OPTTIME:
+ if (uspi->cs_total.cs_nffree < uspi->s_space_to_time)
+ usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
+ } else {
request = uspi->s_fpb;
- if (uspi->cs_total.cs_nffree < uspi->s_dsize *
- (uspi->s_minfree - 2) / 100)
- break;
- usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
- break;
+ if (uspi->cs_total.cs_nffree > uspi->s_time_to_space)
+ usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
}
result = ufs_alloc_fragments (inode, cgno, goal, request, err);
if (result) {
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
+ mutex_unlock(&UFS_SB(sb)->s_lock);
ufs_change_blocknr(inode, fragment - oldcount, oldcount,
uspi->s_sbbase + tmp,
uspi->s_sbbase + result, locked_page);
+ *err = 0;
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result);
- write_sequnlock(&UFS_I(inode)->meta_lock);
- *err = 0;
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ write_sequnlock(&UFS_I(inode)->meta_lock);
if (newcount < request)
ufs_free_fragments (inode, result + newcount, request - newcount);
ufs_free_fragments (inode, tmp, oldcount);
u64 phys64 = 0;
unsigned frag = fragment & uspi->s_fpbmask;
- if (!create) {
- phys64 = ufs_frag_map(inode, offsets, depth);
- if (phys64)
- map_bh(bh_result, sb, phys64 + frag);
- return 0;
- }
+ phys64 = ufs_frag_map(inode, offsets, depth);
+ if (!create)
+ goto done;
+ if (phys64) {
+ if (fragment >= UFS_NDIR_FRAGMENT)
+ goto done;
+ read_seqlock_excl(&UFS_I(inode)->meta_lock);
+ if (fragment < UFS_I(inode)->i_lastfrag) {
+ read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+ goto done;
+ }
+ read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+ }
/* This code entered only while writing ....? */
mutex_lock(&UFS_I(inode)->truncate_mutex);
}
mutex_unlock(&UFS_I(inode)->truncate_mutex);
return err;
+
+done:
+ if (phys64)
+ map_bh(bh_result, sb, phys64 + frag);
+ return 0;
}
static int ufs_writepage(struct page *page, struct writeback_control *wbc)
*/
inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink));
- if (inode->i_nlink == 0) {
- ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
- return -1;
- }
+ if (inode->i_nlink == 0)
+ return -ESTALE;
/*
* Linux now has 32-bit uid and gid, so we can support EFT.
i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode));
inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
- inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
- inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
- inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
+ inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
+ inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
+ inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
inode->i_ctime.tv_nsec = 0;
*/
inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink));
- if (inode->i_nlink == 0) {
- ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
- return -1;
- }
+ if (inode->i_nlink == 0)
+ return -ESTALE;
/*
* Linux now has 32-bit uid and gid, so we can support EFT.
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct buffer_head * bh;
struct inode *inode;
- int err;
+ int err = -EIO;
UFSD("ENTER, ino %lu\n", ino);
err = ufs1_read_inode(inode,
ufs_inode + ufs_inotofsbo(inode->i_ino));
}
-
+ brelse(bh);
if (err)
goto bad_inode;
+
inode->i_version++;
ufsi->i_lastfrag =
(inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
ufs_set_inode_ops(inode);
- brelse(bh);
-
UFSD("EXIT\n");
unlock_new_inode(inode);
return inode;
bad_inode:
iget_failed(inode);
- return ERR_PTR(-EIO);
+ return ERR_PTR(err);
}
static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
ctx->to = from + count;
}
-#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
static void ufs_trunc_direct(struct inode *inode)
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
unsigned offsets[4];
- int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
+ int depth;
int depth2;
unsigned i;
struct ufs_buffer_head *ubh[3];
void *p;
u64 block;
- if (!depth)
- return;
+ if (inode->i_size) {
+ sector_t last = (inode->i_size - 1) >> uspi->s_bshift;
+ depth = ufs_block_to_path(inode, last, offsets);
+ if (!depth)
+ return;
+ } else {
+ depth = 1;
+ }
- /* find the last non-zero in offsets[] */
for (depth2 = depth - 1; depth2; depth2--)
- if (offsets[depth2])
+ if (offsets[depth2] != uspi->s_apb - 1)
break;
mutex_lock(&ufsi->truncate_mutex);
offsets[0] = UFS_IND_BLOCK;
} else {
/* get the blocks that should be partially emptied */
- p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]);
+ p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++);
for (i = 0; i < depth2; i++) {
- offsets[i]++; /* next branch is fully freed */
block = ufs_data_ptr_to_cpu(sb, p);
if (!block)
break;
write_sequnlock(&ufsi->meta_lock);
break;
}
- p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]);
+ p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]++);
}
while (i--)
free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1);
free_full_branch(inode, block, i - UFS_IND_BLOCK + 1);
}
}
+ read_seqlock_excl(&ufsi->meta_lock);
ufsi->i_lastfrag = DIRECT_FRAGMENT;
+ read_sequnlock_excl(&ufsi->meta_lock);
mark_inode_dirty(inode);
mutex_unlock(&ufsi->truncate_mutex);
}
usb3 = ubh_get_usb_third(uspi);
if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
- (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
+ (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) ||
mtype == UFS_MOUNT_UFSTYPE_UFS2) {
/*we have statistic in different place, then usual*/
uspi->cs_total.cs_ndir = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir);
usb2 = ubh_get_usb_second(uspi);
usb3 = ubh_get_usb_third(uspi);
- if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
- (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
- mtype == UFS_MOUNT_UFSTYPE_UFS2) {
+ if (mtype == UFS_MOUNT_UFSTYPE_UFS2) {
/*we have statistic in different place, then usual*/
usb2->fs_un.fs_u2.cs_ndir =
cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
usb3->fs_un1.fs_u2.cs_nffree =
cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
- } else {
- usb1->fs_cstotal.cs_ndir =
- cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
- usb1->fs_cstotal.cs_nbfree =
- cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
- usb1->fs_cstotal.cs_nifree =
- cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
- usb1->fs_cstotal.cs_nffree =
- cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+ goto out;
+ }
+
+ if (mtype == UFS_MOUNT_UFSTYPE_44BSD &&
+ (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) {
+ /* store stats in both old and new places */
+ usb2->fs_un.fs_u2.cs_ndir =
+ cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
+ usb2->fs_un.fs_u2.cs_nbfree =
+ cpu_to_fs64(sb, uspi->cs_total.cs_nbfree);
+ usb3->fs_un1.fs_u2.cs_nifree =
+ cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
+ usb3->fs_un1.fs_u2.cs_nffree =
+ cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
}
+ usb1->fs_cstotal.cs_ndir = cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
+ usb1->fs_cstotal.cs_nbfree = cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
+ usb1->fs_cstotal.cs_nifree = cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
+ usb1->fs_cstotal.cs_nffree = cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+out:
ubh_mark_buffer_dirty(USPI_UBH(uspi));
ufs_print_super_stuff(sb, usb1, usb2, usb3);
UFSD("EXIT\n");
flags |= UFS_ST_SUN;
}
+ if ((flags & UFS_ST_MASK) == UFS_ST_44BSD &&
+ uspi->s_postblformat == UFS_42POSTBLFMT) {
+ if (!silent)
+ pr_err("this is not a 44bsd filesystem");
+ goto failed;
+ }
+
/*
* Check ufs magic number
*/
uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask);
if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
- uspi->s_u2_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
- uspi->s_u2_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
+ uspi->s_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
+ uspi->s_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
} else {
uspi->s_size = fs32_to_cpu(sb, usb1->fs_size);
uspi->s_dsize = fs32_to_cpu(sb, usb1->fs_dsize);
uspi->s_postbloff = fs32_to_cpu(sb, usb3->fs_postbloff);
uspi->s_rotbloff = fs32_to_cpu(sb, usb3->fs_rotbloff);
+ uspi->s_root_blocks = mul_u64_u32_div(uspi->s_dsize,
+ uspi->s_minfree, 100);
+ if (uspi->s_minfree <= 5) {
+ uspi->s_time_to_space = ~0ULL;
+ uspi->s_space_to_time = 0;
+ usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
+ } else {
+ uspi->s_time_to_space = (uspi->s_root_blocks / 2) + 1;
+ uspi->s_space_to_time = mul_u64_u32_div(uspi->s_dsize,
+ uspi->s_minfree - 2, 100) - 1;
+ }
+
/*
* Compute another frequently used values
*/
mutex_lock(&UFS_SB(sb)->s_lock);
usb3 = ubh_get_usb_third(uspi);
- if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
+ if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
buf->f_type = UFS2_MAGIC;
- buf->f_blocks = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
- } else {
+ else
buf->f_type = UFS_MAGIC;
- buf->f_blocks = uspi->s_dsize;
- }
- buf->f_bfree = ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
- uspi->cs_total.cs_nffree;
+
+ buf->f_blocks = uspi->s_dsize;
+ buf->f_bfree = ufs_freefrags(uspi);
buf->f_ffree = uspi->cs_total.cs_nifree;
buf->f_bsize = sb->s_blocksize;
- buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree))
- ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0;
+ buf->f_bavail = (buf->f_bfree > uspi->s_root_blocks)
+ ? (buf->f_bfree - uspi->s_root_blocks) : 0;
buf->f_files = uspi->s_ncg * uspi->s_ipg;
buf->f_namelen = UFS_MAXNAMLEN;
buf->f_fsid.val[0] = (u32)id;
__u32 s_dblkno; /* offset of first data after cg */
__u32 s_cgoffset; /* cylinder group offset in cylinder */
__u32 s_cgmask; /* used to calc mod fs_ntrak */
- __u32 s_size; /* number of blocks (fragments) in fs */
- __u32 s_dsize; /* number of data blocks in fs */
- __u64 s_u2_size; /* ufs2: number of blocks (fragments) in fs */
- __u64 s_u2_dsize; /*ufs2: number of data blocks in fs */
+ __u64 s_size; /* number of blocks (fragments) in fs */
+ __u64 s_dsize; /* number of data blocks in fs */
__u32 s_ncg; /* number of cylinder groups */
__u32 s_bsize; /* size of basic blocks */
__u32 s_fsize; /* size of fragments */
__u32 s_maxsymlinklen;/* upper limit on fast symlinks' size */
__s32 fs_magic; /* filesystem magic */
unsigned int s_dirblksize;
+ __u64 s_root_blocks;
+ __u64 s_time_to_space;
+ __u64 s_space_to_time;
};
/*
struct page *ufs_get_locked_page(struct address_space *mapping,
pgoff_t index)
{
- struct page *page;
-
- page = find_lock_page(mapping, index);
+ struct inode *inode = mapping->host;
+ struct page *page = find_lock_page(mapping, index);
if (!page) {
page = read_mapping_page(mapping, index, NULL);
printk(KERN_ERR "ufs_change_blocknr: "
"read_mapping_page error: ino %lu, index: %lu\n",
mapping->host->i_ino, index);
- goto out;
+ return page;
}
lock_page(page);
/* Truncate got there first */
unlock_page(page);
put_page(page);
- page = NULL;
- goto out;
+ return NULL;
}
if (!PageUptodate(page) || PageError(page)) {
printk(KERN_ERR "ufs_change_blocknr: "
"can not read page: ino %lu, index: %lu\n",
- mapping->host->i_ino, index);
+ inode->i_ino, index);
- page = ERR_PTR(-EIO);
+ return ERR_PTR(-EIO);
}
}
-out:
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, 1 << inode->i_blkbits, 0);
return page;
}
#define ubh_blkmap(ubh,begin,bit) \
((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb)))
-/*
- * Determine the number of available frags given a
- * percentage to hold in reserve.
- */
static inline u64
-ufs_freespace(struct ufs_sb_private_info *uspi, int percentreserved)
+ufs_freefrags(struct ufs_sb_private_info *uspi)
{
return ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
- uspi->cs_total.cs_nffree -
- (uspi->s_dsize * (percentreserved) / 100);
+ uspi->cs_total.cs_nffree;
}
/*
bool must_wait, return_to_userland;
long blocking_state;
- BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
-
ret = VM_FAULT_SIGBUS;
+
+ /*
+ * We don't do userfault handling for the final child pid update.
+ *
+ * We also don't do userfault handling during
+ * coredumping. hugetlbfs has the special
+ * follow_hugetlb_page() to skip missing pages in the
+ * FOLL_DUMP case, anon memory also checks for FOLL_DUMP with
+ * the no_page_table() helper in follow_page_mask(), but the
+ * shmem_vm_ops->fault method is invoked even during
+ * coredumping without mmap_sem and it ends up here.
+ */
+ if (current->flags & (PF_EXITING|PF_DUMPCORE))
+ goto out;
+
+ /*
+ * Coredumping runs without mmap_sem so we can only check that
+ * the mmap_sem is held, if PF_DUMPCORE was not set.
+ */
+ WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
if (!ctx)
goto out;
goto out;
/*
- * We don't do userfault handling for the final child pid update.
- */
- if (current->flags & PF_EXITING)
- goto out;
-
- /*
* Check that we can return VM_FAULT_RETRY.
*
* NOTE: it should become possible to return VM_FAULT_RETRY
xfs_sysfs.o \
xfs_trans.o \
xfs_xattr.o \
- kmem.o \
- uuid.o
+ kmem.o
# low-level transaction/log code
xfs-y += xfs_log.o \
+++ /dev/null
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
- __be32 uu_timelow;
- __be16 uu_timemid;
- __be16 uu_timehi;
- __be16 uu_clockseq;
- __be16 uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
- xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
- fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
- be16_to_cpu(uup->uu_timemid);
- fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
- int i;
- char *cp = (char *)uuid;
-
- if (uuid == NULL)
- return 0;
- /* implied check of version number here... */
- for (i = 0; i < sizeof *uuid; i++)
- if (*cp++) return 0; /* not nil */
- return 1; /* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
- return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
+++ /dev/null
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
- unsigned char __u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-static inline void
-uuid_copy(uuid_t *dst, uuid_t *src)
-{
- memcpy(dst, src, sizeof(uuid_t));
-}
-
-#endif /* __XFS_SUPPORT_UUID_H__ */
struct xfs_inode *ip = XFS_I(ioend->io_inode);
xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size;
- int error = ioend->io_bio->bi_error;
+ int error;
/*
* Just clean up the in-memory strutures if the fs has been shut down.
/*
* Clean up any COW blocks on an I/O error.
*/
+ error = blk_status_to_errno(ioend->io_bio->bi_status);
if (unlikely(error)) {
switch (ioend->io_type) {
case XFS_IO_COW:
else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work);
else
- xfs_destroy_ioend(ioend, bio->bi_error);
+ xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
}
STATIC int
* time.
*/
if (status) {
- ioend->io_bio->bi_error = status;
+ ioend->io_bio->bi_status = errno_to_blk_status(status);
bio_endio(ioend->io_bio);
return status;
}
+ ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
return 0;
}
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+ ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
ioend->io_bio = new;
}
* The swap code (ab-)uses ->bmap to get a block mapping and then
* bypasseѕ the file system for actual I/O. We really can't allow
* that on reflinks inodes, so we have to skip out here. And yes,
- * 0 is the magic code for a bmap error..
+ * 0 is the magic code for a bmap error.
+ *
+ * Since we don't pass back blockdev info, we can't return bmap
+ * information for rt files either.
*/
- if (xfs_is_reflink_inode(ip))
+ if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
return 0;
filemap_write_and_wait(mapping);
__xfs_buf_ioacct_dec(
struct xfs_buf *bp)
{
- ASSERT(spin_is_locked(&bp->b_lock));
+ lockdep_assert_held(&bp->b_lock);
if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
* don't overwrite existing errors - otherwise we can lose errors on
* buffers that require multiple bios to complete.
*/
- if (bio->bi_error)
- cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+ if (bio->bi_status) {
+ int error = blk_status_to_errno(bio->bi_status);
+
+ cmpxchg(&bp->b_io_error, 0, error);
+ }
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
if (!count)
return 0; /* skip atime */
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ }
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
iolock = XFS_IOLOCK_SHARED;
}
- xfs_ilock(ip, iolock);
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
* otherwise demote the lock if we had to take the exclusive lock
* for other reasons in xfs_file_aio_write_checks.
*/
- if (unaligned_io)
- inode_dio_wait(inode);
- else if (iolock == XFS_IOLOCK_EXCL) {
+ if (unaligned_io) {
+ /* If we are going to wait for other DIO to finish, bail */
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (atomic_read(&inode->i_dio_count))
+ return -EAGAIN;
+ } else {
+ inode_dio_wait(inode);
+ }
+ } else if (iolock == XFS_IOLOCK_EXCL) {
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
size_t count;
loff_t pos;
- xfs_ilock(ip, iolock);
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
+
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
return -EFBIG;
if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
return -EIO;
+ file->f_mode |= FMODE_AIO_NOWAIT;
return 0;
}
XFS_STATS_INC(mp, vn_active);
ASSERT(atomic_read(&ip->i_pincount) == 0);
- ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
{
struct xfs_mount *mp = pag->pag_mount;
- ASSERT(spin_is_locked(&pag->pag_ici_lock));
+ lockdep_assert_held(&pag->pag_ici_lock);
if (pag->pag_ici_reclaimable++)
return;
{
struct xfs_mount *mp = pag->pag_mount;
- ASSERT(spin_is_locked(&pag->pag_ici_lock));
+ lockdep_assert_held(&pag->pag_ici_lock);
if (--pag->pag_ici_reclaimable)
return;
in_f->ilf_dsize = in_f32->ilf_dsize;
in_f->ilf_ino = in_f32->ilf_ino;
/* copy biggest field of ilf_u */
- memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
- in_f32->ilf_u.ilfu_uuid.__u_bits,
- sizeof(uuid_t));
+ uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
in_f->ilf_blkno = in_f32->ilf_blkno;
in_f->ilf_len = in_f32->ilf_len;
in_f->ilf_boffset = in_f32->ilf_boffset;
in_f->ilf_dsize = in_f64->ilf_dsize;
in_f->ilf_ino = in_f64->ilf_ino;
/* copy biggest field of ilf_u */
- memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
- in_f64->ilf_u.ilfu_uuid.__u_bits,
- sizeof(uuid_t));
+ uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid);
in_f->ilf_blkno = in_f64->ilf_blkno;
in_f->ilf_len = in_f64->ilf_len;
in_f->ilf_boffset = in_f64->ilf_boffset;
lockmode = xfs_ilock_data_map_shared(ip);
}
+ if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
length = mp->m_super->s_maxbytes - offset;
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
if (flags & IOMAP_DIRECT) {
+ /*
+ * A reflinked inode will result in CoW alloc.
+ * FIXME: It could still overwrite on unshared extents
+ * and not need allocation.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
/* may drop and re-acquire the ilock */
error = xfs_reflink_allocate_cow(ip, &imap, &shared,
&lockmode);
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
/*
+ * If nowait is set bail since we are going to make
+ * allocations.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+ /*
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
* pages to keep the chunks of work done where somewhat symmetric
* with the work writeback does. This is a completely arbitrary
#define __XFS_LINUX__
#include <linux/types.h>
+#include <linux/uuid.h>
/*
* Kernel specific type declarations for XFS
#include "kmem.h"
#include "mrlock.h"
-#include "uuid.h"
#include <linux/semaphore.h>
#include <linux/mm.h>
{
ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
- if (uuid_is_nil(&head->h_fs_uuid)) {
+ if (uuid_is_null(&head->h_fs_uuid)) {
/*
* IRIX doesn't write the h_fs_uuid or h_fmt fields. If
- * h_fs_uuid is nil, we assume this log was last mounted
+ * h_fs_uuid is null, we assume this log was last mounted
* by IRIX and continue.
*/
- xfs_warn(mp, "nil uuid in log - IRIX style log");
+ xfs_warn(mp, "null uuid in log - IRIX style log");
} else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
xfs_warn(mp, "log has mismatched uuid - can't recover");
xlog_header_check_dump(mp, head);
int hole, i;
/* Publish UUID in struct super_block */
- BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t));
- memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t));
+ uuid_copy(&mp->m_super->s_uuid, uuid);
if (mp->m_flags & XFS_MOUNT_NOUUID)
return 0;
- if (uuid_is_nil(uuid)) {
- xfs_warn(mp, "Filesystem has nil UUID - can't mount");
+ if (uuid_is_null(uuid)) {
+ xfs_warn(mp, "Filesystem has null UUID - can't mount");
return -EINVAL;
}
mutex_lock(&xfs_uuid_table_mutex);
for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
- if (uuid_is_nil(&xfs_uuid_table[i])) {
+ if (uuid_is_null(&xfs_uuid_table[i])) {
hole = i;
continue;
}
mutex_lock(&xfs_uuid_table_mutex);
for (i = 0; i < xfs_uuid_table_size; i++) {
- if (uuid_is_nil(&xfs_uuid_table[i]))
+ if (uuid_is_null(&xfs_uuid_table[i]))
continue;
if (!uuid_equal(uuid, &xfs_uuid_table[i]))
continue;
* Copies the low order bits of the timestamp and the randomly
* set "sequence" number out of a UUID.
*/
- uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
+ mp->m_fixedfsid[0] =
+ (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
+ get_unaligned_be16(&sbp->sb_uuid.b[4]);
+ mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
mp->m_dmevmask = 0; /* not persistent; set after each mount */
xfs_init_zones(void)
{
xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
- offsetof(struct xfs_ioend, io_inline_bio));
+ offsetof(struct xfs_ioend, io_inline_bio),
+ BIOSET_NEED_BVECS);
if (!xfs_ioend_bioset)
goto out;
bool acpi_bay_match(acpi_handle handle);
bool acpi_dock_match(acpi_handle handle);
-bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs);
-union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
+bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs);
+union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
u64 rev, u64 func, union acpi_object *argv4);
static inline union acpi_object *
-acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
- union acpi_object *argv4, acpi_object_type type)
+acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev,
+ u64 func, union acpi_object *argv4,
+ acpi_object_type type)
{
union acpi_object *obj;
- obj = acpi_evaluate_dsm(handle, uuid, rev, func, argv4);
+ obj = acpi_evaluate_dsm(handle, guid, rev, func, argv4);
if (obj && obj->type != type) {
ACPI_FREE(obj);
obj = NULL;
u32 of_compatible_ok:1;
u32 coherent_dma:1;
u32 cca_seen:1;
- u32 reserved:20;
+ u32 spi_i2c_slave:1;
+ u32 reserved:19;
};
/* File System */
u16 validation_count;
};
+/*
+ * Maximum value of the validation_count field in struct acpi_table_desc.
+ * When reached, validation_count cannot be changed any more and the table will
+ * be permanently regarded as validated.
+ *
+ * This is to prevent situations in which unbalanced table get/put operations
+ * may cause premature table unmapping in the OS to happen.
+ *
+ * The maximum validation count can be defined to any value, but should be
+ * greater than the maximum number of OS early stage mapping slots to avoid
+ * leaking early stage table mappings to the late stage.
+ */
+#define ACPI_MAX_TABLE_VALIDATIONS ACPI_UINT16_MAX
+
/* Masks for Flags field above */
#define ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL (0) /* Virtual address, external maintained */
#ifndef _DT_BINDINGS_CLK_SUN50I_A64_H_
#define _DT_BINDINGS_CLK_SUN50I_A64_H_
+#define CLK_PLL_PERIPH0 11
+
#define CLK_BUS_MIPI_DSI 28
#define CLK_BUS_CE 29
#define CLK_BUS_DMA 30
#ifndef _DT_BINDINGS_CLK_SUN8I_H3_H_
#define _DT_BINDINGS_CLK_SUN8I_H3_H_
+#define CLK_PLL_PERIPH0 9
+
#define CLK_CPUX 14
#define CLK_BUS_CE 20
#include <linux/resource_ext.h>
#include <linux/device.h>
#include <linux/property.h>
+#include <linux/uuid.h>
#ifndef _LINUX
#define _LINUX
struct acpi_buffer ret; /* free by caller if success */
};
-acpi_status acpi_str_to_uuid(char *str, u8 *uuid);
acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
/* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */
}
static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle,
- const u8 *uuid,
+ const guid_t *guid,
int rev, int func,
union acpi_object *argv4)
{
/*
* will die
*/
-#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
/*
return bio_split(bio, sectors, gfp, bs);
}
-extern struct bio_set *bioset_create(unsigned int, unsigned int);
-extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
+extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
+enum {
+ BIOSET_NEED_BVECS = BIT(0),
+ BIOSET_NEED_RESCUER = BIT(1),
+};
extern void bioset_free(struct bio_set *);
extern mempool_t *biovec_create_pool(int pool_entries);
return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
}
-static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
-{
- return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
-}
-
static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
static inline void bio_io_error(struct bio *bio)
{
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+}
+
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+ bio->bi_status = BLK_STS_AGAIN;
bio_endio(bio);
}
extern void bio_init(struct bio *bio, struct bio_vec *table,
unsigned short max_vecs);
+extern void bio_uninit(struct bio *);
extern void bio_reset(struct bio *);
void bio_chain(struct bio *, struct bio *);
struct blk_mq_tags *tags;
struct blk_mq_tags *sched_tags;
- struct srcu_struct queue_rq_srcu;
-
unsigned long queued;
unsigned long run;
#define BLK_MQ_MAX_DISPATCH_ORDER 7
struct dentry *debugfs_dir;
struct dentry *sched_debugfs_dir;
#endif
+
+ /* Must be the last member - see also blk_mq_hw_ctx_size(). */
+ struct srcu_struct queue_rq_srcu[0];
};
struct blk_mq_tag_set {
bool last;
};
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
+typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
+ const struct blk_mq_queue_data *);
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
init_request_fn *init_request;
exit_request_fn *exit_request;
reinit_request_fn *reinit_request;
+ /* Called from inside blk_get_request() */
+ void (*initialize_rq_fn)(struct request *rq);
map_queues_fn *map_queues;
};
enum {
- BLK_MQ_RQ_QUEUE_OK = 0, /* queued fine */
- BLK_MQ_RQ_QUEUE_BUSY = 1, /* requeue IO for later */
- BLK_MQ_RQ_QUEUE_ERROR = 2, /* end IO with error */
-
BLK_MQ_F_SHOULD_MERGE = 1 << 0,
BLK_MQ_F_TAG_SHARED = 1 << 1,
BLK_MQ_F_SG_MERGE = 1 << 2,
BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
};
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
unsigned int flags);
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
- unsigned int flags, unsigned int hctx_idx);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+ unsigned int op, unsigned int flags, unsigned int hctx_idx);
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
enum {
int blk_mq_request_started(struct request *rq);
void blk_mq_start_request(struct request *rq);
-void blk_mq_end_request(struct request *rq, int error);
-void __blk_mq_end_request(struct request *rq, int error);
+void blk_mq_end_request(struct request *rq, blk_status_t error);
+void __blk_mq_end_request(struct request *rq, blk_status_t error);
void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
void blk_mq_start_hw_queues(struct request_queue *q);
void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_unquiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
int blk_mq_map_queues(struct blk_mq_tag_set *set);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
+void blk_mq_quiesce_queue_nowait(struct request_queue *q);
+
/*
* Driver command data is immediately after the request. So subtract request
* size to get back to the original request, add request size to get the PDU.
struct cgroup_subsys_state;
typedef void (bio_end_io_t) (struct bio *);
+/*
+ * Block error status values. See block/blk-core:blk_errors for the details.
+ */
+typedef u8 __bitwise blk_status_t;
+#define BLK_STS_OK 0
+#define BLK_STS_NOTSUPP ((__force blk_status_t)1)
+#define BLK_STS_TIMEOUT ((__force blk_status_t)2)
+#define BLK_STS_NOSPC ((__force blk_status_t)3)
+#define BLK_STS_TRANSPORT ((__force blk_status_t)4)
+#define BLK_STS_TARGET ((__force blk_status_t)5)
+#define BLK_STS_NEXUS ((__force blk_status_t)6)
+#define BLK_STS_MEDIUM ((__force blk_status_t)7)
+#define BLK_STS_PROTECTION ((__force blk_status_t)8)
+#define BLK_STS_RESOURCE ((__force blk_status_t)9)
+#define BLK_STS_IOERR ((__force blk_status_t)10)
+
+/* hack for device mapper, don't use elsewhere: */
+#define BLK_STS_DM_REQUEUE ((__force blk_status_t)11)
+
+#define BLK_STS_AGAIN ((__force blk_status_t)12)
+
struct blk_issue_stat {
u64 stat;
};
struct bio {
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev;
- int bi_error;
+ blk_status_t bi_status;
unsigned int bi_opf; /* bottom bits req flags,
* top bits REQ_OP. Use
* accessors.
*/
unsigned short bi_flags; /* status, etc and bvec pool number */
unsigned short bi_ioprio;
+ unsigned short bi_write_hint;
struct bvec_iter bi_iter;
/* command specific flags for REQ_OP_WRITE_ZEROES: */
__REQ_NOUNMAP, /* do not free blocks when zeroing */
+ __REQ_NOWAIT, /* Don't wait if request will block */
__REQ_NR_BITS, /* stops here */
};
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
+#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
*/
#define BLKCG_MAX_POLS 3
-typedef void (rq_end_io_fn)(struct request *, int);
+typedef void (rq_end_io_fn)(struct request *, blk_status_t);
#define BLK_RL_SYNCFULL (1U << 0)
#define BLK_RL_ASYNCFULL (1U << 1)
unsigned int extra_len; /* length of alignment and padding */
+ unsigned short write_hint;
+
unsigned long deadline;
struct list_head timeout_list;
int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
+ atomic_t shared_hctx_restart;
+
struct blk_queue_stats *stats;
struct rq_wb *rq_wb;
rq_timed_out_fn *rq_timed_out_fn;
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
+ /* Called just after a request is allocated */
init_rq_fn *init_rq_fn;
+ /* Called just before a request is freed */
exit_rq_fn *exit_rq_fn;
+ /* Called from inside blk_get_request() */
+ void (*initialize_rq_fn)(struct request *rq);
const struct blk_mq_ops *mq_ops;
size_t cmd_size;
void *rq_alloc_data;
+
+ struct work_struct release_work;
+
+#define BLK_MAX_WRITE_HINTS 5
+ u64 write_hints[BLK_MAX_WRITE_HINTS];
};
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
#define QUEUE_FLAG_STATS 27 /* track rq completion times */
#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */
#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */
+#define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */
+#define QUEUE_FLAG_QUIESCED 31 /* queue has been quiesced */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
(1 << QUEUE_FLAG_POLL))
+/*
+ * @q->queue_lock is set while a queue is being initialized. Since we know
+ * that no other threads access the queue object before @q->queue_lock has
+ * been set, it is safe to manipulate queue flags without holding the
+ * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
+ * blk_init_allocated_queue().
+ */
static inline void queue_lockdep_assert_held(struct request_queue *q)
{
if (q->queue_lock)
#define blk_queue_secure_erase(q) \
(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
+#define blk_queue_scsi_passthrough(q) \
+ test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
#define blk_noretry_request(rq) \
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
REQ_FAILFAST_DRIVER))
+#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
static inline bool blk_account_rq(struct request *rq)
{
static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
{
- if (bio_data(a) == bio_data(b))
+ if (bio_page(a) == bio_page(b) &&
+ bio_offset(a) == bio_offset(b))
return true;
return false;
#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
#define BLK_MIN_SG_TIMEOUT (7 * HZ)
-#ifdef CONFIG_BOUNCE
-extern int init_emergency_isa_pool(void);
-extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
-#else
-static inline int init_emergency_isa_pool(void)
-{
- return 0;
-}
-static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
-{
-}
-#endif /* CONFIG_MMU */
-
struct rq_map_data {
struct page **pages;
int page_order;
extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
extern void blk_put_request(struct request *);
extern void __blk_put_request(struct request_queue *, struct request *);
-extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
+extern struct request *blk_get_request(struct request_queue *, unsigned int op,
+ gfp_t gfp_mask);
extern void blk_requeue_request(struct request_queue *, struct request *);
extern int blk_lld_busy(struct request_queue *q);
extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
int (*bio_ctr)(struct bio *, struct bio *, void *),
void *data);
extern void blk_rq_unprep_clone(struct request *rq);
-extern int blk_insert_cloned_request(struct request_queue *q,
+extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
struct request *rq);
extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
extern void blk_delay_queue(struct request_queue *, unsigned long);
-extern void blk_queue_split(struct request_queue *, struct bio **,
- struct bio_set *);
+extern void blk_queue_split(struct request_queue *, struct bio **);
extern void blk_recount_segments(struct request_queue *, struct bio *);
extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
extern void __blk_run_queue_uncond(struct request_queue *q);
extern void blk_run_queue(struct request_queue *);
extern void blk_run_queue_async(struct request_queue *q);
-extern void blk_mq_quiesce_queue(struct request_queue *q);
extern int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long,
gfp_t);
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *);
+int blk_status_to_errno(blk_status_t status);
+blk_status_t errno_to_blk_status(int errno);
+
bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
* blk_end_request() for parts of the original function.
* This prevents code duplication in drivers.
*/
-extern bool blk_update_request(struct request *rq, int error,
+extern bool blk_update_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
-extern void blk_finish_request(struct request *rq, int error);
-extern bool blk_end_request(struct request *rq, int error,
+extern void blk_finish_request(struct request *rq, blk_status_t error);
+extern bool blk_end_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
-extern void blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request(struct request *rq, int error,
+extern void blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
-extern void __blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request_cur(struct request *rq, int error);
+extern void __blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
extern void blk_complete_request(struct request *);
extern void __blk_complete_request(struct request *);
#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
-static inline unsigned long queue_bounce_pfn(struct request_queue *q)
-{
- return q->limits.bounce_pfn;
-}
-
static inline unsigned long queue_segment_boundary(struct request_queue *q)
{
return q->limits.seg_boundary_mask;
const char *disk_name;
};
-typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
struct blk_integrity_profile {
integrity_processing_fn *generate_fn;
struct cleancache_ops {
int (*init_fs)(size_t);
- int (*init_shared_fs)(char *uuid, size_t);
+ int (*init_shared_fs)(uuid_t *uuid, size_t);
int (*get_page)(int, struct cleancache_filekey,
pgoff_t, struct page *);
void (*put_page)(int, struct cleancache_filekey,
const char *name,
struct config_item_type *type);
-extern struct config_item * config_item_get(struct config_item *);
+extern struct config_item *config_item_get(struct config_item *);
+extern struct config_item *config_item_get_unless_zero(struct config_item *);
extern void config_item_put(struct config_item *);
struct config_item_type {
* 2 : The target wants to push back the io
*/
typedef int (*dm_endio_fn) (struct dm_target *ti,
- struct bio *bio, int error);
+ struct bio *bio, blk_status_t *error);
typedef int (*dm_request_endio_fn) (struct dm_target *ti,
- struct request *clone, int error,
+ struct request *clone, blk_status_t error,
union map_info *map_context);
typedef void (*dm_presuspend_fn) (struct dm_target *ti);
static inline int dmi_name_in_serial(const char *s) { return 0; }
#define dmi_available 0
static inline int dmi_walk(void (*decode)(const struct dmi_header *, void *),
- void *private_data) { return -1; }
+ void *private_data) { return -ENXIO; }
static inline bool dmi_match(enum dmi_field f, const char *str)
{ return false; }
static inline void dmi_memdev_name(u16 handle, const char **bank,
int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
void (*requests_merged)(struct request_queue *, struct request *, struct request *);
- struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
- void (*put_request)(struct request *);
+ void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
+ void (*prepare_request)(struct request *, struct bio *bio);
+ void (*finish_request)(struct request *);
void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
bool (*has_work)(struct blk_mq_hw_ctx *);
void (*requeue_request)(struct request *);
struct request *(*former_request)(struct request_queue *, struct request *);
struct request *(*next_request)(struct request_queue *, struct request *);
- int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
- void (*put_rq_priv)(struct request_queue *, struct request *);
void (*init_icq)(struct io_cq *);
void (*exit_icq)(struct io_cq *);
};
#include <linux/rwsem.h>
#include <linux/capability.h>
#include <linux/semaphore.h>
+#include <linux/fcntl.h>
#include <linux/fiemap.h>
#include <linux/rculist_bl.h>
#include <linux/atomic.h>
#include <linux/percpu-rwsem.h>
#include <linux/workqueue.h>
#include <linux/delayed_call.h>
+#include <linux/uuid.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
+/* File is capable of returning -EAGAIN if AIO will block */
+#define FMODE_AIO_NOWAIT ((__force fmode_t)0x8000000)
+
/*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
* that indicates that they should check the contents of the iovec are
struct address_space;
struct writeback_control;
+/*
+ * Write life time hint values.
+ */
+enum rw_hint {
+ WRITE_LIFE_NOT_SET = 0,
+ WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
+ WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
+ WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
+ WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
+ WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
+};
+
#define IOCB_EVENTFD (1 << 0)
#define IOCB_APPEND (1 << 1)
#define IOCB_DIRECT (1 << 2)
#define IOCB_DSYNC (1 << 4)
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
+#define IOCB_NOWAIT (1 << 7)
struct kiocb {
struct file *ki_filp;
void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
void *private;
int ki_flags;
+ enum rw_hint ki_hint;
};
static inline bool is_sync_kiocb(struct kiocb *kiocb)
return kiocb->ki_complete == NULL;
}
-static inline int iocb_flags(struct file *file);
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
- *kiocb = (struct kiocb) {
- .ki_filp = filp,
- .ki_flags = iocb_flags(filp),
- };
-}
-
/*
* "descriptor" for what we're up to with a read.
* This allows us to use the same read code yet
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
unsigned int i_blkbits;
+ enum rw_hint i_write_hint;
blkcnt_t i_blocks;
#ifdef __NEED_I_SIZE_ORDERED
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
+ enum rw_hint f_write_hint;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
#define OFFT_OFFSET_MAX INT_LIMIT(off_t)
#endif
-#include <linux/fcntl.h>
-
extern void send_sigio(struct fown_struct *fown, int fd, int band);
/*
struct sb_writers s_writers;
- char s_id[32]; /* Informational name */
- u8 s_uuid[16]; /* UUID */
+ char s_id[32]; /* Informational name */
+ uuid_t s_uuid; /* UUID */
void *s_fs_info; /* Filesystem private info */
unsigned int s_max_links;
return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
}
+static inline enum rw_hint file_write_hint(struct file *file)
+{
+ if (file->f_write_hint != WRITE_LIFE_NOT_SET)
+ return file->f_write_hint;
+
+ return file_inode(file)->i_write_hint;
+}
+
+static inline int iocb_flags(struct file *file);
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+ *kiocb = (struct kiocb) {
+ .ki_filp = filp,
+ .ki_flags = iocb_flags(filp),
+ .ki_hint = file_write_hint(filp),
+ };
+}
+
/*
* Inode state bits. Protected by inode->i_lock
*
extern void filemap_fdatawait_keep_errors(struct address_space *);
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
loff_t lend);
+extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
+ loff_t lend);
extern int filemap_write_and_wait(struct address_space *mapping);
extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
DIO_SKIP_DIO_COUNT = 0x08,
};
-void dio_end_io(struct bio *bio, int error);
+void dio_end_io(struct bio *bio);
ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, struct iov_iter *iter,
return res;
}
+static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
+{
+ if (unlikely(flags & ~RWF_SUPPORTED))
+ return -EOPNOTSUPP;
+
+ if (flags & RWF_NOWAIT) {
+ if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT))
+ return -EOPNOTSUPP;
+ ki->ki_flags |= IOCB_NOWAIT;
+ }
+ if (flags & RWF_HIPRI)
+ ki->ki_flags |= IOCB_HIPRI;
+ if (flags & RWF_DSYNC)
+ ki->ki_flags |= IOCB_DSYNC;
+ if (flags & RWF_SYNC)
+ ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ return 0;
+}
+
static inline ino_t parent_ino(struct dentry *dentry)
{
ino_t res;
return NULL;
}
-static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
-{
- uuid_be_to_bin(uuid_str, (uuid_be *)to);
- return 0;
-}
-
static inline int disk_max_parts(struct gendisk *disk)
{
if (disk->flags & GENHD_FL_EXT_DEVT)
dev_t devt = MKDEV(0, 0);
return devt;
}
-
-static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
-{
- return -EINVAL;
-}
#endif /* CONFIG_BLOCK */
#endif /* _LINUX_GENHD_H */
/**
* hash_for_each_possible_rcu - iterate over all possible objects hashing to the
* same bucket in an rcu enabled hashtable
- * in a rcu enabled hashtable
* @name: hashtable to iterate
* @obj: the type * to use as a loop cursor for each entry
* @member: the name of the hlist_node within the struct
void (*init_dev)(ide_drive_t *);
void (*set_pio_mode)(struct hwif_s *, ide_drive_t *);
void (*set_dma_mode)(struct hwif_s *, ide_drive_t *);
- int (*reset_poll)(ide_drive_t *);
+ blk_status_t (*reset_poll)(ide_drive_t *);
void (*pre_reset)(ide_drive_t *);
void (*resetproc)(ide_drive_t *);
void (*maskproc)(ide_drive_t *, int);
extern int ide_vlb_clk;
extern int ide_pci_clk;
-int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int);
+int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
void ide_kill_rq(ide_drive_t *, struct request *);
void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
const struct ide_devset *setting, int arg);
void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8);
-int ide_complete_rq(ide_drive_t *, int, unsigned int);
+int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int);
void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd);
void ide_tf_dump(const char *, struct ide_cmd *);
#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
#define IOMAP_FAULT (1 << 3) /* mapping for page fault */
#define IOMAP_DIRECT (1 << 4) /* direct I/O */
+#define IOMAP_NOWAIT (1 << 5) /* Don't wait for writeback */
struct iomap_ops {
/*
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
-/* Is the vma a continuation of the stack vma above it? */
-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
-{
- return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
-}
-
static inline bool vma_is_anonymous(struct vm_area_struct *vma)
{
return !vma->vm_ops;
static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
#endif
-static inline int stack_guard_page_start(struct vm_area_struct *vma,
- unsigned long addr)
-{
- return (vma->vm_flags & VM_GROWSDOWN) &&
- (vma->vm_start == addr) &&
- !vma_growsdown(vma->vm_prev, addr);
-}
-
-/* Is the vma a continuation of the stack vma below it? */
-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
-{
- return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
-}
-
-static inline int stack_guard_page_end(struct vm_area_struct *vma,
- unsigned long addr)
-{
- return (vma->vm_flags & VM_GROWSUP) &&
- (vma->vm_end == addr) &&
- !vma_growsup(vma->vm_next, addr);
-}
-
int vma_is_stack_for_current(struct vm_area_struct *vma);
extern unsigned long move_page_tables(struct vm_area_struct *vma,
pgoff_t offset,
unsigned long size);
+extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
return vma;
}
+static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
+{
+ unsigned long vm_start = vma->vm_start;
+
+ if (vma->vm_flags & VM_GROWSDOWN) {
+ vm_start -= stack_guard_gap;
+ if (vm_start > vma->vm_start)
+ vm_start = 0;
+ }
+ return vm_start;
+}
+
+static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
+{
+ unsigned long vm_end = vma->vm_end;
+
+ if (vma->vm_flags & VM_GROWSUP) {
+ vm_end += stack_guard_gap;
+ if (vm_end < vma->vm_end)
+ vm_end = -PAGE_SIZE;
+ }
+ return vm_end;
+}
+
static inline unsigned long vma_pages(struct vm_area_struct *vma)
{
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
hwparam_ioport, /* Module parameter configures an I/O port */
hwparam_iomem, /* Module parameter configures an I/O mem address */
hwparam_ioport_or_iomem, /* Module parameter could be either, depending on other option */
- hwparam_irq, /* Module parameter configures an I/O port */
+ hwparam_irq, /* Module parameter configures an IRQ */
hwparam_dma, /* Module parameter configures a DMA channel */
hwparam_dma_addr, /* Module parameter configures a DMA buffer address */
hwparam_other, /* Module parameter configures some other value */
*
* int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
* Called when a user wants to change the Maximum Transfer Unit
- * of a device. If not defined, any request to change MTU will
- * will return an error.
+ * of a device.
*
* void (*ndo_tx_timeout)(struct net_device *dev);
* Callback used when the transmitter has not made any progress
* @rtnl_link_state: This enum represents the phases of creating
* a new link
*
- * @destructor: Called from unregister,
- * can be used to call free_netdev
+ * @needs_free_netdev: Should unregister perform free_netdev?
+ * @priv_destructor: Called from unregister
* @npinfo: XXX: need comments on this one
* @nd_net: Network namespace this network device is inside
*
RTNL_LINK_INITIALIZING,
} rtnl_link_state:16;
- void (*destructor)(struct net_device *dev);
+ bool needs_free_netdev;
+ void (*priv_destructor)(struct net_device *dev);
#ifdef CONFIG_NETPOLL
struct netpoll_info __rcu *npinfo;
return dev->name;
}
+static inline bool netdev_unregistering(const struct net_device *dev)
+{
+ return dev->reg_state == NETREG_UNREGISTERING;
+}
+
static inline const char *netdev_reg_state(const struct net_device *dev)
{
switch (dev->reg_state) {
};
-#define FCNVME_ASSOC_HOSTID_LEN 16
#define FCNVME_ASSOC_HOSTNQN_LEN 256
#define FCNVME_ASSOC_SUBNQN_LEN 256
__be16 cntlid;
__be16 sqsize;
__be32 rsvd52;
- u8 hostid[FCNVME_ASSOC_HOSTID_LEN];
+ uuid_t hostid;
u8 hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
u8 subnqn[FCNVME_ASSOC_SUBNQN_LEN];
u8 rsvd632[384];
#define _LINUX_NVME_H
#include <linux/types.h>
+#include <linux/uuid.h>
/* NQN names in commands fields specified one size */
#define NVMF_NQN_FIELD_LEN 256
NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */
};
-#define NVMF_AQ_DEPTH 32
+#define NVME_AQ_DEPTH 32
enum {
NVME_REG_CAP = 0x0000, /* Controller Capabilities */
NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */
NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
+ NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */
};
#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
__u8 tnvmcap[16];
__u8 unvmcap[16];
__le32 rpmbs;
- __u8 rsvd316[4];
+ __le16 edstt;
+ __u8 dsto;
+ __u8 fwug;
__le16 kas;
- __u8 rsvd322[190];
+ __le16 hctma;
+ __le16 mntmt;
+ __le16 mxtmt;
+ __le32 sanicap;
+ __u8 rsvd332[180];
__u8 sqes;
__u8 cqes;
__le16 maxcmd;
NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
NVME_CTRL_VWC_PRESENT = 1 << 0,
NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
+ NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7,
};
__le16 nabsn;
__le16 nabo;
__le16 nabspf;
- __u16 rsvd46;
+ __le16 noiob;
__u8 nvmcap[16];
__u8 rsvd64[40];
__u8 nguid[16];
NVME_ID_CNS_NS = 0x00,
NVME_ID_CNS_CTRL = 0x01,
NVME_ID_CNS_NS_ACTIVE_LIST = 0x02,
+ NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_CTRL_NS_LIST = 0x12,
};
enum {
+ NVME_DIR_IDENTIFY = 0x00,
+ NVME_DIR_STREAMS = 0x01,
+ NVME_DIR_SND_ID_OP_ENABLE = 0x01,
+ NVME_DIR_SND_ST_OP_REL_ID = 0x01,
+ NVME_DIR_SND_ST_OP_REL_RSC = 0x02,
+ NVME_DIR_RCV_ID_OP_PARAM = 0x01,
+ NVME_DIR_RCV_ST_OP_PARAM = 0x01,
+ NVME_DIR_RCV_ST_OP_STATUS = 0x02,
+ NVME_DIR_RCV_ST_OP_RESOURCE = 0x03,
+ NVME_DIR_ENDIR = 0x01,
+};
+
+enum {
NVME_NS_FEAT_THIN = 1 << 0,
NVME_NS_FLBAS_LBA_MASK = 0xf,
NVME_NS_FLBAS_META_EXT = 0x10,
NVME_NS_DPS_PI_TYPE3 = 3,
};
+struct nvme_ns_id_desc {
+ __u8 nidt;
+ __u8 nidl;
+ __le16 reserved;
+};
+
+#define NVME_NIDT_EUI64_LEN 8
+#define NVME_NIDT_NGUID_LEN 16
+#define NVME_NIDT_UUID_LEN 16
+
+enum {
+ NVME_NIDT_EUI64 = 0x01,
+ NVME_NIDT_NGUID = 0x02,
+ NVME_NIDT_UUID = 0x03,
+};
+
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
+ NVME_RW_DTYPE_STREAMS = 1 << 4,
};
struct nvme_dsm_cmd {
__le64 entries[32];
};
+enum {
+ NVME_HOST_MEM_ENABLE = (1 << 0),
+ NVME_HOST_MEM_RETURN = (1 << 1),
+};
+
/* Admin commands */
enum nvme_admin_opcode {
nvme_admin_download_fw = 0x11,
nvme_admin_ns_attach = 0x15,
nvme_admin_keep_alive = 0x18,
+ nvme_admin_directive_send = 0x19,
+ nvme_admin_directive_recv = 0x1a,
nvme_admin_dbbuf = 0x7C,
nvme_admin_format_nvm = 0x80,
nvme_admin_security_send = 0x81,
__u32 rsvd11[5];
};
+#define NVME_IDENTIFY_DATA_SIZE 4096
+
struct nvme_features {
__u8 opcode;
__u8 flags;
union nvme_data_ptr dptr;
__le32 fid;
__le32 dword11;
- __u32 rsvd12[4];
+ __le32 dword12;
+ __le32 dword13;
+ __le32 dword14;
+ __le32 dword15;
+};
+
+struct nvme_host_mem_buf_desc {
+ __le64 addr;
+ __le32 size;
+ __u32 rsvd;
};
struct nvme_create_cq {
__u32 rsvd14[2];
};
+struct nvme_directive_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 numd;
+ __u8 doper;
+ __u8 dtype;
+ __le16 dspec;
+ __u8 endir;
+ __u8 tdtype;
+ __u16 rsvd15;
+
+ __u32 rsvd16[3];
+};
+
/*
* Fabrics subcommands.
*/
};
struct nvmf_connect_data {
- __u8 hostid[16];
+ uuid_t hostid;
__le16 cntlid;
char resv4[238];
char subsysnqn[NVMF_NQN_FIELD_LEN];
__u32 rsvd12[6];
};
+struct streams_directive_params {
+ __u16 msl;
+ __u16 nssa;
+ __u16 nsso;
+ __u8 rsvd[10];
+ __u32 sws;
+ __u16 sgs;
+ __u16 nsa;
+ __u16 nso;
+ __u8 rsvd2[6];
+};
+
struct nvme_command {
union {
struct nvme_common_command common;
struct nvmf_property_set_command prop_set;
struct nvmf_property_get_command prop_get;
struct nvme_dbbuf dbbuf;
+ struct nvme_directive_cmd directive;
};
};
#define NVME_VS(major, minor, tertiary) \
(((major) << 16) | ((minor) << 8) | (tertiary))
+#define NVME_MAJOR(ver) ((ver) >> 16)
+#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
+#define NVME_TERTIARY(ver) ((ver) & 0xff)
+
#endif /* _LINUX_NVME_H */
static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { }
#endif
-extern const u8 pci_acpi_dsm_uuid[];
+extern const guid_t pci_acpi_dsm_guid;
#define DEVICE_LABEL_DSM 0x07
#define RESET_DELAY_DSM 0x08
#define FUNCTION_DELAY_DSM 0x09
const void *buf, size_t buflen, off_t skip);
size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
void *buf, size_t buflen, off_t skip);
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+ size_t buflen, off_t skip);
/*
* Maximum number of entries that will be allocated in one piece, if
int red_left_pad; /* Left redzone padding size */
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
+ struct work_struct kobj_remove_work;
#endif
#ifdef CONFIG_MEMCG
struct memcg_cache_params memcg_params;
*/
struct tk_read_base {
struct clocksource *clock;
- u64 (*read)(struct clocksource *cs);
u64 mask;
u64 cycle_last;
u32 mult;
* interval.
* @xtime_remainder: Shifted nano seconds left over when rounding
* @cycle_interval
- * @raw_interval: Raw nano seconds accumulated per NTP interval.
+ * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
* @ntp_error: Difference between accumulated time and NTP time in ntp
* shifted nano seconds.
* @ntp_error_shift: Shift conversion between clock shifted nano seconds and
u64 cycle_interval;
u64 xtime_interval;
s64 xtime_remainder;
- u32 raw_interval;
+ u64 raw_interval;
/* The ntp_tick_length() value currently being used.
* This cached copy ensures we consistently apply the tick
* length for an entire tick, as ntp_tick_length may change
#include <uapi/linux/uuid.h>
-/*
- * V1 (time-based) UUID definition [RFC 4122].
- * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns
- * increments since midnight 15th October 1582
- * - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID
- * time
- * - the clock sequence is a 14-bit counter to avoid duplicate times
- */
-struct uuid_v1 {
- __be32 time_low; /* low part of timestamp */
- __be16 time_mid; /* mid part of timestamp */
- __be16 time_hi_and_version; /* high part of timestamp and version */
-#define UUID_TO_UNIX_TIME 0x01b21dd213814000ULL
-#define UUID_TIMEHI_MASK 0x0fff
-#define UUID_VERSION_TIME 0x1000 /* time-based UUID */
-#define UUID_VERSION_NAME 0x3000 /* name-based UUID */
-#define UUID_VERSION_RANDOM 0x4000 /* (pseudo-)random generated UUID */
- u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */
-#define UUID_CLOCKHI_MASK 0x3f
-#define UUID_VARIANT_STD 0x80
- u8 clock_seq_low; /* clock seq low */
- u8 node[6]; /* spatially unique node ID (MAC addr) */
-};
+typedef struct {
+ __u8 b[16];
+} uuid_t;
+
+#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+((uuid_t) \
+{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
+ ((b) >> 8) & 0xff, (b) & 0xff, \
+ ((c) >> 8) & 0xff, (c) & 0xff, \
+ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
/*
* The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")
*/
#define UUID_STRING_LEN 36
-static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2)
+extern const guid_t guid_null;
+extern const uuid_t uuid_null;
+
+static inline bool guid_equal(const guid_t *u1, const guid_t *u2)
+{
+ return memcmp(u1, u2, sizeof(guid_t)) == 0;
+}
+
+static inline void guid_copy(guid_t *dst, const guid_t *src)
+{
+ memcpy(dst, src, sizeof(guid_t));
+}
+
+static inline bool guid_is_null(const guid_t *guid)
+{
+ return guid_equal(guid, &guid_null);
+}
+
+static inline bool uuid_equal(const uuid_t *u1, const uuid_t *u2)
+{
+ return memcmp(u1, u2, sizeof(uuid_t)) == 0;
+}
+
+static inline void uuid_copy(uuid_t *dst, const uuid_t *src)
{
- return memcmp(&u1, &u2, sizeof(uuid_le));
+ memcpy(dst, src, sizeof(uuid_t));
}
-static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2)
+static inline bool uuid_is_null(const uuid_t *uuid)
{
- return memcmp(&u1, &u2, sizeof(uuid_be));
+ return uuid_equal(uuid, &uuid_null);
}
void generate_random_uuid(unsigned char uuid[16]);
-extern void uuid_le_gen(uuid_le *u);
-extern void uuid_be_gen(uuid_be *u);
+extern void guid_gen(guid_t *u);
+extern void uuid_gen(uuid_t *u);
bool __must_check uuid_is_valid(const char *uuid);
-extern const u8 uuid_le_index[16];
-extern const u8 uuid_be_index[16];
+extern const u8 guid_index[16];
+extern const u8 uuid_index[16];
+
+int guid_parse(const char *uuid, guid_t *u);
+int uuid_parse(const char *uuid, uuid_t *u);
+
+/* backwards compatibility, don't use in new code */
+typedef uuid_t uuid_be;
+#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+ UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7)
+#define NULL_UUID_BE \
+ UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00)
-int uuid_le_to_bin(const char *uuid, uuid_le *u);
-int uuid_be_to_bin(const char *uuid, uuid_be *u);
+#define uuid_le_gen(u) guid_gen(u)
+#define uuid_be_gen(u) uuid_gen(u)
+#define uuid_le_to_bin(guid, u) guid_parse(guid, u)
+#define uuid_be_to_bin(uuid, u) uuid_parse(uuid, u)
+
+static inline int uuid_le_cmp(const guid_t u1, const guid_t u2)
+{
+ return memcmp(&u1, &u2, sizeof(guid_t));
+}
+
+static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2)
+{
+ return memcmp(&u1, &u2, sizeof(uuid_t));
+}
#endif
{
}
+static inline void cec_notifier_register(struct cec_notifier *n,
+ struct cec_adapter *adap,
+ void (*callback)(struct cec_adapter *adap, u16 pa))
+{
+}
+
+static inline void cec_notifier_unregister(struct cec_notifier *n)
+{
+}
+
#endif
#endif
#define cec_phys_addr_exp(pa) \
((pa) >> 12), ((pa) >> 8) & 0xf, ((pa) >> 4) & 0xf, (pa) & 0xf
-#if IS_ENABLED(CONFIG_CEC_CORE)
+#if IS_REACHABLE(CONFIG_CEC_CORE)
struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
void *priv, const char *name, u32 caps, u8 available_las);
int cec_register_adapter(struct cec_adapter *adap, struct device *parent);
struct net;
#ifdef CONFIG_WEXT_CORE
-int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
void __user *arg);
int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
unsigned long arg);
struct iw_statistics *get_wireless_stats(struct net_device *dev);
int call_commit_handler(struct net_device *dev);
#else
-static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
void __user *arg)
{
return -EINVAL;
}
#endif
-#ifdef CONFIG_XFRM_OFFLOAD
void __net_init xfrm_dev_init(void);
+
+#ifdef CONFIG_XFRM_OFFLOAD
int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
struct xfrm_user_offload *xuo);
}
}
#else
-static inline void __net_init xfrm_dev_init(void)
-{
-}
-
static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
{
return 0;
osd_req_done_fn *async_done;
void *async_private;
- int async_error;
+ blk_status_t async_error;
int req_errors;
};
extern void scsi_kunmap_atomic_sg(void *virt);
extern int scsi_init_io(struct scsi_cmnd *cmd);
+extern void scsi_initialize_rq(struct request *rq);
extern int scsi_dma_map(struct scsi_cmnd *cmd);
extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
kfree(req->cmd);
}
-void scsi_req_init(struct request *);
+void scsi_req_init(struct scsi_request *req);
#endif /* _SCSI_SCSI_REQUEST_H */
#define N_TXTADDR(x) (N_MAGIC(x) == QMAGIC ? PAGE_SIZE : 0)
#endif
-/* Address of data segment in memory after it is loaded.
- Note that it is up to you to define SEGMENT_SIZE
- on machines not listed here. */
-#if defined(vax) || defined(hp300) || defined(pyr)
-#define SEGMENT_SIZE page_size
-#endif
-#ifdef sony
-#define SEGMENT_SIZE 0x2000
-#endif /* Sony. */
-#ifdef is68k
-#define SEGMENT_SIZE 0x20000
-#endif
-#if defined(m68k) && defined(PORTAR)
-#define PAGE_SIZE 0x400
-#define SEGMENT_SIZE PAGE_SIZE
-#endif
-
-#ifdef linux
+/* Address of data segment in memory after it is loaded. */
#ifndef __KERNEL__
#include <unistd.h>
#endif
#endif
#endif
#endif
-#endif
#define _N_SEGMENT_ROUND(x) ALIGN(x, SEGMENT_SIZE)
unsigned int r_extern:1;
/* Four bits that aren't used, but when writing an object file
it is desirable to clear them. */
-#ifdef NS32K
- unsigned r_bsr:1;
- unsigned r_disp:1;
- unsigned r_pad:2;
-#else
unsigned int r_pad:4;
-#endif
};
#endif /* no N_RELOCATION_INFO_DECLARED. */
struct iocb {
/* these are internal to the kernel/libc. */
__u64 aio_data; /* data to be returned in event's data */
- __u32 PADDED(aio_key, aio_reserved1);
+ __u32 PADDED(aio_key, aio_rw_flags);
/* the kernel sets aio_key to the req # */
/* common fields */
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
-#define DM_VERSION_MINOR 35
+#define DM_VERSION_MINOR 36
#define DM_VERSION_PATCHLEVEL 0
-#define DM_VERSION_EXTRA "-ioctl (2016-06-23)"
+#define DM_VERSION_EXTRA "-ioctl (2017-06-09)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
* it was forced up into this mode or autonegotiated.
*/
-/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. */
-/* Update drivers/net/phy/phy.c:phy_speed_to_str() when adding new values */
+/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal.
+ * Update drivers/net/phy/phy.c:phy_speed_to_str() and
+ * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values.
+ */
#define SPEED_10 10
#define SPEED_100 100
#define SPEED_1000 1000
/* (1U << 31) is reserved for signed error codes */
/*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWF_WRITE_LIFE_NOT_SET 0
+#define RWH_WRITE_LIFE_NONE 1
+#define RWH_WRITE_LIFE_SHORT 2
+#define RWH_WRITE_LIFE_MEDIUM 3
+#define RWH_WRITE_LIFE_LONG 4
+#define RWH_WRITE_LIFE_EXTREME 5
+
+/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */
#define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */
#define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */
#define RWF_SYNC 0x00000004 /* per-IO O_SYNC */
+#define RWF_NOWAIT 0x00000008 /* per-IO, return -EAGAIN if operation would block */
+
+#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\
+ RWF_NOWAIT)
#endif /* _UAPI_LINUX_FS_H */
LO_FLAGS_AUTOCLEAR = 4,
LO_FLAGS_PARTSCAN = 8,
LO_FLAGS_DIRECT_IO = 16,
+ LO_FLAGS_BLOCKSIZE = 32,
};
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
__u64 lo_init[2];
};
+#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0]
+
/*
* Loop filter types
*/
#define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */
#define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */
#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */
+#define NBD_FLAG_SEND_FUA (1 << 3) /* send FUA (forced unit access) */
/* there is a gap here to match userspace */
#define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */
#define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */
+/* values for cmd flags in the upper 16 bits of request type */
+#define NBD_CMD_FLAG_FUA (1 << 16) /* FUA (forced unit access) op */
+
/* These are client behavior specific flags. */
#define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on
disconnect. */
#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
enum ovs_tunnel_key_attr {
+ /* OVS_TUNNEL_KEY_ATTR_NONE, standard nl API requires this attribute! */
OVS_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */
OVS_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */
OVS_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */
typedef struct {
__u8 b[16];
-} uuid_le;
+} guid_t;
-typedef struct {
- __u8 b[16];
-} uuid_be;
-
-#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
-((uuid_le) \
+#define GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+((guid_t) \
{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
(b) & 0xff, ((b) >> 8) & 0xff, \
(c) & 0xff, ((c) >> 8) & 0xff, \
(d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
-#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
-((uuid_be) \
-{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
- ((b) >> 8) & 0xff, (b) & 0xff, \
- ((c) >> 8) & 0xff, (c) & 0xff, \
- (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
-
+/* backwards compatibility, don't use in new code */
+typedef guid_t uuid_le;
+#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+ GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)
#define NULL_UUID_LE \
UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \
- 0x00, 0x00, 0x00, 0x00)
-
-#define NULL_UUID_BE \
- UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \
- 0x00, 0x00, 0x00, 0x00)
-
+ 0x00, 0x00, 0x00, 0x00)
#endif /* _UAPI_LINUX_UUID_H_ */
if (err)
return err;
+ if (is_pointer_value(env, insn->src_reg)) {
+ verbose("R%d leaks addr into mem\n", insn->src_reg);
+ return -EACCES;
+ }
+
/* check whether atomic_add can read the memory */
err = check_mem_access(env, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ, -1);
int ret = -ENOMEM, max_order = 0;
if (!has_aux(event))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) {
/*
ret = __irq_set_trigger(desc,
new->flags & IRQF_TRIGGER_MASK);
- if (ret)
+ if (ret) {
+ irq_release_resources(desc);
goto out_mask;
+ }
}
desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
#include <linux/syscore_ops.h>
#include <linux/compiler.h>
#include <linux/hugetlb.h>
+#include <linux/frame.h>
#include <asm/page.h>
#include <asm/sections.h>
* only when panic_cpu holds the current CPU number; this is the only CPU
* which processes crash_kexec routines.
*/
-void __crash_kexec(struct pt_regs *regs)
+void __noclone __crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_mutex here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
mutex_unlock(&kexec_mutex);
}
}
+STACK_FRAME_NON_STANDARD(__crash_kexec);
void crash_kexec(struct pt_regs *regs)
{
ops = container_of(fops, struct klp_ops, fops);
- rcu_read_lock();
+ /*
+ * A variant of synchronize_sched() is used to allow patching functions
+ * where RCU is not watching, see klp_synchronize_transition().
+ */
+ preempt_disable_notrace();
func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
stack_node);
klp_arch_set_pc(regs, (unsigned long)func->new_func);
unlock:
- rcu_read_unlock();
+ preempt_enable_notrace();
}
/*
static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn);
/*
+ * This function is just a stub to implement a hard force
+ * of synchronize_sched(). This requires synchronizing
+ * tasks even in userspace and idle.
+ */
+static void klp_sync(struct work_struct *work)
+{
+}
+
+/*
+ * We allow to patch also functions where RCU is not watching,
+ * e.g. before user_exit(). We can not rely on the RCU infrastructure
+ * to do the synchronization. Instead hard force the sched synchronization.
+ *
+ * This approach allows to use RCU functions for manipulating func_stack
+ * safely.
+ */
+static void klp_synchronize_transition(void)
+{
+ schedule_on_each_cpu(klp_sync);
+}
+
+/*
* The transition to the target patch state is complete. Clean up the data
* structures.
*/
* func->transition gets cleared, the handler may choose a
* removed function.
*/
- synchronize_rcu();
+ klp_synchronize_transition();
}
if (klp_transition_patch->immediate)
/* Prevent klp_ftrace_handler() from seeing KLP_UNDEFINED state */
if (klp_target_state == KLP_PATCHED)
- synchronize_rcu();
+ klp_synchronize_transition();
read_lock(&tasklist_lock);
for_each_process_thread(g, task) {
*/
void klp_update_patch_state(struct task_struct *task)
{
- rcu_read_lock();
+ /*
+ * A variant of synchronize_sched() is used to allow patching functions
+ * where RCU is not watching, see klp_synchronize_transition().
+ */
+ preempt_disable_notrace();
/*
* This test_and_clear_tsk_thread_flag() call also serves as a read
if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING))
task->patch_state = READ_ONCE(klp_target_state);
- rcu_read_unlock();
+ preempt_enable_notrace();
}
/*
clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
/* Let any remaining calls to klp_update_patch_state() complete */
- synchronize_rcu();
+ klp_synchronize_transition();
klp_start_transition();
}
struct hib_bio_batch {
atomic_t count;
wait_queue_head_t wait;
- int error;
+ blk_status_t error;
};
static void hib_init_batch(struct hib_bio_batch *hb)
{
atomic_set(&hb->count, 0);
init_waitqueue_head(&hb->wait);
- hb->error = 0;
+ hb->error = BLK_STS_OK;
}
static void hib_end_io(struct bio *bio)
struct hib_bio_batch *hb = bio->bi_private;
struct page *page = bio->bi_io_vec[0].bv_page;
- if (bio->bi_error) {
+ if (bio->bi_status) {
printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
imajor(bio->bi_bdev->bd_inode),
iminor(bio->bi_bdev->bd_inode),
flush_icache_range((unsigned long)page_address(page),
(unsigned long)page_address(page) + PAGE_SIZE);
- if (bio->bi_error && !hb->error)
- hb->error = bio->bi_error;
+ if (bio->bi_status && !hb->error)
+ hb->error = bio->bi_status;
if (atomic_dec_and_test(&hb->count))
wake_up(&hb->wait);
return error;
}
-static int hib_wait_io(struct hib_bio_batch *hb)
+static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
{
wait_event(hb->wait, atomic_read(&hb->count) == 0);
- return hb->error;
+ return blk_status_to_errno(hb->error);
}
/*
BUG_ON(cpu_online(smp_processor_id()));
if (mm != &init_mm) {
- switch_mm_irqs_off(mm, &init_mm, current);
+ switch_mm(mm, &init_mm, current);
finish_arch_post_lock_switch();
}
mmdrop(mm);
if (sg_policy->next_freq == next_freq)
return;
- if (sg_policy->next_freq > next_freq)
- next_freq = (sg_policy->next_freq + next_freq) >> 1;
-
sg_policy->next_freq = next_freq;
sg_policy->last_freq_update_time = time;
trace_sched_stat_runtime_enabled()) {
printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
"stat_blocked and stat_runtime require the "
- "kernel parameter schedstats=enabled or "
+ "kernel parameter schedstats=enable or "
"kernel.sched_schedstats=1\n");
}
#endif
return !tsk->ptrace;
}
-static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
+ bool *resched_timer)
{
struct sigqueue *q, *first = NULL;
still_pending:
list_del_init(&first->list);
copy_siginfo(info, &first->info);
+
+ *resched_timer =
+ (first->flags & SIGQUEUE_PREALLOC) &&
+ (info->si_code == SI_TIMER) &&
+ (info->si_sys_private);
+
__sigqueue_free(first);
} else {
/*
}
static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
- siginfo_t *info)
+ siginfo_t *info, bool *resched_timer)
{
int sig = next_signal(pending, mask);
if (sig)
- collect_signal(sig, pending, info);
+ collect_signal(sig, pending, info, resched_timer);
return sig;
}
*/
int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
{
+ bool resched_timer = false;
int signr;
/* We only dequeue private signals from ourselves, we don't let
* signalfd steal them
*/
- signr = __dequeue_signal(&tsk->pending, mask, info);
+ signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
if (!signr) {
signr = __dequeue_signal(&tsk->signal->shared_pending,
- mask, info);
+ mask, info, &resched_timer);
#ifdef CONFIG_POSIX_TIMERS
/*
* itimer signal ?
current->jobctl |= JOBCTL_STOP_DEQUEUED;
}
#ifdef CONFIG_POSIX_TIMERS
- if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
+ if (resched_timer) {
/*
* Release the siglock to ensure proper locking order
* of timer locks outside of siglocks. Note, we leave
/* Only supports reads */
if (oldval && oldlen) {
char buf[UUID_STRING_LEN + 1];
- uuid_be uuid;
+ uuid_t uuid;
result = kernel_read(file, 0, buf, sizeof(buf) - 1);
if (result < 0)
buf[result] = '\0';
result = -EIO;
- if (uuid_be_to_bin(buf, &uuid))
+ if (uuid_parse(buf, &uuid))
goto out;
if (oldlen > 16)
{
struct alarm_base *base = &alarm_bases[alarm->type];
- start = ktime_add(start, base->gettime());
+ start = ktime_add_safe(start, base->gettime());
alarm_start(alarm, start);
}
EXPORT_SYMBOL_GPL(alarm_start_relative);
overrun++;
}
- alarm->node.expires = ktime_add(alarm->node.expires, interval);
+ alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
return overrun;
}
EXPORT_SYMBOL_GPL(alarm_forward);
/* start the timer */
timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval);
+
+ /*
+ * Rate limit to the tick as a hot fix to prevent DOS. Will be
+ * mopped up later.
+ */
+ if (timr->it.alarm.interval < TICK_NSEC)
+ timr->it.alarm.interval = TICK_NSEC;
+
exp = timespec64_to_ktime(new_setting->it_value);
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
ktime_t now;
now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
- exp = ktime_add(now, exp);
+ exp = ktime_add_safe(now, exp);
}
alarm_start(&timr->it.alarm.alarmtimer, exp);
static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
#ifdef CONFIG_TICK_ONESHOT
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
static void tick_broadcast_clear_oneshot(int cpu);
static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
#else
+static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
static inline void tick_broadcast_clear_oneshot(int cpu) { }
static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
#endif
/**
* tick_broadcast_setup_oneshot - setup the broadcast device
*/
-void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
int cpu = smp_processor_id();
/* Functions related to oneshot broadcasting */
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
-extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
extern void tick_broadcast_switch_to_oneshot(void);
extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
extern int tick_broadcast_oneshot_active(void);
bool tick_broadcast_oneshot_available(void);
extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
#else /* !(BROADCAST && ONESHOT): */
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
static inline int tick_broadcast_oneshot_active(void) { return 0; }
tk->offs_boot = ktime_add(tk->offs_boot, delta);
}
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function. This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+ struct clocksource *clock = READ_ONCE(tkr->clock);
+
+ return clock->read(clock);
+}
+
#ifdef CONFIG_DEBUG_TIMEKEEPING
#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
*/
do {
seq = read_seqcount_begin(&tk_core.seq);
- now = tkr->read(tkr->clock);
+ now = tk_clock_read(tkr);
last = tkr->cycle_last;
mask = tkr->mask;
max = tkr->clock->max_cycles;
u64 cycle_now, delta;
/* read clocksource */
- cycle_now = tkr->read(tkr->clock);
+ cycle_now = tk_clock_read(tkr);
/* calculate the delta since the last update_wall_time */
delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
++tk->cs_was_changed_seq;
old_clock = tk->tkr_mono.clock;
tk->tkr_mono.clock = clock;
- tk->tkr_mono.read = clock->read;
tk->tkr_mono.mask = clock->mask;
- tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+ tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
tk->tkr_raw.clock = clock;
- tk->tkr_raw.read = clock->read;
tk->tkr_raw.mask = clock->mask;
tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
/* Go back from cycles -> shifted ns */
tk->xtime_interval = interval * clock->mult;
tk->xtime_remainder = ntpinterval - tk->xtime_interval;
- tk->raw_interval = (interval * clock->mult) >> clock->shift;
+ tk->raw_interval = interval * clock->mult;
/* if changing clocks, convert xtime_nsec shift units */
if (old_clock) {
now += timekeeping_delta_to_ns(tkr,
clocksource_delta(
- tkr->read(tkr->clock),
+ tk_clock_read(tkr),
tkr->cycle_last,
tkr->mask));
} while (read_seqcount_retry(&tkf->seq, seq));
return cycles_at_suspend;
}
+static struct clocksource dummy_clock = {
+ .read = dummy_clock_read,
+};
+
/**
* halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
* @tk: Timekeeper to snapshot.
struct tk_read_base *tkr = &tk->tkr_mono;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
- cycles_at_suspend = tkr->read(tkr->clock);
- tkr_dummy.read = dummy_clock_read;
+ cycles_at_suspend = tk_clock_read(tkr);
+ tkr_dummy.clock = &dummy_clock;
update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
tkr = &tk->tkr_raw;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
- tkr_dummy.read = dummy_clock_read;
+ tkr_dummy.clock = &dummy_clock;
update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
}
*/
static void timekeeping_forward_now(struct timekeeper *tk)
{
- struct clocksource *clock = tk->tkr_mono.clock;
u64 cycle_now, delta;
u64 nsec;
- cycle_now = tk->tkr_mono.read(clock);
+ cycle_now = tk_clock_read(&tk->tkr_mono);
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
do {
seq = read_seqcount_begin(&tk_core.seq);
-
- now = tk->tkr_mono.read(tk->tkr_mono.clock);
+ now = tk_clock_read(&tk->tkr_mono);
systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
base_real = ktime_add(tk->tkr_mono.base,
* Check whether the system counter value provided by the
* device driver is on the current timekeeping interval.
*/
- now = tk->tkr_mono.read(tk->tkr_mono.clock);
+ now = tk_clock_read(&tk->tkr_mono);
interval_start = tk->tkr_mono.cycle_last;
if (!cycle_between(interval_start, cycles, now)) {
clock_was_set_seq = tk->clock_was_set_seq;
* The less preferred source will only be tried if there is no better
* usable source. The rtc part is handled separately in rtc core code.
*/
- cycle_now = tk->tkr_mono.read(clock);
+ cycle_now = tk_clock_read(&tk->tkr_mono);
if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
cycle_now > tk->tkr_mono.cycle_last) {
u64 nsec, cyc_delta;
u32 shift, unsigned int *clock_set)
{
u64 interval = tk->cycle_interval << shift;
- u64 raw_nsecs;
+ u64 snsec_per_sec;
/* If the offset is smaller than a shifted interval, do nothing */
if (offset < interval)
*clock_set |= accumulate_nsecs_to_secs(tk);
/* Accumulate raw time */
- raw_nsecs = (u64)tk->raw_interval << shift;
- raw_nsecs += tk->raw_time.tv_nsec;
- if (raw_nsecs >= NSEC_PER_SEC) {
- u64 raw_secs = raw_nsecs;
- raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
- tk->raw_time.tv_sec += raw_secs;
+ tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
+ tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+ snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+ while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+ tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+ tk->raw_time.tv_sec++;
}
- tk->raw_time.tv_nsec = raw_nsecs;
+ tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
+ tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
/* Accumulate error between NTP and clock interval */
tk->ntp_error += tk->ntp_tick << shift;
#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
offset = real_tk->cycle_interval;
#else
- offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+ offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
#endif
__blk_add_trace(bt, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
- BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu),
+ BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
&rpdu);
}
}
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
- bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error,
+ bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
sizeof(r), &r);
}
command = strsep(&next, ":");
- if (WARN_ON_ONCE(!tr))
- return -EINVAL;
-
mutex_lock(&ftrace_cmd_mutex);
list_for_each_entry(p, &ftrace_commands, list) {
if (strcmp(p->name, command) == 0) {
char *number;
int ret;
+ if (!tr)
+ return -ENODEV;
+
/* hash funcs only work with set_ftrace_filter */
if (!enable)
return -EINVAL;
{
struct ftrace_probe_ops *ops;
+ if (!tr)
+ return -ENODEV;
+
/* we register both traceon and traceoff to this callback */
if (strcmp(cmd, "traceon") == 0)
ops = param ? &traceon_count_probe_ops : &traceon_probe_ops;
{
struct ftrace_probe_ops *ops;
+ if (!tr)
+ return -ENODEV;
+
ops = param ? &stacktrace_count_probe_ops : &stacktrace_probe_ops;
return ftrace_trace_probe_callback(tr, ops, hash, glob, cmd,
{
struct ftrace_probe_ops *ops;
+ if (!tr)
+ return -ENODEV;
+
ops = &dump_probe_ops;
/* Only dump once. */
{
struct ftrace_probe_ops *ops;
+ if (!tr)
+ return -ENODEV;
+
ops = &cpudump_probe_ops;
/* Only dump once. */
pr_info("Probe point is not specified.\n");
return -EINVAL;
}
- if (isdigit(argv[1][0])) {
- /* an address specified */
- ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
- if (ret) {
- pr_info("Failed to parse address.\n");
- return ret;
- }
- } else {
+
+ /* try to parse an address. if that fails, try to read the
+ * input as a symbol. */
+ if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
/* a symbol specified */
symbol = argv[1];
/* TODO: support .init module functions */
ret = traceprobe_split_symbol_offset(symbol, &offset);
if (ret) {
- pr_info("Failed to parse symbol.\n");
+ pr_info("Failed to parse either an address or a symbol.\n");
return ret;
}
if (offset && is_return &&
static int
stack_trace_filter_open(struct inode *inode, struct file *file)
{
- return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
+ struct ftrace_ops *ops = inode->i_private;
+
+ return ftrace_regex_open(ops, FTRACE_ITER_FILTER,
inode, file);
}
NULL, &stack_trace_fops);
trace_create_file("stack_trace_filter", 0444, d_tracer,
- NULL, &stack_trace_filter_fops);
+ &trace_ops, &stack_trace_filter_fops);
if (stack_trace_filter_buf[0])
ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
* the values[M, M+1, ..., N] into the ints array in get_options.
*/
-static int get_range(char **str, int *pint)
+static int get_range(char **str, int *pint, int n)
{
int x, inc_counter, upper_range;
(*str)++;
upper_range = simple_strtol((*str), NULL, 0);
inc_counter = upper_range - *pint;
- for (x = *pint; x < upper_range; x++)
+ for (x = *pint; n && x < upper_range; x++, n--)
*pint++ = x;
return inc_counter;
}
break;
if (res == 3) {
int range_nums;
- range_nums = get_range((char **)&str, ints + i);
+ range_nums = get_range((char **)&str, ints + i, nints - i);
if (range_nums < 0)
break;
/*
u32 crc32c(u32 crc, const void *address, unsigned int length)
{
SHASH_DESC_ON_STACK(shash, tfm);
- u32 *ctx = (u32 *)shash_desc_ctx(shash);
+ u32 ret, *ctx = (u32 *)shash_desc_ctx(shash);
int err;
shash->tfm = tfm;
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
- return *ctx;
+ ret = *ctx;
+ barrier_data(ctx);
+ return ret;
}
EXPORT_SYMBOL(crc32c);
return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
}
EXPORT_SYMBOL(sg_pcopy_to_buffer);
+
+/**
+ * sg_zero_buffer - Zero-out a part of a SG list
+ * @sgl: The SG list
+ * @nents: Number of SG entries
+ * @buflen: The number of bytes to zero out
+ * @skip: Number of bytes to skip before zeroing
+ *
+ * Returns the number of bytes zeroed.
+ **/
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+ size_t buflen, off_t skip)
+{
+ unsigned int offset = 0;
+ struct sg_mapping_iter miter;
+ unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
+
+ sg_miter_start(&miter, sgl, nents, sg_flags);
+
+ if (!sg_miter_skip(&miter, skip))
+ return false;
+
+ while (offset < buflen && sg_miter_next(&miter)) {
+ unsigned int len;
+
+ len = min(miter.length, buflen - offset);
+ memset(miter.addr, 0, len);
+
+ offset += len;
+ }
+
+ sg_miter_stop(&miter);
+ return offset;
+}
+EXPORT_SYMBOL(sg_zero_buffer);
struct test_uuid_data {
const char *uuid;
- uuid_le le;
- uuid_be be;
+ guid_t le;
+ uuid_t be;
};
static const struct test_uuid_data test_uuid_test_data[] = {
{
.uuid = "c33f4995-3701-450e-9fbf-206a2e98e576",
- .le = UUID_LE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
- .be = UUID_BE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+ .le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+ .be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
},
{
.uuid = "64b4371c-77c1-48f9-8221-29f054fc023b",
- .le = UUID_LE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
- .be = UUID_BE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+ .le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+ .be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
},
{
.uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84",
- .le = UUID_LE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
- .be = UUID_BE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+ .le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+ .be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
},
};
static void __init test_uuid_test(const struct test_uuid_data *data)
{
- uuid_le le;
- uuid_be be;
+ guid_t le;
+ uuid_t be;
char buf[48];
/* LE */
total_tests++;
- if (uuid_le_to_bin(data->uuid, &le))
+ if (guid_parse(data->uuid, &le))
test_uuid_failed("conversion", false, false, data->uuid, NULL);
total_tests++;
- if (uuid_le_cmp(data->le, le)) {
+ if (!guid_equal(&data->le, &le)) {
sprintf(buf, "%pUl", &le);
test_uuid_failed("cmp", false, false, data->uuid, buf);
}
/* BE */
total_tests++;
- if (uuid_be_to_bin(data->uuid, &be))
+ if (uuid_parse(data->uuid, &be))
test_uuid_failed("conversion", false, true, data->uuid, NULL);
total_tests++;
- if (uuid_be_cmp(data->be, be)) {
+ if (uuid_equal(&data->be, &be)) {
sprintf(buf, "%pUb", &be);
test_uuid_failed("cmp", false, true, data->uuid, buf);
}
static void __init test_uuid_wrong(const char *data)
{
- uuid_le le;
- uuid_be be;
+ guid_t le;
+ uuid_t be;
/* LE */
total_tests++;
- if (!uuid_le_to_bin(data, &le))
+ if (!guid_parse(data, &le))
test_uuid_failed("negative", true, false, data, NULL);
/* BE */
total_tests++;
- if (!uuid_be_to_bin(data, &be))
+ if (!uuid_parse(data, &be))
test_uuid_failed("negative", true, true, data, NULL);
}
#include <linux/uuid.h>
#include <linux/random.h>
-const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
-EXPORT_SYMBOL(uuid_le_index);
-const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-EXPORT_SYMBOL(uuid_be_index);
+const guid_t guid_null;
+EXPORT_SYMBOL(guid_null);
+const uuid_t uuid_null;
+EXPORT_SYMBOL(uuid_null);
+
+const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
+const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
/***************************************************************
* Random UUID interface
b[8] = (b[8] & 0x3F) | 0x80;
}
-void uuid_le_gen(uuid_le *lu)
+void guid_gen(guid_t *lu)
{
__uuid_gen_common(lu->b);
/* version 4 : random generation */
lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
}
-EXPORT_SYMBOL_GPL(uuid_le_gen);
+EXPORT_SYMBOL_GPL(guid_gen);
-void uuid_be_gen(uuid_be *bu)
+void uuid_gen(uuid_t *bu)
{
__uuid_gen_common(bu->b);
/* version 4 : random generation */
bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
}
-EXPORT_SYMBOL_GPL(uuid_be_gen);
+EXPORT_SYMBOL_GPL(uuid_gen);
/**
* uuid_is_valid - checks if UUID string valid
}
EXPORT_SYMBOL(uuid_is_valid);
-static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
+static int __uuid_parse(const char *uuid, __u8 b[16], const u8 ei[16])
{
static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34};
unsigned int i;
return 0;
}
-int uuid_le_to_bin(const char *uuid, uuid_le *u)
+int guid_parse(const char *uuid, guid_t *u)
{
- return __uuid_to_bin(uuid, u->b, uuid_le_index);
+ return __uuid_parse(uuid, u->b, guid_index);
}
-EXPORT_SYMBOL(uuid_le_to_bin);
+EXPORT_SYMBOL(guid_parse);
-int uuid_be_to_bin(const char *uuid, uuid_be *u)
+int uuid_parse(const char *uuid, uuid_t *u)
{
- return __uuid_to_bin(uuid, u->b, uuid_be_index);
+ return __uuid_parse(uuid, u->b, uuid_index);
}
-EXPORT_SYMBOL(uuid_be_to_bin);
+EXPORT_SYMBOL(uuid_parse);
char uuid[UUID_STRING_LEN + 1];
char *p = uuid;
int i;
- const u8 *index = uuid_be_index;
+ const u8 *index = uuid_index;
bool uc = false;
switch (*(++fmt)) {
case 'L':
uc = true; /* fall-through */
case 'l':
- index = uuid_le_index;
+ index = guid_index;
break;
case 'B':
uc = true;
int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
if (cleancache_ops) {
- pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE);
+ pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE);
if (pool_id < 0)
pool_id = CLEANCACHE_NO_POOL;
}
}
EXPORT_SYMBOL(filemap_flush);
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping: address space within which to check
+ * @start_byte: offset in bytes where the range starts
+ * @end_byte: offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+bool filemap_range_has_page(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
+{
+ pgoff_t index = start_byte >> PAGE_SHIFT;
+ pgoff_t end = end_byte >> PAGE_SHIFT;
+ struct pagevec pvec;
+ bool ret;
+
+ if (end_byte < start_byte)
+ return false;
+
+ if (mapping->nrpages == 0)
+ return false;
+
+ pagevec_init(&pvec, 0);
+ if (!pagevec_lookup(&pvec, mapping, index, 1))
+ return false;
+ ret = (pvec.pages[0]->index <= end);
+ pagevec_release(&pvec);
+ return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
static int __filemap_fdatawait_range(struct address_space *mapping,
loff_t start_byte, loff_t end_byte)
{
loff_t size;
size = i_size_read(inode);
- retval = filemap_write_and_wait_range(mapping, iocb->ki_pos,
- iocb->ki_pos + count - 1);
- if (retval < 0)
- goto out;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (filemap_range_has_page(mapping, iocb->ki_pos,
+ iocb->ki_pos + count - 1))
+ return -EAGAIN;
+ } else {
+ retval = filemap_write_and_wait_range(mapping,
+ iocb->ki_pos,
+ iocb->ki_pos + count - 1);
+ if (retval < 0)
+ goto out;
+ }
file_accessed(file);
pos = iocb->ki_pos;
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+ return -EINVAL;
+
if (limit != RLIM_INFINITY) {
if (iocb->ki_pos >= limit) {
send_sig(SIGXFSZ, current, 0);
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
- written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
- if (written)
- goto out;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* If there are pages to writeback, return */
+ if (filemap_range_has_page(inode->i_mapping, pos,
+ pos + iov_iter_count(from)))
+ return -EAGAIN;
+ } else {
+ written = filemap_write_and_wait_range(mapping, pos,
+ pos + write_len - 1);
+ if (written)
+ goto out;
+ }
/*
* After a write we want buffered reads to be sure to go to disk to get
/* mlock all present pages, but do not fault in new pages */
if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
return -ENOENT;
- /* For mm_populate(), just skip the stack guard page. */
- if ((*flags & FOLL_POPULATE) &&
- (stack_guard_page_start(vma, address) ||
- stack_guard_page_end(vma, address + PAGE_SIZE)))
- return -ENOENT;
if (*flags & FOLL_WRITE)
fault_flags |= FAULT_FLAG_WRITE;
if (*flags & FOLL_REMOTE)
*/
if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
page = pmd_page(*vmf->pmd);
+ if (!get_page_unless_zero(page))
+ goto out_unlock;
spin_unlock(vmf->ptl);
wait_on_page_locked(page);
+ put_page(page);
goto out;
}
/* Migration could have started since the pmd_trans_migrating check */
if (!page_locked) {
+ page_nid = -1;
+ if (!get_page_unless_zero(page))
+ goto out_unlock;
spin_unlock(vmf->ptl);
wait_on_page_locked(page);
- page_nid = -1;
+ put_page(page);
goto out;
}
spin_unlock(ptl);
free_page_and_swap_cache(src_page);
}
- cond_resched();
}
}
* page_remove_rmap() in try_to_unmap_one(). So to determine page status
* correctly, we save a copy of the page flags at this time.
*/
- page_flags = p->flags;
+ if (PageHuge(p))
+ page_flags = hpage->flags;
+ else
+ page_flags = p->flags;
/*
* unpoison always clear PG_hwpoison inside page lock
}
/*
- * This is like a special single-page "expand_{down|up}wards()",
- * except we must first make sure that 'address{-|+}PAGE_SIZE'
- * doesn't hit another vma.
- */
-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
-{
- address &= PAGE_MASK;
- if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
- struct vm_area_struct *prev = vma->vm_prev;
-
- /*
- * Is there a mapping abutting this one below?
- *
- * That's only ok if it's the same stack mapping
- * that has gotten split..
- */
- if (prev && prev->vm_end == address)
- return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
-
- return expand_downwards(vma, address - PAGE_SIZE);
- }
- if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
- struct vm_area_struct *next = vma->vm_next;
-
- /* As VM_GROWSDOWN but s/below/above/ */
- if (next && next->vm_start == address + PAGE_SIZE)
- return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
-
- return expand_upwards(vma, address + PAGE_SIZE);
- }
- return 0;
-}
-
-/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
if (vma->vm_flags & VM_SHARED)
return VM_FAULT_SIGBUS;
- /* Check if we need to add a guard page to the stack */
- if (check_stack_guard_page(vma, vmf->address) < 0)
- return VM_FAULT_SIGSEGV;
-
/*
* Use pte_alloc() instead of pte_alloc_map(). We can't run
* pte_offset_map() on pmds where a huge pmd might be created
unsigned long retval;
unsigned long newbrk, oldbrk;
struct mm_struct *mm = current->mm;
+ struct vm_area_struct *next;
unsigned long min_brk;
bool populate;
LIST_HEAD(uf);
}
/* Check against existing mmap mappings. */
- if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+ next = find_vma(mm, oldbrk);
+ if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
goto out;
/* Ok, looks good - let it rip. */
static long vma_compute_subtree_gap(struct vm_area_struct *vma)
{
- unsigned long max, subtree_gap;
- max = vma->vm_start;
- if (vma->vm_prev)
- max -= vma->vm_prev->vm_end;
+ unsigned long max, prev_end, subtree_gap;
+
+ /*
+ * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
+ * allow two stack_guard_gaps between them here, and when choosing
+ * an unmapped area; whereas when expanding we only require one.
+ * That's a little inconsistent, but keeps the code here simpler.
+ */
+ max = vm_start_gap(vma);
+ if (vma->vm_prev) {
+ prev_end = vm_end_gap(vma->vm_prev);
+ if (max > prev_end)
+ max -= prev_end;
+ else
+ max = 0;
+ }
if (vma->vm_rb.rb_left) {
subtree_gap = rb_entry(vma->vm_rb.rb_left,
struct vm_area_struct, vm_rb)->rb_subtree_gap;
anon_vma_unlock_read(anon_vma);
}
- highest_address = vma->vm_end;
+ highest_address = vm_end_gap(vma);
vma = vma->vm_next;
i++;
}
if (vma->vm_next)
vma_gap_update(vma->vm_next);
else
- mm->highest_vm_end = vma->vm_end;
+ mm->highest_vm_end = vm_end_gap(vma);
/*
* vma->vm_prev wasn't known when we followed the rbtree to find the
vma_gap_update(vma);
if (end_changed) {
if (!next)
- mm->highest_vm_end = end;
+ mm->highest_vm_end = vm_end_gap(vma);
else if (!adjust_next)
vma_gap_update(next);
}
* mm->highest_vm_end doesn't need any update
* in remove_next == 1 case.
*/
- VM_WARN_ON(mm->highest_vm_end != end);
+ VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
}
}
if (insert && file)
while (true) {
/* Visit left subtree if it looks promising */
- gap_end = vma->vm_start;
+ gap_end = vm_start_gap(vma);
if (gap_end >= low_limit && vma->vm_rb.rb_left) {
struct vm_area_struct *left =
rb_entry(vma->vm_rb.rb_left,
}
}
- gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
check_current:
/* Check if current node has a suitable gap */
if (gap_start > high_limit)
return -ENOMEM;
- if (gap_end >= low_limit && gap_end - gap_start >= length)
+ if (gap_end >= low_limit &&
+ gap_end > gap_start && gap_end - gap_start >= length)
goto found;
/* Visit right subtree if it looks promising */
vma = rb_entry(rb_parent(prev),
struct vm_area_struct, vm_rb);
if (prev == vma->vm_rb.rb_left) {
- gap_start = vma->vm_prev->vm_end;
- gap_end = vma->vm_start;
+ gap_start = vm_end_gap(vma->vm_prev);
+ gap_end = vm_start_gap(vma);
goto check_current;
}
}
while (true) {
/* Visit right subtree if it looks promising */
- gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
if (gap_start <= high_limit && vma->vm_rb.rb_right) {
struct vm_area_struct *right =
rb_entry(vma->vm_rb.rb_right,
check_current:
/* Check if current node has a suitable gap */
- gap_end = vma->vm_start;
+ gap_end = vm_start_gap(vma);
if (gap_end < low_limit)
return -ENOMEM;
- if (gap_start <= high_limit && gap_end - gap_start >= length)
+ if (gap_start <= high_limit &&
+ gap_end > gap_start && gap_end - gap_start >= length)
goto found;
/* Visit left subtree if it looks promising */
struct vm_area_struct, vm_rb);
if (prev == vma->vm_rb.rb_right) {
gap_start = vma->vm_prev ?
- vma->vm_prev->vm_end : 0;
+ vm_end_gap(vma->vm_prev) : 0;
goto check_current;
}
}
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
struct vm_unmapped_area_info info;
if (len > TASK_SIZE - mmap_min_addr)
if (addr) {
addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
+ vma = find_vma_prev(mm, addr, &prev);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)) &&
+ (!prev || addr >= vm_end_gap(prev)))
return addr;
}
const unsigned long len, const unsigned long pgoff,
const unsigned long flags)
{
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
/* requesting a specific address */
if (addr) {
addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
+ vma = find_vma_prev(mm, addr, &prev);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)) &&
+ (!prev || addr >= vm_end_gap(prev)))
return addr;
}
* update accounting. This is shared with both the
* grow-up and grow-down cases.
*/
-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
+static int acct_stack_growth(struct vm_area_struct *vma,
+ unsigned long size, unsigned long grow)
{
struct mm_struct *mm = vma->vm_mm;
struct rlimit *rlim = current->signal->rlim;
- unsigned long new_start, actual_size;
+ unsigned long new_start;
/* address space limit tests */
if (!may_expand_vm(mm, vma->vm_flags, grow))
return -ENOMEM;
/* Stack limit test */
- actual_size = size;
- if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
- actual_size -= PAGE_SIZE;
- if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+ if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
return -ENOMEM;
/* mlock limit tests */
int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
+ struct vm_area_struct *next;
+ unsigned long gap_addr;
int error = 0;
if (!(vma->vm_flags & VM_GROWSUP))
return -EFAULT;
- /* Guard against wrapping around to address 0. */
- if (address < PAGE_ALIGN(address+4))
- address = PAGE_ALIGN(address+4);
- else
+ /* Guard against exceeding limits of the address space. */
+ address &= PAGE_MASK;
+ if (address >= TASK_SIZE)
return -ENOMEM;
+ address += PAGE_SIZE;
+
+ /* Enforce stack_guard_gap */
+ gap_addr = address + stack_guard_gap;
+
+ /* Guard against overflow */
+ if (gap_addr < address || gap_addr > TASK_SIZE)
+ gap_addr = TASK_SIZE;
+
+ next = vma->vm_next;
+ if (next && next->vm_start < gap_addr) {
+ if (!(next->vm_flags & VM_GROWSUP))
+ return -ENOMEM;
+ /* Check that both stack segments have the same anon_vma? */
+ }
/* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma)))
if (vma->vm_next)
vma_gap_update(vma->vm_next);
else
- mm->highest_vm_end = address;
+ mm->highest_vm_end = vm_end_gap(vma);
spin_unlock(&mm->page_table_lock);
perf_event_mmap(vma);
unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
+ struct vm_area_struct *prev;
+ unsigned long gap_addr;
int error;
address &= PAGE_MASK;
if (error)
return error;
+ /* Enforce stack_guard_gap */
+ gap_addr = address - stack_guard_gap;
+ if (gap_addr > address)
+ return -ENOMEM;
+ prev = vma->vm_prev;
+ if (prev && prev->vm_end > gap_addr) {
+ if (!(prev->vm_flags & VM_GROWSDOWN))
+ return -ENOMEM;
+ /* Check that both stack segments have the same anon_vma? */
+ }
+
/* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma)))
return -ENOMEM;
return error;
}
-/*
- * Note how expand_stack() refuses to expand the stack all the way to
- * abut the next virtual mapping, *unless* that mapping itself is also
- * a stack mapping. We want to leave room for a guard page, after all
- * (the guard page itself is not added here, that is done by the
- * actual page faulting logic)
- *
- * This matches the behavior of the guard page logic (see mm/memory.c:
- * check_stack_guard_page()), which only allows the guard page to be
- * removed under these circumstances.
- */
+/* enforced gap between the expanding stack and other mappings. */
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
+static int __init cmdline_parse_stack_guard_gap(char *p)
+{
+ unsigned long val;
+ char *endptr;
+
+ val = simple_strtoul(p, &endptr, 10);
+ if (!*endptr)
+ stack_guard_gap = val << PAGE_SHIFT;
+
+ return 0;
+}
+__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
+
#ifdef CONFIG_STACK_GROWSUP
int expand_stack(struct vm_area_struct *vma, unsigned long address)
{
- struct vm_area_struct *next;
-
- address &= PAGE_MASK;
- next = vma->vm_next;
- if (next && next->vm_start == address + PAGE_SIZE) {
- if (!(next->vm_flags & VM_GROWSUP))
- return -ENOMEM;
- }
return expand_upwards(vma, address);
}
#else
int expand_stack(struct vm_area_struct *vma, unsigned long address)
{
- struct vm_area_struct *prev;
-
- address &= PAGE_MASK;
- prev = vma->vm_prev;
- if (prev && prev->vm_end == address) {
- if (!(prev->vm_flags & VM_GROWSDOWN))
- return -ENOMEM;
- }
return expand_downwards(vma, address);
}
vma->vm_prev = prev;
vma_gap_update(vma);
} else
- mm->highest_vm_end = prev ? prev->vm_end : 0;
+ mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
tail_vma->vm_next = NULL;
/* Kill the cache */
{
struct page *page = bio->bi_io_vec[0].bv_page;
- if (bio->bi_error) {
+ if (bio->bi_status) {
SetPageError(page);
/*
* We failed to write the page out to swap-space.
{
struct page *page = bio->bi_io_vec[0].bv_page;
- if (bio->bi_error) {
+ if (bio->bi_status) {
SetPageError(page);
ClearPageUptodate(page);
pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
#include <uapi/linux/memfd.h>
#include <linux/userfaultfd_k.h>
#include <linux/rmap.h>
+#include <linux/uuid.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
#ifdef CONFIG_TMPFS_POSIX_ACL
sb->s_flags |= MS_POSIXACL;
#endif
+ uuid_gen(&sb->s_uuid);
inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
if (!inode)
return name;
}
+static void sysfs_slab_remove_workfn(struct work_struct *work)
+{
+ struct kmem_cache *s =
+ container_of(work, struct kmem_cache, kobj_remove_work);
+
+ if (!s->kobj.state_in_sysfs)
+ /*
+ * For a memcg cache, this may be called during
+ * deactivation and again on shutdown. Remove only once.
+ * A cache is never shut down before deactivation is
+ * complete, so no need to worry about synchronization.
+ */
+ return;
+
+#ifdef CONFIG_MEMCG
+ kset_unregister(s->memcg_kset);
+#endif
+ kobject_uevent(&s->kobj, KOBJ_REMOVE);
+ kobject_del(&s->kobj);
+ kobject_put(&s->kobj);
+}
+
static int sysfs_slab_add(struct kmem_cache *s)
{
int err;
struct kset *kset = cache_kset(s);
int unmergeable = slab_unmergeable(s);
+ INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
+
if (!kset) {
kobject_init(&s->kobj, &slab_ktype);
return 0;
*/
return;
- if (!s->kobj.state_in_sysfs)
- /*
- * For a memcg cache, this may be called during
- * deactivation and again on shutdown. Remove only once.
- * A cache is never shut down before deactivation is
- * complete, so no need to worry about synchronization.
- */
- return;
-
-#ifdef CONFIG_MEMCG
- kset_unregister(s->memcg_kset);
-#endif
- kobject_uevent(&s->kobj, KOBJ_REMOVE);
- kobject_del(&s->kobj);
+ kobject_get(&s->kobj);
+ schedule_work(&s->kobj_remove_work);
}
void sysfs_slab_release(struct kmem_cache *s)
if (!page)
goto not_enough_page;
ctrl->map[idx] = page;
+
+ if (!(idx % SWAP_CLUSTER_MAX))
+ cond_resched();
}
return 0;
not_enough_page:
if (p4d_none(*p4d))
return NULL;
pud = pud_offset(p4d, addr);
- if (pud_none(*pud))
+
+ /*
+ * Don't dereference bad PUD or PMD (below) entries. This will also
+ * identify huge mappings, which we may encounter on architectures
+ * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
+ * identified as vmalloc addresses by is_vmalloc_addr(), but are
+ * not [unambiguously] associated with a struct page, so there is
+ * no correct value to return for them.
+ */
+ WARN_ON_ONCE(pud_bad(*pud));
+ if (pud_none(*pud) || pud_bad(*pud))
return NULL;
pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
+ WARN_ON_ONCE(pmd_bad(*pmd));
+ if (pmd_none(*pmd) || pmd_bad(*pmd))
return NULL;
ptep = pte_offset_map(pmd, addr);
unsigned long pressure = 0;
/*
- * reclaimed can be greater than scanned in cases
- * like THP, where the scanned is 1 and reclaimed
- * could be 512
+ * reclaimed can be greater than scanned for things such as reclaimed
+ * slab pages. shrink_node() just adds reclaimed pages without a
+ * related increment to scanned pages.
*/
if (reclaimed >= scanned)
goto out;
return 0;
out_free_newdev:
- free_netdev(new_dev);
+ if (new_dev->reg_state == NETREG_UNINITIALIZED)
+ free_netdev(new_dev);
return err;
}
free_percpu(vlan->vlan_pcpu_stats);
vlan->vlan_pcpu_stats = NULL;
- free_netdev(dev);
}
void vlan_setup(struct net_device *dev)
netif_keep_dst(dev);
dev->netdev_ops = &vlan_netdev_ops;
- dev->destructor = vlan_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = vlan_dev_free;
dev->ethtool_ops = &vlan_ethtool_ops;
dev->min_mtu = 0;
skb_new->protocol = eth_type_trans(skb_new, soft_iface);
- soft_iface->stats.rx_packets++;
- soft_iface->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
+ batadv_inc_counter(bat_priv, BATADV_CNT_RX);
+ batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
+ skb->len + ETH_HLEN + hdr_size);
netif_rx(skb_new);
batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n",
orig_addr_gw);
- return NET_RX_DROP;
+ goto free_skb;
}
}
* netdev and its private data (bat_priv)
*/
rcu_barrier();
-
- free_netdev(dev);
}
/**
ether_setup(dev);
dev->netdev_ops = &batadv_netdev_ops;
- dev->destructor = batadv_softif_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = batadv_softif_free;
dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
dev->priv_flags |= IFF_NO_QUEUE;
dev->netdev_ops = &netdev_ops;
dev->header_ops = &header_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
}
static struct device_type bt_type = {
ether_setup(dev);
dev->netdev_ops = &br_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->ethtool_ops = &br_ethtool_ops;
SET_NETDEV_DEVTYPE(dev, &br_type);
dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
lock_sock(sk);
+ err = -EINVAL;
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ goto out;
+
err = -EAFNOSUPPORT;
if (uaddr->sa_family != AF_CAIF)
goto out;
{
struct sk_buff *skb;
- if (likely(in_interrupt()))
- skb = alloc_skb(len + pfx, GFP_ATOMIC);
- else
- skb = alloc_skb(len + pfx, GFP_KERNEL);
-
+ skb = alloc_skb(len + pfx, GFP_ATOMIC);
if (unlikely(skb == NULL))
return NULL;
{
struct chnl_net *priv = netdev_priv(dev);
caif_free_client(&priv->chnl);
- free_netdev(dev);
}
static void ipcaif_net_setup(struct net_device *dev)
{
struct chnl_net *priv;
dev->netdev_ops = &netdev_ops;
- dev->destructor = chnl_net_destructor;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = chnl_net_destructor;
dev->flags |= IFF_NOARP;
dev->flags |= IFF_POINTOPOINT;
dev->mtu = GPRS_PDP_MTU;
static int can_pernet_init(struct net *net)
{
- net->can.can_rcvlists_lock =
- __SPIN_LOCK_UNLOCKED(net->can.can_rcvlists_lock);
+ spin_lock_init(&net->can.can_rcvlists_lock);
net->can.can_rx_alldev_list =
kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
if (!new_ifalias)
return -ENOMEM;
dev->ifalias = new_ifalias;
+ memcpy(dev->ifalias, alias, len);
+ dev->ifalias[len] = 0;
- strlcpy(dev->ifalias, alias, len+1);
return len;
}
}
EXPORT_SYMBOL(gro_find_complete_by_type);
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+ skb_dst_drop(skb);
+ secpath_reset(skb);
+ kmem_cache_free(skbuff_head_cache, skb);
+}
+
static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
{
switch (ret) {
break;
case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
- skb_dst_drop(skb);
- secpath_reset(skb);
- kmem_cache_free(skbuff_head_cache, skb);
- } else {
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
__kfree_skb(skb);
- }
break;
case GRO_HELD:
break;
case GRO_DROP:
- case GRO_MERGED_FREE:
napi_reuse_skb(napi, skb);
break;
+ case GRO_MERGED_FREE:
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
+ napi_reuse_skb(napi, skb);
+ break;
+
case GRO_MERGED:
case GRO_CONSUMED:
break;
}
EXPORT_SYMBOL(__skb_gro_checksum_complete);
+static void net_rps_send_ipi(struct softnet_data *remsd)
+{
+#ifdef CONFIG_RPS
+ while (remsd) {
+ struct softnet_data *next = remsd->rps_ipi_next;
+
+ if (cpu_online(remsd->cpu))
+ smp_call_function_single_async(remsd->cpu, &remsd->csd);
+ remsd = next;
+ }
+#endif
+}
+
/*
* net_rps_action_and_irq_enable sends any pending IPI's for rps.
* Note: called with local irq disabled, but exits with local irq enabled.
local_irq_enable();
/* Send pending IPI's to kick RPS processing on remote cpus. */
- while (remsd) {
- struct softnet_data *next = remsd->rps_ipi_next;
-
- if (cpu_online(remsd->cpu))
- smp_call_function_single_async(remsd->cpu,
- &remsd->csd);
- remsd = next;
- }
+ net_rps_send_ipi(remsd);
} else
#endif
local_irq_enable();
if (rc == BUSY_POLL_BUDGET)
__napi_schedule(napi);
local_bh_enable();
- if (local_softirq_pending())
- do_softirq();
}
void napi_busy_loop(unsigned int napi_id,
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
+ if (dev->priv_destructor)
+ dev->priv_destructor(dev);
goto out;
}
EXPORT_SYMBOL(register_netdevice);
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
- if (dev->destructor)
- dev->destructor(dev);
+ if (dev->priv_destructor)
+ dev->priv_destructor(dev);
+ if (dev->needs_free_netdev)
+ free_netdev(dev);
/* Report a network device has been unregistered */
rtnl_lock();
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
- storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
- storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
- storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
+ storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+ storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
+ storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
struct sk_buff **list_skb;
struct sk_buff *skb;
unsigned int cpu;
- struct softnet_data *sd, *oldsd;
+ struct softnet_data *sd, *oldsd, *remsd = NULL;
local_irq_disable();
cpu = smp_processor_id();
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
+#ifdef CONFIG_RPS
+ remsd = oldsd->rps_ipi_list;
+ oldsd->rps_ipi_list = NULL;
+#endif
+ /* send out pending IPI's on offline CPU */
+ net_rps_send_ipi(remsd);
+
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
netif_rx_ni(skb);
if (cmd == SIOCGIFNAME)
return dev_ifname(net, (struct ifreq __user *)arg);
+ /*
+ * Take care of Wireless Extensions. Unfortunately struct iwreq
+ * isn't a proper subset of struct ifreq (it's 8 byte shorter)
+ * so we need to treat it specially, otherwise applications may
+ * fault if the struct they're passing happens to land at the
+ * end of a mapped page.
+ */
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+ struct iwreq iwr;
+
+ if (copy_from_user(&iwr, arg, sizeof(iwr)))
+ return -EFAULT;
+
+ return wext_handle_ioctl(net, &iwr, cmd, arg);
+ }
+
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
ret = -EFAULT;
return ret;
}
- /* Take care of Wireless Extensions */
- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
- return wext_handle_ioctl(net, &ifr, cmd, arg);
return -ENOTTY;
}
}
spin_lock_bh(&dst_garbage.lock);
dst = dst_garbage.list;
dst_garbage.list = NULL;
+ /* The code in dst_ifdown places a hold on the loopback device.
+ * If the gc entry processing is set to expire after a lengthy
+ * interval, this hold can cause netdev_wait_allrefs() to hang
+ * out and wait for a long time -- until the the loopback
+ * interface is released. If we're really unlucky, it'll emit
+ * pr_emerg messages to console too. Reset the interval here,
+ * so dst cleanups occur in a more timely fashion.
+ */
+ if (dst_garbage.timer_inc > DST_GC_INC) {
+ dst_garbage.timer_inc = DST_GC_INC;
+ dst_garbage.timer_expires = DST_GC_MIN;
+ mod_delayed_work(system_wq, &dst_gc_work,
+ dst_garbage.timer_expires);
+ }
spin_unlock_bh(&dst_garbage.lock);
if (last)
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
- struct fib_rule *rule, *tmp;
+ struct fib_rule *rule, *r;
struct nlattr *tb[FRA_MAX+1];
struct fib_kuid_range range;
int err = -EINVAL;
/*
* Check if this rule is a target to any of them. If so,
+ * adjust to the next one with the same preference or
* disable them. As this operation is eventually very
- * expensive, it is only performed if goto rules have
- * actually been added.
+ * expensive, it is only performed if goto rules, except
+ * current if it is goto rule, have actually been added.
*/
if (ops->nr_goto_rules > 0) {
- list_for_each_entry(tmp, &ops->rules_list, list) {
- if (rtnl_dereference(tmp->ctarget) == rule) {
- RCU_INIT_POINTER(tmp->ctarget, NULL);
+ struct fib_rule *n;
+
+ n = list_next_entry(rule, list);
+ if (&n->list == &ops->rules_list || n->pref != rule->pref)
+ n = NULL;
+ list_for_each_entry(r, &ops->rules_list, list) {
+ if (rtnl_dereference(r->ctarget) != rule)
+ continue;
+ rcu_assign_pointer(r->ctarget, n);
+ if (!n)
ops->unresolved_rules++;
- }
}
}
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+ nla_total_size(4) /* IFLA_LINK_NETNSID */
+ + nla_total_size(4) /* IFLA_GROUP */
+ nla_total_size(ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
struct ifla_vf_mac vf_mac;
struct ifla_vf_info ivi;
+ memset(&ivi, 0, sizeof(ivi));
+
/* Not all SR-IOV capable drivers support the
* spoofcheck and "RSS query enable" query. Preset to
* -1 so the user space tool can detect that the driver
ivi.spoofchk = -1;
ivi.rss_query_en = -1;
ivi.trusted = -1;
- memset(ivi.mac, 0, sizeof(ivi.mac));
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
*/
[IFLA_LINK_NETNSID] = { .type = NLA_S32 },
[IFLA_PROTO_DOWN] = { .type = NLA_U8 },
[IFLA_XDP] = { .type = NLA_NESTED },
+ [IFLA_GROUP] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
}
-static inline void dnrt_drop(struct dn_route *rt)
-{
- dst_release(&rt->dst);
- call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
-}
-
static void dn_dst_check_expire(unsigned long dummy)
{
int i;
}
*rtp = rt->dst.dn_next;
rt->dst.dn_next = NULL;
- dnrt_drop(rt);
+ dnrt_free(rt);
break;
}
spin_unlock_bh(&dn_rt_hash_table[i].lock);
dst_use(&rth->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
- dnrt_drop(rt);
+ dst_free(&rt->dst);
*rp = rth;
return 0;
}
for(; rt; rt = next) {
next = rcu_dereference_raw(rt->dst.dn_next);
RCU_INIT_POINTER(rt->dst.dn_next, NULL);
- dst_free((struct dst_entry *)rt);
+ dnrt_free(rt);
}
nothing_to_declare:
if (dev_out->flags & IFF_LOOPBACK)
flags |= RTCF_LOCAL;
- rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST);
+ rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, DST_HOST);
if (rt == NULL)
goto e_nobufs;
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
- if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+ if (skb->len < sizeof(*nlh) ||
+ nlh->nlmsg_len < sizeof(*nlh) ||
+ skb->len < nlh->nlmsg_len)
return;
if (!netlink_capable(skb, CAP_NET_ADMIN))
del_timer_sync(&hsr->announce_timer);
synchronize_rcu();
- free_netdev(hsr_dev);
}
static const struct net_device_ops hsr_device_ops = {
SET_NETDEV_DEVTYPE(dev, &hsr_type);
dev->priv_flags |= IFF_NO_QUEUE;
- dev->destructor = hsr_dev_destroy;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = hsr_dev_destroy;
dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
unsigned long irqflags;
frame->is_supervision = is_supervision_frame(port->hsr, skb);
- frame->node_src = hsr_get_node(&port->hsr->node_db, skb,
- frame->is_supervision);
+ frame->node_src = hsr_get_node(port, skb, frame->is_supervision);
if (frame->node_src == NULL)
return -1; /* Unknown node and !is_supervision, or no mem */
/* Get the hsr_node from which 'skb' was sent.
*/
-struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
bool is_sup)
{
+ struct list_head *node_db = &port->hsr->node_db;
struct hsr_node *node;
struct ethhdr *ethhdr;
u16 seq_out;
*/
seq_out = hsr_get_skb_sequence_nr(skb) - 1;
} else {
- WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
+ /* this is called also for frames from master port and
+ * so warn only for non master ports
+ */
+ if (port->type != HSR_PT_MASTER)
+ WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
seq_out = HSR_SEQNR_START;
}
struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
u16 seq_out);
-struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
bool is_sup);
void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
struct hsr_port *port);
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
- ldev->destructor = free_netdev;
+ ldev->needs_free_netdev = true;
ldev->features |= NETIF_F_NETNS_LOCAL;
}
/* Needed by both icmp_global_allow and icmp_xmit_lock */
local_bh_disable();
- /* Check global sysctl_icmp_msgs_per_sec ratelimit */
- if (!icmpv4_global_allow(net, type, code))
+ /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
+ * incoming dev is loopback. If outgoing dev change to not be
+ * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
+ */
+ if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
+ !icmpv4_global_allow(net, type, code))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
if (!pmc)
return;
+ spin_lock_init(&pmc->lock);
spin_lock_bh(&im->lock);
pmc->interface = im->interface;
in_dev_hold(in_dev);
static void ip_mc_clear_src(struct ip_mc_list *pmc)
{
- struct ip_sf_list *psf, *nextpsf;
+ struct ip_sf_list *psf, *nextpsf, *tomb, *sources;
- for (psf = pmc->tomb; psf; psf = nextpsf) {
+ spin_lock_bh(&pmc->lock);
+ tomb = pmc->tomb;
+ pmc->tomb = NULL;
+ sources = pmc->sources;
+ pmc->sources = NULL;
+ pmc->sfmode = MCAST_EXCLUDE;
+ pmc->sfcount[MCAST_INCLUDE] = 0;
+ pmc->sfcount[MCAST_EXCLUDE] = 1;
+ spin_unlock_bh(&pmc->lock);
+
+ for (psf = tomb; psf; psf = nextpsf) {
nextpsf = psf->sf_next;
kfree(psf);
}
- pmc->tomb = NULL;
- for (psf = pmc->sources; psf; psf = nextpsf) {
+ for (psf = sources; psf; psf = nextpsf) {
nextpsf = psf->sf_next;
kfree(psf);
}
- pmc->sources = NULL;
- pmc->sfmode = MCAST_EXCLUDE;
- pmc->sfcount[MCAST_INCLUDE] = 0;
- pmc->sfcount[MCAST_EXCLUDE] = 1;
}
/* Join a multicast group
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
+ if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
+ (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
(sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
return 0;
drop:
+ if (tun_dst)
+ dst_release((struct dst_entry *)tun_dst);
kfree_skb(skb);
return 0;
}
gro_cells_destroy(&tunnel->gro_cells);
dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
- free_netdev(dev);
}
void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
struct iphdr *iph = &tunnel->parms.iph;
int err;
- dev->destructor = ip_tunnel_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ip_tunnel_dev_free;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
static void ipmr_free_table(struct mr_table *mrt);
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local);
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc_cache *cache, int local);
static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
dev->flags = IFF_NOARP;
dev->netdev_ops = ®_vif_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->features |= NETIF_F_NETNS_LOCAL;
}
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else {
- ip_mr_forward(net, mrt, skb, c, 0);
+ ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
}
}
}
/* Queue a packet for resolution. It gets locked cache entry! */
static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
- struct sk_buff *skb)
+ struct sk_buff *skb, struct net_device *dev)
{
const struct iphdr *iph = ip_hdr(skb);
struct mfc_cache *c;
kfree_skb(skb);
err = -ENOBUFS;
} else {
+ if (dev) {
+ skb->dev = dev;
+ skb->skb_iif = dev->ifindex;
+ }
skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
err = 0;
}
/* "local" means that we should preserve one skb (for local delivery) */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local)
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc_cache *cache, int local)
{
- int true_vifi = ipmr_find_vif(mrt, skb->dev);
+ int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
}
/* Wrong interface: drop packet and (maybe) send PIM assert. */
- if (mrt->vif_table[vif].dev != skb->dev) {
- struct net_device *mdev;
-
- mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
- if (mdev == skb->dev)
- goto forward;
-
+ if (mrt->vif_table[vif].dev != dev) {
if (rt_is_output_route(skb_rtable(skb))) {
/* It is our own packet, looped back.
* Very complicated situation...
read_lock(&mrt_lock);
vif = ipmr_find_vif(mrt, dev);
if (vif >= 0) {
- int err2 = ipmr_cache_unresolved(mrt, vif, skb);
+ int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
read_unlock(&mrt_lock);
return err2;
}
read_lock(&mrt_lock);
- ip_mr_forward(net, mrt, skb, cache, local);
+ ip_mr_forward(net, mrt, dev, skb, cache, local);
read_unlock(&mrt_lock);
if (local)
iph->saddr = saddr;
iph->daddr = daddr;
iph->version = 0;
- err = ipmr_cache_unresolved(mrt, vif, skb2);
+ err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
+ dst_release(sk->sk_rx_dst);
+ sk->sk_rx_dst = NULL;
tcp_saved_syn_free(tp);
/* Clean up fastopen related fields */
static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
unsigned long delay)
{
- if (!delayed_work_pending(&ifp->dad_work))
- in6_ifa_hold(ifp);
- mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
+ in6_ifa_hold(ifp);
+ if (mod_delayed_work(addrconf_wq, &ifp->dad_work, delay))
+ in6_ifa_put(ifp);
}
static int snmp6_alloc_dev(struct inet6_dev *idev)
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_changeupper_info *info;
struct inet6_dev *idev = __in6_dev_get(dev);
+ struct net *net = dev_net(dev);
int run_pending = 0;
int err;
case NETDEV_CHANGEMTU:
/* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
if (dev->mtu < IPV6_MIN_MTU) {
- addrconf_ifdown(dev, 1);
+ addrconf_ifdown(dev, dev != net->loopback_dev);
break;
}
* IPV6_MIN_MTU stop IPv6 on this interface.
*/
if (dev->mtu < IPV6_MIN_MTU)
- addrconf_ifdown(dev, 1);
+ addrconf_ifdown(dev, dev != net->loopback_dev);
}
break;
*/
err = ip6_datagram_dst_update(sk, true);
- if (err)
+ if (err) {
+ /* Reset daddr and dport so that udp_v6_early_demux()
+ * fails to find this socket
+ */
+ memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
+ inet->inet_dport = 0;
goto out;
+ }
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
#include <net/ipv6.h>
#include <linux/icmpv6.h>
+static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
+{
+ int off = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr;
+
+ if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+ return offsetof(struct ipv6hdr, nexthdr);
+
+ while (off < nhlen) {
+ exthdr = (void *)ipv6_hdr + off;
+ if (exthdr->nexthdr == NEXTHDR_ESP)
+ return off;
+
+ off += ipv6_optlen(exthdr);
+ }
+
+ return 0;
+}
+
static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
struct xfrm_state *x;
__be32 seq;
__be32 spi;
+ int nhoff;
int err;
skb_pull(skb, offset);
xo->flags |= XFRM_GRO;
+ nhoff = esp6_nexthdr_esp_offset(ipv6_hdr(skb), offset);
+ if (!nhoff)
+ goto out;
+
+ IP6CB(skb)->nhoff = nhoff;
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
- struct rt6_info *rt;
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
.flags = FIB_LOOKUP_NOREF,
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
- rt = arg.result;
+ if (arg.result)
+ return arg.result;
- if (!rt) {
- dst_hold(&net->ipv6.ip6_null_entry->dst);
- return &net->ipv6.ip6_null_entry->dst;
- }
-
- if (rt->rt6i_flags & RTF_REJECT &&
- rt->dst.error == -EAGAIN) {
- ip6_rt_put(rt);
- rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
- }
-
- return &rt->dst;
+ dst_hold(&net->ipv6.ip6_null_entry->dst);
+ return &net->ipv6.ip6_null_entry->dst;
}
static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
flp6->saddr = saddr;
}
err = rt->dst.error;
- goto out;
+ if (err != -EAGAIN)
+ goto out;
}
again:
ip6_rt_put(rt);
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
- if (!icmpv6_global_allow(type))
+ if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
goto out_bh_enable;
mip6_addr_swap(skb);
{
u32 *v = (u32 *)loc.v32;
+ __ila_hash_secret_init();
return jhash_2words(v[0], v[1], hashrnd);
}
struct rt6_info *rt;
rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
- if (rt->rt6i_flags & RTF_REJECT &&
- rt->dst.error == -EAGAIN) {
+ if (rt->dst.error == -EAGAIN) {
ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats);
- free_netdev(dev);
}
static void ip6gre_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ip6gre_netdev_ops;
- dev->destructor = ip6gre_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ip6gre_dev_free;
dev->type = ARPHRD_IP6GRE;
return 0;
err_reg_dev:
- ip6gre_dev_free(ign->fb_tunnel_dev);
+ free_netdev(ign->fb_tunnel_dev);
err_alloc_dev:
return err;
}
ether_setup(dev);
dev->netdev_ops = &ip6gre_tap_netdev_ops;
- dev->destructor = ip6gre_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ip6gre_dev_free;
dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
*/
cork->length += length;
- if ((((length + fragheaderlen) > mtu) ||
+ if ((((length + (skb ? skb->len : headersize)) > mtu) ||
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
gro_cells_destroy(&t->gro_cells);
dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats);
- free_netdev(dev);
}
static int ip6_tnl_create2(struct net_device *dev)
return t;
failed_free:
- ip6_dev_free(dev);
+ free_netdev(dev);
failed:
return ERR_PTR(err);
}
return 0;
drop:
+ if (tun_dst)
+ dst_release((struct dst_entry *)tun_dst);
kfree_skb(skb);
return 0;
}
fl6.flowi6_proto = IPPROTO_IPIP;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
- dsfield = ip6_tclass(key->label);
+ dsfield = key->tos;
} else {
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
fl6.flowi6_proto = IPPROTO_IPV6;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
- dsfield = ip6_tclass(key->label);
+ dsfield = key->tos;
} else {
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
static void ip6_tnl_dev_setup(struct net_device *dev)
{
dev->netdev_ops = &ip6_tnl_netdev_ops;
- dev->destructor = ip6_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ip6_dev_free;
dev->type = ARPHRD_TUNNEL6;
dev->flags |= IFF_NOARP;
return 0;
err_register:
- ip6_dev_free(ip6n->fb_tnl_dev);
+ free_netdev(ip6n->fb_tnl_dev);
err_alloc_dev:
return err;
}
static void vti6_dev_free(struct net_device *dev)
{
free_percpu(dev->tstats);
- free_netdev(dev);
}
static int vti6_tnl_create2(struct net_device *dev)
return t;
failed_free:
- vti6_dev_free(dev);
+ free_netdev(dev);
failed:
return NULL;
}
static void vti6_dev_setup(struct net_device *dev)
{
dev->netdev_ops = &vti6_netdev_ops;
- dev->destructor = vti6_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = vti6_dev_free;
dev->type = ARPHRD_TUNNEL6;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
return 0;
err_register:
- vti6_dev_free(ip6n->fb_tnl_dev);
+ free_netdev(ip6n->fb_tnl_dev);
err_alloc_dev:
return err;
}
dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
dev->flags = IFF_NOARP;
dev->netdev_ops = ®_vif_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->features |= NETIF_F_NETNS_LOCAL;
}
u64 buff64[SNMP_MIB_MAX];
int i;
- memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+ memset(buff64, 0, sizeof(u64) * SNMP_MIB_MAX);
snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
for (i = 0; itemlist[i].name; i++)
if ((rt->dst.dev == dev || !dev) &&
rt != adn->net->ipv6.ip6_null_entry &&
(rt->rt6i_nsiblings == 0 ||
+ (dev && netdev_unregistering(dev)) ||
!rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
return -1;
net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
#endif
- } else if (event == NETDEV_UNREGISTER) {
+ } else if (event == NETDEV_UNREGISTER &&
+ dev->reg_state != NETREG_UNREGISTERED) {
+ /* NETDEV_UNREGISTER could be fired for multiple times by
+ * netdev_wait_allrefs(). Make sure we only call this once.
+ */
in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
return nt;
failed_free:
- ipip6_dev_free(dev);
+ free_netdev(dev);
failed:
return NULL;
}
* we try harder to allocate.
*/
kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
- kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
+ kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
NULL;
rcu_read_lock();
dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
- free_netdev(dev);
}
#define SIT_FEATURES (NETIF_F_SG | \
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->netdev_ops = &ipip6_netdev_ops;
- dev->destructor = ipip6_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ipip6_dev_free;
dev->type = ARPHRD_SIT;
dev->hard_header_len = LL_MAX_HEADER + t_hlen;
struct sock *sk;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+ if (sk->sk_state == TCP_ESTABLISHED &&
+ INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
return sk;
/* Only check first socket in chain */
break;
return 1;
#endif
- ipv6_hdr(skb)->payload_len = htons(skb->len);
__skb_push(skb, skb->data - skb_network_header(skb));
+ ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
if (xo && (xo->flags & XFRM_GRO)) {
skb_mac_header_rebuild(skb);
ether_setup(dev);
dev->netdev_ops = &irlan_eth_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->min_mtu = 0;
dev->max_mtu = ETH_MAX_MTU;
goto out;
}
+ err = -ENOBUFS;
key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
if (sa->sadb_sa_auth) {
int keysize = 0;
if (key)
keysize = (key->sadb_key_bits + 7) / 8;
x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
- if (!x->aalg)
+ if (!x->aalg) {
+ err = -ENOMEM;
goto out;
+ }
strcpy(x->aalg->alg_name, a->name);
x->aalg->alg_key_len = 0;
if (key) {
goto out;
}
x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
- if (!x->calg)
+ if (!x->calg) {
+ err = -ENOMEM;
goto out;
+ }
strcpy(x->calg->alg_name, a->name);
x->props.calgo = sa->sadb_sa_encrypt;
} else {
if (key)
keysize = (key->sadb_key_bits + 7) / 8;
x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
- if (!x->ealg)
+ if (!x->ealg) {
+ err = -ENOMEM;
goto out;
+ }
strcpy(x->ealg->alg_name, a->name);
x->ealg->alg_key_len = 0;
if (key) {
struct xfrm_encap_tmpl *natt;
x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
- if (!x->encap)
+ if (!x->encap) {
+ err = -ENOMEM;
goto out;
+ }
natt = x->encap;
n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
int err, err2;
err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
+ if (!err)
+ xfrm_garbage_collect(net);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - old silent behavior */
{
struct l2tp_eth *priv = netdev_priv(dev);
- stats->tx_bytes = atomic_long_read(&priv->tx_bytes);
- stats->tx_packets = atomic_long_read(&priv->tx_packets);
- stats->tx_dropped = atomic_long_read(&priv->tx_dropped);
- stats->rx_bytes = atomic_long_read(&priv->rx_bytes);
- stats->rx_packets = atomic_long_read(&priv->rx_packets);
- stats->rx_errors = atomic_long_read(&priv->rx_errors);
+ stats->tx_bytes = (unsigned long) atomic_long_read(&priv->tx_bytes);
+ stats->tx_packets = (unsigned long) atomic_long_read(&priv->tx_packets);
+ stats->tx_dropped = (unsigned long) atomic_long_read(&priv->tx_dropped);
+ stats->rx_bytes = (unsigned long) atomic_long_read(&priv->rx_bytes);
+ stats->rx_packets = (unsigned long) atomic_long_read(&priv->rx_packets);
+ stats->rx_errors = (unsigned long) atomic_long_read(&priv->rx_errors);
+
}
static const struct net_device_ops l2tp_eth_netdev_ops = {
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->features |= NETIF_F_LLTX;
dev->netdev_ops = &l2tp_eth_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
}
static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
default:
return -EINVAL;
}
+ sdata->u.ap.req_smps = sdata->smps_mode;
+
sdata->needed_rx_chains = sdata->local->rx_chains;
sdata->vif.bss_conf.beacon_int = params->beacon_interval;
return true;
/* can't handle non-legacy preamble yet */
if (status->flag & RX_FLAG_MACTIME_PLCP_START &&
- status->encoding != RX_ENC_LEGACY)
+ status->encoding == RX_ENC_LEGACY)
return true;
return false;
}
static void ieee80211_if_free(struct net_device *dev)
{
free_percpu(dev->tstats);
- free_netdev(dev);
}
static void ieee80211_if_setup(struct net_device *dev)
ether_setup(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->netdev_ops = &ieee80211_dataif_ops;
- dev->destructor = ieee80211_if_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ieee80211_if_free;
}
static void ieee80211_if_setup_no_queue(struct net_device *dev)
ret = dev_alloc_name(ndev, ndev->name);
if (ret < 0) {
ieee80211_if_free(ndev);
+ free_netdev(ndev);
return ret;
}
ret = register_netdevice(ndev);
if (ret) {
- ieee80211_if_free(ndev);
+ free_netdev(ndev);
return ret;
}
}
struct ieee80211_supported_band *sband;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_channel *chan;
- u32 rate_flags, rates = 0;
+ u32 rates = 0;
sdata_assert_lock(sdata);
return;
}
chan = chanctx_conf->def.chan;
- rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
rcu_read_unlock();
sband = local->hw.wiphy->bands[chan->band];
shift = ieee80211_vif_get_shift(&sdata->vif);
*/
rates_len = 0;
for (i = 0; i < sband->n_bitrates; i++) {
- if ((rate_flags & sband->bitrates[i].flags)
- != rate_flags)
- continue;
rates |= BIT(i);
rates_len++;
}
u32 *rates, u32 *basic_rates,
bool *have_higher_than_11mbit,
int *min_rate, int *min_rate_index,
- int shift, u32 rate_flags)
+ int shift)
{
int i, j;
int brate;
br = &sband->bitrates[j];
- if ((rate_flags & br->flags) != rate_flags)
- continue;
brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
if (brate == rate) {
return -ENOMEM;
}
- if (new_sta || override) {
- err = ieee80211_prep_channel(sdata, cbss);
- if (err) {
- if (new_sta)
- sta_info_free(local, new_sta);
- return -EINVAL;
- }
- }
-
+ /*
+ * Set up the information for the new channel before setting the
+ * new channel. We can't - completely race-free - change the basic
+ * rates bitmap and the channel (sband) that it refers to, but if
+ * we set it up before we at least avoid calling into the driver's
+ * bss_info_changed() method with invalid information (since we do
+ * call that from changing the channel - only for IDLE and perhaps
+ * some others, but ...).
+ *
+ * So to avoid that, just set up all the new information before the
+ * channel, but tell the driver to apply it only afterwards, since
+ * it might need the new channel for that.
+ */
if (new_sta) {
u32 rates = 0, basic_rates = 0;
bool have_higher_than_11mbit;
int min_rate = INT_MAX, min_rate_index = -1;
- struct ieee80211_chanctx_conf *chanctx_conf;
const struct cfg80211_bss_ies *ies;
int shift = ieee80211_vif_get_shift(&sdata->vif);
- u32 rate_flags;
-
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (WARN_ON(!chanctx_conf)) {
- rcu_read_unlock();
- sta_info_free(local, new_sta);
- return -EINVAL;
- }
- rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
- rcu_read_unlock();
ieee80211_get_rates(sband, bss->supp_rates,
bss->supp_rates_len,
&rates, &basic_rates,
&have_higher_than_11mbit,
&min_rate, &min_rate_index,
- shift, rate_flags);
+ shift);
/*
* This used to be a workaround for basic rates missing
sdata->vif.bss_conf.sync_dtim_count = 0;
}
rcu_read_unlock();
+ }
- /* tell driver about BSSID, basic rates and timing */
+ if (new_sta || override) {
+ err = ieee80211_prep_channel(sdata, cbss);
+ if (err) {
+ if (new_sta)
+ sta_info_free(local, new_sta);
+ return -EINVAL;
+ }
+ }
+
+ if (new_sta) {
+ /*
+ * tell driver about BSSID, basic rates and timing
+ * this was set up above, before setting the channel
+ */
ieee80211_bss_info_change_notify(sdata,
BSS_CHANGED_BSSID | BSS_CHANGED_BASIC_RATES |
BSS_CHANGED_BEACON_INT);
*/
if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
!ieee80211_has_morefrags(hdr->frame_control) &&
+ !ieee80211_is_back_req(hdr->frame_control) &&
!(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
(rx->sdata->vif.type == NL80211_IFTYPE_AP ||
rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
- /* PM bit is only checked in frames where it isn't reserved,
+ /*
+ * PM bit is only checked in frames where it isn't reserved,
* in AP mode it's reserved in non-bufferable management frames
* (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field)
+ * BAR frames should be ignored as specified in
+ * IEEE 802.11-2012 10.2.1.2.
*/
(!ieee80211_is_mgmt(hdr->frame_control) ||
ieee80211_is_bufferable_mmpdu(hdr->frame_control))) {
#include <asm/unaligned.h>
#include <net/mac80211.h>
#include <crypto/aes.h>
+#include <crypto/algapi.h>
#include "ieee80211_i.h"
#include "michael.h"
data_len = skb->len - hdrlen - MICHAEL_MIC_LEN;
key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY];
michael_mic(key, hdr, data, data_len, mic);
- if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0)
+ if (crypto_memneq(mic, data + data_len, MICHAEL_MIC_LEN))
goto mic_fail;
/* remove Michael MIC from payload */
bip_aad(skb, aad);
ieee80211_aes_cmac(key->u.aes_cmac.tfm, aad,
skb->data + 24, skb->len - 24, mic);
- if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+ if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
key->u.aes_cmac.icverrors++;
return RX_DROP_UNUSABLE;
}
bip_aad(skb, aad);
ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad,
skb->data + 24, skb->len - 24, mic);
- if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+ if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
key->u.aes_cmac.icverrors++;
return RX_DROP_UNUSABLE;
}
if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
skb->data + 24, skb->len - 24,
mic) < 0 ||
- memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+ crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
key->u.aes_gmac.icverrors++;
return RX_DROP_UNUSABLE;
}
struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
mac802154_llsec_destroy(&sdata->sec);
-
- free_netdev(dev);
}
static void ieee802154_if_setup(struct net_device *dev)
sdata->dev->dev_addr);
sdata->dev->header_ops = &mac802154_header_ops;
- sdata->dev->destructor = mac802154_wpan_free;
+ sdata->dev->needs_free_netdev = true;
+ sdata->dev->priv_destructor = mac802154_wpan_free;
sdata->dev->netdev_ops = &mac802154_wpan_ops;
sdata->dev->ml_priv = &mac802154_mlme_wpan;
wpan_dev->promiscuous_mode = false;
break;
case NL802154_IFTYPE_MONITOR:
- sdata->dev->destructor = free_netdev;
+ sdata->dev->needs_free_netdev = true;
sdata->dev->netdev_ops = &mac802154_monitor_ops;
wpan_dev->promiscuous_mode = true;
break;
struct vport *vport = ovs_internal_dev_get_vport(dev);
ovs_vport_free(vport);
- free_netdev(dev);
}
static void
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
IFF_PHONY_HEADROOM | IFF_NO_QUEUE;
- netdev->destructor = internal_dev_destructor;
+ netdev->needs_free_netdev = true;
+ netdev->priv_destructor = internal_dev_destructor;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
dev->tx_queue_len = 10;
dev->netdev_ops = &gprs_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
}
/*
unsigned int *_toklen)
{
const __be32 *xdr = *_xdr;
- unsigned int toklen = *_toklen, n_parts, loop, tmp;
+ unsigned int toklen = *_toklen, n_parts, loop, tmp, paddedlen;
/* there must be at least one name, and at least #names+1 length
* words */
toklen -= 4;
if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX)
return -EINVAL;
- if (tmp > toklen)
+ paddedlen = (tmp + 3) & ~3;
+ if (paddedlen > toklen)
return -EINVAL;
princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL);
if (!princ->name_parts[loop])
return -ENOMEM;
memcpy(princ->name_parts[loop], xdr, tmp);
princ->name_parts[loop][tmp] = 0;
- tmp = (tmp + 3) & ~3;
- toklen -= tmp;
- xdr += tmp >> 2;
+ toklen -= paddedlen;
+ xdr += paddedlen >> 2;
}
if (toklen < 4)
toklen -= 4;
if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX)
return -EINVAL;
- if (tmp > toklen)
+ paddedlen = (tmp + 3) & ~3;
+ if (paddedlen > toklen)
return -EINVAL;
princ->realm = kmalloc(tmp + 1, GFP_KERNEL);
if (!princ->realm)
return -ENOMEM;
memcpy(princ->realm, xdr, tmp);
princ->realm[tmp] = 0;
- tmp = (tmp + 3) & ~3;
- toklen -= tmp;
- xdr += tmp >> 2;
+ toklen -= paddedlen;
+ xdr += paddedlen >> 2;
_debug("%s/...@%s", princ->name_parts[0], princ->realm);
unsigned int *_toklen)
{
const __be32 *xdr = *_xdr;
- unsigned int toklen = *_toklen, len;
+ unsigned int toklen = *_toklen, len, paddedlen;
/* there must be at least one tag and one length word */
if (toklen <= 8)
toklen -= 8;
if (len > max_data_size)
return -EINVAL;
+ paddedlen = (len + 3) & ~3;
+ if (paddedlen > toklen)
+ return -EINVAL;
td->data_len = len;
if (len > 0) {
td->data = kmemdup(xdr, len, GFP_KERNEL);
if (!td->data)
return -ENOMEM;
- len = (len + 3) & ~3;
- toklen -= len;
- xdr += len >> 2;
+ toklen -= paddedlen;
+ xdr += paddedlen >> 2;
}
_debug("tag %x len %x", td->tag, td->data_len);
const __be32 **_xdr, unsigned int *_toklen)
{
const __be32 *xdr = *_xdr;
- unsigned int toklen = *_toklen, len;
+ unsigned int toklen = *_toklen, len, paddedlen;
/* there must be at least one length word */
if (toklen <= 4)
toklen -= 4;
if (len > AFSTOKEN_K5_TIX_MAX)
return -EINVAL;
+ paddedlen = (len + 3) & ~3;
+ if (paddedlen > toklen)
+ return -EINVAL;
*_tktlen = len;
_debug("ticket len %u", len);
*_ticket = kmemdup(xdr, len, GFP_KERNEL);
if (!*_ticket)
return -ENOMEM;
- len = (len + 3) & ~3;
- toklen -= len;
- xdr += len >> 2;
+ toklen -= paddedlen;
+ xdr += paddedlen >> 2;
}
*_xdr = xdr;
{
const __be32 *xdr = prep->data, *token;
const char *cp;
- unsigned int len, tmp, loop, ntoken, toklen, sec_ix;
+ unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix;
size_t datalen = prep->datalen;
int ret;
if (len < 1 || len > AFSTOKEN_CELL_MAX)
goto not_xdr;
datalen -= 4;
- tmp = (len + 3) & ~3;
- if (tmp > datalen)
+ paddedlen = (len + 3) & ~3;
+ if (paddedlen > datalen)
goto not_xdr;
cp = (const char *) xdr;
for (loop = 0; loop < len; loop++)
if (!isprint(cp[loop]))
goto not_xdr;
- if (len < tmp)
- for (; loop < tmp; loop++)
- if (cp[loop])
- goto not_xdr;
+ for (; loop < paddedlen; loop++)
+ if (cp[loop])
+ goto not_xdr;
_debug("cellname: [%u/%u] '%*.*s'",
- len, tmp, len, len, (const char *) xdr);
- datalen -= tmp;
- xdr += tmp >> 2;
+ len, paddedlen, len, len, (const char *) xdr);
+ datalen -= paddedlen;
+ xdr += paddedlen >> 2;
/* get the token count */
if (datalen < 12)
sec_ix = ntohl(*xdr);
datalen -= 4;
_debug("token: [%x/%zx] %x", toklen, datalen, sec_ix);
- if (toklen < 20 || toklen > datalen)
+ paddedlen = (toklen + 3) & ~3;
+ if (toklen < 20 || toklen > datalen || paddedlen > datalen)
goto not_xdr;
- datalen -= (toklen + 3) & ~3;
- xdr += (toklen + 3) >> 2;
+ datalen -= paddedlen;
+ xdr += paddedlen >> 2;
} while (--loop > 0);
k++;
}
- if (n)
+ if (n) {
+ err = -EINVAL;
goto err_out;
+ }
return keys_ex;
}
}
- spin_lock_bh(&police->tcf_lock);
if (est) {
err = gen_replace_estimator(&police->tcf_bstats, NULL,
&police->tcf_rate_est,
&police->tcf_lock,
NULL, est);
if (err)
- goto failure_unlock;
+ goto failure;
} else if (tb[TCA_POLICE_AVRATE] &&
(ret == ACT_P_CREATED ||
!gen_estimator_active(&police->tcf_rate_est))) {
err = -EINVAL;
- goto failure_unlock;
+ goto failure;
}
+ spin_lock_bh(&police->tcf_lock);
/* No failure allowed after this point */
police->tcfp_mtu = parm->mtu;
if (police->tcfp_mtu == 0) {
return ret;
-failure_unlock:
- spin_unlock_bh(&police->tcf_lock);
failure:
qdisc_put_rtab(P_tab);
qdisc_put_rtab(R_tab);
return sch;
}
/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
- ops->destroy(sch);
+ if (ops->destroy)
+ ops->destroy(sch);
err_out3:
dev_put(dev);
kfree((char *) sch - sch->padded);
if (sctp_sk(sk)->bind_hash)
sctp_put_port(sk);
+ sctp_sk(sk)->ep = NULL;
sock_put(sk);
}
static int sctp_sock_dump(struct sock *sk, void *p)
{
- struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_comm_param *commp = p;
struct sk_buff *skb = commp->skb;
struct netlink_callback *cb = commp->cb;
int err = 0;
lock_sock(sk);
- list_for_each_entry(assoc, &ep->asocs, asocs) {
+ if (!sctp_sk(sk)->ep)
+ goto release;
+ list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) {
if (cb->args[4] < cb->args[1])
goto next;
for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize;
hash++, head++) {
- read_lock(&head->lock);
+ read_lock_bh(&head->lock);
sctp_for_each_hentry(epb, &head->chain) {
err = cb(sctp_ep(epb), p);
if (err)
break;
}
- read_unlock(&head->lock);
+ read_unlock_bh(&head->lock);
}
return err;
if (err)
return err;
- sctp_transport_get_idx(net, &hti, pos);
- obj = sctp_transport_get_next(net, &hti);
- for (; obj && !IS_ERR(obj); obj = sctp_transport_get_next(net, &hti)) {
+ obj = sctp_transport_get_idx(net, &hti, pos + 1);
+ for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) {
struct sctp_transport *transport = obj;
if (!sctp_transport_hold(transport))
}
if (skb_cloned(_skb) &&
- pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL))
+ pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
goto exit;
/* Now reverse the concerned fields */
struct path path = { };
err = -EINVAL;
- if (sunaddr->sun_family != AF_UNIX)
+ if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
+ sunaddr->sun_family != AF_UNIX)
goto out;
if (addr_len == sizeof(short)) {
unsigned int hash;
int err;
+ err = -EINVAL;
+ if (alen < offsetofend(struct sockaddr, sa_family))
+ goto out;
+
if (addr->sa_family != AF_UNSPEC) {
err = unix_mkname(sunaddr, alen, &hash);
if (err < 0)
* Main IOCTl dispatcher.
* Check the type of IOCTL and call the appropriate wrapper...
*/
-static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
+static int wireless_process_ioctl(struct net *net, struct iwreq *iwr,
unsigned int cmd,
struct iw_request_info *info,
wext_ioctl_func standard,
wext_ioctl_func private)
{
- struct iwreq *iwr = (struct iwreq *) ifr;
struct net_device *dev;
iw_handler handler;
* The copy_to/from_user() of ifr is also dealt with in there */
/* Make sure the device exist */
- if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL)
+ if ((dev = __dev_get_by_name(net, iwr->ifr_name)) == NULL)
return -ENODEV;
/* A bunch of special cases, then the generic case...
else if (private)
return private(dev, iwr, cmd, info, handler);
}
- /* Old driver API : call driver ioctl handler */
- if (dev->netdev_ops->ndo_do_ioctl)
- return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
return -EOPNOTSUPP;
}
}
/* entry point from dev ioctl */
-static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
+static int wext_ioctl_dispatch(struct net *net, struct iwreq *iwr,
unsigned int cmd, struct iw_request_info *info,
wext_ioctl_func standard,
wext_ioctl_func private)
if (ret)
return ret;
- dev_load(net, ifr->ifr_name);
+ dev_load(net, iwr->ifr_name);
rtnl_lock();
- ret = wireless_process_ioctl(net, ifr, cmd, info, standard, private);
+ ret = wireless_process_ioctl(net, iwr, cmd, info, standard, private);
rtnl_unlock();
return ret;
}
-int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
void __user *arg)
{
struct iw_request_info info = { .cmd = cmd, .flags = 0 };
int ret;
- ret = wext_ioctl_dispatch(net, ifr, cmd, &info,
+ ret = wext_ioctl_dispatch(net, iwr, cmd, &info,
ioctl_standard_call,
ioctl_private_call);
if (ret >= 0 &&
IW_IS_GET(cmd) &&
- copy_to_user(arg, ifr, sizeof(struct iwreq)))
+ copy_to_user(arg, iwr, sizeof(struct iwreq)))
return -EFAULT;
return ret;
info.cmd = cmd;
info.flags = IW_REQUEST_FLAG_COMPAT;
- ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd, &info,
+ ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
compat_standard_call,
compat_private_call);
obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
xfrm_input.o xfrm_output.o \
- xfrm_sysctl.o xfrm_replay.o
-obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o
+ xfrm_sysctl.o xfrm_replay.o xfrm_device.o
obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
#include <net/xfrm.h>
#include <linux/notifier.h>
+#ifdef CONFIG_XFRM_OFFLOAD
int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
{
int err;
return true;
}
EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+#endif
int xfrm_dev_register(struct net_device *dev)
{
err = -ESRCH;
out:
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-
- if (cnt)
- xfrm_garbage_collect(net);
-
return err;
}
EXPORT_SYMBOL(xfrm_policy_flush);
return 0;
return err;
}
+ xfrm_garbage_collect(net);
c.data.type = type;
c.event = nlh->nlmsg_type;
include scripts/Kbuild.include
srcdir := $(srctree)/$(obj)
-subdirs := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.))
+
+# When make is run under a fakechroot environment, the function
+# $(wildcard $(srcdir)/*/.) doesn't only return directories, but also regular
+# files. So, we are using a combination of sort/dir/wildcard which works
+# with fakechroot.
+subdirs := $(patsubst $(srcdir)/%/,%,\
+ $(filter-out $(srcdir)/,\
+ $(sort $(dir $(wildcard $(srcdir)/*/)))))
+
# caller may set destination dir (when installing to asm/)
_dst := $(if $(dst),$(dst),$(obj))
int yylex(void);
int yyparse(void);
-void error_with_pos(const char *, ...);
+void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2)));
/*----------------------------------------------------------------------*/
#define xmalloc(size) ({ void *__ptr = malloc(size); \
# Check that we have the required ncurses stuff installed for lxdialog (menuconfig)
PHONY += $(obj)/dochecklxdialog
-$(addprefix $(obj)/,$(lxdialog)): $(obj)/dochecklxdialog
+$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/dochecklxdialog
$(obj)/dochecklxdialog:
$(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_EXTRACFLAGS) $(HOSTLOADLIBES_mconf)
static int items_num;
static int global_exit;
/* the currently selected button */
-const char *current_instructions = menu_instructions;
+static const char *current_instructions = menu_instructions;
static char *dialog_input_result;
static int dialog_input_result_len;
};
static const int function_keys_num = 9;
-struct function_keys function_keys[] = {
+static struct function_keys function_keys[] = {
{
.key_str = "F1",
.func = "Help",
index = (index + items_num) % items_num;
while (true) {
char *str = k_menu_items[index].str;
- if (strcasestr(str, match_str) != 0)
+ if (strcasestr(str, match_str) != NULL)
return index;
if (flag == FIND_NEXT_MATCH_UP ||
flag == MATCH_TINKER_PATTERN_UP)
static void conf(struct menu *menu)
{
- struct menu *submenu = 0;
+ struct menu *submenu = NULL;
const char *prompt = menu_get_prompt(menu);
struct symbol *sym;
int res;
static void conf_choice(struct menu *menu)
{
const char *prompt = _(menu_get_prompt(menu));
- struct menu *child = 0;
+ struct menu *child = NULL;
struct symbol *active;
int selected_index = 0;
int last_top_row = 0;
}
}
-void setup_windows(void)
+static void setup_windows(void)
{
int lines, columns;
mkattrn(FUNCTION_TEXT, A_REVERSE);
}
-void set_colors()
+void set_colors(void)
{
start_color();
use_default_colors();
int lines = 0;
if (!text)
- return 0;
+ return NULL;
for (i = 0; text[i] != '\0' && lines < line_no; i++)
if (text[i] == '\n')
case "$i" in
*.[cS])
j=${i/\.[cS]/\.o}
+ j="${j#$tree}"
if [ -e $j ]; then
echo $i
fi
hmac_misc.mode = inode->i_mode;
crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
if (evm_hmac_attrs & EVM_ATTR_FSUUID)
- crypto_shash_update(desc, inode->i_sb->s_uuid,
+ crypto_shash_update(desc, &inode->i_sb->s_uuid.b[0],
sizeof(inode->i_sb->s_uuid));
crypto_shash_final(desc, digest);
}
enum ima_hooks func;
int mask;
unsigned long fsmagic;
- u8 fsuuid[16];
+ uuid_t fsuuid;
kuid_t uid;
kuid_t fowner;
bool (*uid_op)(kuid_t, kuid_t); /* Handlers for operators */
&& rule->fsmagic != inode->i_sb->s_magic)
return false;
if ((rule->flags & IMA_FSUUID) &&
- memcmp(rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid)))
+ !uuid_equal(&rule->fsuuid, &inode->i_sb->s_uuid))
return false;
if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid))
return false;
case Opt_fsuuid:
ima_log_string(ab, "fsuuid", args[0].from);
- if (memchr_inv(entry->fsuuid, 0x00,
- sizeof(entry->fsuuid))) {
+ if (uuid_is_null(&entry->fsuuid)) {
result = -EINVAL;
break;
}
- result = blk_part_pack_uuid(args[0].from,
- entry->fsuuid);
+ result = uuid_parse(args[0].from, &entry->fsuuid);
if (!result)
entry->flags |= IMA_FSUUID;
break;
}
if (entry->flags & IMA_FSUUID) {
- seq_printf(m, "fsuuid=%pU", entry->fsuuid);
+ seq_printf(m, "fsuuid=%pU", &entry->fsuuid);
seq_puts(m, " ");
}
opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int),
GFP_KERNEL);
- if (!opts->mnt_opts_flags) {
- kfree(opts->mnt_opts);
+ if (!opts->mnt_opts_flags)
goto out_err;
- }
if (fscontext) {
opts->mnt_opts[num_mnt_opts] = fscontext;
return 0;
out_err:
+ security_free_mnt_opts(opts);
kfree(context);
kfree(defcontext);
kfree(fscontext);
struct snd_pcm_substream *substream;
const struct snd_pcm_chmap_elem *map;
- if (snd_BUG_ON(!info->chmap))
+ if (!info->chmap)
return -EINVAL;
substream = snd_pcm_chmap_substream(info, idx);
if (!substream)
unsigned int __user *dst;
int c, count = 0;
- if (snd_BUG_ON(!info->chmap))
+ if (!info->chmap)
return -EINVAL;
if (size < 8)
return -ENOMEM;
cycle = increment_cycle_count(cycle, 1);
if (s->handle_packet(s, 0, cycle, i) < 0) {
s->packet_index = -1;
- amdtp_stream_pcm_abort(s);
+ if (in_interrupt())
+ amdtp_stream_pcm_abort(s);
+ WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
return;
}
}
/* Queueing error or detecting invalid payload. */
if (i < packets) {
s->packet_index = -1;
- amdtp_stream_pcm_abort(s);
+ if (in_interrupt())
+ amdtp_stream_pcm_abort(s);
+ WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
return;
}
/* For a PCM substream processing. */
struct snd_pcm_substream *pcm;
struct tasklet_struct period_tasklet;
- unsigned int pcm_buffer_pointer;
+ snd_pcm_uframes_t pcm_buffer_pointer;
unsigned int pcm_period_pointer;
/* To wait for first packet. */
#define list_for_each_codec(c, bus) \
list_for_each_entry(c, &(bus)->core.codec_list, core.list)
+#define list_for_each_codec_safe(c, n, bus) \
+ list_for_each_entry_safe(c, n, &(bus)->core.codec_list, core.list)
/* snd_hda_codec_read/write optional flags */
#define HDA_RW_NO_RESPONSE_FALLBACK (1 << 0)
/* configure each codec instance */
int azx_codec_configure(struct azx *chip)
{
- struct hda_codec *codec;
- list_for_each_codec(codec, &chip->bus) {
+ struct hda_codec *codec, *next;
+
+ /* use _safe version here since snd_hda_codec_configure() deregisters
+ * the device upon error and deletes itself from the bus list.
+ */
+ list_for_each_codec_safe(codec, next, &chip->bus) {
snd_hda_codec_configure(codec);
}
return 0;
spec->input_paths[i][nums]);
spec->input_paths[i][nums] =
spec->input_paths[i][n];
+ spec->input_paths[i][n] = 0;
}
}
nums++;
#define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
#define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_BXT_T(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x1a98)
#define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
- IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) || \
- IS_GLK(pci)
+#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci) || \
+ IS_BXT_T(pci) || IS_KBL(pci) || IS_KBL_LP(pci) || \
+ IS_KBL_H(pci) || IS_GLK(pci) || IS_CFL(pci))
static char *driver_short_names[] = {
[AZX_DRIVER_ICH] = "HDA Intel",
/* Kabylake-H */
{ PCI_DEVICE(0x8086, 0xa2f0),
.driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+ /* Coffelake */
+ { PCI_DEVICE(0x8086, 0xa348),
+ .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE},
/* Broxton-P(Apollolake) */
{ PCI_DEVICE(0x8086, 0x5a98),
.driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
#include "skl.h"
/* Unique identification for getting NHLT blobs */
-static u8 OSC_UUID[16] = {0x6E, 0x88, 0x9F, 0xA6, 0xEB, 0x6C, 0x94, 0x45,
- 0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53};
+static guid_t osc_guid =
+ GUID_INIT(0xA69F886E, 0x6CEB, 0x4594,
+ 0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53);
struct nhlt_acpi_table *skl_nhlt_init(struct device *dev)
{
return NULL;
}
- obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL);
+ obj = acpi_evaluate_dsm(handle, &osc_guid, 1, 1, NULL);
if (obj && obj->type == ACPI_TYPE_BUFFER) {
nhlt_ptr = (struct nhlt_resource_desc *)obj->buffer.pointer;
nhlt_table = (struct nhlt_acpi_table *)
objtool-y += arch/$(SRCARCH)/
objtool-y += builtin-check.o
+objtool-y += check.o
objtool-y += elf.o
objtool-y += special.o
objtool-y += objtool.o
c) Higher live patching compatibility rate
- (NOTE: This is not yet implemented)
-
- Currently with CONFIG_LIVEPATCH there's a basic live patching
- framework which is safe for roughly 85-90% of "security" fixes. But
- patches can't have complex features like function dependency or
- prototype changes, or data structure changes.
-
- There's a strong need to support patches which have the more complex
- features so that the patch compatibility rate for security fixes can
- eventually approach something resembling 100%. To achieve that, a
- "consistency model" is needed, which allows tasks to be safely
- transitioned from an unpatched state to a patched state.
-
- One of the key requirements of the currently proposed livepatch
- consistency model [*] is that it needs to walk the stack of each
- sleeping task to determine if it can be transitioned to the patched
- state. If objtool can ensure that stack traces are reliable, this
- consistency model can be used and the live patching compatibility
- rate can be improved significantly.
-
- [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com
+ Livepatch has an optional "consistency model", which is needed for
+ more complex patches. In order for the consistency model to work,
+ stack traces need to be reliable (or an unreliable condition needs to
+ be detectable). Objtool makes that possible.
+ For more details, see the livepatch documentation in the Linux kernel
+ source tree at Documentation/livepatch/livepatch.txt.
Rules
-----
return normally.
-Errors in .S files
-------------------
+Objtool warnings
+----------------
-If you're getting an error in a compiled .S file which you don't
-understand, first make sure that the affected code follows the above
-rules.
+For asm files, if you're getting an error which doesn't make sense,
+first make sure that the affected code follows the above rules.
+
+For C files, the common culprits are inline asm statements and calls to
+"noreturn" functions. See below for more details.
+
+Another possible cause for errors in C code is if the Makefile removes
+-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
Here are some examples of common warnings reported by objtool, what
they mean, and suggestions for how to fix them.
-1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
+1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
The func() function made a function call without first saving and/or
- updating the frame pointer.
-
- If func() is indeed a callable function, add proper frame pointer
- logic using the FRAME_BEGIN and FRAME_END macros. Otherwise, remove
- its ELF function annotation by changing ENDPROC to END.
+ updating the frame pointer, and CONFIG_FRAME_POINTER is enabled.
- If you're getting this error in a .c file, see the "Errors in .c
- files" section.
+ If the error is for an asm file, and func() is indeed a callable
+ function, add proper frame pointer logic using the FRAME_BEGIN and
+ FRAME_END macros. Otherwise, if it's not a callable function, remove
+ its ELF function annotation by changing ENDPROC to END, and instead
+ use the manual CFI hint macros in asm/undwarf.h.
+ If it's a GCC-compiled .c file, the error may be because the function
+ uses an inline asm() statement which has a "call" instruction. An
+ asm() statement with a call instruction must declare the use of the
+ stack pointer in its output operand. For example, on x86_64:
-2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function
-
- A return instruction was detected, but objtool couldn't find a way
- for a callable function to reach the instruction.
+ register void *__sp asm("rsp");
+ asm volatile("call func" : "+r" (__sp));
- If the return instruction is inside (or reachable from) a callable
- function, the function needs to be annotated with the ENTRY/ENDPROC
- macros.
+ Otherwise the stack frame may not get created before the call.
- If you _really_ need a return instruction outside of a function, and
- are 100% sure that it won't affect stack traces, you can tell
- objtool to ignore it. See the "Adding exceptions" section below.
+2. file.o: warning: objtool: .text+0x53: unreachable instruction
-3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction
+ Objtool couldn't find a code path to reach the instruction.
- The instruction lives inside of a callable function, but there's no
- possible control flow path from the beginning of the function to the
- instruction.
+ If the error is for an asm file, and the instruction is inside (or
+ reachable from) a callable function, the function should be annotated
+ with the ENTRY/ENDPROC macros (ENDPROC is the important one).
+ Otherwise, the code should probably be annotated with the CFI hint
+ macros in asm/undwarf.h so objtool and the unwinder can know the
+ stack state associated with the code.
- If the instruction is actually needed, and it's actually in a
- callable function, ensure that its function is properly annotated
- with ENTRY/ENDPROC.
+ If you're 100% sure the code won't affect stack traces, or if you're
+ a just a bad person, you can tell objtool to ignore it. See the
+ "Adding exceptions" section below.
If it's not actually in a callable function (e.g. kernel entry code),
change ENDPROC to END.
-4. asm_file.o: warning: objtool: func(): can't find starting instruction
+4. file.o: warning: objtool: func(): can't find starting instruction
or
- asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction
+ file.o: warning: objtool: func()+0x11dd: can't decode instruction
- Did you put data in a text section? If so, that can confuse
+ Does the file have data in a text section? If so, that can confuse
objtool's instruction decoder. Move the data to a more appropriate
section like .data or .rodata.
-5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction
-
- This is a kernel entry/exit instruction like sysenter or sysret.
- Such instructions aren't allowed in a callable function, and are most
- likely part of the kernel entry code.
+5. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function
- If the instruction isn't actually in a callable function, change
- ENDPROC to END.
+ This is a kernel entry/exit instruction like sysenter or iret. Such
+ instructions aren't allowed in a callable function, and are most
+ likely part of the kernel entry code. They should usually not have
+ the callable function annotation (ENDPROC) and should always be
+ annotated with the CFI hint macros in asm/undwarf.h.
-6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer
+6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
- This is a dynamic jump or a jump to an undefined symbol. Stacktool
+ This is a dynamic jump or a jump to an undefined symbol. Objtool
assumed it's a sibling call and detected that the frame pointer
wasn't first restored to its original state.
destination code to the local file.
If the instruction is not actually in a callable function (e.g.
- kernel entry code), change ENDPROC to END.
+ kernel entry code), change ENDPROC to END and annotate manually with
+ the CFI hint macros in asm/undwarf.h.
-7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch
+7. file: warning: objtool: func()+0x5c: stack state mismatch
The instruction's frame pointer state is inconsistent, depending on
which execution path was taken to reach the instruction.
- Make sure the function pushes and sets up the frame pointer (for
- x86_64, this means rbp) at the beginning of the function and pops it
- at the end of the function. Also make sure that no other code in the
- function touches the frame pointer.
+ Make sure that, when CONFIG_FRAME_POINTER is enabled, the function
+ pushes and sets up the frame pointer (for x86_64, this means rbp) at
+ the beginning of the function and pops it at the end of the function.
+ Also make sure that no other code in the function touches the frame
+ pointer.
+ Another possibility is that the code has some asm or inline asm which
+ does some unusual things to the stack or the frame pointer. In such
+ cases it's probably appropriate to use the CFI hint macros in
+ asm/undwarf.h.
-Errors in .c files
-------------------
-1. c_file.o: warning: objtool: funcA() falls through to next function funcB()
+8. file.o: warning: objtool: funcA() falls through to next function funcB()
This means that funcA() doesn't end with a return instruction or an
unconditional jump, and that objtool has determined that the function
might be corrupt due to a gcc bug. For more details, see:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
-2. If you're getting any other objtool error in a compiled .c file, it
- may be because the file uses an asm() statement which has a "call"
- instruction. An asm() statement with a call instruction must declare
- the use of the stack pointer in its output operand. For example, on
- x86_64:
-
- register void *__sp asm("rsp");
- asm volatile("call func" : "+r" (__sp));
-
- Otherwise the stack frame may not get created before the call.
-
-3. Another possible cause for errors in C code is if the Makefile removes
- -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
-
-Also see the above section for .S file errors for more information what
-the individual error messages mean.
If the error doesn't seem to make sense, it could be a bug in objtool.
Feel free to ask the objtool maintainer for help.
all: $(OBJTOOL)
INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi
-CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
+CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES)
LDFLAGS += -lelf $(LIBSUBCMD)
# Allow old libelf to be used:
#define _ARCH_H
#include <stdbool.h>
+#include <linux/list.h>
#include "elf.h"
+#include "cfi.h"
-#define INSN_FP_SAVE 1
-#define INSN_FP_SETUP 2
-#define INSN_FP_RESTORE 3
-#define INSN_JUMP_CONDITIONAL 4
-#define INSN_JUMP_UNCONDITIONAL 5
-#define INSN_JUMP_DYNAMIC 6
-#define INSN_CALL 7
-#define INSN_CALL_DYNAMIC 8
-#define INSN_RETURN 9
-#define INSN_CONTEXT_SWITCH 10
-#define INSN_NOP 11
-#define INSN_OTHER 12
+#define INSN_JUMP_CONDITIONAL 1
+#define INSN_JUMP_UNCONDITIONAL 2
+#define INSN_JUMP_DYNAMIC 3
+#define INSN_CALL 4
+#define INSN_CALL_DYNAMIC 5
+#define INSN_RETURN 6
+#define INSN_CONTEXT_SWITCH 7
+#define INSN_STACK 8
+#define INSN_NOP 9
+#define INSN_OTHER 10
#define INSN_LAST INSN_OTHER
+enum op_dest_type {
+ OP_DEST_REG,
+ OP_DEST_REG_INDIRECT,
+ OP_DEST_MEM,
+ OP_DEST_PUSH,
+ OP_DEST_LEAVE,
+};
+
+struct op_dest {
+ enum op_dest_type type;
+ unsigned char reg;
+ int offset;
+};
+
+enum op_src_type {
+ OP_SRC_REG,
+ OP_SRC_REG_INDIRECT,
+ OP_SRC_CONST,
+ OP_SRC_POP,
+ OP_SRC_ADD,
+ OP_SRC_AND,
+};
+
+struct op_src {
+ enum op_src_type type;
+ unsigned char reg;
+ int offset;
+};
+
+struct stack_op {
+ struct op_dest dest;
+ struct op_src src;
+};
+
+void arch_initial_func_cfi_state(struct cfi_state *state);
+
int arch_decode_instruction(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, unsigned char *type,
- unsigned long *displacement);
+ unsigned long *immediate, struct stack_op *op);
+
+bool arch_callee_saved_reg(unsigned char reg);
#endif /* _ARCH_H */
#include "../../arch.h"
#include "../../warn.h"
+static unsigned char op_to_cfi_reg[][2] = {
+ {CFI_AX, CFI_R8},
+ {CFI_CX, CFI_R9},
+ {CFI_DX, CFI_R10},
+ {CFI_BX, CFI_R11},
+ {CFI_SP, CFI_R12},
+ {CFI_BP, CFI_R13},
+ {CFI_SI, CFI_R14},
+ {CFI_DI, CFI_R15},
+};
+
static int is_x86_64(struct elf *elf)
{
switch (elf->ehdr.e_machine) {
}
}
+bool arch_callee_saved_reg(unsigned char reg)
+{
+ switch (reg) {
+ case CFI_BP:
+ case CFI_BX:
+ case CFI_R12:
+ case CFI_R13:
+ case CFI_R14:
+ case CFI_R15:
+ return true;
+
+ case CFI_AX:
+ case CFI_CX:
+ case CFI_DX:
+ case CFI_SI:
+ case CFI_DI:
+ case CFI_SP:
+ case CFI_R8:
+ case CFI_R9:
+ case CFI_R10:
+ case CFI_R11:
+ case CFI_RA:
+ default:
+ return false;
+ }
+}
+
int arch_decode_instruction(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, unsigned char *type,
- unsigned long *immediate)
+ unsigned long *immediate, struct stack_op *op)
{
struct insn insn;
- int x86_64;
- unsigned char op1, op2, ext;
+ int x86_64, sign;
+ unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
+ modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
+ sib = 0;
x86_64 = is_x86_64(elf);
if (x86_64 == -1)
return -1;
- insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64);
+ insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64);
insn_get_length(&insn);
- insn_get_opcode(&insn);
- insn_get_modrm(&insn);
- insn_get_immediate(&insn);
if (!insn_complete(&insn)) {
WARN_FUNC("can't decode instruction", sec, offset);
op1 = insn.opcode.bytes[0];
op2 = insn.opcode.bytes[1];
+ if (insn.rex_prefix.nbytes) {
+ rex = insn.rex_prefix.bytes[0];
+ rex_w = X86_REX_W(rex) >> 3;
+ rex_r = X86_REX_R(rex) >> 2;
+ rex_b = X86_REX_B(rex);
+ }
+
+ if (insn.modrm.nbytes) {
+ modrm = insn.modrm.bytes[0];
+ modrm_mod = X86_MODRM_MOD(modrm);
+ modrm_reg = X86_MODRM_REG(modrm);
+ modrm_rm = X86_MODRM_RM(modrm);
+ }
+
+ if (insn.sib.nbytes)
+ sib = insn.sib.bytes[0];
+
switch (op1) {
- case 0x55:
- if (!insn.rex_prefix.nbytes)
- /* push rbp */
- *type = INSN_FP_SAVE;
+
+ case 0x1:
+ case 0x29:
+ if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+
+ /* add/sub reg, %rsp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_SRC_REG;
+ op->dest.reg = CFI_SP;
+ }
+ break;
+
+ case 0x50 ... 0x57:
+
+ /* push reg */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ op->dest.type = OP_DEST_PUSH;
+
break;
- case 0x5d:
- if (!insn.rex_prefix.nbytes)
- /* pop rbp */
- *type = INSN_FP_RESTORE;
+ case 0x58 ... 0x5f:
+
+ /* pop reg */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+
+ break;
+
+ case 0x68:
+ case 0x6a:
+ /* push immediate */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
break;
case 0x70 ... 0x7f:
*type = INSN_JUMP_CONDITIONAL;
break;
+ case 0x81:
+ case 0x83:
+ if (rex != 0x48)
+ break;
+
+ if (modrm == 0xe4) {
+ /* and imm, %rsp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_AND;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.immediate.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ break;
+ }
+
+ if (modrm == 0xc4)
+ sign = 1;
+ else if (modrm == 0xec)
+ sign = -1;
+ else
+ break;
+
+ /* add/sub imm, %rsp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.immediate.value * sign;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ break;
+
case 0x89:
- if (insn.rex_prefix.nbytes == 1 &&
- insn.rex_prefix.bytes[0] == 0x48 &&
- insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5)
- /* mov rsp, rbp */
- *type = INSN_FP_SETUP;
+ if (rex == 0x48 && modrm == 0xe5) {
+
+ /* mov %rsp, %rbp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_BP;
+ break;
+ }
+ /* fallthrough */
+ case 0x88:
+ if (!rex_b &&
+ (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
+
+ /* mov reg, disp(%rbp) */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG_INDIRECT;
+ op->dest.reg = CFI_BP;
+ op->dest.offset = insn.displacement.value;
+
+ } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+
+ /* mov reg, disp(%rsp) */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG_INDIRECT;
+ op->dest.reg = CFI_SP;
+ op->dest.offset = insn.displacement.value;
+ }
+
+ break;
+
+ case 0x8b:
+ if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
+
+ /* mov disp(%rbp), reg */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG_INDIRECT;
+ op->src.reg = CFI_BP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+
+ } else if (rex_w && !rex_b && sib == 0x24 &&
+ modrm_mod != 3 && modrm_rm == 4) {
+
+ /* mov disp(%rsp), reg */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG_INDIRECT;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ }
+
break;
case 0x8d:
- if (insn.rex_prefix.nbytes &&
- insn.rex_prefix.bytes[0] == 0x48 &&
- insn.modrm.nbytes && insn.modrm.bytes[0] == 0x2c &&
- insn.sib.nbytes && insn.sib.bytes[0] == 0x24)
- /* lea %(rsp), %rbp */
- *type = INSN_FP_SETUP;
+ if (rex == 0x48 && modrm == 0x65) {
+
+ /* lea -disp(%rbp), %rsp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_BP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ break;
+ }
+
+ if (rex == 0x4c && modrm == 0x54 && sib == 0x24 &&
+ insn.displacement.value == 8) {
+
+ /*
+ * lea 0x8(%rsp), %r10
+ *
+ * Here r10 is the "drap" pointer, used as a stack
+ * pointer helper when the stack gets realigned.
+ */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = 8;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_R10;
+ break;
+ }
+
+ if (rex == 0x4c && modrm == 0x6c && sib == 0x24 &&
+ insn.displacement.value == 16) {
+
+ /*
+ * lea 0x10(%rsp), %r13
+ *
+ * Here r13 is the "drap" pointer, used as a stack
+ * pointer helper when the stack gets realigned.
+ */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = 16;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_R13;
+ break;
+ }
+
+ if (rex == 0x49 && modrm == 0x62 &&
+ insn.displacement.value == -8) {
+
+ /*
+ * lea -0x8(%r10), %rsp
+ *
+ * Restoring rsp back to its original value after a
+ * stack realignment.
+ */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_R10;
+ op->src.offset = -8;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ break;
+ }
+
+ if (rex == 0x49 && modrm == 0x65 &&
+ insn.displacement.value == -16) {
+
+ /*
+ * lea -0x10(%r13), %rsp
+ *
+ * Restoring rsp back to its original value after a
+ * stack realignment.
+ */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_R13;
+ op->src.offset = -16;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ break;
+ }
+
+ break;
+
+ case 0x8f:
+ /* pop to mem */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_MEM;
break;
case 0x90:
*type = INSN_NOP;
break;
+ case 0x9c:
+ /* pushf */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ break;
+
+ case 0x9d:
+ /* popf */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_MEM;
+ break;
+
case 0x0f:
+
if (op2 >= 0x80 && op2 <= 0x8f)
*type = INSN_JUMP_CONDITIONAL;
else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
op2 == 0x35)
+
/* sysenter, sysret */
*type = INSN_CONTEXT_SWITCH;
+
else if (op2 == 0x0d || op2 == 0x1f)
+
/* nopl/nopw */
*type = INSN_NOP;
- else if (op2 == 0x01 && insn.modrm.nbytes &&
- (insn.modrm.bytes[0] == 0xc2 ||
- insn.modrm.bytes[0] == 0xd8))
- /* vmlaunch, vmrun */
- *type = INSN_CONTEXT_SWITCH;
+
+ else if (op2 == 0xa0 || op2 == 0xa8) {
+
+ /* push fs/gs */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+
+ } else if (op2 == 0xa1 || op2 == 0xa9) {
+
+ /* pop fs/gs */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_MEM;
+ }
break;
- case 0xc9: /* leave */
- *type = INSN_FP_RESTORE;
+ case 0xc9:
+ /*
+ * leave
+ *
+ * equivalent to:
+ * mov bp, sp
+ * pop bp
+ */
+ *type = INSN_STACK;
+ op->dest.type = OP_DEST_LEAVE;
+
break;
- case 0xe3: /* jecxz/jrcxz */
+ case 0xe3:
+ /* jecxz/jrcxz */
*type = INSN_JUMP_CONDITIONAL;
break;
break;
case 0xff:
- ext = X86_MODRM_REG(insn.modrm.bytes[0]);
- if (ext == 2 || ext == 3)
+ if (modrm_reg == 2 || modrm_reg == 3)
+
*type = INSN_CALL_DYNAMIC;
- else if (ext == 4)
+
+ else if (modrm_reg == 4)
+
*type = INSN_JUMP_DYNAMIC;
- else if (ext == 5) /*jmpf */
+
+ else if (modrm_reg == 5)
+
+ /* jmpf */
*type = INSN_CONTEXT_SWITCH;
+ else if (modrm_reg == 6) {
+
+ /* push from mem */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ }
+
break;
default:
return 0;
}
+
+void arch_initial_func_cfi_state(struct cfi_state *state)
+{
+ int i;
+
+ for (i = 0; i < CFI_NUM_REGS; i++) {
+ state->regs[i].base = CFI_UNDEFINED;
+ state->regs[i].offset = 0;
+ }
+
+ /* initial CFA (call frame address) */
+ state->cfa.base = CFI_SP;
+ state->cfa.offset = 8;
+
+ /* initial RA (return address) */
+ state->regs[16].base = CFI_CFA;
+ state->regs[16].offset = -8;
+}
/*
- * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* For more information, see tools/objtool/Documentation/stack-validation.txt.
*/
-#include <string.h>
-#include <stdlib.h>
#include <subcmd/parse-options.h>
-
#include "builtin.h"
-#include "elf.h"
-#include "special.h"
-#include "arch.h"
-#include "warn.h"
-
-#include <linux/hashtable.h>
-#include <linux/kernel.h>
-
-#define STATE_FP_SAVED 0x1
-#define STATE_FP_SETUP 0x2
-#define STATE_FENTRY 0x4
-
-struct instruction {
- struct list_head list;
- struct hlist_node hash;
- struct section *sec;
- unsigned long offset;
- unsigned int len, state;
- unsigned char type;
- unsigned long immediate;
- bool alt_group, visited, dead_end;
- struct symbol *call_dest;
- struct instruction *jump_dest;
- struct list_head alts;
- struct symbol *func;
-};
-
-struct alternative {
- struct list_head list;
- struct instruction *insn;
-};
-
-struct objtool_file {
- struct elf *elf;
- struct list_head insn_list;
- DECLARE_HASHTABLE(insn_hash, 16);
- struct section *rodata, *whitelist;
- bool ignore_unreachables, c_file;
-};
-
-const char *objname;
-static bool nofp;
-
-static struct instruction *find_insn(struct objtool_file *file,
- struct section *sec, unsigned long offset)
-{
- struct instruction *insn;
-
- hash_for_each_possible(file->insn_hash, insn, hash, offset)
- if (insn->sec == sec && insn->offset == offset)
- return insn;
-
- return NULL;
-}
-
-static struct instruction *next_insn_same_sec(struct objtool_file *file,
- struct instruction *insn)
-{
- struct instruction *next = list_next_entry(insn, list);
-
- if (&next->list == &file->insn_list || next->sec != insn->sec)
- return NULL;
-
- return next;
-}
-
-static bool gcov_enabled(struct objtool_file *file)
-{
- struct section *sec;
- struct symbol *sym;
-
- list_for_each_entry(sec, &file->elf->sections, list)
- list_for_each_entry(sym, &sec->symbol_list, list)
- if (!strncmp(sym->name, "__gcov_.", 8))
- return true;
-
- return false;
-}
-
-#define for_each_insn(file, insn) \
- list_for_each_entry(insn, &file->insn_list, list)
-
-#define func_for_each_insn(file, func, insn) \
- for (insn = find_insn(file, func->sec, func->offset); \
- insn && &insn->list != &file->insn_list && \
- insn->sec == func->sec && \
- insn->offset < func->offset + func->len; \
- insn = list_next_entry(insn, list))
-
-#define func_for_each_insn_continue_reverse(file, func, insn) \
- for (insn = list_prev_entry(insn, list); \
- &insn->list != &file->insn_list && \
- insn->sec == func->sec && insn->offset >= func->offset; \
- insn = list_prev_entry(insn, list))
-
-#define sec_for_each_insn_from(file, insn) \
- for (; insn; insn = next_insn_same_sec(file, insn))
-
-
-/*
- * Check if the function has been manually whitelisted with the
- * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
- * due to its use of a context switching instruction.
- */
-static bool ignore_func(struct objtool_file *file, struct symbol *func)
-{
- struct rela *rela;
- struct instruction *insn;
-
- /* check for STACK_FRAME_NON_STANDARD */
- if (file->whitelist && file->whitelist->rela)
- list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
- if (rela->sym->type == STT_SECTION &&
- rela->sym->sec == func->sec &&
- rela->addend == func->offset)
- return true;
- if (rela->sym->type == STT_FUNC && rela->sym == func)
- return true;
- }
-
- /* check if it has a context switching instruction */
- func_for_each_insn(file, func, insn)
- if (insn->type == INSN_CONTEXT_SWITCH)
- return true;
-
- return false;
-}
-
-/*
- * This checks to see if the given function is a "noreturn" function.
- *
- * For global functions which are outside the scope of this object file, we
- * have to keep a manual list of them.
- *
- * For local functions, we have to detect them manually by simply looking for
- * the lack of a return instruction.
- *
- * Returns:
- * -1: error
- * 0: no dead end
- * 1: dead end
- */
-static int __dead_end_function(struct objtool_file *file, struct symbol *func,
- int recursion)
-{
- int i;
- struct instruction *insn;
- bool empty = true;
-
- /*
- * Unfortunately these have to be hard coded because the noreturn
- * attribute isn't provided in ELF data.
- */
- static const char * const global_noreturns[] = {
- "__stack_chk_fail",
- "panic",
- "do_exit",
- "do_task_dead",
- "__module_put_and_exit",
- "complete_and_exit",
- "kvm_spurious_fault",
- "__reiserfs_panic",
- "lbug_with_loc"
- };
-
- if (func->bind == STB_WEAK)
- return 0;
-
- if (func->bind == STB_GLOBAL)
- for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
- if (!strcmp(func->name, global_noreturns[i]))
- return 1;
-
- if (!func->sec)
- return 0;
-
- func_for_each_insn(file, func, insn) {
- empty = false;
-
- if (insn->type == INSN_RETURN)
- return 0;
- }
-
- if (empty)
- return 0;
-
- /*
- * A function can have a sibling call instead of a return. In that
- * case, the function's dead-end status depends on whether the target
- * of the sibling call returns.
- */
- func_for_each_insn(file, func, insn) {
- if (insn->sec != func->sec ||
- insn->offset >= func->offset + func->len)
- break;
-
- if (insn->type == INSN_JUMP_UNCONDITIONAL) {
- struct instruction *dest = insn->jump_dest;
- struct symbol *dest_func;
-
- if (!dest)
- /* sibling call to another file */
- return 0;
-
- if (dest->sec != func->sec ||
- dest->offset < func->offset ||
- dest->offset >= func->offset + func->len) {
- /* local sibling call */
- dest_func = find_symbol_by_offset(dest->sec,
- dest->offset);
- if (!dest_func)
- continue;
-
- if (recursion == 5) {
- WARN_FUNC("infinite recursion (objtool bug!)",
- dest->sec, dest->offset);
- return -1;
- }
-
- return __dead_end_function(file, dest_func,
- recursion + 1);
- }
- }
-
- if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
- /* sibling call */
- return 0;
- }
-
- return 1;
-}
-
-static int dead_end_function(struct objtool_file *file, struct symbol *func)
-{
- return __dead_end_function(file, func, 0);
-}
-
-/*
- * Call the arch-specific instruction decoder for all the instructions and add
- * them to the global instruction list.
- */
-static int decode_instructions(struct objtool_file *file)
-{
- struct section *sec;
- struct symbol *func;
- unsigned long offset;
- struct instruction *insn;
- int ret;
-
- list_for_each_entry(sec, &file->elf->sections, list) {
-
- if (!(sec->sh.sh_flags & SHF_EXECINSTR))
- continue;
-
- for (offset = 0; offset < sec->len; offset += insn->len) {
- insn = malloc(sizeof(*insn));
- memset(insn, 0, sizeof(*insn));
-
- INIT_LIST_HEAD(&insn->alts);
- insn->sec = sec;
- insn->offset = offset;
-
- ret = arch_decode_instruction(file->elf, sec, offset,
- sec->len - offset,
- &insn->len, &insn->type,
- &insn->immediate);
- if (ret)
- return ret;
-
- if (!insn->type || insn->type > INSN_LAST) {
- WARN_FUNC("invalid instruction type %d",
- insn->sec, insn->offset, insn->type);
- return -1;
- }
-
- hash_add(file->insn_hash, &insn->hash, insn->offset);
- list_add_tail(&insn->list, &file->insn_list);
- }
-
- list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC)
- continue;
-
- if (!find_insn(file, sec, func->offset)) {
- WARN("%s(): can't find starting instruction",
- func->name);
- return -1;
- }
-
- func_for_each_insn(file, func, insn)
- if (!insn->func)
- insn->func = func;
- }
- }
-
- return 0;
-}
-
-/*
- * Find all uses of the unreachable() macro, which are code path dead ends.
- */
-static int add_dead_ends(struct objtool_file *file)
-{
- struct section *sec;
- struct rela *rela;
- struct instruction *insn;
- bool found;
-
- sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
- if (!sec)
- return 0;
-
- list_for_each_entry(rela, &sec->rela_list, list) {
- if (rela->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", sec->name);
- return -1;
- }
- insn = find_insn(file, rela->sym->sec, rela->addend);
- if (insn)
- insn = list_prev_entry(insn, list);
- else if (rela->addend == rela->sym->sec->len) {
- found = false;
- list_for_each_entry_reverse(insn, &file->insn_list, list) {
- if (insn->sec == rela->sym->sec) {
- found = true;
- break;
- }
- }
-
- if (!found) {
- WARN("can't find unreachable insn at %s+0x%x",
- rela->sym->sec->name, rela->addend);
- return -1;
- }
- } else {
- WARN("can't find unreachable insn at %s+0x%x",
- rela->sym->sec->name, rela->addend);
- return -1;
- }
-
- insn->dead_end = true;
- }
-
- return 0;
-}
-
-/*
- * Warnings shouldn't be reported for ignored functions.
- */
-static void add_ignores(struct objtool_file *file)
-{
- struct instruction *insn;
- struct section *sec;
- struct symbol *func;
-
- list_for_each_entry(sec, &file->elf->sections, list) {
- list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC)
- continue;
-
- if (!ignore_func(file, func))
- continue;
-
- func_for_each_insn(file, func, insn)
- insn->visited = true;
- }
- }
-}
-
-/*
- * Find the destination instructions for all jumps.
- */
-static int add_jump_destinations(struct objtool_file *file)
-{
- struct instruction *insn;
- struct rela *rela;
- struct section *dest_sec;
- unsigned long dest_off;
-
- for_each_insn(file, insn) {
- if (insn->type != INSN_JUMP_CONDITIONAL &&
- insn->type != INSN_JUMP_UNCONDITIONAL)
- continue;
-
- /* skip ignores */
- if (insn->visited)
- continue;
-
- rela = find_rela_by_dest_range(insn->sec, insn->offset,
- insn->len);
- if (!rela) {
- dest_sec = insn->sec;
- dest_off = insn->offset + insn->len + insn->immediate;
- } else if (rela->sym->type == STT_SECTION) {
- dest_sec = rela->sym->sec;
- dest_off = rela->addend + 4;
- } else if (rela->sym->sec->idx) {
- dest_sec = rela->sym->sec;
- dest_off = rela->sym->sym.st_value + rela->addend + 4;
- } else {
- /* sibling call */
- insn->jump_dest = 0;
- continue;
- }
-
- insn->jump_dest = find_insn(file, dest_sec, dest_off);
- if (!insn->jump_dest) {
-
- /*
- * This is a special case where an alt instruction
- * jumps past the end of the section. These are
- * handled later in handle_group_alt().
- */
- if (!strcmp(insn->sec->name, ".altinstr_replacement"))
- continue;
-
- WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
- insn->sec, insn->offset, dest_sec->name,
- dest_off);
- return -1;
- }
- }
-
- return 0;
-}
-
-/*
- * Find the destination instructions for all calls.
- */
-static int add_call_destinations(struct objtool_file *file)
-{
- struct instruction *insn;
- unsigned long dest_off;
- struct rela *rela;
-
- for_each_insn(file, insn) {
- if (insn->type != INSN_CALL)
- continue;
-
- rela = find_rela_by_dest_range(insn->sec, insn->offset,
- insn->len);
- if (!rela) {
- dest_off = insn->offset + insn->len + insn->immediate;
- insn->call_dest = find_symbol_by_offset(insn->sec,
- dest_off);
- if (!insn->call_dest) {
- WARN_FUNC("can't find call dest symbol at offset 0x%lx",
- insn->sec, insn->offset, dest_off);
- return -1;
- }
- } else if (rela->sym->type == STT_SECTION) {
- insn->call_dest = find_symbol_by_offset(rela->sym->sec,
- rela->addend+4);
- if (!insn->call_dest ||
- insn->call_dest->type != STT_FUNC) {
- WARN_FUNC("can't find call dest symbol at %s+0x%x",
- insn->sec, insn->offset,
- rela->sym->sec->name,
- rela->addend + 4);
- return -1;
- }
- } else
- insn->call_dest = rela->sym;
- }
-
- return 0;
-}
-
-/*
- * The .alternatives section requires some extra special care, over and above
- * what other special sections require:
- *
- * 1. Because alternatives are patched in-place, we need to insert a fake jump
- * instruction at the end so that validate_branch() skips all the original
- * replaced instructions when validating the new instruction path.
- *
- * 2. An added wrinkle is that the new instruction length might be zero. In
- * that case the old instructions are replaced with noops. We simulate that
- * by creating a fake jump as the only new instruction.
- *
- * 3. In some cases, the alternative section includes an instruction which
- * conditionally jumps to the _end_ of the entry. We have to modify these
- * jumps' destinations to point back to .text rather than the end of the
- * entry in .altinstr_replacement.
- *
- * 4. It has been requested that we don't validate the !POPCNT feature path
- * which is a "very very small percentage of machines".
- */
-static int handle_group_alt(struct objtool_file *file,
- struct special_alt *special_alt,
- struct instruction *orig_insn,
- struct instruction **new_insn)
-{
- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
- unsigned long dest_off;
-
- last_orig_insn = NULL;
- insn = orig_insn;
- sec_for_each_insn_from(file, insn) {
- if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
- break;
-
- if (special_alt->skip_orig)
- insn->type = INSN_NOP;
-
- insn->alt_group = true;
- last_orig_insn = insn;
- }
-
- if (!next_insn_same_sec(file, last_orig_insn)) {
- WARN("%s: don't know how to handle alternatives at end of section",
- special_alt->orig_sec->name);
- return -1;
- }
-
- fake_jump = malloc(sizeof(*fake_jump));
- if (!fake_jump) {
- WARN("malloc failed");
- return -1;
- }
- memset(fake_jump, 0, sizeof(*fake_jump));
- INIT_LIST_HEAD(&fake_jump->alts);
- fake_jump->sec = special_alt->new_sec;
- fake_jump->offset = -1;
- fake_jump->type = INSN_JUMP_UNCONDITIONAL;
- fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
-
- if (!special_alt->new_len) {
- *new_insn = fake_jump;
- return 0;
- }
-
- last_new_insn = NULL;
- insn = *new_insn;
- sec_for_each_insn_from(file, insn) {
- if (insn->offset >= special_alt->new_off + special_alt->new_len)
- break;
-
- last_new_insn = insn;
-
- if (insn->type != INSN_JUMP_CONDITIONAL &&
- insn->type != INSN_JUMP_UNCONDITIONAL)
- continue;
-
- if (!insn->immediate)
- continue;
-
- dest_off = insn->offset + insn->len + insn->immediate;
- if (dest_off == special_alt->new_off + special_alt->new_len)
- insn->jump_dest = fake_jump;
-
- if (!insn->jump_dest) {
- WARN_FUNC("can't find alternative jump destination",
- insn->sec, insn->offset);
- return -1;
- }
- }
-
- if (!last_new_insn) {
- WARN_FUNC("can't find last new alternative instruction",
- special_alt->new_sec, special_alt->new_off);
- return -1;
- }
-
- list_add(&fake_jump->list, &last_new_insn->list);
-
- return 0;
-}
-
-/*
- * A jump table entry can either convert a nop to a jump or a jump to a nop.
- * If the original instruction is a jump, make the alt entry an effective nop
- * by just skipping the original instruction.
- */
-static int handle_jump_alt(struct objtool_file *file,
- struct special_alt *special_alt,
- struct instruction *orig_insn,
- struct instruction **new_insn)
-{
- if (orig_insn->type == INSN_NOP)
- return 0;
-
- if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
- WARN_FUNC("unsupported instruction at jump label",
- orig_insn->sec, orig_insn->offset);
- return -1;
- }
-
- *new_insn = list_next_entry(orig_insn, list);
- return 0;
-}
-
-/*
- * Read all the special sections which have alternate instructions which can be
- * patched in or redirected to at runtime. Each instruction having alternate
- * instruction(s) has them added to its insn->alts list, which will be
- * traversed in validate_branch().
- */
-static int add_special_section_alts(struct objtool_file *file)
-{
- struct list_head special_alts;
- struct instruction *orig_insn, *new_insn;
- struct special_alt *special_alt, *tmp;
- struct alternative *alt;
- int ret;
-
- ret = special_get_alts(file->elf, &special_alts);
- if (ret)
- return ret;
-
- list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
- alt = malloc(sizeof(*alt));
- if (!alt) {
- WARN("malloc failed");
- ret = -1;
- goto out;
- }
-
- orig_insn = find_insn(file, special_alt->orig_sec,
- special_alt->orig_off);
- if (!orig_insn) {
- WARN_FUNC("special: can't find orig instruction",
- special_alt->orig_sec, special_alt->orig_off);
- ret = -1;
- goto out;
- }
+#include "check.h"
- new_insn = NULL;
- if (!special_alt->group || special_alt->new_len) {
- new_insn = find_insn(file, special_alt->new_sec,
- special_alt->new_off);
- if (!new_insn) {
- WARN_FUNC("special: can't find new instruction",
- special_alt->new_sec,
- special_alt->new_off);
- ret = -1;
- goto out;
- }
- }
+bool nofp;
- if (special_alt->group) {
- ret = handle_group_alt(file, special_alt, orig_insn,
- &new_insn);
- if (ret)
- goto out;
- } else if (special_alt->jump_or_nop) {
- ret = handle_jump_alt(file, special_alt, orig_insn,
- &new_insn);
- if (ret)
- goto out;
- }
-
- alt->insn = new_insn;
- list_add_tail(&alt->list, &orig_insn->alts);
-
- list_del(&special_alt->list);
- free(special_alt);
- }
-
-out:
- return ret;
-}
-
-static int add_switch_table(struct objtool_file *file, struct symbol *func,
- struct instruction *insn, struct rela *table,
- struct rela *next_table)
-{
- struct rela *rela = table;
- struct instruction *alt_insn;
- struct alternative *alt;
-
- list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
- if (rela == next_table)
- break;
-
- if (rela->sym->sec != insn->sec ||
- rela->addend <= func->offset ||
- rela->addend >= func->offset + func->len)
- break;
-
- alt_insn = find_insn(file, insn->sec, rela->addend);
- if (!alt_insn) {
- WARN("%s: can't find instruction at %s+0x%x",
- file->rodata->rela->name, insn->sec->name,
- rela->addend);
- return -1;
- }
-
- alt = malloc(sizeof(*alt));
- if (!alt) {
- WARN("malloc failed");
- return -1;
- }
-
- alt->insn = alt_insn;
- list_add_tail(&alt->list, &insn->alts);
- }
-
- return 0;
-}
-
-/*
- * find_switch_table() - Given a dynamic jump, find the switch jump table in
- * .rodata associated with it.
- *
- * There are 3 basic patterns:
- *
- * 1. jmpq *[rodata addr](,%reg,8)
- *
- * This is the most common case by far. It jumps to an address in a simple
- * jump table which is stored in .rodata.
- *
- * 2. jmpq *[rodata addr](%rip)
- *
- * This is caused by a rare GCC quirk, currently only seen in three driver
- * functions in the kernel, only with certain obscure non-distro configs.
- *
- * As part of an optimization, GCC makes a copy of an existing switch jump
- * table, modifies it, and then hard-codes the jump (albeit with an indirect
- * jump) to use a single entry in the table. The rest of the jump table and
- * some of its jump targets remain as dead code.
- *
- * In such a case we can just crudely ignore all unreachable instruction
- * warnings for the entire object file. Ideally we would just ignore them
- * for the function, but that would require redesigning the code quite a
- * bit. And honestly that's just not worth doing: unreachable instruction
- * warnings are of questionable value anyway, and this is such a rare issue.
- *
- * 3. mov [rodata addr],%reg1
- * ... some instructions ...
- * jmpq *(%reg1,%reg2,8)
- *
- * This is a fairly uncommon pattern which is new for GCC 6. As of this
- * writing, there are 11 occurrences of it in the allmodconfig kernel.
- *
- * TODO: Once we have DWARF CFI and smarter instruction decoding logic,
- * ensure the same register is used in the mov and jump instructions.
- */
-static struct rela *find_switch_table(struct objtool_file *file,
- struct symbol *func,
- struct instruction *insn)
-{
- struct rela *text_rela, *rodata_rela;
- struct instruction *orig_insn = insn;
-
- text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
- if (text_rela && text_rela->sym == file->rodata->sym) {
- /* case 1 */
- rodata_rela = find_rela_by_dest(file->rodata,
- text_rela->addend);
- if (rodata_rela)
- return rodata_rela;
-
- /* case 2 */
- rodata_rela = find_rela_by_dest(file->rodata,
- text_rela->addend + 4);
- if (!rodata_rela)
- return NULL;
- file->ignore_unreachables = true;
- return rodata_rela;
- }
-
- /* case 3 */
- func_for_each_insn_continue_reverse(file, func, insn) {
- if (insn->type == INSN_JUMP_DYNAMIC)
- break;
-
- /* allow small jumps within the range */
- if (insn->type == INSN_JUMP_UNCONDITIONAL &&
- insn->jump_dest &&
- (insn->jump_dest->offset <= insn->offset ||
- insn->jump_dest->offset > orig_insn->offset))
- break;
-
- /* look for a relocation which references .rodata */
- text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
- insn->len);
- if (!text_rela || text_rela->sym != file->rodata->sym)
- continue;
-
- /*
- * Make sure the .rodata address isn't associated with a
- * symbol. gcc jump tables are anonymous data.
- */
- if (find_symbol_containing(file->rodata, text_rela->addend))
- continue;
-
- return find_rela_by_dest(file->rodata, text_rela->addend);
- }
-
- return NULL;
-}
-
-static int add_func_switch_tables(struct objtool_file *file,
- struct symbol *func)
-{
- struct instruction *insn, *prev_jump = NULL;
- struct rela *rela, *prev_rela = NULL;
- int ret;
-
- func_for_each_insn(file, func, insn) {
- if (insn->type != INSN_JUMP_DYNAMIC)
- continue;
-
- rela = find_switch_table(file, func, insn);
- if (!rela)
- continue;
-
- /*
- * We found a switch table, but we don't know yet how big it
- * is. Don't add it until we reach the end of the function or
- * the beginning of another switch table in the same function.
- */
- if (prev_jump) {
- ret = add_switch_table(file, func, prev_jump, prev_rela,
- rela);
- if (ret)
- return ret;
- }
-
- prev_jump = insn;
- prev_rela = rela;
- }
-
- if (prev_jump) {
- ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-/*
- * For some switch statements, gcc generates a jump table in the .rodata
- * section which contains a list of addresses within the function to jump to.
- * This finds these jump tables and adds them to the insn->alts lists.
- */
-static int add_switch_table_alts(struct objtool_file *file)
-{
- struct section *sec;
- struct symbol *func;
- int ret;
-
- if (!file->rodata || !file->rodata->rela)
- return 0;
-
- list_for_each_entry(sec, &file->elf->sections, list) {
- list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC)
- continue;
-
- ret = add_func_switch_tables(file, func);
- if (ret)
- return ret;
- }
- }
-
- return 0;
-}
-
-static int decode_sections(struct objtool_file *file)
-{
- int ret;
-
- ret = decode_instructions(file);
- if (ret)
- return ret;
-
- ret = add_dead_ends(file);
- if (ret)
- return ret;
-
- add_ignores(file);
-
- ret = add_jump_destinations(file);
- if (ret)
- return ret;
-
- ret = add_call_destinations(file);
- if (ret)
- return ret;
-
- ret = add_special_section_alts(file);
- if (ret)
- return ret;
-
- ret = add_switch_table_alts(file);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static bool is_fentry_call(struct instruction *insn)
-{
- if (insn->type == INSN_CALL &&
- insn->call_dest->type == STT_NOTYPE &&
- !strcmp(insn->call_dest->name, "__fentry__"))
- return true;
-
- return false;
-}
-
-static bool has_modified_stack_frame(struct instruction *insn)
-{
- return (insn->state & STATE_FP_SAVED) ||
- (insn->state & STATE_FP_SETUP);
-}
-
-static bool has_valid_stack_frame(struct instruction *insn)
-{
- return (insn->state & STATE_FP_SAVED) &&
- (insn->state & STATE_FP_SETUP);
-}
-
-static unsigned int frame_state(unsigned long state)
-{
- return (state & (STATE_FP_SAVED | STATE_FP_SETUP));
-}
-
-/*
- * Follow the branch starting at the given instruction, and recursively follow
- * any other branches (jumps). Meanwhile, track the frame pointer state at
- * each instruction and validate all the rules described in
- * tools/objtool/Documentation/stack-validation.txt.
- */
-static int validate_branch(struct objtool_file *file,
- struct instruction *first, unsigned char first_state)
-{
- struct alternative *alt;
- struct instruction *insn;
- struct section *sec;
- struct symbol *func = NULL;
- unsigned char state;
- int ret;
-
- insn = first;
- sec = insn->sec;
- state = first_state;
-
- if (insn->alt_group && list_empty(&insn->alts)) {
- WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
- sec, insn->offset);
- return 1;
- }
-
- while (1) {
- if (file->c_file && insn->func) {
- if (func && func != insn->func) {
- WARN("%s() falls through to next function %s()",
- func->name, insn->func->name);
- return 1;
- }
-
- func = insn->func;
- }
-
- if (insn->visited) {
- if (frame_state(insn->state) != frame_state(state)) {
- WARN_FUNC("frame pointer state mismatch",
- sec, insn->offset);
- return 1;
- }
-
- return 0;
- }
-
- insn->visited = true;
- insn->state = state;
-
- list_for_each_entry(alt, &insn->alts, list) {
- ret = validate_branch(file, alt->insn, state);
- if (ret)
- return 1;
- }
-
- switch (insn->type) {
-
- case INSN_FP_SAVE:
- if (!nofp) {
- if (state & STATE_FP_SAVED) {
- WARN_FUNC("duplicate frame pointer save",
- sec, insn->offset);
- return 1;
- }
- state |= STATE_FP_SAVED;
- }
- break;
-
- case INSN_FP_SETUP:
- if (!nofp) {
- if (state & STATE_FP_SETUP) {
- WARN_FUNC("duplicate frame pointer setup",
- sec, insn->offset);
- return 1;
- }
- state |= STATE_FP_SETUP;
- }
- break;
-
- case INSN_FP_RESTORE:
- if (!nofp) {
- if (has_valid_stack_frame(insn))
- state &= ~STATE_FP_SETUP;
-
- state &= ~STATE_FP_SAVED;
- }
- break;
-
- case INSN_RETURN:
- if (!nofp && has_modified_stack_frame(insn)) {
- WARN_FUNC("return without frame pointer restore",
- sec, insn->offset);
- return 1;
- }
- return 0;
-
- case INSN_CALL:
- if (is_fentry_call(insn)) {
- state |= STATE_FENTRY;
- break;
- }
-
- ret = dead_end_function(file, insn->call_dest);
- if (ret == 1)
- return 0;
- if (ret == -1)
- return 1;
-
- /* fallthrough */
- case INSN_CALL_DYNAMIC:
- if (!nofp && !has_valid_stack_frame(insn)) {
- WARN_FUNC("call without frame pointer save/setup",
- sec, insn->offset);
- return 1;
- }
- break;
-
- case INSN_JUMP_CONDITIONAL:
- case INSN_JUMP_UNCONDITIONAL:
- if (insn->jump_dest) {
- ret = validate_branch(file, insn->jump_dest,
- state);
- if (ret)
- return 1;
- } else if (has_modified_stack_frame(insn)) {
- WARN_FUNC("sibling call from callable instruction with changed frame pointer",
- sec, insn->offset);
- return 1;
- } /* else it's a sibling call */
-
- if (insn->type == INSN_JUMP_UNCONDITIONAL)
- return 0;
-
- break;
-
- case INSN_JUMP_DYNAMIC:
- if (list_empty(&insn->alts) &&
- has_modified_stack_frame(insn)) {
- WARN_FUNC("sibling call from callable instruction with changed frame pointer",
- sec, insn->offset);
- return 1;
- }
-
- return 0;
-
- default:
- break;
- }
-
- if (insn->dead_end)
- return 0;
-
- insn = next_insn_same_sec(file, insn);
- if (!insn) {
- WARN("%s: unexpected end of section", sec->name);
- return 1;
- }
- }
-
- return 0;
-}
-
-static bool is_kasan_insn(struct instruction *insn)
-{
- return (insn->type == INSN_CALL &&
- !strcmp(insn->call_dest->name, "__asan_handle_no_return"));
-}
-
-static bool is_ubsan_insn(struct instruction *insn)
-{
- return (insn->type == INSN_CALL &&
- !strcmp(insn->call_dest->name,
- "__ubsan_handle_builtin_unreachable"));
-}
-
-static bool ignore_unreachable_insn(struct symbol *func,
- struct instruction *insn)
-{
- int i;
-
- if (insn->type == INSN_NOP)
- return true;
-
- /*
- * Check if this (or a subsequent) instruction is related to
- * CONFIG_UBSAN or CONFIG_KASAN.
- *
- * End the search at 5 instructions to avoid going into the weeds.
- */
- for (i = 0; i < 5; i++) {
-
- if (is_kasan_insn(insn) || is_ubsan_insn(insn))
- return true;
-
- if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
- insn = insn->jump_dest;
- continue;
- }
-
- if (insn->offset + insn->len >= func->offset + func->len)
- break;
- insn = list_next_entry(insn, list);
- }
-
- return false;
-}
-
-static int validate_functions(struct objtool_file *file)
-{
- struct section *sec;
- struct symbol *func;
- struct instruction *insn;
- int ret, warnings = 0;
-
- list_for_each_entry(sec, &file->elf->sections, list) {
- list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC)
- continue;
-
- insn = find_insn(file, sec, func->offset);
- if (!insn)
- continue;
-
- ret = validate_branch(file, insn, 0);
- warnings += ret;
- }
- }
-
- list_for_each_entry(sec, &file->elf->sections, list) {
- list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC)
- continue;
-
- func_for_each_insn(file, func, insn) {
- if (insn->visited)
- continue;
-
- insn->visited = true;
-
- if (file->ignore_unreachables || warnings ||
- ignore_unreachable_insn(func, insn))
- continue;
-
- /*
- * gcov produces a lot of unreachable
- * instructions. If we get an unreachable
- * warning and the file has gcov enabled, just
- * ignore it, and all other such warnings for
- * the file.
- */
- if (!file->ignore_unreachables &&
- gcov_enabled(file)) {
- file->ignore_unreachables = true;
- continue;
- }
-
- WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset);
- warnings++;
- }
- }
- }
-
- return warnings;
-}
-
-static int validate_uncallable_instructions(struct objtool_file *file)
-{
- struct instruction *insn;
- int warnings = 0;
-
- for_each_insn(file, insn) {
- if (!insn->visited && insn->type == INSN_RETURN) {
- WARN_FUNC("return instruction outside of a callable function",
- insn->sec, insn->offset);
- warnings++;
- }
- }
-
- return warnings;
-}
-
-static void cleanup(struct objtool_file *file)
-{
- struct instruction *insn, *tmpinsn;
- struct alternative *alt, *tmpalt;
-
- list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
- list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
- list_del(&alt->list);
- free(alt);
- }
- list_del(&insn->list);
- hash_del(&insn->hash);
- free(insn);
- }
- elf_close(file->elf);
-}
-
-const char * const check_usage[] = {
+static const char * const check_usage[] = {
"objtool check [<options>] file.o",
NULL,
};
+const struct option check_options[] = {
+ OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
+ OPT_END(),
+};
+
int cmd_check(int argc, const char **argv)
{
- struct objtool_file file;
- int ret, warnings = 0;
-
- const struct option options[] = {
- OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
- OPT_END(),
- };
+ const char *objname;
- argc = parse_options(argc, argv, options, check_usage, 0);
+ argc = parse_options(argc, argv, check_options, check_usage, 0);
if (argc != 1)
- usage_with_options(check_usage, options);
+ usage_with_options(check_usage, check_options);
objname = argv[0];
- file.elf = elf_open(objname);
- if (!file.elf) {
- fprintf(stderr, "error reading elf file %s\n", objname);
- return 1;
- }
-
- INIT_LIST_HEAD(&file.insn_list);
- hash_init(file.insn_hash);
- file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
- file.rodata = find_section_by_name(file.elf, ".rodata");
- file.ignore_unreachables = false;
- file.c_file = find_section_by_name(file.elf, ".comment");
-
- ret = decode_sections(&file);
- if (ret < 0)
- goto out;
- warnings += ret;
-
- ret = validate_functions(&file);
- if (ret < 0)
- goto out;
- warnings += ret;
-
- ret = validate_uncallable_instructions(&file);
- if (ret < 0)
- goto out;
- warnings += ret;
-
-out:
- cleanup(&file);
-
- /* ignore warnings for now until we get all the code cleaned up */
- if (ret || warnings)
- return 0;
- return 0;
+ return check(objname, nofp);
}
--- /dev/null
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _OBJTOOL_CFI_H
+#define _OBJTOOL_CFI_H
+
+#define CFI_UNDEFINED -1
+#define CFI_CFA -2
+#define CFI_SP_INDIRECT -3
+#define CFI_BP_INDIRECT -4
+
+#define CFI_AX 0
+#define CFI_DX 1
+#define CFI_CX 2
+#define CFI_BX 3
+#define CFI_SI 4
+#define CFI_DI 5
+#define CFI_BP 6
+#define CFI_SP 7
+#define CFI_R8 8
+#define CFI_R9 9
+#define CFI_R10 10
+#define CFI_R11 11
+#define CFI_R12 12
+#define CFI_R13 13
+#define CFI_R14 14
+#define CFI_R15 15
+#define CFI_RA 16
+#define CFI_NUM_REGS 17
+
+struct cfi_reg {
+ int base;
+ int offset;
+};
+
+struct cfi_state {
+ struct cfi_reg cfa;
+ struct cfi_reg regs[CFI_NUM_REGS];
+};
+
+#endif /* _OBJTOOL_CFI_H */
--- /dev/null
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "check.h"
+#include "elf.h"
+#include "special.h"
+#include "arch.h"
+#include "warn.h"
+
+#include <linux/hashtable.h>
+#include <linux/kernel.h>
+
+struct alternative {
+ struct list_head list;
+ struct instruction *insn;
+};
+
+const char *objname;
+static bool nofp;
+struct cfi_state initial_func_cfi;
+
+static struct instruction *find_insn(struct objtool_file *file,
+ struct section *sec, unsigned long offset)
+{
+ struct instruction *insn;
+
+ hash_for_each_possible(file->insn_hash, insn, hash, offset)
+ if (insn->sec == sec && insn->offset == offset)
+ return insn;
+
+ return NULL;
+}
+
+static struct instruction *next_insn_same_sec(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct instruction *next = list_next_entry(insn, list);
+
+ if (!next || &next->list == &file->insn_list || next->sec != insn->sec)
+ return NULL;
+
+ return next;
+}
+
+static bool gcov_enabled(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *sym;
+
+ for_each_sec(file, sec)
+ list_for_each_entry(sym, &sec->symbol_list, list)
+ if (!strncmp(sym->name, "__gcov_.", 8))
+ return true;
+
+ return false;
+}
+
+#define func_for_each_insn(file, func, insn) \
+ for (insn = find_insn(file, func->sec, func->offset); \
+ insn && &insn->list != &file->insn_list && \
+ insn->sec == func->sec && \
+ insn->offset < func->offset + func->len; \
+ insn = list_next_entry(insn, list))
+
+#define func_for_each_insn_continue_reverse(file, func, insn) \
+ for (insn = list_prev_entry(insn, list); \
+ &insn->list != &file->insn_list && \
+ insn->sec == func->sec && insn->offset >= func->offset; \
+ insn = list_prev_entry(insn, list))
+
+#define sec_for_each_insn_from(file, insn) \
+ for (; insn; insn = next_insn_same_sec(file, insn))
+
+#define sec_for_each_insn_continue(file, insn) \
+ for (insn = next_insn_same_sec(file, insn); insn; \
+ insn = next_insn_same_sec(file, insn))
+
+/*
+ * Check if the function has been manually whitelisted with the
+ * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
+ * due to its use of a context switching instruction.
+ */
+static bool ignore_func(struct objtool_file *file, struct symbol *func)
+{
+ struct rela *rela;
+ struct instruction *insn;
+
+ /* check for STACK_FRAME_NON_STANDARD */
+ if (file->whitelist && file->whitelist->rela)
+ list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
+ if (rela->sym->type == STT_SECTION &&
+ rela->sym->sec == func->sec &&
+ rela->addend == func->offset)
+ return true;
+ if (rela->sym->type == STT_FUNC && rela->sym == func)
+ return true;
+ }
+
+ /* check if it has a context switching instruction */
+ func_for_each_insn(file, func, insn)
+ if (insn->type == INSN_CONTEXT_SWITCH)
+ return true;
+
+ return false;
+}
+
+/*
+ * This checks to see if the given function is a "noreturn" function.
+ *
+ * For global functions which are outside the scope of this object file, we
+ * have to keep a manual list of them.
+ *
+ * For local functions, we have to detect them manually by simply looking for
+ * the lack of a return instruction.
+ *
+ * Returns:
+ * -1: error
+ * 0: no dead end
+ * 1: dead end
+ */
+static int __dead_end_function(struct objtool_file *file, struct symbol *func,
+ int recursion)
+{
+ int i;
+ struct instruction *insn;
+ bool empty = true;
+
+ /*
+ * Unfortunately these have to be hard coded because the noreturn
+ * attribute isn't provided in ELF data.
+ */
+ static const char * const global_noreturns[] = {
+ "__stack_chk_fail",
+ "panic",
+ "do_exit",
+ "do_task_dead",
+ "__module_put_and_exit",
+ "complete_and_exit",
+ "kvm_spurious_fault",
+ "__reiserfs_panic",
+ "lbug_with_loc",
+ "fortify_panic",
+ };
+
+ if (func->bind == STB_WEAK)
+ return 0;
+
+ if (func->bind == STB_GLOBAL)
+ for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
+ if (!strcmp(func->name, global_noreturns[i]))
+ return 1;
+
+ if (!func->sec)
+ return 0;
+
+ func_for_each_insn(file, func, insn) {
+ empty = false;
+
+ if (insn->type == INSN_RETURN)
+ return 0;
+ }
+
+ if (empty)
+ return 0;
+
+ /*
+ * A function can have a sibling call instead of a return. In that
+ * case, the function's dead-end status depends on whether the target
+ * of the sibling call returns.
+ */
+ func_for_each_insn(file, func, insn) {
+ if (insn->sec != func->sec ||
+ insn->offset >= func->offset + func->len)
+ break;
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+ struct instruction *dest = insn->jump_dest;
+ struct symbol *dest_func;
+
+ if (!dest)
+ /* sibling call to another file */
+ return 0;
+
+ if (dest->sec != func->sec ||
+ dest->offset < func->offset ||
+ dest->offset >= func->offset + func->len) {
+ /* local sibling call */
+ dest_func = find_symbol_by_offset(dest->sec,
+ dest->offset);
+ if (!dest_func)
+ continue;
+
+ if (recursion == 5) {
+ WARN_FUNC("infinite recursion (objtool bug!)",
+ dest->sec, dest->offset);
+ return -1;
+ }
+
+ return __dead_end_function(file, dest_func,
+ recursion + 1);
+ }
+ }
+
+ if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
+ /* sibling call */
+ return 0;
+ }
+
+ return 1;
+}
+
+static int dead_end_function(struct objtool_file *file, struct symbol *func)
+{
+ return __dead_end_function(file, func, 0);
+}
+
+static void clear_insn_state(struct insn_state *state)
+{
+ int i;
+
+ memset(state, 0, sizeof(*state));
+ state->cfa.base = CFI_UNDEFINED;
+ for (i = 0; i < CFI_NUM_REGS; i++)
+ state->regs[i].base = CFI_UNDEFINED;
+ state->drap_reg = CFI_UNDEFINED;
+}
+
+/*
+ * Call the arch-specific instruction decoder for all the instructions and add
+ * them to the global instruction list.
+ */
+static int decode_instructions(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+ unsigned long offset;
+ struct instruction *insn;
+ int ret;
+
+ for_each_sec(file, sec) {
+
+ if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ continue;
+
+ for (offset = 0; offset < sec->len; offset += insn->len) {
+ insn = malloc(sizeof(*insn));
+ if (!insn) {
+ WARN("malloc failed");
+ return -1;
+ }
+ memset(insn, 0, sizeof(*insn));
+ INIT_LIST_HEAD(&insn->alts);
+ clear_insn_state(&insn->state);
+
+ insn->sec = sec;
+ insn->offset = offset;
+
+ ret = arch_decode_instruction(file->elf, sec, offset,
+ sec->len - offset,
+ &insn->len, &insn->type,
+ &insn->immediate,
+ &insn->stack_op);
+ if (ret)
+ return ret;
+
+ if (!insn->type || insn->type > INSN_LAST) {
+ WARN_FUNC("invalid instruction type %d",
+ insn->sec, insn->offset, insn->type);
+ return -1;
+ }
+
+ hash_add(file->insn_hash, &insn->hash, insn->offset);
+ list_add_tail(&insn->list, &file->insn_list);
+ }
+
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ if (!find_insn(file, sec, func->offset)) {
+ WARN("%s(): can't find starting instruction",
+ func->name);
+ return -1;
+ }
+
+ func_for_each_insn(file, func, insn)
+ if (!insn->func)
+ insn->func = func;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Find all uses of the unreachable() macro, which are code path dead ends.
+ */
+static int add_dead_ends(struct objtool_file *file)
+{
+ struct section *sec;
+ struct rela *rela;
+ struct instruction *insn;
+ bool found;
+
+ sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (insn)
+ insn = list_prev_entry(insn, list);
+ else if (rela->addend == rela->sym->sec->len) {
+ found = false;
+ list_for_each_entry_reverse(insn, &file->insn_list, list) {
+ if (insn->sec == rela->sym->sec) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ WARN("can't find unreachable insn at %s+0x%x",
+ rela->sym->sec->name, rela->addend);
+ return -1;
+ }
+ } else {
+ WARN("can't find unreachable insn at %s+0x%x",
+ rela->sym->sec->name, rela->addend);
+ return -1;
+ }
+
+ insn->dead_end = true;
+ }
+
+ return 0;
+}
+
+/*
+ * Warnings shouldn't be reported for ignored functions.
+ */
+static void add_ignores(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct section *sec;
+ struct symbol *func;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ if (!ignore_func(file, func))
+ continue;
+
+ func_for_each_insn(file, func, insn)
+ insn->ignore = true;
+ }
+ }
+}
+
+/*
+ * Find the destination instructions for all jumps.
+ */
+static int add_jump_destinations(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct rela *rela;
+ struct section *dest_sec;
+ unsigned long dest_off;
+
+ for_each_insn(file, insn) {
+ if (insn->type != INSN_JUMP_CONDITIONAL &&
+ insn->type != INSN_JUMP_UNCONDITIONAL)
+ continue;
+
+ if (insn->ignore)
+ continue;
+
+ rela = find_rela_by_dest_range(insn->sec, insn->offset,
+ insn->len);
+ if (!rela) {
+ dest_sec = insn->sec;
+ dest_off = insn->offset + insn->len + insn->immediate;
+ } else if (rela->sym->type == STT_SECTION) {
+ dest_sec = rela->sym->sec;
+ dest_off = rela->addend + 4;
+ } else if (rela->sym->sec->idx) {
+ dest_sec = rela->sym->sec;
+ dest_off = rela->sym->sym.st_value + rela->addend + 4;
+ } else {
+ /* sibling call */
+ insn->jump_dest = 0;
+ continue;
+ }
+
+ insn->jump_dest = find_insn(file, dest_sec, dest_off);
+ if (!insn->jump_dest) {
+
+ /*
+ * This is a special case where an alt instruction
+ * jumps past the end of the section. These are
+ * handled later in handle_group_alt().
+ */
+ if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+ continue;
+
+ WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+ insn->sec, insn->offset, dest_sec->name,
+ dest_off);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Find the destination instructions for all calls.
+ */
+static int add_call_destinations(struct objtool_file *file)
+{
+ struct instruction *insn;
+ unsigned long dest_off;
+ struct rela *rela;
+
+ for_each_insn(file, insn) {
+ if (insn->type != INSN_CALL)
+ continue;
+
+ rela = find_rela_by_dest_range(insn->sec, insn->offset,
+ insn->len);
+ if (!rela) {
+ dest_off = insn->offset + insn->len + insn->immediate;
+ insn->call_dest = find_symbol_by_offset(insn->sec,
+ dest_off);
+ if (!insn->call_dest) {
+ WARN_FUNC("can't find call dest symbol at offset 0x%lx",
+ insn->sec, insn->offset, dest_off);
+ return -1;
+ }
+ } else if (rela->sym->type == STT_SECTION) {
+ insn->call_dest = find_symbol_by_offset(rela->sym->sec,
+ rela->addend+4);
+ if (!insn->call_dest ||
+ insn->call_dest->type != STT_FUNC) {
+ WARN_FUNC("can't find call dest symbol at %s+0x%x",
+ insn->sec, insn->offset,
+ rela->sym->sec->name,
+ rela->addend + 4);
+ return -1;
+ }
+ } else
+ insn->call_dest = rela->sym;
+ }
+
+ return 0;
+}
+
+/*
+ * The .alternatives section requires some extra special care, over and above
+ * what other special sections require:
+ *
+ * 1. Because alternatives are patched in-place, we need to insert a fake jump
+ * instruction at the end so that validate_branch() skips all the original
+ * replaced instructions when validating the new instruction path.
+ *
+ * 2. An added wrinkle is that the new instruction length might be zero. In
+ * that case the old instructions are replaced with noops. We simulate that
+ * by creating a fake jump as the only new instruction.
+ *
+ * 3. In some cases, the alternative section includes an instruction which
+ * conditionally jumps to the _end_ of the entry. We have to modify these
+ * jumps' destinations to point back to .text rather than the end of the
+ * entry in .altinstr_replacement.
+ *
+ * 4. It has been requested that we don't validate the !POPCNT feature path
+ * which is a "very very small percentage of machines".
+ */
+static int handle_group_alt(struct objtool_file *file,
+ struct special_alt *special_alt,
+ struct instruction *orig_insn,
+ struct instruction **new_insn)
+{
+ struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
+ unsigned long dest_off;
+
+ last_orig_insn = NULL;
+ insn = orig_insn;
+ sec_for_each_insn_from(file, insn) {
+ if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+ break;
+
+ if (special_alt->skip_orig)
+ insn->type = INSN_NOP;
+
+ insn->alt_group = true;
+ last_orig_insn = insn;
+ }
+
+ if (!next_insn_same_sec(file, last_orig_insn)) {
+ WARN("%s: don't know how to handle alternatives at end of section",
+ special_alt->orig_sec->name);
+ return -1;
+ }
+
+ fake_jump = malloc(sizeof(*fake_jump));
+ if (!fake_jump) {
+ WARN("malloc failed");
+ return -1;
+ }
+ memset(fake_jump, 0, sizeof(*fake_jump));
+ INIT_LIST_HEAD(&fake_jump->alts);
+ clear_insn_state(&fake_jump->state);
+
+ fake_jump->sec = special_alt->new_sec;
+ fake_jump->offset = -1;
+ fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+ fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+ fake_jump->ignore = true;
+
+ if (!special_alt->new_len) {
+ *new_insn = fake_jump;
+ return 0;
+ }
+
+ last_new_insn = NULL;
+ insn = *new_insn;
+ sec_for_each_insn_from(file, insn) {
+ if (insn->offset >= special_alt->new_off + special_alt->new_len)
+ break;
+
+ last_new_insn = insn;
+
+ if (insn->type != INSN_JUMP_CONDITIONAL &&
+ insn->type != INSN_JUMP_UNCONDITIONAL)
+ continue;
+
+ if (!insn->immediate)
+ continue;
+
+ dest_off = insn->offset + insn->len + insn->immediate;
+ if (dest_off == special_alt->new_off + special_alt->new_len)
+ insn->jump_dest = fake_jump;
+
+ if (!insn->jump_dest) {
+ WARN_FUNC("can't find alternative jump destination",
+ insn->sec, insn->offset);
+ return -1;
+ }
+ }
+
+ if (!last_new_insn) {
+ WARN_FUNC("can't find last new alternative instruction",
+ special_alt->new_sec, special_alt->new_off);
+ return -1;
+ }
+
+ list_add(&fake_jump->list, &last_new_insn->list);
+
+ return 0;
+}
+
+/*
+ * A jump table entry can either convert a nop to a jump or a jump to a nop.
+ * If the original instruction is a jump, make the alt entry an effective nop
+ * by just skipping the original instruction.
+ */
+static int handle_jump_alt(struct objtool_file *file,
+ struct special_alt *special_alt,
+ struct instruction *orig_insn,
+ struct instruction **new_insn)
+{
+ if (orig_insn->type == INSN_NOP)
+ return 0;
+
+ if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
+ WARN_FUNC("unsupported instruction at jump label",
+ orig_insn->sec, orig_insn->offset);
+ return -1;
+ }
+
+ *new_insn = list_next_entry(orig_insn, list);
+ return 0;
+}
+
+/*
+ * Read all the special sections which have alternate instructions which can be
+ * patched in or redirected to at runtime. Each instruction having alternate
+ * instruction(s) has them added to its insn->alts list, which will be
+ * traversed in validate_branch().
+ */
+static int add_special_section_alts(struct objtool_file *file)
+{
+ struct list_head special_alts;
+ struct instruction *orig_insn, *new_insn;
+ struct special_alt *special_alt, *tmp;
+ struct alternative *alt;
+ int ret;
+
+ ret = special_get_alts(file->elf, &special_alts);
+ if (ret)
+ return ret;
+
+ list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
+ alt = malloc(sizeof(*alt));
+ if (!alt) {
+ WARN("malloc failed");
+ ret = -1;
+ goto out;
+ }
+
+ orig_insn = find_insn(file, special_alt->orig_sec,
+ special_alt->orig_off);
+ if (!orig_insn) {
+ WARN_FUNC("special: can't find orig instruction",
+ special_alt->orig_sec, special_alt->orig_off);
+ ret = -1;
+ goto out;
+ }
+
+ new_insn = NULL;
+ if (!special_alt->group || special_alt->new_len) {
+ new_insn = find_insn(file, special_alt->new_sec,
+ special_alt->new_off);
+ if (!new_insn) {
+ WARN_FUNC("special: can't find new instruction",
+ special_alt->new_sec,
+ special_alt->new_off);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (special_alt->group) {
+ ret = handle_group_alt(file, special_alt, orig_insn,
+ &new_insn);
+ if (ret)
+ goto out;
+ } else if (special_alt->jump_or_nop) {
+ ret = handle_jump_alt(file, special_alt, orig_insn,
+ &new_insn);
+ if (ret)
+ goto out;
+ }
+
+ alt->insn = new_insn;
+ list_add_tail(&alt->list, &orig_insn->alts);
+
+ list_del(&special_alt->list);
+ free(special_alt);
+ }
+
+out:
+ return ret;
+}
+
+static int add_switch_table(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct rela *table,
+ struct rela *next_table)
+{
+ struct rela *rela = table;
+ struct instruction *alt_insn;
+ struct alternative *alt;
+
+ list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
+ if (rela == next_table)
+ break;
+
+ if (rela->sym->sec != insn->sec ||
+ rela->addend <= func->offset ||
+ rela->addend >= func->offset + func->len)
+ break;
+
+ alt_insn = find_insn(file, insn->sec, rela->addend);
+ if (!alt_insn) {
+ WARN("%s: can't find instruction at %s+0x%x",
+ file->rodata->rela->name, insn->sec->name,
+ rela->addend);
+ return -1;
+ }
+
+ alt = malloc(sizeof(*alt));
+ if (!alt) {
+ WARN("malloc failed");
+ return -1;
+ }
+
+ alt->insn = alt_insn;
+ list_add_tail(&alt->list, &insn->alts);
+ }
+
+ return 0;
+}
+
+/*
+ * find_switch_table() - Given a dynamic jump, find the switch jump table in
+ * .rodata associated with it.
+ *
+ * There are 3 basic patterns:
+ *
+ * 1. jmpq *[rodata addr](,%reg,8)
+ *
+ * This is the most common case by far. It jumps to an address in a simple
+ * jump table which is stored in .rodata.
+ *
+ * 2. jmpq *[rodata addr](%rip)
+ *
+ * This is caused by a rare GCC quirk, currently only seen in three driver
+ * functions in the kernel, only with certain obscure non-distro configs.
+ *
+ * As part of an optimization, GCC makes a copy of an existing switch jump
+ * table, modifies it, and then hard-codes the jump (albeit with an indirect
+ * jump) to use a single entry in the table. The rest of the jump table and
+ * some of its jump targets remain as dead code.
+ *
+ * In such a case we can just crudely ignore all unreachable instruction
+ * warnings for the entire object file. Ideally we would just ignore them
+ * for the function, but that would require redesigning the code quite a
+ * bit. And honestly that's just not worth doing: unreachable instruction
+ * warnings are of questionable value anyway, and this is such a rare issue.
+ *
+ * 3. mov [rodata addr],%reg1
+ * ... some instructions ...
+ * jmpq *(%reg1,%reg2,8)
+ *
+ * This is a fairly uncommon pattern which is new for GCC 6. As of this
+ * writing, there are 11 occurrences of it in the allmodconfig kernel.
+ *
+ * TODO: Once we have DWARF CFI and smarter instruction decoding logic,
+ * ensure the same register is used in the mov and jump instructions.
+ */
+static struct rela *find_switch_table(struct objtool_file *file,
+ struct symbol *func,
+ struct instruction *insn)
+{
+ struct rela *text_rela, *rodata_rela;
+ struct instruction *orig_insn = insn;
+
+ text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
+ if (text_rela && text_rela->sym == file->rodata->sym) {
+ /* case 1 */
+ rodata_rela = find_rela_by_dest(file->rodata,
+ text_rela->addend);
+ if (rodata_rela)
+ return rodata_rela;
+
+ /* case 2 */
+ rodata_rela = find_rela_by_dest(file->rodata,
+ text_rela->addend + 4);
+ if (!rodata_rela)
+ return NULL;
+ file->ignore_unreachables = true;
+ return rodata_rela;
+ }
+
+ /* case 3 */
+ func_for_each_insn_continue_reverse(file, func, insn) {
+ if (insn->type == INSN_JUMP_DYNAMIC)
+ break;
+
+ /* allow small jumps within the range */
+ if (insn->type == INSN_JUMP_UNCONDITIONAL &&
+ insn->jump_dest &&
+ (insn->jump_dest->offset <= insn->offset ||
+ insn->jump_dest->offset > orig_insn->offset))
+ break;
+
+ /* look for a relocation which references .rodata */
+ text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
+ insn->len);
+ if (!text_rela || text_rela->sym != file->rodata->sym)
+ continue;
+
+ /*
+ * Make sure the .rodata address isn't associated with a
+ * symbol. gcc jump tables are anonymous data.
+ */
+ if (find_symbol_containing(file->rodata, text_rela->addend))
+ continue;
+
+ return find_rela_by_dest(file->rodata, text_rela->addend);
+ }
+
+ return NULL;
+}
+
+static int add_func_switch_tables(struct objtool_file *file,
+ struct symbol *func)
+{
+ struct instruction *insn, *prev_jump = NULL;
+ struct rela *rela, *prev_rela = NULL;
+ int ret;
+
+ func_for_each_insn(file, func, insn) {
+ if (insn->type != INSN_JUMP_DYNAMIC)
+ continue;
+
+ rela = find_switch_table(file, func, insn);
+ if (!rela)
+ continue;
+
+ /*
+ * We found a switch table, but we don't know yet how big it
+ * is. Don't add it until we reach the end of the function or
+ * the beginning of another switch table in the same function.
+ */
+ if (prev_jump) {
+ ret = add_switch_table(file, func, prev_jump, prev_rela,
+ rela);
+ if (ret)
+ return ret;
+ }
+
+ prev_jump = insn;
+ prev_rela = rela;
+ }
+
+ if (prev_jump) {
+ ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * For some switch statements, gcc generates a jump table in the .rodata
+ * section which contains a list of addresses within the function to jump to.
+ * This finds these jump tables and adds them to the insn->alts lists.
+ */
+static int add_switch_table_alts(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+ int ret;
+
+ if (!file->rodata || !file->rodata->rela)
+ return 0;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ ret = add_func_switch_tables(file, func);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int decode_sections(struct objtool_file *file)
+{
+ int ret;
+
+ ret = decode_instructions(file);
+ if (ret)
+ return ret;
+
+ ret = add_dead_ends(file);
+ if (ret)
+ return ret;
+
+ add_ignores(file);
+
+ ret = add_jump_destinations(file);
+ if (ret)
+ return ret;
+
+ ret = add_call_destinations(file);
+ if (ret)
+ return ret;
+
+ ret = add_special_section_alts(file);
+ if (ret)
+ return ret;
+
+ ret = add_switch_table_alts(file);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static bool is_fentry_call(struct instruction *insn)
+{
+ if (insn->type == INSN_CALL &&
+ insn->call_dest->type == STT_NOTYPE &&
+ !strcmp(insn->call_dest->name, "__fentry__"))
+ return true;
+
+ return false;
+}
+
+static bool has_modified_stack_frame(struct insn_state *state)
+{
+ int i;
+
+ if (state->cfa.base != initial_func_cfi.cfa.base ||
+ state->cfa.offset != initial_func_cfi.cfa.offset ||
+ state->stack_size != initial_func_cfi.cfa.offset ||
+ state->drap)
+ return true;
+
+ for (i = 0; i < CFI_NUM_REGS; i++)
+ if (state->regs[i].base != initial_func_cfi.regs[i].base ||
+ state->regs[i].offset != initial_func_cfi.regs[i].offset)
+ return true;
+
+ return false;
+}
+
+static bool has_valid_stack_frame(struct insn_state *state)
+{
+ if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA &&
+ state->regs[CFI_BP].offset == -16)
+ return true;
+
+ if (state->drap && state->regs[CFI_BP].base == CFI_BP)
+ return true;
+
+ return false;
+}
+
+static void save_reg(struct insn_state *state, unsigned char reg, int base,
+ int offset)
+{
+ if ((arch_callee_saved_reg(reg) ||
+ (state->drap && reg == state->drap_reg)) &&
+ state->regs[reg].base == CFI_UNDEFINED) {
+ state->regs[reg].base = base;
+ state->regs[reg].offset = offset;
+ }
+}
+
+static void restore_reg(struct insn_state *state, unsigned char reg)
+{
+ state->regs[reg].base = CFI_UNDEFINED;
+ state->regs[reg].offset = 0;
+}
+
+/*
+ * A note about DRAP stack alignment:
+ *
+ * GCC has the concept of a DRAP register, which is used to help keep track of
+ * the stack pointer when aligning the stack. r10 or r13 is used as the DRAP
+ * register. The typical DRAP pattern is:
+ *
+ * 4c 8d 54 24 08 lea 0x8(%rsp),%r10
+ * 48 83 e4 c0 and $0xffffffffffffffc0,%rsp
+ * 41 ff 72 f8 pushq -0x8(%r10)
+ * 55 push %rbp
+ * 48 89 e5 mov %rsp,%rbp
+ * (more pushes)
+ * 41 52 push %r10
+ * ...
+ * 41 5a pop %r10
+ * (more pops)
+ * 5d pop %rbp
+ * 49 8d 62 f8 lea -0x8(%r10),%rsp
+ * c3 retq
+ *
+ * There are some variations in the epilogues, like:
+ *
+ * 5b pop %rbx
+ * 41 5a pop %r10
+ * 41 5c pop %r12
+ * 41 5d pop %r13
+ * 41 5e pop %r14
+ * c9 leaveq
+ * 49 8d 62 f8 lea -0x8(%r10),%rsp
+ * c3 retq
+ *
+ * and:
+ *
+ * 4c 8b 55 e8 mov -0x18(%rbp),%r10
+ * 48 8b 5d e0 mov -0x20(%rbp),%rbx
+ * 4c 8b 65 f0 mov -0x10(%rbp),%r12
+ * 4c 8b 6d f8 mov -0x8(%rbp),%r13
+ * c9 leaveq
+ * 49 8d 62 f8 lea -0x8(%r10),%rsp
+ * c3 retq
+ *
+ * Sometimes r13 is used as the DRAP register, in which case it's saved and
+ * restored beforehand:
+ *
+ * 41 55 push %r13
+ * 4c 8d 6c 24 10 lea 0x10(%rsp),%r13
+ * 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
+ * ...
+ * 49 8d 65 f0 lea -0x10(%r13),%rsp
+ * 41 5d pop %r13
+ * c3 retq
+ */
+static int update_insn_state(struct instruction *insn, struct insn_state *state)
+{
+ struct stack_op *op = &insn->stack_op;
+ struct cfi_reg *cfa = &state->cfa;
+ struct cfi_reg *regs = state->regs;
+
+ /* stack operations don't make sense with an undefined CFA */
+ if (cfa->base == CFI_UNDEFINED) {
+ if (insn->func) {
+ WARN_FUNC("undefined stack state", insn->sec, insn->offset);
+ return -1;
+ }
+ return 0;
+ }
+
+ switch (op->dest.type) {
+
+ case OP_DEST_REG:
+ switch (op->src.type) {
+
+ case OP_SRC_REG:
+ if (cfa->base == op->src.reg && cfa->base == CFI_SP &&
+ op->dest.reg == CFI_BP && regs[CFI_BP].base == CFI_CFA &&
+ regs[CFI_BP].offset == -cfa->offset) {
+
+ /* mov %rsp, %rbp */
+ cfa->base = op->dest.reg;
+ state->bp_scratch = false;
+ } else if (state->drap) {
+
+ /* drap: mov %rsp, %rbp */
+ regs[CFI_BP].base = CFI_BP;
+ regs[CFI_BP].offset = -state->stack_size;
+ state->bp_scratch = false;
+ } else if (!nofp) {
+
+ WARN_FUNC("unknown stack-related register move",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ break;
+
+ case OP_SRC_ADD:
+ if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) {
+
+ /* add imm, %rsp */
+ state->stack_size -= op->src.offset;
+ if (cfa->base == CFI_SP)
+ cfa->offset -= op->src.offset;
+ break;
+ }
+
+ if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
+
+ /* lea disp(%rbp), %rsp */
+ state->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+ break;
+ }
+
+ if (op->dest.reg != CFI_BP && op->src.reg == CFI_SP &&
+ cfa->base == CFI_SP) {
+
+ /* drap: lea disp(%rsp), %drap */
+ state->drap_reg = op->dest.reg;
+ break;
+ }
+
+ if (state->drap && op->dest.reg == CFI_SP &&
+ op->src.reg == state->drap_reg) {
+
+ /* drap: lea disp(%drap), %rsp */
+ cfa->base = CFI_SP;
+ cfa->offset = state->stack_size = -op->src.offset;
+ state->drap_reg = CFI_UNDEFINED;
+ state->drap = false;
+ break;
+ }
+
+ if (op->dest.reg == state->cfa.base) {
+ WARN_FUNC("unsupported stack register modification",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ break;
+
+ case OP_SRC_AND:
+ if (op->dest.reg != CFI_SP ||
+ (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
+ (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
+ WARN_FUNC("unsupported stack pointer realignment",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ if (state->drap_reg != CFI_UNDEFINED) {
+ /* drap: and imm, %rsp */
+ cfa->base = state->drap_reg;
+ cfa->offset = state->stack_size = 0;
+ state->drap = true;
+
+ }
+
+ /*
+ * Older versions of GCC (4.8ish) realign the stack
+ * without DRAP, with a frame pointer.
+ */
+
+ break;
+
+ case OP_SRC_POP:
+ if (!state->drap && op->dest.type == OP_DEST_REG &&
+ op->dest.reg == cfa->base) {
+
+ /* pop %rbp */
+ cfa->base = CFI_SP;
+ }
+
+ if (regs[op->dest.reg].offset == -state->stack_size) {
+
+ if (state->drap && cfa->base == CFI_BP_INDIRECT &&
+ op->dest.type == OP_DEST_REG &&
+ op->dest.reg == state->drap_reg) {
+
+ /* drap: pop %drap */
+ cfa->base = state->drap_reg;
+ cfa->offset = 0;
+ }
+
+ restore_reg(state, op->dest.reg);
+ }
+
+ state->stack_size -= 8;
+ if (cfa->base == CFI_SP)
+ cfa->offset -= 8;
+
+ break;
+
+ case OP_SRC_REG_INDIRECT:
+ if (state->drap && op->src.reg == CFI_BP &&
+ op->src.offset == regs[op->dest.reg].offset) {
+
+ /* drap: mov disp(%rbp), %reg */
+ if (op->dest.reg == state->drap_reg) {
+ cfa->base = state->drap_reg;
+ cfa->offset = 0;
+ }
+
+ restore_reg(state, op->dest.reg);
+
+ } else if (op->src.reg == cfa->base &&
+ op->src.offset == regs[op->dest.reg].offset + cfa->offset) {
+
+ /* mov disp(%rbp), %reg */
+ /* mov disp(%rsp), %reg */
+ restore_reg(state, op->dest.reg);
+ }
+
+ break;
+
+ default:
+ WARN_FUNC("unknown stack-related instruction",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ break;
+
+ case OP_DEST_PUSH:
+ state->stack_size += 8;
+ if (cfa->base == CFI_SP)
+ cfa->offset += 8;
+
+ if (op->src.type != OP_SRC_REG)
+ break;
+
+ if (state->drap) {
+ if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+ /* drap: push %drap */
+ cfa->base = CFI_BP_INDIRECT;
+ cfa->offset = -state->stack_size;
+
+ /* save drap so we know when to undefine it */
+ save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+
+ } else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) {
+
+ /* drap: push %rbp */
+ state->stack_size = 0;
+
+ } else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+ /* drap: push %reg */
+ save_reg(state, op->src.reg, CFI_BP, -state->stack_size);
+ }
+
+ } else {
+
+ /* push %reg */
+ save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+ }
+
+ /* detect when asm code uses rbp as a scratch register */
+ if (!nofp && insn->func && op->src.reg == CFI_BP &&
+ cfa->base != CFI_BP)
+ state->bp_scratch = true;
+ break;
+
+ case OP_DEST_REG_INDIRECT:
+
+ if (state->drap) {
+ if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+ /* drap: mov %drap, disp(%rbp) */
+ cfa->base = CFI_BP_INDIRECT;
+ cfa->offset = op->dest.offset;
+
+ /* save drap so we know when to undefine it */
+ save_reg(state, op->src.reg, CFI_CFA, op->dest.offset);
+ }
+
+ else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+ /* drap: mov reg, disp(%rbp) */
+ save_reg(state, op->src.reg, CFI_BP, op->dest.offset);
+ }
+
+ } else if (op->dest.reg == cfa->base) {
+
+ /* mov reg, disp(%rbp) */
+ /* mov reg, disp(%rsp) */
+ save_reg(state, op->src.reg, CFI_CFA,
+ op->dest.offset - state->cfa.offset);
+ }
+
+ break;
+
+ case OP_DEST_LEAVE:
+ if ((!state->drap && cfa->base != CFI_BP) ||
+ (state->drap && cfa->base != state->drap_reg)) {
+ WARN_FUNC("leave instruction with modified stack frame",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ /* leave (mov %rbp, %rsp; pop %rbp) */
+
+ state->stack_size = -state->regs[CFI_BP].offset - 8;
+ restore_reg(state, CFI_BP);
+
+ if (!state->drap) {
+ cfa->base = CFI_SP;
+ cfa->offset -= 8;
+ }
+
+ break;
+
+ case OP_DEST_MEM:
+ if (op->src.type != OP_SRC_POP) {
+ WARN_FUNC("unknown stack-related memory operation",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ /* pop mem */
+ state->stack_size -= 8;
+ if (cfa->base == CFI_SP)
+ cfa->offset -= 8;
+
+ break;
+
+ default:
+ WARN_FUNC("unknown stack-related instruction",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ return 0;
+}
+
+static bool insn_state_match(struct instruction *insn, struct insn_state *state)
+{
+ struct insn_state *state1 = &insn->state, *state2 = state;
+ int i;
+
+ if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) {
+ WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
+ insn->sec, insn->offset,
+ state1->cfa.base, state1->cfa.offset,
+ state2->cfa.base, state2->cfa.offset);
+
+ } else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) {
+ for (i = 0; i < CFI_NUM_REGS; i++) {
+ if (!memcmp(&state1->regs[i], &state2->regs[i],
+ sizeof(struct cfi_reg)))
+ continue;
+
+ WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
+ insn->sec, insn->offset,
+ i, state1->regs[i].base, state1->regs[i].offset,
+ i, state2->regs[i].base, state2->regs[i].offset);
+ break;
+ }
+
+ } else if (state1->drap != state2->drap ||
+ (state1->drap && state1->drap_reg != state2->drap_reg)) {
+ WARN_FUNC("stack state mismatch: drap1=%d(%d) drap2=%d(%d)",
+ insn->sec, insn->offset,
+ state1->drap, state1->drap_reg,
+ state2->drap, state2->drap_reg);
+
+ } else
+ return true;
+
+ return false;
+}
+
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps). Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/stack-validation.txt.
+ */
+static int validate_branch(struct objtool_file *file, struct instruction *first,
+ struct insn_state state)
+{
+ struct alternative *alt;
+ struct instruction *insn;
+ struct section *sec;
+ struct symbol *func = NULL;
+ int ret;
+
+ insn = first;
+ sec = insn->sec;
+
+ if (insn->alt_group && list_empty(&insn->alts)) {
+ WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
+ sec, insn->offset);
+ return -1;
+ }
+
+ while (1) {
+ if (file->c_file && insn->func) {
+ if (func && func != insn->func) {
+ WARN("%s() falls through to next function %s()",
+ func->name, insn->func->name);
+ return 1;
+ }
+ }
+
+ func = insn->func;
+
+ if (insn->visited) {
+ if (!!insn_state_match(insn, &state))
+ return 1;
+
+ return 0;
+ }
+
+ insn->state = state;
+
+ insn->visited = true;
+
+ list_for_each_entry(alt, &insn->alts, list) {
+ ret = validate_branch(file, alt->insn, state);
+ if (ret)
+ return 1;
+ }
+
+ switch (insn->type) {
+
+ case INSN_RETURN:
+ if (func && has_modified_stack_frame(&state)) {
+ WARN_FUNC("return with modified stack frame",
+ sec, insn->offset);
+ return 1;
+ }
+
+ if (state.bp_scratch) {
+ WARN("%s uses BP as a scratch register",
+ insn->func->name);
+ return 1;
+ }
+
+ return 0;
+
+ case INSN_CALL:
+ if (is_fentry_call(insn))
+ break;
+
+ ret = dead_end_function(file, insn->call_dest);
+ if (ret == 1)
+ return 0;
+ if (ret == -1)
+ return 1;
+
+ /* fallthrough */
+ case INSN_CALL_DYNAMIC:
+ if (!nofp && func && !has_valid_stack_frame(&state)) {
+ WARN_FUNC("call without frame pointer save/setup",
+ sec, insn->offset);
+ return 1;
+ }
+ break;
+
+ case INSN_JUMP_CONDITIONAL:
+ case INSN_JUMP_UNCONDITIONAL:
+ if (insn->jump_dest) {
+ ret = validate_branch(file, insn->jump_dest,
+ state);
+ if (ret)
+ return 1;
+ } else if (func && has_modified_stack_frame(&state)) {
+ WARN_FUNC("sibling call from callable instruction with modified stack frame",
+ sec, insn->offset);
+ return 1;
+ } /* else it's a sibling call */
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
+ return 0;
+
+ break;
+
+ case INSN_JUMP_DYNAMIC:
+ if (func && list_empty(&insn->alts) &&
+ has_modified_stack_frame(&state)) {
+ WARN_FUNC("sibling call from callable instruction with modified stack frame",
+ sec, insn->offset);
+ return 1;
+ }
+
+ return 0;
+
+ case INSN_STACK:
+ if (update_insn_state(insn, &state))
+ return -1;
+
+ break;
+
+ default:
+ break;
+ }
+
+ if (insn->dead_end)
+ return 0;
+
+ insn = next_insn_same_sec(file, insn);
+ if (!insn) {
+ WARN("%s: unexpected end of section", sec->name);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static bool is_kasan_insn(struct instruction *insn)
+{
+ return (insn->type == INSN_CALL &&
+ !strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+}
+
+static bool is_ubsan_insn(struct instruction *insn)
+{
+ return (insn->type == INSN_CALL &&
+ !strcmp(insn->call_dest->name,
+ "__ubsan_handle_builtin_unreachable"));
+}
+
+static bool ignore_unreachable_insn(struct instruction *insn)
+{
+ int i;
+
+ if (insn->ignore || insn->type == INSN_NOP)
+ return true;
+
+ /*
+ * Ignore any unused exceptions. This can happen when a whitelisted
+ * function has an exception table entry.
+ */
+ if (!strcmp(insn->sec->name, ".fixup"))
+ return true;
+
+ /*
+ * Check if this (or a subsequent) instruction is related to
+ * CONFIG_UBSAN or CONFIG_KASAN.
+ *
+ * End the search at 5 instructions to avoid going into the weeds.
+ */
+ if (!insn->func)
+ return false;
+ for (i = 0; i < 5; i++) {
+
+ if (is_kasan_insn(insn) || is_ubsan_insn(insn))
+ return true;
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
+ insn = insn->jump_dest;
+ continue;
+ }
+
+ if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
+ break;
+ insn = list_next_entry(insn, list);
+ }
+
+ return false;
+}
+
+static int validate_functions(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+ struct instruction *insn;
+ struct insn_state state;
+ int ret, warnings = 0;
+
+ clear_insn_state(&state);
+
+ state.cfa = initial_func_cfi.cfa;
+ memcpy(&state.regs, &initial_func_cfi.regs,
+ CFI_NUM_REGS * sizeof(struct cfi_reg));
+ state.stack_size = initial_func_cfi.cfa.offset;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ insn = find_insn(file, sec, func->offset);
+ if (!insn || insn->ignore)
+ continue;
+
+ ret = validate_branch(file, insn, state);
+ warnings += ret;
+ }
+ }
+
+ return warnings;
+}
+
+static int validate_reachable_instructions(struct objtool_file *file)
+{
+ struct instruction *insn;
+
+ if (file->ignore_unreachables)
+ return 0;
+
+ for_each_insn(file, insn) {
+ if (insn->visited || ignore_unreachable_insn(insn))
+ continue;
+
+ /*
+ * gcov produces a lot of unreachable instructions. If we get
+ * an unreachable warning and the file has gcov enabled, just
+ * ignore it, and all other such warnings for the file. Do
+ * this here because this is an expensive function.
+ */
+ if (gcov_enabled(file))
+ return 0;
+
+ WARN_FUNC("unreachable instruction", insn->sec, insn->offset);
+ return 1;
+ }
+
+ return 0;
+}
+
+static void cleanup(struct objtool_file *file)
+{
+ struct instruction *insn, *tmpinsn;
+ struct alternative *alt, *tmpalt;
+
+ list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
+ list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
+ list_del(&alt->list);
+ free(alt);
+ }
+ list_del(&insn->list);
+ hash_del(&insn->hash);
+ free(insn);
+ }
+ elf_close(file->elf);
+}
+
+int check(const char *_objname, bool _nofp)
+{
+ struct objtool_file file;
+ int ret, warnings = 0;
+
+ objname = _objname;
+ nofp = _nofp;
+
+ file.elf = elf_open(objname);
+ if (!file.elf)
+ return 1;
+
+ INIT_LIST_HEAD(&file.insn_list);
+ hash_init(file.insn_hash);
+ file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
+ file.rodata = find_section_by_name(file.elf, ".rodata");
+ file.ignore_unreachables = false;
+ file.c_file = find_section_by_name(file.elf, ".comment");
+
+ arch_initial_func_cfi_state(&initial_func_cfi);
+
+ ret = decode_sections(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+
+ if (list_empty(&file.insn_list))
+ goto out;
+
+ ret = validate_functions(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+
+ if (!warnings) {
+ ret = validate_reachable_instructions(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
+out:
+ cleanup(&file);
+
+ /* ignore warnings for now until we get all the code cleaned up */
+ if (ret || warnings)
+ return 0;
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CHECK_H
+#define _CHECK_H
+
+#include <stdbool.h>
+#include "elf.h"
+#include "cfi.h"
+#include "arch.h"
+#include <linux/hashtable.h>
+
+struct insn_state {
+ struct cfi_reg cfa;
+ struct cfi_reg regs[CFI_NUM_REGS];
+ int stack_size;
+ bool bp_scratch;
+ bool drap;
+ int drap_reg;
+};
+
+struct instruction {
+ struct list_head list;
+ struct hlist_node hash;
+ struct section *sec;
+ unsigned long offset;
+ unsigned int len;
+ unsigned char type;
+ unsigned long immediate;
+ bool alt_group, visited, dead_end, ignore;
+ struct symbol *call_dest;
+ struct instruction *jump_dest;
+ struct list_head alts;
+ struct symbol *func;
+ struct stack_op stack_op;
+ struct insn_state state;
+};
+
+struct objtool_file {
+ struct elf *elf;
+ struct list_head insn_list;
+ DECLARE_HASHTABLE(insn_hash, 16);
+ struct section *rodata, *whitelist;
+ bool ignore_unreachables, c_file;
+};
+
+int check(const char *objname, bool nofp);
+
+#define for_each_insn(file, insn) \
+ list_for_each_entry(insn, &file->insn_list, list)
+
+#endif /* _CHECK_H */
#define ELF_C_READ_MMAP ELF_C_READ
#endif
+#define WARN_ELF(format, ...) \
+ WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+
struct section *find_section_by_name(struct elf *elf, const char *name)
{
struct section *sec;
int i;
if (elf_getshdrnum(elf->elf, §ions_nr)) {
- perror("elf_getshdrnum");
+ WARN_ELF("elf_getshdrnum");
return -1;
}
if (elf_getshdrstrndx(elf->elf, &shstrndx)) {
- perror("elf_getshdrstrndx");
+ WARN_ELF("elf_getshdrstrndx");
return -1;
}
s = elf_getscn(elf->elf, i);
if (!s) {
- perror("elf_getscn");
+ WARN_ELF("elf_getscn");
return -1;
}
sec->idx = elf_ndxscn(s);
if (!gelf_getshdr(s, &sec->sh)) {
- perror("gelf_getshdr");
+ WARN_ELF("gelf_getshdr");
return -1;
}
sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name);
if (!sec->name) {
- perror("elf_strptr");
+ WARN_ELF("elf_strptr");
return -1;
}
- sec->elf_data = elf_getdata(s, NULL);
- if (!sec->elf_data) {
- perror("elf_getdata");
+ sec->data = elf_getdata(s, NULL);
+ if (!sec->data) {
+ WARN_ELF("elf_getdata");
return -1;
}
- if (sec->elf_data->d_off != 0 ||
- sec->elf_data->d_size != sec->sh.sh_size) {
+ if (sec->data->d_off != 0 ||
+ sec->data->d_size != sec->sh.sh_size) {
WARN("unexpected data attributes for %s", sec->name);
return -1;
}
- sec->data = (unsigned long)sec->elf_data->d_buf;
- sec->len = sec->elf_data->d_size;
+ sec->len = sec->data->d_size;
}
/* sanity check, one more call to elf_nextscn() should return NULL */
sym->idx = i;
- if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) {
- perror("gelf_getsym");
+ if (!gelf_getsym(symtab->data, i, &sym->sym)) {
+ WARN_ELF("gelf_getsym");
goto err;
}
sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
sym->sym.st_name);
if (!sym->name) {
- perror("elf_strptr");
+ WARN_ELF("elf_strptr");
goto err;
}
}
memset(rela, 0, sizeof(*rela));
- if (!gelf_getrela(sec->elf_data, i, &rela->rela)) {
- perror("gelf_getrela");
+ if (!gelf_getrela(sec->data, i, &rela->rela)) {
+ WARN_ELF("gelf_getrela");
return -1;
}
INIT_LIST_HEAD(&elf->sections);
- elf->name = strdup(name);
- if (!elf->name) {
- perror("strdup");
- goto err;
- }
-
elf->fd = open(name, O_RDONLY);
if (elf->fd == -1) {
perror("open");
elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL);
if (!elf->elf) {
- perror("elf_begin");
+ WARN_ELF("elf_begin");
goto err;
}
if (!gelf_getehdr(elf->elf, &elf->ehdr)) {
- perror("gelf_getehdr");
+ WARN_ELF("gelf_getehdr");
goto err;
}
struct symbol *sym, *tmpsym;
struct rela *rela, *tmprela;
+ if (elf->elf)
+ elf_end(elf->elf);
+
+ if (elf->fd > 0)
+ close(elf->fd);
+
list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) {
list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) {
list_del(&sym->list);
list_del(&sec->list);
free(sec);
}
- if (elf->name)
- free(elf->name);
- if (elf->fd > 0)
- close(elf->fd);
- if (elf->elf)
- elf_end(elf->elf);
+
free(elf);
}
DECLARE_HASHTABLE(rela_hash, 16);
struct section *base, *rela;
struct symbol *sym;
- Elf_Data *elf_data;
+ Elf_Data *data;
char *name;
int idx;
- unsigned long data;
unsigned int len;
};
struct symbol *find_containing_func(struct section *sec, unsigned long offset);
void elf_close(struct elf *elf);
-
+#define for_each_sec(file, sec) \
+ list_for_each_entry(sec, &file->elf->sections, list)
#endif /* _OBJTOOL_ELF_H */
alt->jump_or_nop = entry->jump_or_nop;
if (alt->group) {
- alt->orig_len = *(unsigned char *)(sec->data + offset +
+ alt->orig_len = *(unsigned char *)(sec->data->d_buf + offset +
entry->orig_len);
- alt->new_len = *(unsigned char *)(sec->data + offset +
+ alt->new_len = *(unsigned char *)(sec->data->d_buf + offset +
entry->new_len);
}
if (entry->feature) {
unsigned short feature;
- feature = *(unsigned short *)(sec->data + offset +
+ feature = *(unsigned short *)(sec->data->d_buf + offset +
entry->feature);
/*
#ifndef _WARN_H
#define _WARN_H
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "elf.h"
+
extern const char *objname;
static inline char *offstr(struct section *sec, unsigned long offset)
free(_str); \
})
+#define WARN_ELF(format, ...) \
+ WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+
#endif /* _WARN_H */
include $(srctree)/tools/scripts/Makefile.arch
-$(call detected_var,ARCH)
+$(call detected_var,SRCARCH)
NO_PERF_REGS := 1
# Additional ARCH settings for ppc
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
NO_PERF_REGS := 0
LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
endif
# Additional ARCH settings for x86
-ifeq ($(ARCH),x86)
+ifeq ($(SRCARCH),x86)
$(call detected,CONFIG_X86)
ifeq (${IS_64_BIT}, 1)
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
NO_PERF_REGS := 0
endif
-ifeq ($(ARCH),arm)
+ifeq ($(SRCARCH),arm)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-arm
endif
-ifeq ($(ARCH),arm64)
+ifeq ($(SRCARCH),arm64)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif
# Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures
# to the check.
-ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm))
NO_LIBDW_DWARF_UNWIND := 1
endif
FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
-FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
# include ARCH specific config
--include $(src-perf)/arch/$(ARCH)/Makefile
+-include $(src-perf)/arch/$(SRCARCH)/Makefile
ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
endif
INC_FLAGS += -I$(src-perf)/util/include
-INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include
+INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
INC_FLAGS += -I$(srctree)/tools/include/uapi
INC_FLAGS += -I$(srctree)/tools/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/
# $(obj-perf) for generated common-cmds.h
# $(obj-perf)/util for generated bison/flex headers
ifndef NO_DWARF
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
- msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
+ msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
NO_DWARF := 1
else
CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
CFLAGS += -DHAVE_BPF_PROLOGUE
$(call detected,CONFIG_BPF_PROLOGUE)
else
- msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
+ msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset());
endif
else
msg := $(warning DWARF support is off, BPF prologue is disabled);
endif
endif
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
ifndef NO_DWARF
CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
endif
endif
ifndef NO_LOCAL_LIBUNWIND
- ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
+ ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
$(call feature_check,libunwind-debug-frame)
ifneq ($(feature-libunwind-debug-frame), 1)
msg := $(warning No debug_frame support found in libunwind);
NO_PERF_READ_VDSO32 := 1
endif
endif
- ifneq ($(ARCH), x86)
+ ifneq ($(SRCARCH), x86)
NO_PERF_READ_VDSOX32 := 1
endif
ifndef NO_PERF_READ_VDSOX32
endif
ifndef NO_AUXTRACE
- ifeq ($(ARCH),x86)
+ ifeq ($(SRCARCH),x86)
ifeq ($(feature-get_cpuid), 0)
msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
NO_AUXTRACE := 1
ETC_PERFCONFIG = etc/perfconfig
endif
ifndef lib
-ifeq ($(ARCH)$(IS_64_BIT), x861)
+ifeq ($(SRCARCH)$(IS_64_BIT), x861)
lib = lib64
else
lib = lib
ifeq ($(config),0)
include $(srctree)/tools/scripts/Makefile.arch
--include arch/$(ARCH)/Makefile
+-include arch/$(SRCARCH)/Makefile
endif
# The FEATURE_DUMP_EXPORT holds location of the actual
libperf-y += common.o
-libperf-y += $(ARCH)/
+libperf-y += $(SRCARCH)/
jevents-y += json.o jsmn.o jevents.o
pmu-events-y += pmu-events.o
-JDIR = pmu-events/arch/$(ARCH)
+JDIR = pmu-events/arch/$(SRCARCH)
JSON = $(shell [ -d $(JDIR) ] && \
find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
#
# directory and create tables in pmu-events.c.
#
$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
- $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
+ $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc))
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
endif
evsel = perf_evlist__first(evlist);
evsel->attr.task = 1;
- evsel->attr.sample_freq = 0;
+ evsel->attr.sample_freq = 1;
evsel->attr.inherit = 0;
evsel->attr.watermark = 0;
evsel->attr.wakeup_events = 1;
struct perf_evsel *evsel;
event_attr_init(&attr);
+ /*
+ * Unnamed union member, not supported as struct member named
+ * initializer in older compilers such as gcc 4.4.7
+ *
+ * Just for probing the precise_ip:
+ */
+ attr.sample_period = 1;
perf_event_attr__set_max_precise_ip(&attr);
+ /*
+ * Now let the usual logic to set up the perf_event_attr defaults
+ * to kick in when we return and before perf_evsel__open() is called.
+ */
+ attr.sample_period = 0;
evsel = perf_evsel__new(&attr);
if (evsel == NULL)
/*
* default get_cpuid(): nothing gets recorded
- * actual implementation must be in arch/$(ARCH)/util/header.c
+ * actual implementation must be in arch/$(SRCARCH)/util/header.c
*/
int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
{
*/
map_groups__fixup_end(&machine->kmaps);
- if (machine__get_running_kernel_start(machine, &name, &addr)) {
- } else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
- machine__destroy_kernel_maps(machine);
- return -1;
+ if (!machine__get_running_kernel_start(machine, &name, &addr)) {
+ if (name &&
+ maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
+ machine__destroy_kernel_maps(machine);
+ return -1;
+ }
}
return 0;
struct map *map, unsigned long offs)
{
struct symbol *sym;
- u64 addr = tp->address + tp->offset - offs;
+ u64 addr = tp->address - offs;
sym = map__find_symbol(map, addr);
if (!sym)
Dwarf_Addr pc;
bool isactivation;
+ if (!dwfl_frame_pc(state, &pc, NULL)) {
+ pr_err("%s", dwfl_errmsg(-1));
+ return DWARF_CB_ABORT;
+ }
+
+ // report the module before we query for isactivation
+ report_module(pc, ui);
+
if (!dwfl_frame_pc(state, &pc, &isactivation)) {
pr_err("%s", dwfl_errmsg(-1));
return DWARF_CB_ABORT;
}
EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
-union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
+union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
u64 rev, u64 func, union acpi_object *argv4)
{
union acpi_object *obj = ERR_PTR(-ENXIO);
rcu_read_lock();
ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
if (ops)
- obj = ops->evaluate_dsm(handle, uuid, rev, func, argv4);
+ obj = ops->evaluate_dsm(handle, guid, rev, func, argv4);
rcu_read_unlock();
if (IS_ERR(obj))
- return acpi_evaluate_dsm(handle, uuid, rev, func, argv4);
+ return acpi_evaluate_dsm(handle, guid, rev, func, argv4);
return obj;
}
EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm);
union acpi_object *result;
static union acpi_object *nfit_test_evaluate_dsm(acpi_handle handle,
- const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4)
+ const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4)
{
if (handle != &nfit_ctl_handle)
return ERR_PTR(-ENXIO);
#ifndef __NFIT_TEST_H__
#define __NFIT_TEST_H__
#include <linux/list.h>
+#include <linux/uuid.h>
#include <linux/ioport.h>
#include <linux/spinlock_types.h>
typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
- const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4);
+ const guid_t *guid, u64 rev, u64 func,
+ union acpi_object *argv4);
void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
unsigned long size);
void __wrap_iounmap(volatile void __iomem *addr);
#ifndef __BPF_ENDIAN__
#define __BPF_ENDIAN__
-#include <asm/byteorder.h>
+#include <linux/swab.h>
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-# define __bpf_ntohs(x) __builtin_bswap16(x)
-# define __bpf_htons(x) __builtin_bswap16(x)
-#elif __BYTE_ORDER == __BIG_ENDIAN
-# define __bpf_ntohs(x) (x)
-# define __bpf_htons(x) (x)
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x) __builtin_bswap16(x)
+# define __bpf_htons(x) __builtin_bswap16(x)
+# define __bpf_constant_ntohs(x) ___constant_swab16(x)
+# define __bpf_constant_htons(x) ___constant_swab16(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x) (x)
+# define __bpf_htons(x) (x)
+# define __bpf_constant_ntohs(x) (x)
+# define __bpf_constant_htons(x) (x)
#else
-# error "Fix your __BYTE_ORDER?!"
+# error "Fix your compiler's __BYTE_ORDER__?!"
#endif
#define bpf_htons(x) \
(__builtin_constant_p(x) ? \
- __constant_htons(x) : __bpf_htons(x))
+ __bpf_constant_htons(x) : __bpf_htons(x))
#define bpf_ntohs(x) \
(__builtin_constant_p(x) ? \
- __constant_ntohs(x) : __bpf_ntohs(x))
+ __bpf_constant_ntohs(x) : __bpf_ntohs(x))
-#endif
+#endif /* __BPF_ENDIAN__ */
.errstr = "invalid bpf_context access",
},
{
+ "leak pointer into ctx 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 2 },
+ .errstr_unpriv = "R2 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "leak pointer into ctx 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "leak pointer into ctx 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .errstr_unpriv = "R2 leaks addr into ctx",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "leak pointer into map val",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr_unpriv = "R6 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
"helper access to map: full range",
.insns = {
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
echo "Running remote perf test $WITH DMA"
write_file "" $REMOTE_PERF/run
echo -n " "
- read_file $LOCAL_PERF/run
+ read_file $REMOTE_PERF/run
echo " Passed"
_modprobe -r ntb_perf