+/*
+ * fsys_getcpu doesn't use the third parameter in this implementation. It reads
+ * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
+ */
+ENTRY(fsys_getcpu)
+ .prologue
+ .altrp b6
+ .body
+ ;;
+ add r2=TI_FLAGS+IA64_TASK_SIZE,r16
+ tnat.nz p6,p0 = r32 // guard against NaT argument
+ add r3=TI_CPU+IA64_TASK_SIZE,r16
+ ;;
+ ld4 r3=[r3] // M r3 = thread_info->cpu
+ ld4 r2=[r2] // M r2 = thread_info->flags
+(p6) br.cond.spnt.few .fail_einval // B
+ ;;
+ tnat.nz p7,p0 = r33 // I guard against NaT argument
+(p7) br.cond.spnt.few .fail_einval // B
+#ifdef CONFIG_NUMA
+ movl r17=cpu_to_node_map
+ ;;
+EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
+EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
+ shladd r18=r3,1,r17
+ ;;
+ ld2 r20=[r18] // r20 = cpu_to_node_map[cpu]
+ and r2 = TIF_ALLWORK_MASK,r2
+ ;;
+ cmp.ne p8,p0=0,r2
+(p8) br.spnt.many fsys_fallback_syscall
+ ;;
+ ;;
+EX(.fail_efault, st4 [r32] = r3)
+EX(.fail_efault, st2 [r33] = r20)
+ mov r8=0
+ ;;
+#else
+EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
+EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
+ and r2 = TIF_ALLWORK_MASK,r2
+ ;;
+ cmp.ne p8,p0=0,r2
+(p8) br.spnt.many fsys_fallback_syscall
+ ;;
+EX(.fail_efault, st4 [r32] = r3)
+EX(.fail_efault, st2 [r33] = r0)
+ mov r8=0
+ ;;
+#endif
+ FSYS_RETURN
+END(fsys_getcpu)
+