crypto: sha256-mb - fix ctx pointer and digest copy
authorXiaodong Liu <xiaodong.liu@intel.com>
Fri, 12 Aug 2016 10:24:42 +0000 (06:24 -0400)
committerHerbert Xu <herbert@gondor.apana.org.au>
Tue, 16 Aug 2016 09:09:42 +0000 (17:09 +0800)
1. fix ctx pointer
Use req_ctx which is the ctx for the next job that have
been completed in the lanes instead of the first
completed job rctx, whose completion could have been
called and released.
2. fix digest copy
Use XMM register to copy another 16 bytes sha256 digest
instead of a regular register.

Signed-off-by: Xiaodong Liu <xiaodong.liu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/x86/crypto/sha256-mb/sha256_mb.c
arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S

index 89fa85e..6f97fb3 100644 (file)
@@ -485,10 +485,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
 
                        req = cast_mcryptd_ctx_to_req(req_ctx);
                        if (irqs_disabled())
-                               rctx->complete(&req->base, ret);
+                               req_ctx->complete(&req->base, ret);
                        else {
                                local_bh_disable();
-                               rctx->complete(&req->base, ret);
+                               req_ctx->complete(&req->base, ret);
                                local_bh_enable();
                        }
                }
index b691da9..a78a069 100644 (file)
@@ -265,13 +265,14 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2)
        vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
        vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
        vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
-       movl    _args_digest+4*32(state, idx, 4), tmp2_w
+       vmovd   _args_digest(state , idx, 4) , %xmm0
        vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
        vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
        vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
 
-       vmovdqu %xmm0, _result_digest(job_rax)
-       movl    tmp2_w, _result_digest+1*16(job_rax)
+        vmovdqu %xmm0, _result_digest(job_rax)
+        offset =  (_result_digest + 1*16)
+        vmovdqu %xmm1, offset(job_rax)
 
        pop     %rbx