Add raid unit
New raid unit adds source for optimized xor and P+Q functions. Signed-off-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
parent
fce681adb4
commit
d6c5e9620d
@ -25,6 +25,7 @@ perf_tests32=
|
||||
# Include units
|
||||
|
||||
include erasure_code/Makefile.am
|
||||
include raid/Makefile.am
|
||||
|
||||
# LIB version info not necessarily the same as package version
|
||||
LIBISAL_CURRENT=2
|
||||
|
@ -27,9 +27,10 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
|
||||
objs = bin\ec_base.obj bin\ec_highlevel_func.obj bin\ec_multibinary.obj bin\gf_2vect_dot_prod_avx.obj bin\gf_2vect_dot_prod_avx2.obj bin\gf_2vect_dot_prod_avx512.obj bin\gf_2vect_dot_prod_sse.obj bin\gf_2vect_mad_avx.obj bin\gf_2vect_mad_avx2.obj bin\gf_2vect_mad_avx512.obj bin\gf_2vect_mad_sse.obj bin\gf_3vect_dot_prod_avx.obj bin\gf_3vect_dot_prod_avx2.obj bin\gf_3vect_dot_prod_avx512.obj bin\gf_3vect_dot_prod_sse.obj bin\gf_3vect_mad_avx.obj bin\gf_3vect_mad_avx2.obj bin\gf_3vect_mad_avx512.obj bin\gf_3vect_mad_sse.obj bin\gf_4vect_dot_prod_avx.obj bin\gf_4vect_dot_prod_avx2.obj bin\gf_4vect_dot_prod_avx512.obj bin\gf_4vect_dot_prod_sse.obj bin\gf_4vect_mad_avx.obj bin\gf_4vect_mad_avx2.obj bin\gf_4vect_mad_avx512.obj bin\gf_4vect_mad_sse.obj bin\gf_5vect_dot_prod_avx.obj bin\gf_5vect_dot_prod_avx2.obj bin\gf_5vect_dot_prod_sse.obj bin\gf_5vect_mad_avx.obj bin\gf_5vect_mad_avx2.obj bin\gf_5vect_mad_sse.obj bin\gf_6vect_dot_prod_avx.obj bin\gf_6vect_dot_prod_avx2.obj bin\gf_6vect_dot_prod_sse.obj bin\gf_6vect_mad_avx.obj bin\gf_6vect_mad_avx2.obj bin\gf_6vect_mad_sse.obj bin\gf_vect_dot_prod_avx.obj bin\gf_vect_dot_prod_avx2.obj bin\gf_vect_dot_prod_avx512.obj bin\gf_vect_dot_prod_sse.obj bin\gf_vect_mad_avx.obj bin\gf_vect_mad_avx2.obj bin\gf_vect_mad_avx512.obj bin\gf_vect_mad_sse.obj bin\gf_vect_mul_avx.obj bin\gf_vect_mul_sse.obj
|
||||
|
||||
INCLUDES = -I./ -Ierasure_code/ -Iinclude/
|
||||
objs = bin\ec_base.obj bin\ec_highlevel_func.obj bin\ec_multibinary.obj bin\gf_2vect_dot_prod_avx.obj bin\gf_2vect_dot_prod_avx2.obj bin\gf_2vect_dot_prod_avx512.obj bin\gf_2vect_dot_prod_sse.obj bin\gf_2vect_mad_avx.obj bin\gf_2vect_mad_avx2.obj bin\gf_2vect_mad_avx512.obj bin\gf_2vect_mad_sse.obj bin\gf_3vect_dot_prod_avx.obj bin\gf_3vect_dot_prod_avx2.obj bin\gf_3vect_dot_prod_avx512.obj bin\gf_3vect_dot_prod_sse.obj bin\gf_3vect_mad_avx.obj bin\gf_3vect_mad_avx2.obj bin\gf_3vect_mad_avx512.obj bin\gf_3vect_mad_sse.obj bin\gf_4vect_dot_prod_avx.obj bin\gf_4vect_dot_prod_avx2.obj bin\gf_4vect_dot_prod_avx512.obj bin\gf_4vect_dot_prod_sse.obj bin\gf_4vect_mad_avx.obj bin\gf_4vect_mad_avx2.obj bin\gf_4vect_mad_avx512.obj bin\gf_4vect_mad_sse.obj bin\gf_5vect_dot_prod_avx.obj bin\gf_5vect_dot_prod_avx2.obj bin\gf_5vect_dot_prod_sse.obj bin\gf_5vect_mad_avx.obj bin\gf_5vect_mad_avx2.obj bin\gf_5vect_mad_sse.obj bin\gf_6vect_dot_prod_avx.obj bin\gf_6vect_dot_prod_avx2.obj bin\gf_6vect_dot_prod_sse.obj bin\gf_6vect_mad_avx.obj bin\gf_6vect_mad_avx2.obj bin\gf_6vect_mad_sse.obj bin\gf_vect_dot_prod_avx.obj bin\gf_vect_dot_prod_avx2.obj bin\gf_vect_dot_prod_avx512.obj bin\gf_vect_dot_prod_sse.obj bin\gf_vect_mad_avx.obj bin\gf_vect_mad_avx2.obj bin\gf_vect_mad_avx512.obj bin\gf_vect_mad_sse.obj bin\gf_vect_mul_avx.obj bin\gf_vect_mul_sse.obj bin\pq_check_sse.obj bin\pq_gen_avx.obj bin\pq_gen_avx2.obj bin\pq_gen_sse.obj bin\raid_base.obj bin\raid_multibinary.obj bin\xor_check_sse.obj bin\xor_gen_avx.obj bin\xor_gen_sse.obj
|
||||
|
||||
INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Iinclude/
|
||||
LINKFLAGS = /nologo
|
||||
CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D)
|
||||
AFLAGS = -f win64 $(INCLUDES) $(D)
|
||||
@ -53,13 +54,24 @@ isa-l.dll: $(objs)
|
||||
{erasure_code}.asm.obj:
|
||||
$(AS) $(AFLAGS) -o $@ $?
|
||||
|
||||
{raid}.c.obj:
|
||||
$(CC) $(CFLAGS) /c -Fo$@ $?
|
||||
{raid}.asm.obj:
|
||||
$(AS) $(AFLAGS) -o $@ $?
|
||||
|
||||
|
||||
# Examples
|
||||
ex = xor_example.exe
|
||||
ex: lib $(ex)
|
||||
|
||||
$(ex): $(@B).obj
|
||||
|
||||
.obj.exe:
|
||||
link /out:$@ $(LINKFLAGS) isa-l.lib $?
|
||||
|
||||
# Check tests
|
||||
checks = erasure_code_test.exe erasure_code_update_test.exe gf_inverse_test.exe gf_vect_mul_test.exe
|
||||
checks = erasure_code_test.exe erasure_code_update_test.exe gf_inverse_test.exe gf_vect_mul_test.exe \
|
||||
pq_check_test.exe pq_gen_test.exe xor_check_test.exe xor_gen_test.exe
|
||||
|
||||
checks: lib $(checks)
|
||||
$(checks): $(@B).obj
|
||||
@ -73,7 +85,7 @@ tests: lib $(tests)
|
||||
$(tests): $(@B).obj
|
||||
|
||||
# Performance tests
|
||||
perfs = erasure_code_base_perf.exe erasure_code_perf.exe erasure_code_sse_perf.exe erasure_code_update_perf.exe gf_2vect_dot_prod_sse_perf.exe gf_3vect_dot_prod_sse_perf.exe gf_4vect_dot_prod_sse_perf.exe gf_5vect_dot_prod_sse_perf.exe gf_6vect_dot_prod_sse_perf.exe gf_vect_dot_prod_1tbl.exe gf_vect_dot_prod_avx_perf.exe gf_vect_dot_prod_perf.exe gf_vect_dot_prod_sse_perf.exe gf_vect_mad_perf.exe gf_vect_mul_avx_perf.exe gf_vect_mul_perf.exe gf_vect_mul_sse_perf.exe
|
||||
perfs = erasure_code_base_perf.exe erasure_code_perf.exe erasure_code_sse_perf.exe erasure_code_update_perf.exe gf_2vect_dot_prod_sse_perf.exe gf_3vect_dot_prod_sse_perf.exe gf_4vect_dot_prod_sse_perf.exe gf_5vect_dot_prod_sse_perf.exe gf_6vect_dot_prod_sse_perf.exe gf_vect_dot_prod_1tbl.exe gf_vect_dot_prod_avx_perf.exe gf_vect_dot_prod_perf.exe gf_vect_dot_prod_sse_perf.exe gf_vect_mad_perf.exe gf_vect_mul_avx_perf.exe gf_vect_mul_perf.exe gf_vect_mul_sse_perf.exe pq_gen_perf.exe xor_gen_perf.exe
|
||||
|
||||
perfs: lib $(perfs)
|
||||
$(perfs): $(@B).obj
|
||||
|
@ -27,7 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
|
||||
units = erasure_code
|
||||
units = erasure_code raid
|
||||
|
||||
default: lib
|
||||
|
||||
|
302
include/raid.h
Normal file
302
include/raid.h
Normal file
@ -0,0 +1,302 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
#ifndef _RAID_H_
|
||||
#define _RAID_H_
|
||||
|
||||
/**
|
||||
* @file raid.h
|
||||
* @brief Interface to RAID functions - XOR and P+Q calculation.
|
||||
*
|
||||
* This file defines the interface to optimized XOR calculation (RAID5) or P+Q
|
||||
* dual parity (RAID6). Operations are carried out on an array of pointers to
|
||||
* sources and output arrays.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Multi-binary functions */
|
||||
|
||||
/**
|
||||
* @brief Generate XOR parity vector from N sources, runs appropriate version.
|
||||
*
|
||||
* This function determines what instruction sets are enabled and
|
||||
* selects the appropriate version at runtime.
|
||||
*
|
||||
* @param vects Number of source+dest vectors in array.
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param array Array of pointers to source and dest. For XOR the dest is
|
||||
* the last pointer. ie array[vects-1]. Src and dest
|
||||
* pointers must be aligned to 32B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int xor_gen(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks that array has XOR parity sum of 0 across all vectors, runs appropriate version.
|
||||
*
|
||||
* This function determines what instruction sets are enabled and
|
||||
* selects the appropriate version at runtime.
|
||||
*
|
||||
* @param vects Number of vectors in array.
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param array Array of pointers to vectors. Src and dest pointers
|
||||
* must be aligned to 16B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int xor_check(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generate P+Q parity vectors from N sources, runs appropriate version.
|
||||
*
|
||||
* This function determines what instruction sets are enabled and
|
||||
* selects the appropriate version at runtime.
|
||||
*
|
||||
* @param vects Number of source+dest vectors in array.
|
||||
* @param len Length of each vector in bytes. Must be 32B aligned.
|
||||
* @param array Array of pointers to source and dest. For P+Q the dest
|
||||
* is the last two pointers. ie array[vects-2],
|
||||
* array[vects-1]. P and Q parity vectors are
|
||||
* written to these last two pointers. Src and dest
|
||||
* pointers must be aligned to 32B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int pq_gen(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks that array of N sources, P and Q are consistent across all vectors, runs appropriate version.
|
||||
*
|
||||
* This function determines what instruction sets are enabled and
|
||||
* selects the appropriate version at runtime.
|
||||
*
|
||||
* @param vects Number of vectors in array including P&Q.
|
||||
* @param len Length of each vector in bytes. Must be 16B aligned.
|
||||
* @param array Array of pointers to source and P, Q. P and Q parity
|
||||
* are assumed to be the last two pointers in the array.
|
||||
* All pointers must be aligned to 16B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int pq_check(int vects, int len, void **array);
|
||||
|
||||
|
||||
/* Arch specific versions */
|
||||
|
||||
/**
|
||||
* @brief Generate XOR parity vector from N sources.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param vects Number of source+dest vectors in array.
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param array Array of pointers to source and dest. For XOR the dest is
|
||||
* the last pointer. ie array[vects-1]. Src and dest pointers
|
||||
* must be aligned to 16B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int xor_gen_sse(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generate XOR parity vector from N sources.
|
||||
* @requires AVX
|
||||
*
|
||||
* @param vects Number of source+dest vectors in array.
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param array Array of pointers to source and dest. For XOR the dest is
|
||||
* the last pointer. ie array[vects-1]. Src and dest pointers
|
||||
* must be aligned to 32B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int xor_gen_avx(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks that array has XOR parity sum of 0 across all vectors.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param vects Number of vectors in array.
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param array Array of pointers to vectors. Src and dest pointers
|
||||
* must be aligned to 16B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int xor_check_sse(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generate P+Q parity vectors from N sources.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param vects Number of source+dest vectors in array.
|
||||
* @param len Length of each vector in bytes. Must be 16B aligned.
|
||||
* @param array Array of pointers to source and dest. For P+Q the dest
|
||||
* is the last two pointers. ie array[vects-2],
|
||||
* array[vects-1]. P and Q parity vectors are
|
||||
* written to these last two pointers. Src and dest
|
||||
* pointers must be aligned to 16B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int pq_gen_sse(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generate P+Q parity vectors from N sources.
|
||||
* @requires AVX
|
||||
*
|
||||
* @param vects Number of source+dest vectors in array.
|
||||
* @param len Length of each vector in bytes. Must be 16B aligned.
|
||||
* @param array Array of pointers to source and dest. For P+Q the dest
|
||||
* is the last two pointers. ie array[vects-2],
|
||||
* array[vects-1]. P and Q parity vectors are
|
||||
* written to these last two pointers. Src and dest
|
||||
* pointers must be aligned to 16B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int pq_gen_avx(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generate P+Q parity vectors from N sources.
|
||||
* @requires AVX2
|
||||
*
|
||||
* @param vects Number of source+dest vectors in array.
|
||||
* @param len Length of each vector in bytes. Must be 32B aligned.
|
||||
* @param array Array of pointers to source and dest. For P+Q the dest
|
||||
* is the last two pointers. ie array[vects-2],
|
||||
* array[vects-1]. P and Q parity vectors are
|
||||
* written to these last two pointers. Src and dest
|
||||
* pointers must be aligned to 32B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int pq_gen_avx2(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks that array of N sources, P and Q are consistent across all vectors.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param vects Number of vectors in array including P&Q.
|
||||
* @param len Length of each vector in bytes. Must be 16B aligned.
|
||||
* @param array Array of pointers to source and P, Q. P and Q parity
|
||||
are assumed to be the last two pointers in the array.
|
||||
All pointers must be aligned to 16B.
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int pq_check_sse(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generate P+Q parity vectors from N sources, runs baseline version.
|
||||
* @param vects Number of source+dest vectors in array.
|
||||
* @param len Length of each vector in bytes. Must be 16B aligned.
|
||||
* @param array Array of pointers to source and dest. For P+Q the dest
|
||||
* is the last two pointers. ie array[vects-2],
|
||||
* array[vects-1]. P and Q parity vectors are
|
||||
* written to these last two pointers. Src and dest pointers
|
||||
* must be aligned to 16B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int pq_gen_base(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generate XOR parity vector from N sources, runs baseline version.
|
||||
* @param vects Number of source+dest vectors in array.
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param array Array of pointers to source and dest. For XOR the dest is
|
||||
* the last pointer. ie array[vects-1]. Src and dest pointers
|
||||
* must be aligned to 32B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int xor_gen_base(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks that array has XOR parity sum of 0 across all vectors, runs baseline version.
|
||||
*
|
||||
* @param vects Number of vectors in array.
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param array Array of pointers to vectors. Src and dest pointers
|
||||
* must be aligned to 16B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int xor_check_base(int vects, int len, void **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks that array of N sources, P and Q are consistent across all vectors, runs baseline version.
|
||||
*
|
||||
* @param vects Number of vectors in array including P&Q.
|
||||
* @param len Length of each vector in bytes. Must be 16B aligned.
|
||||
* @param array Array of pointers to source and P, Q. P and Q parity
|
||||
* are assumed to be the last two pointers in the array.
|
||||
* All pointers must be aligned to 16B.
|
||||
*
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int pq_check_base(int vects, int len, void **array);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_RAID_H_
|
15
isa-l.def
15
isa-l.def
@ -54,3 +54,18 @@ gf_vect_mul @50
|
||||
ec_encode_data_update @51
|
||||
gf_vect_dot_prod @52
|
||||
gf_vect_mad @53
|
||||
xor_gen @54
|
||||
xor_check @55
|
||||
pq_gen @56
|
||||
pq_check @57
|
||||
xor_gen_sse @58
|
||||
xor_gen_avx @59
|
||||
xor_check_sse @60
|
||||
pq_gen_sse @61
|
||||
pq_gen_avx @62
|
||||
pq_gen_avx2 @63
|
||||
pq_check_sse @64
|
||||
pq_gen_base @65
|
||||
xor_gen_base @66
|
||||
xor_check_base @67
|
||||
pq_check_base @68
|
||||
|
45
raid/Makefile.am
Normal file
45
raid/Makefile.am
Normal file
@ -0,0 +1,45 @@
|
||||
########################################################################
|
||||
# Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
|
||||
lsrc += raid/xor_gen_sse.asm raid/pq_gen_sse.asm raid/xor_check_sse.asm \
|
||||
raid/pq_check_sse.asm raid/pq_gen_avx.asm \
|
||||
raid/xor_gen_avx.asm raid/pq_gen_avx2.asm \
|
||||
raid/raid_base.c raid/raid_multibinary.asm
|
||||
|
||||
extern_hdrs += include/raid.h
|
||||
|
||||
other_src += include/test.h include/types.h
|
||||
|
||||
check_tests += raid/xor_gen_test raid/pq_gen_test raid/xor_check_test raid/pq_check_test
|
||||
|
||||
perf_tests += raid/xor_gen_perf raid/pq_gen_perf
|
||||
|
||||
examples += raid/xor_example
|
||||
|
||||
lsrc32 += xor_gen_sse.asm pq_gen_sse_i32.asm xor_check_sse.asm pq_check_sse_i32.asm raid_base.c
|
277
raid/pq_check_sse.asm
Normal file
277
raid/pq_check_sse.asm
Normal file
@ -0,0 +1,277 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Optimized pq of N source vectors using SSE3
|
||||
;;; int pq_check_sse(int vects, int len, void **array)
|
||||
|
||||
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
|
||||
;;; (**array). Last two pointers are the P and Q destinations respectively.
|
||||
;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp3 arg4
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define tmp r11
|
||||
%define tmp3 r10
|
||||
%define return rax
|
||||
%define stack_size 7*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm15, 6*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
movdqa xmm15, [rsp + 9*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define vec arg0
|
||||
%define len arg1
|
||||
%define ptr arg3
|
||||
%define pos return
|
||||
|
||||
%define xp1 xmm0
|
||||
%define xq1 xmm1
|
||||
%define xtmp1 xmm2
|
||||
%define xs1 xmm3
|
||||
|
||||
%define xp2 xmm4
|
||||
%define xq2 xmm5
|
||||
%define xtmp2 xmm6
|
||||
%define xs2 xmm7
|
||||
|
||||
%define xp3 xmm8
|
||||
%define xq3 xmm9
|
||||
%define xtmp3 xmm10
|
||||
%define xs3 xmm11
|
||||
|
||||
%define xpoly xmm15
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
align 16
|
||||
global pq_check_sse:function
|
||||
func(pq_check_sse)
|
||||
FUNC_SAVE
|
||||
sub vec, 3 ;Keep as offset to last source
|
||||
jng return_fail ;Must have at least 2 sources
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (16-1) ;Check alignment of length
|
||||
jnz return_fail
|
||||
mov pos, 0
|
||||
movdqa xpoly, [poly]
|
||||
cmp len, 48
|
||||
jl loop16
|
||||
|
||||
len_aligned_32bytes:
|
||||
sub len, 48 ;Do end of vec first and run backward
|
||||
|
||||
loop48:
|
||||
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
|
||||
XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
|
||||
XLDR xp2, [ptr+pos+16] ;Initialize xp2 with P2 src + 16B ahead
|
||||
XLDR xp3, [ptr+pos+32] ;Initialize xp3 with P2 src + 32B ahead
|
||||
pxor xq1, xq1 ;q1 = 0
|
||||
pxor xq2, xq2 ;q2 = 0
|
||||
pxor xq3, xq3 ;q3 = 0
|
||||
|
||||
mov ptr, [arg2+vec*8] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
|
||||
XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
|
||||
|
||||
next_vect:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*8] ; get pointer to next vect
|
||||
pxor xp1, xs1 ; p1 ^= s1
|
||||
pxor xp2, xs2 ; p2 ^= s2
|
||||
pxor xp3, xs3 ; p3 ^= s2
|
||||
pxor xq1, xs1 ; q1 ^= s1
|
||||
pxor xq2, xs2 ; q2 ^= s2
|
||||
pxor xq3, xs3 ; q3 ^= s3
|
||||
pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
|
||||
pxor xtmp2, xtmp2 ; xtmp2 = 0
|
||||
pxor xtmp3, xtmp3 ; xtmp3 = 0
|
||||
pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
|
||||
pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
|
||||
pcmpgtb xtmp3, xq3 ; xtmp3 = mask 0xff or 0x00 if bit7 set
|
||||
pand xtmp1, xpoly ; xtmp1 = poly or 0x00
|
||||
pand xtmp2, xpoly ; xtmp2 = poly or 0x00
|
||||
pand xtmp3, xpoly ; xtmp3 = poly or 0x00
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
|
||||
XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
|
||||
XLDR xs3, [ptr+pos+32] ; Get next vector (source data3)
|
||||
paddb xq1, xq1 ; q1 = q1<<1
|
||||
paddb xq2, xq2 ; q2 = q2<<1
|
||||
paddb xq3, xq3 ; q3 = q3<<1
|
||||
pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
|
||||
pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
|
||||
pxor xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
|
||||
jg next_vect ; Loop for each vect except 0
|
||||
|
||||
pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
|
||||
pxor xq1, xs1 ;q1 ^= 1 * s1[0]
|
||||
pxor xp2, xs2 ;p2 ^= s2[0]
|
||||
pxor xq2, xs2 ;q2 ^= 1 * s2[0]
|
||||
pxor xp3, xs3 ;p3 ^= s3[0]
|
||||
pxor xq3, xs3 ;q3 ^= 1 * s3[0]
|
||||
|
||||
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
|
||||
XLDR xtmp1, [tmp+pos] ;re-init xq1 with Q1 src
|
||||
XLDR xtmp2, [tmp+pos+16] ;re-init xq2 with Q2 src + 16B ahead
|
||||
XLDR xtmp3, [tmp+pos+32] ;re-init xq3 with Q2 src + 32B ahead
|
||||
|
||||
pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
|
||||
pxor xq2, xtmp2
|
||||
pxor xq3, xtmp3
|
||||
|
||||
por xp1, xq1 ;Confirm that all P&Q parity are 0
|
||||
por xp1, xp2
|
||||
por xp1, xq2
|
||||
por xp1, xp3
|
||||
por xp1, xq3
|
||||
ptest xp1, xp1
|
||||
jnz return_fail
|
||||
add pos, 48
|
||||
cmp pos, len
|
||||
jle loop48
|
||||
|
||||
|
||||
;; ------------------------------
|
||||
;; Do last 16 or 32 Bytes remaining
|
||||
add len, 48
|
||||
cmp pos, len
|
||||
je return_pass
|
||||
|
||||
loop16:
|
||||
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
|
||||
XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
|
||||
pxor xq1, xq1 ;q = 0
|
||||
mov ptr, [arg2+vec*8] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
|
||||
next_vect16:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*8] ; get pointer to next vect
|
||||
pxor xq1, xs1 ; q ^= s
|
||||
pxor xtmp1, xtmp1 ; xtmp = 0
|
||||
pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
|
||||
pand xtmp1, xpoly ; xtmp = poly or 0x00
|
||||
pxor xp1, xs1 ; p ^= s
|
||||
paddb xq1, xq1 ; q = q<<1
|
||||
pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data)
|
||||
jg next_vect16 ; Loop for each vect except 0
|
||||
|
||||
pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
|
||||
pxor xq1, xs1 ;q ^= 1 * s[0]
|
||||
|
||||
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
|
||||
XLDR xtmp1, [tmp+pos] ;re-init tmp with Q1 src
|
||||
pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
|
||||
|
||||
por xp1, xq1 ;Confirm that all P&Q parity are = 0
|
||||
ptest xp1, xp1
|
||||
jnz return_fail
|
||||
add pos, 16
|
||||
cmp pos, len
|
||||
jl loop16
|
||||
|
||||
|
||||
return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
poly:
|
||||
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion pq_check_sse, 00, 06, 0033
|
282
raid/pq_check_sse_i32.asm
Normal file
282
raid/pq_check_sse_i32.asm
Normal file
@ -0,0 +1,282 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Optimized pq of N source vectors using SSE3
|
||||
;;; int pq_gen_sse(int vects, int len, void **array)
|
||||
|
||||
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
|
||||
;;; (**array). Last two pointers are the P and Q destinations respectively.
|
||||
;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define tmp r11
|
||||
%define stack_size 2*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf32
|
||||
%define arg0 edx
|
||||
%define arg1 ecx
|
||||
%define return eax
|
||||
%define PS 4
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp+8+PS*x]
|
||||
%define arg2 edi ; must sav/restore
|
||||
%define arg3 esi
|
||||
%define tmp ebx
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg0, arg(0)
|
||||
mov arg1, arg(1)
|
||||
mov arg2, arg(2)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
mov esp, ebp ;if has frame pointer?
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define vec arg0
|
||||
%define len arg1
|
||||
%define ptr arg3
|
||||
%define pos return
|
||||
|
||||
%define xp1 xmm0
|
||||
%define xq1 xmm1
|
||||
%define xtmp1 xmm2
|
||||
%define xs1 xmm3
|
||||
|
||||
%define xp2 xmm4
|
||||
%define xq2 xmm5
|
||||
%define xtmp2 xmm6
|
||||
%define xs2 xmm7
|
||||
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%define xpoly xmm15
|
||||
%elifidn PS,4 ; 32-bit code
|
||||
%define xpoly [poly]
|
||||
%endif
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
align 16
|
||||
global pq_check_sse:function
|
||||
func(pq_check_sse)
|
||||
FUNC_SAVE
|
||||
sub vec, 3 ;Keep as offset to last source
|
||||
jng return_fail ;Must have at least 2 sources
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (16-1) ;Check alignment of length
|
||||
jnz return_fail
|
||||
mov pos, 0
|
||||
%ifidn PS,8
|
||||
movdqa xpoly, [poly] ;For 64-bit, load poly into high xmm reg
|
||||
%endif
|
||||
cmp len, 32
|
||||
jl loop16
|
||||
|
||||
len_aligned_32bytes:
|
||||
sub len, 32 ;Do end of vec first and run backward
|
||||
|
||||
loop32:
|
||||
mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
|
||||
XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
|
||||
XLDR xp2, [ptr+pos+16] ;Initialize xp2 with P2 src + 16B ahead
|
||||
pxor xq1, xq1 ;q1 = 0
|
||||
pxor xq2, xq2 ;q2 = 0
|
||||
|
||||
mov ptr, [arg2+vec*PS] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
|
||||
|
||||
next_vect:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*PS] ; get pointer to next vect
|
||||
pxor xp1, xs1 ; p1 ^= s1
|
||||
pxor xp2, xs2 ; p2 ^= s2
|
||||
pxor xq1, xs1 ; q1 ^= s1
|
||||
pxor xq2, xs2 ; q2 ^= s2
|
||||
pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
|
||||
pxor xtmp2, xtmp2 ; xtmp2 = 0
|
||||
pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
|
||||
pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
|
||||
pand xtmp1, xpoly ; xtmp1 = poly or 0x00
|
||||
pand xtmp2, xpoly ; xtmp2 = poly or 0x00
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
|
||||
XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
|
||||
paddb xq1, xq1 ; q1 = q1<<1
|
||||
paddb xq2, xq2 ; q2 = q2<<1
|
||||
pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
|
||||
pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
|
||||
jg next_vect ; Loop for each vect except 0
|
||||
|
||||
pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
|
||||
pxor xq1, xs1 ;q1 ^= 1 * s1[0]
|
||||
pxor xp2, xs2 ;p2 ^= s2[0]
|
||||
pxor xq2, xs2 ;q2 ^= 1 * s2[0]
|
||||
|
||||
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
|
||||
XLDR xtmp1, [tmp+pos] ;re-init xq1 with Q1 src
|
||||
XLDR xtmp2, [tmp+pos+16] ;re-init xq2 with Q2 src + 16B ahead
|
||||
|
||||
pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
|
||||
pxor xq2, xtmp2
|
||||
|
||||
por xp1, xq1 ;Confirm that all P&Q parity are 0
|
||||
por xp1, xp2
|
||||
por xp1, xq2
|
||||
ptest xp1, xp1
|
||||
jnz return_fail
|
||||
add pos, 32
|
||||
cmp pos, len
|
||||
jle loop32
|
||||
|
||||
|
||||
;; ------------------------------
|
||||
;; Do last 16 Bytes remaining
|
||||
add len, 32
|
||||
cmp pos, len
|
||||
je return_pass
|
||||
|
||||
loop16:
|
||||
mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
|
||||
XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
|
||||
pxor xq1, xq1 ;q = 0
|
||||
mov ptr, [arg2+vec*PS] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
|
||||
next_vect16:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*PS] ; get pointer to next vect
|
||||
pxor xq1, xs1 ; q ^= s
|
||||
pxor xtmp1, xtmp1 ; xtmp = 0
|
||||
pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
|
||||
pand xtmp1, xpoly ; xtmp = poly or 0x00
|
||||
pxor xp1, xs1 ; p ^= s
|
||||
paddb xq1, xq1 ; q = q<<1
|
||||
pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data)
|
||||
jg next_vect16 ; Loop for each vect except 0
|
||||
|
||||
pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
|
||||
pxor xq1, xs1 ;q ^= 1 * s[0]
|
||||
|
||||
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
|
||||
XLDR xtmp1, [tmp+pos] ;re-init tmp with Q1 src
|
||||
pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
|
||||
|
||||
por xp1, xq1 ;Confirm that all P&Q parity are = 0
|
||||
ptest xp1, xp1
|
||||
jnz return_fail
|
||||
add pos, 16
|
||||
cmp pos, len
|
||||
jl loop16
|
||||
|
||||
|
||||
return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
|
||||
return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
poly:
|
||||
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion pq_check_sse, 00, 06, 0033
|
304
raid/pq_check_test.c
Normal file
304
raid/pq_check_test.c
Normal file
@ -0,0 +1,304 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include<stdio.h>
|
||||
#include<stdint.h>
|
||||
#include<string.h>
|
||||
#include<stdlib.h>
|
||||
#include "raid.h"
|
||||
#include "types.h"
|
||||
|
||||
#define TEST_SOURCES 16
|
||||
#define TEST_LEN 1024
|
||||
#define TEST_MEM ((TEST_SOURCES + 2)*(TEST_LEN))
|
||||
#ifndef TEST_SEED
|
||||
# define TEST_SEED 0x1234
|
||||
#endif
|
||||
|
||||
int ref_multi_pq(int vects, int len, void **array)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char p, q, s;
|
||||
unsigned char **src = (unsigned char **)array;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
q = p = src[vects - 3][i];
|
||||
|
||||
for (j = vects - 4; j >= 0; j--) {
|
||||
p ^= s = src[j][i];
|
||||
q = s ^ ((q << 1) ^ ((q & 0x80) ? 0x1d : 0)); // mult by GF{2}
|
||||
}
|
||||
|
||||
src[vects - 2][i] = p; // second to last pointer is p
|
||||
src[vects - 1][i] = q; // last pointer is q
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Generates pseudo-random data
|
||||
|
||||
void rand_buffer(unsigned char *buf, long buffer_size)
|
||||
{
|
||||
long i;
|
||||
for (i = 0; i < buffer_size; i++)
|
||||
buf[i] = rand();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, k, ret, fail = 0;
|
||||
void *buffs[TEST_SOURCES + 2];
|
||||
char c;
|
||||
char *tmp_buf[TEST_SOURCES + 2];
|
||||
int serr, lerr;
|
||||
|
||||
printf("Test pq_check_test %d sources X %d bytes\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
srand(TEST_SEED);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES + 2; i++) {
|
||||
void *buf;
|
||||
if (posix_memalign(&buf, 16, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return 1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES + 2; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
ref_multi_pq(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
if (ret != 0) {
|
||||
fail++;
|
||||
printf("\nfail zero test %d\n", ret);
|
||||
}
|
||||
|
||||
((char *)(buffs[0]))[TEST_LEN - 2] = 0x7; // corrupt buffer
|
||||
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
if (ret == 0) {
|
||||
fail++;
|
||||
printf("\nfail corrupt buffer test %d\n", ret);
|
||||
}
|
||||
((char *)(buffs[0]))[TEST_LEN - 2] = 0; // un-corrupt buffer
|
||||
|
||||
// Test corrupted buffer any location on all sources
|
||||
for (j = 0; j < TEST_SOURCES + 2; j++) {
|
||||
for (i = TEST_LEN - 1; i >= 0; i--) {
|
||||
((char *)buffs[j])[i] = 0x5; // corrupt buffer
|
||||
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
if (ret == 0) {
|
||||
fail++;
|
||||
printf("\nfail corrupt zero buffer test j=%d, i=%d\n", j, i);
|
||||
return 1;
|
||||
}
|
||||
((char *)buffs[j])[i] = 0; // un-corrupt buffer
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test rand1
|
||||
for (i = 0; i < TEST_SOURCES + 2; i++)
|
||||
rand_buffer(buffs[i], TEST_LEN);
|
||||
|
||||
ref_multi_pq(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
if (ret != 0) {
|
||||
fail++;
|
||||
printf("fail first rand test %d\n", ret);
|
||||
}
|
||||
|
||||
c = ((char *)(buffs[0]))[TEST_LEN - 2];
|
||||
((char *)(buffs[0]))[TEST_LEN - 2] = c ^ 0x1;
|
||||
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
if (ret == 0) {
|
||||
fail++;
|
||||
printf("\nFail corrupt buffer test, passed when should have failed\n");
|
||||
}
|
||||
((char *)(buffs[0]))[TEST_LEN - 2] = c; // un-corrupt buffer
|
||||
|
||||
// Test corrupted buffer any location on all sources w/ random data
|
||||
for (j = 0; j < TEST_SOURCES + 2; j++) {
|
||||
for (i = TEST_LEN - 1; i >= 0; i--) {
|
||||
// Check it still passes
|
||||
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
if (ret != 0) { // should pass
|
||||
fail++;
|
||||
printf
|
||||
("\nFail rand test with un-corrupted buffer j=%d, i=%d\n",
|
||||
j, i);
|
||||
return 1;
|
||||
}
|
||||
c = ((char *)buffs[j])[i];
|
||||
((char *)buffs[j])[i] = c ^ 1; // corrupt buffer
|
||||
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
if (ret == 0) { // Check it now fails
|
||||
fail++;
|
||||
printf("\nfail corrupt buffer test j=%d, i=%d\n", j, i);
|
||||
return 1;
|
||||
}
|
||||
((char *)buffs[j])[i] = c; // un-corrupt buffer
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test various number of sources, full length
|
||||
for (j = 4; j <= TEST_SOURCES + 2; j++) {
|
||||
// New random data
|
||||
for (i = 0; i < j; i++)
|
||||
rand_buffer(buffs[i], TEST_LEN);
|
||||
|
||||
// Generate p,q parity for this number of sources
|
||||
ref_multi_pq(j, TEST_LEN, buffs);
|
||||
|
||||
// Set errors up in each source and len position
|
||||
for (i = 0; i < j; i++) {
|
||||
for (k = 0; k < TEST_LEN; k++) {
|
||||
// See if it still passes
|
||||
ret = pq_check(j, TEST_LEN, buffs);
|
||||
if (ret != 0) { // Should pass
|
||||
printf("\nfail rand fixed len test %d sources\n", j);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
c = ((char *)buffs[i])[k];
|
||||
((char *)buffs[i])[k] = c ^ 1; // corrupt buffer
|
||||
|
||||
ret = pq_check(j, TEST_LEN, buffs);
|
||||
if (ret == 0) { // Should fail
|
||||
printf
|
||||
("\nfail rand fixed len test corrupted buffer %d sources\n",
|
||||
j);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
((char *)buffs[i])[k] = c; // un-corrupt buffer
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
fflush(0);
|
||||
|
||||
// Test various number of sources and len
|
||||
k = 16;
|
||||
while (k <= TEST_LEN) {
|
||||
char *tmp;
|
||||
for (j = 4; j <= TEST_SOURCES + 2; j++) {
|
||||
for (i = 0; i < j; i++)
|
||||
rand_buffer(buffs[i], k);
|
||||
|
||||
// Generate p,q parity for this number of sources
|
||||
ref_multi_pq(j, k, buffs);
|
||||
|
||||
// Inject errors at various source and len positions
|
||||
for (lerr = 0; lerr < k; lerr++) {
|
||||
for (serr = 0; serr < j; serr++) {
|
||||
// See if it still passes
|
||||
ret = pq_check(j, k, buffs);
|
||||
if (ret != 0) { // Should pass
|
||||
printf
|
||||
("\nfail rand var src, len test %d sources, len=%d\n",
|
||||
j, k);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
tmp = (char *)buffs[serr];
|
||||
c = tmp[lerr];
|
||||
((char *)buffs[serr])[lerr] = c ^ 1; // corrupt buffer
|
||||
|
||||
ret = pq_check(j, k, buffs);
|
||||
if (ret == 0) { // Should fail
|
||||
printf
|
||||
("\nfail rand var src, len test corrupted buffer "
|
||||
"%d sources, len=%d, ret=%d\n", j, k,
|
||||
ret);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
((char *)buffs[serr])[lerr] = c; // un-corrupt buffer
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
fflush(0);
|
||||
}
|
||||
k += 16;
|
||||
}
|
||||
|
||||
// Test at the end of buffer
|
||||
for (i = 0; i < TEST_LEN; i += 16) {
|
||||
for (j = 0; j < TEST_SOURCES + 2; j++) {
|
||||
rand_buffer(buffs[j], TEST_LEN - i);
|
||||
tmp_buf[j] = (char *)buffs[j] + i;
|
||||
}
|
||||
|
||||
pq_gen_base(TEST_SOURCES + 2, TEST_LEN - i, (void *)tmp_buf);
|
||||
|
||||
// Test good data
|
||||
ret = pq_check(TEST_SOURCES + 2, TEST_LEN - i, (void *)tmp_buf);
|
||||
if (ret != 0) {
|
||||
printf("fail end test - offset: %d, len: %d\n", i, TEST_LEN - i);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
// Test bad data
|
||||
for (serr = 0; serr < TEST_SOURCES + 2; serr++) {
|
||||
for (lerr = 0; lerr < (TEST_LEN - i); lerr++) {
|
||||
c = tmp_buf[serr][lerr];
|
||||
tmp_buf[serr][lerr] = c ^ 1;
|
||||
|
||||
ret =
|
||||
pq_check(TEST_SOURCES + 2, TEST_LEN - i, (void *)tmp_buf);
|
||||
if (ret == 0) {
|
||||
printf("fail end test corrupted buffer - "
|
||||
"offset: %d, len: %d, ret: %d\n", i,
|
||||
TEST_LEN - i, ret);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
tmp_buf[serr][lerr] = c;
|
||||
}
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
fflush(0);
|
||||
}
|
||||
|
||||
if (fail == 0)
|
||||
printf("Pass\n");
|
||||
|
||||
return fail;
|
||||
|
||||
}
|
254
raid/pq_gen_avx.asm
Normal file
254
raid/pq_gen_avx.asm
Normal file
@ -0,0 +1,254 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Optimized pq of N source vectors using AVX
|
||||
;;; int pq_gen_avx(int vects, int len, void **array)
|
||||
|
||||
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
|
||||
;;; (**array). Last two pointers are the P and Q destinations respectively.
|
||||
;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp3 arg4
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define tmp r11
|
||||
%define tmp3 r10
|
||||
%define return rax
|
||||
%define stack_size 8*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm14, 6*16
|
||||
save_xmm128 xmm15, 7*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
movdqa xmm14, [rsp + 6*16]
|
||||
movdqa xmm15, [rsp + 7*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define vec arg0
|
||||
%define len arg1
|
||||
%define ptr arg3
|
||||
%define pos rax
|
||||
|
||||
%define xp1 xmm0
|
||||
%define xq1 xmm1
|
||||
%define xtmp1 xmm2
|
||||
%define xs1 xmm3
|
||||
|
||||
%define xp2 xmm4
|
||||
%define xq2 xmm5
|
||||
%define xtmp2 xmm6
|
||||
%define xs2 xmm7
|
||||
|
||||
%define xp3 xmm8
|
||||
%define xq3 xmm9
|
||||
%define xtmp3 xmm10
|
||||
%define xs3 xmm11
|
||||
|
||||
%define xzero xmm14
|
||||
%define xpoly xmm15
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
align 16
|
||||
global pq_gen_avx:function
|
||||
func(pq_gen_avx)
|
||||
FUNC_SAVE
|
||||
sub vec, 3 ;Keep as offset to last source
|
||||
jng return_fail ;Must have at least 2 sources
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (16-1) ;Check alignment of length
|
||||
jnz return_fail
|
||||
mov pos, 0
|
||||
vmovdqa xpoly, [poly]
|
||||
vpxor xzero, xzero, xzero
|
||||
cmp len, 48
|
||||
jl loop16
|
||||
|
||||
len_aligned_32bytes:
|
||||
sub len, 48 ;Len points to last block
|
||||
|
||||
loop48:
|
||||
mov ptr, [arg2+vec*8] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
|
||||
XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
|
||||
vpxor xp1, xp1, xp1 ;p1 = 0
|
||||
vpxor xp2, xp2, xp2 ;p2 = 0
|
||||
vpxor xp3, xp3, xp3 ;p3 = 0
|
||||
vpxor xq1, xq1, xq1 ;q1 = 0
|
||||
vpxor xq2, xq2, xq2 ;q2 = 0
|
||||
vpxor xq3, xq3, xq3 ;q3 = 0
|
||||
|
||||
next_vect:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*8] ; get pointer to next vect
|
||||
vpxor xq1, xq1, xs1 ; q1 ^= s1
|
||||
vpxor xq2, xq2, xs2 ; q2 ^= s2
|
||||
vpxor xq3, xq3, xs3 ; q3 ^= s3
|
||||
vpxor xp1, xp1, xs1 ; p1 ^= s1
|
||||
vpxor xp2, xp2, xs2 ; p2 ^= s2
|
||||
vpxor xp3, xp3, xs3 ; p3 ^= s2
|
||||
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
|
||||
vpblendvb xtmp2, xzero, xpoly, xq2 ; xtmp2 = poly or 0x00
|
||||
vpblendvb xtmp3, xzero, xpoly, xq3 ; xtmp3 = poly or 0x00
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
|
||||
XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
|
||||
XLDR xs3, [ptr+pos+32] ; Get next vector (source data3)
|
||||
vpaddb xq1, xq1, xq1 ; q1 = q1<<1
|
||||
vpaddb xq2, xq2, xq2 ; q2 = q2<<1
|
||||
vpaddb xq3, xq3, xq3 ; q3 = q3<<1
|
||||
vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
|
||||
vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
|
||||
vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
|
||||
jg next_vect ; Loop for each vect except 0
|
||||
|
||||
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
|
||||
vpxor xp1, xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
|
||||
vpxor xq1, xq1, xs1 ;q1 ^= 1 * s1[0]
|
||||
vpxor xp2, xp2, xs2 ;p2 ^= s2[0]
|
||||
vpxor xq2, xq2, xs2 ;q2 ^= 1 * s2[0]
|
||||
vpxor xp3, xp3, xs3 ;p3 ^= s3[0]
|
||||
vpxor xq3, xq3, xs3 ;q3 ^= 1 * s3[0]
|
||||
XSTR [ptr+pos], xp1 ;Write parity P1 vector
|
||||
XSTR [ptr+pos+16], xp2 ;Write parity P2 vector
|
||||
XSTR [ptr+pos+32], xp3 ;Write parity P3 vector
|
||||
XSTR [tmp+pos], xq1 ;Write parity Q1 vector
|
||||
XSTR [tmp+pos+16], xq2 ;Write parity Q2 vector
|
||||
XSTR [tmp+pos+32], xq3 ;Write parity Q3 vector
|
||||
add pos, 48
|
||||
cmp pos, len
|
||||
jle loop48
|
||||
|
||||
;; ------------------------------
|
||||
;; Do last 16 or 32 Bytes remaining
|
||||
add len, 48
|
||||
cmp pos, len
|
||||
je return_pass
|
||||
|
||||
loop16:
|
||||
mov ptr, [arg2+vec*8] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
vpxor xp1, xp1, xp1 ;p = 0
|
||||
vpxor xq1, xq1, xq1 ;q = 0
|
||||
|
||||
next_vect16:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*8] ; get pointer to next vect
|
||||
vpxor xq1, xq1, xs1 ; q1 ^= s1
|
||||
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
|
||||
vpxor xp1, xp1, xs1 ; p ^= s
|
||||
vpaddb xq1, xq1, xq1 ; q = q<<1
|
||||
vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data)
|
||||
jg next_vect16 ; Loop for each vect except 0
|
||||
|
||||
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
|
||||
vpxor xp1, xp1, xs1 ;p ^= s[0] - last source is already loaded
|
||||
vpxor xq1, xq1, xs1 ;q ^= 1 * s[0]
|
||||
XSTR [ptr+pos], xp1 ;Write parity P vector
|
||||
XSTR [tmp+pos], xq1 ;Write parity Q vector
|
||||
add pos, 16
|
||||
cmp pos, len
|
||||
jl loop16
|
||||
|
||||
|
||||
return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
poly:
|
||||
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion pq_gen_avx, 02, 0a, 0039
|
256
raid/pq_gen_avx2.asm
Normal file
256
raid/pq_gen_avx2.asm
Normal file
@ -0,0 +1,256 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Optimized pq of N source vectors using AVX
|
||||
;;; int pq_gen_avx(int vects, int len, void **array)
|
||||
|
||||
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
|
||||
;;; (**array). Last two pointers are the P and Q destinations respectively.
|
||||
;;; Vectors must be aligned to 32 bytes. Length must be 32 byte aligned.
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp3 arg4
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define tmp r11
|
||||
%define tmp3 r10
|
||||
%define return rax
|
||||
%define stack_size 8*32 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
;; Until a sav_ymm256 is defined
|
||||
vmovdqu [rsp + 0*32], ymm6
|
||||
vmovdqu [rsp + 1*32], ymm7
|
||||
vmovdqu [rsp + 2*32], ymm8
|
||||
vmovdqu [rsp + 3*32], ymm9
|
||||
vmovdqu [rsp + 4*32], ymm10
|
||||
vmovdqu [rsp + 5*32], ymm11
|
||||
vmovdqu [rsp + 6*32], ymm14
|
||||
vmovdqu [rsp + 7*32], ymm15
|
||||
end_prolog
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqu ymm6, [rsp + 0*32]
|
||||
vmovdqu ymm7, [rsp + 1*32]
|
||||
vmovdqu ymm8, [rsp + 2*32]
|
||||
vmovdqu ymm9, [rsp + 3*32]
|
||||
vmovdqu ymm10, [rsp + 4*32]
|
||||
vmovdqu ymm11, [rsp + 5*32]
|
||||
vmovdqu ymm14, [rsp + 6*32]
|
||||
vmovdqu ymm15, [rsp + 7*32]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define vec arg0
|
||||
%define len arg1
|
||||
%define ptr arg3
|
||||
%define pos rax
|
||||
|
||||
%define xp1 ymm0
|
||||
%define xq1 ymm1
|
||||
%define xtmp1 ymm2
|
||||
%define xs1 ymm3
|
||||
|
||||
%define xp2 ymm4
|
||||
%define xq2 ymm5
|
||||
%define xtmp2 ymm6
|
||||
%define xs2 ymm7
|
||||
|
||||
%define xp3 ymm8
|
||||
%define xq3 ymm9
|
||||
%define xtmp3 ymm10
|
||||
%define xs3 ymm11
|
||||
|
||||
%define xzero ymm14
|
||||
%define xpoly ymm15
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
align 16
|
||||
global pq_gen_avx2:function
|
||||
func(pq_gen_avx2)
|
||||
FUNC_SAVE
|
||||
sub vec, 3 ;Keep as offset to last source
|
||||
jng return_fail ;Must have at least 2 sources
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (32-1) ;Check alignment of length
|
||||
jnz return_fail
|
||||
mov pos, 0
|
||||
vmovdqa xpoly, [poly]
|
||||
vpxor xzero, xzero, xzero
|
||||
cmp len, 96
|
||||
jl loop32
|
||||
|
||||
len_aligned_32bytes:
|
||||
sub len, 3*32 ;Len points to last block
|
||||
|
||||
loop96:
|
||||
mov ptr, [arg2+vec*8] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
XLDR xs2, [ptr+pos+32] ;Preload last vector (source)
|
||||
XLDR xs3, [ptr+pos+64] ;Preload last vector (source)
|
||||
vpxor xp1, xp1, xp1 ;p1 = 0
|
||||
vpxor xp2, xp2, xp2 ;p2 = 0
|
||||
vpxor xp3, xp3, xp3 ;p3 = 0
|
||||
vpxor xq1, xq1, xq1 ;q1 = 0
|
||||
vpxor xq2, xq2, xq2 ;q2 = 0
|
||||
vpxor xq3, xq3, xq3 ;q3 = 0
|
||||
|
||||
next_vect:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*8] ; get pointer to next vect
|
||||
vpxor xq1, xq1, xs1 ; q1 ^= s1
|
||||
vpxor xq2, xq2, xs2 ; q2 ^= s2
|
||||
vpxor xq3, xq3, xs3 ; q3 ^= s3
|
||||
vpxor xp1, xp1, xs1 ; p1 ^= s1
|
||||
vpxor xp2, xp2, xs2 ; p2 ^= s2
|
||||
vpxor xp3, xp3, xs3 ; p3 ^= s2
|
||||
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
|
||||
vpblendvb xtmp2, xzero, xpoly, xq2 ; xtmp2 = poly or 0x00
|
||||
vpblendvb xtmp3, xzero, xpoly, xq3 ; xtmp3 = poly or 0x00
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
|
||||
XLDR xs2, [ptr+pos+32] ; Get next vector (source data2)
|
||||
XLDR xs3, [ptr+pos+64] ; Get next vector (source data3)
|
||||
vpaddb xq1, xq1, xq1 ; q1 = q1<<1
|
||||
vpaddb xq2, xq2, xq2 ; q2 = q2<<1
|
||||
vpaddb xq3, xq3, xq3 ; q3 = q3<<1
|
||||
vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
|
||||
vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
|
||||
vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
|
||||
jg next_vect ; Loop for each vect except 0
|
||||
|
||||
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
|
||||
vpxor xp1, xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
|
||||
vpxor xq1, xq1, xs1 ;q1 ^= 1 * s1[0]
|
||||
vpxor xp2, xp2, xs2 ;p2 ^= s2[0]
|
||||
vpxor xq2, xq2, xs2 ;q2 ^= 1 * s2[0]
|
||||
vpxor xp3, xp3, xs3 ;p3 ^= s3[0]
|
||||
vpxor xq3, xq3, xs3 ;q3 ^= 1 * s3[0]
|
||||
XSTR [ptr+pos], xp1 ;Write parity P1 vector
|
||||
XSTR [ptr+pos+32], xp2 ;Write parity P2 vector
|
||||
XSTR [ptr+pos+64], xp3 ;Write parity P3 vector
|
||||
XSTR [tmp+pos], xq1 ;Write parity Q1 vector
|
||||
XSTR [tmp+pos+32], xq2 ;Write parity Q2 vector
|
||||
XSTR [tmp+pos+64], xq3 ;Write parity Q3 vector
|
||||
add pos, 3*32
|
||||
cmp pos, len
|
||||
jle loop96
|
||||
|
||||
;; ------------------------------
|
||||
;; Do last 16 or 32 Bytes remaining
|
||||
add len, 3*32
|
||||
cmp pos, len
|
||||
je return_pass
|
||||
|
||||
loop32:
|
||||
mov ptr, [arg2+vec*8] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
vpxor xp1, xp1, xp1 ;p = 0
|
||||
vpxor xq1, xq1, xq1 ;q = 0
|
||||
|
||||
next_vect32:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*8] ; get pointer to next vect
|
||||
vpxor xq1, xq1, xs1 ; q1 ^= s1
|
||||
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
|
||||
vpxor xp1, xp1, xs1 ; p ^= s
|
||||
vpaddb xq1, xq1, xq1 ; q = q<<1
|
||||
vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data)
|
||||
jg next_vect32 ; Loop for each vect except 0
|
||||
|
||||
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
|
||||
vpxor xp1, xp1, xs1 ;p ^= s[0] - last source is already loaded
|
||||
vpxor xq1, xq1, xs1 ;q ^= 1 * s[0]
|
||||
XSTR [ptr+pos], xp1 ;Write parity P vector
|
||||
XSTR [tmp+pos], xq1 ;Write parity Q vector
|
||||
add pos, 32
|
||||
cmp pos, len
|
||||
jl loop32
|
||||
|
||||
|
||||
return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 32
|
||||
poly:
|
||||
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
|
||||
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion pq_gen_avx2, 04, 03, 0041
|
97
raid/pq_gen_perf.c
Normal file
97
raid/pq_gen_perf.c
Normal file
@ -0,0 +1,97 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include<stdio.h>
|
||||
#include<stdint.h>
|
||||
#include<string.h>
|
||||
#include<stdlib.h>
|
||||
#include<sys/time.h>
|
||||
#include "raid.h"
|
||||
#include "test.h"
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 40000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||
# define TEST_LOOPS 1000
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define TEST_MEM ((TEST_SOURCES + 2)*(TEST_LEN))
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
void *buffs[TEST_SOURCES + 2];
|
||||
struct perf start, stop;
|
||||
|
||||
printf("Test pq_gen_perf %d sources X %d bytes\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES + 2; i++) {
|
||||
int ret;
|
||||
void *buf;
|
||||
ret = posix_memalign(&buf, 32, TEST_LEN);
|
||||
if (ret) {
|
||||
printf("alloc error: Fail");
|
||||
return 1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Setup data
|
||||
for (i = 0; i < TEST_SOURCES + 2; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
// Warm up
|
||||
pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++)
|
||||
pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
perf_stop(&stop);
|
||||
printf("pq_gen" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_MEM * i);
|
||||
|
||||
return 0;
|
||||
}
|
258
raid/pq_gen_sse.asm
Normal file
258
raid/pq_gen_sse.asm
Normal file
@ -0,0 +1,258 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Optimized pq of N source vectors using SSE3
|
||||
;;; int pq_gen_sse(int vects, int len, void **array)
|
||||
|
||||
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
|
||||
;;; (**array). Last two pointers are the P and Q destinations respectively.
|
||||
;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp3 arg4
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define tmp r11
|
||||
%define tmp3 r10
|
||||
%define return rax
|
||||
%define stack_size 7*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm15, 6*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
movdqa xmm15, [rsp + 6*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define vec arg0
|
||||
%define len arg1
|
||||
%define ptr arg3
|
||||
%define pos rax
|
||||
|
||||
%define xp1 xmm0
|
||||
%define xq1 xmm1
|
||||
%define xtmp1 xmm2
|
||||
%define xs1 xmm3
|
||||
|
||||
%define xp2 xmm4
|
||||
%define xq2 xmm5
|
||||
%define xtmp2 xmm6
|
||||
%define xs2 xmm7
|
||||
|
||||
%define xp3 xmm8
|
||||
%define xq3 xmm9
|
||||
%define xtmp3 xmm10
|
||||
%define xs3 xmm11
|
||||
|
||||
%define xpoly xmm15
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
align 16
|
||||
global pq_gen_sse:function
|
||||
func(pq_gen_sse)
|
||||
FUNC_SAVE
|
||||
sub vec, 3 ;Keep as offset to last source
|
||||
jng return_fail ;Must have at least 2 sources
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (16-1) ;Check alignment of length
|
||||
jnz return_fail
|
||||
mov pos, 0
|
||||
movdqa xpoly, [poly]
|
||||
cmp len, 48
|
||||
jl loop16
|
||||
|
||||
len_aligned_32bytes:
|
||||
sub len, 48 ;Len points to last block
|
||||
|
||||
loop48:
|
||||
mov ptr, [arg2+vec*8] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
|
||||
XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
|
||||
pxor xp1, xp1 ;p1 = 0
|
||||
pxor xp2, xp2 ;p2 = 0
|
||||
pxor xp3, xp3 ;p3 = 0
|
||||
pxor xq1, xq1 ;q1 = 0
|
||||
pxor xq2, xq2 ;q2 = 0
|
||||
pxor xq3, xq3 ;q3 = 0
|
||||
|
||||
next_vect:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*8] ; get pointer to next vect
|
||||
pxor xq1, xs1 ; q1 ^= s1
|
||||
pxor xq2, xs2 ; q2 ^= s2
|
||||
pxor xq3, xs3 ; q3 ^= s3
|
||||
pxor xp1, xs1 ; p1 ^= s1
|
||||
pxor xp2, xs2 ; p2 ^= s2
|
||||
pxor xp3, xs3 ; p3 ^= s2
|
||||
pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
|
||||
pxor xtmp2, xtmp2 ; xtmp2 = 0
|
||||
pxor xtmp3, xtmp3 ; xtmp3 = 0
|
||||
pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
|
||||
pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
|
||||
pcmpgtb xtmp3, xq3 ; xtmp3 = mask 0xff or 0x00 if bit7 set
|
||||
pand xtmp1, xpoly ; xtmp1 = poly or 0x00
|
||||
pand xtmp2, xpoly ; xtmp2 = poly or 0x00
|
||||
pand xtmp3, xpoly ; xtmp3 = poly or 0x00
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
|
||||
XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
|
||||
XLDR xs3, [ptr+pos+32] ; Get next vector (source data3)
|
||||
paddb xq1, xq1 ; q1 = q1<<1
|
||||
paddb xq2, xq2 ; q2 = q2<<1
|
||||
paddb xq3, xq3 ; q3 = q3<<1
|
||||
pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
|
||||
pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
|
||||
pxor xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
|
||||
jg next_vect ; Loop for each vect except 0
|
||||
|
||||
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
|
||||
pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
|
||||
pxor xq1, xs1 ;q1 ^= 1 * s1[0]
|
||||
pxor xp2, xs2 ;p2 ^= s2[0]
|
||||
pxor xq2, xs2 ;q2 ^= 1 * s2[0]
|
||||
pxor xp3, xs3 ;p3 ^= s3[0]
|
||||
pxor xq3, xs3 ;q3 ^= 1 * s3[0]
|
||||
XSTR [ptr+pos], xp1 ;Write parity P1 vector
|
||||
XSTR [ptr+pos+16], xp2 ;Write parity P2 vector
|
||||
XSTR [ptr+pos+32], xp3 ;Write parity P3 vector
|
||||
XSTR [tmp+pos], xq1 ;Write parity Q1 vector
|
||||
XSTR [tmp+pos+16], xq2 ;Write parity Q2 vector
|
||||
XSTR [tmp+pos+32], xq3 ;Write parity Q3 vector
|
||||
add pos, 48
|
||||
cmp pos, len
|
||||
jle loop48
|
||||
|
||||
;; ------------------------------
|
||||
;; Do last 16 or 32 Bytes remaining
|
||||
add len, 48
|
||||
cmp pos, len
|
||||
je return_pass
|
||||
|
||||
loop16:
|
||||
mov ptr, [arg2+vec*8] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
pxor xp1, xp1 ;p = 0
|
||||
pxor xq1, xq1 ;q = 0
|
||||
|
||||
next_vect16:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*8] ; get pointer to next vect
|
||||
pxor xq1, xs1 ; q1 ^= s1
|
||||
pxor xtmp1, xtmp1 ; xtmp = 0
|
||||
pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
|
||||
pand xtmp1, xpoly ; xtmp = poly or 0x00
|
||||
pxor xp1, xs1 ; p ^= s
|
||||
paddb xq1, xq1 ; q = q<<1
|
||||
pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data)
|
||||
jg next_vect16 ; Loop for each vect except 0
|
||||
|
||||
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
|
||||
pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
|
||||
pxor xq1, xs1 ;q ^= 1 * s[0]
|
||||
XSTR [ptr+pos], xp1 ;Write parity P vector
|
||||
XSTR [tmp+pos], xq1 ;Write parity Q vector
|
||||
add pos, 16
|
||||
cmp pos, len
|
||||
jl loop16
|
||||
|
||||
|
||||
return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
poly:
|
||||
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion pq_gen_sse, 00, 09, 0032
|
264
raid/pq_gen_sse_i32.asm
Normal file
264
raid/pq_gen_sse_i32.asm
Normal file
@ -0,0 +1,264 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Optimized pq of N source vectors using SSE3
|
||||
;;; int pq_gen_sse(int vects, int len, void **array)
|
||||
|
||||
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
|
||||
;;; (**array). Last two pointers are the P and Q destinations respectively.
|
||||
;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define tmp r10
|
||||
%define stack_size 2*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf32
|
||||
%define arg0 edx
|
||||
%define arg1 ecx
|
||||
%define return eax
|
||||
%define PS 4
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp+8+PS*x]
|
||||
%define arg2 edi ; must sav/restore
|
||||
%define arg3 esi
|
||||
%define tmp ebx
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg0, arg(0)
|
||||
mov arg1, arg(1)
|
||||
mov arg2, arg(2)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
mov esp, ebp ;if has frame pointer?
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define vec arg0
|
||||
%define len arg1
|
||||
%define ptr arg3
|
||||
%define pos return
|
||||
|
||||
%define xp1 xmm0
|
||||
%define xq1 xmm1
|
||||
%define xtmp1 xmm2
|
||||
%define xs1 xmm3
|
||||
|
||||
%define xp2 xmm4
|
||||
%define xq2 xmm5
|
||||
%define xtmp2 xmm6
|
||||
%define xs2 xmm7
|
||||
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%define xpoly xmm15
|
||||
%elifidn PS,4 ; 32-bit code
|
||||
%define xpoly [poly]
|
||||
%endif
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
align 16
|
||||
global pq_gen_sse:function
|
||||
func(pq_gen_sse)
|
||||
FUNC_SAVE
|
||||
sub vec, 3 ;Keep as offset to last source
|
||||
jng return_fail ;Must have at least 2 sources
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (16-1) ;Check alignment of length
|
||||
jnz return_fail
|
||||
mov pos, 0
|
||||
%ifidn PS,8
|
||||
movdqa xpoly, [poly] ;For 64-bit, load poly into high xmm reg
|
||||
%endif
|
||||
cmp len, 32
|
||||
jl loop16
|
||||
|
||||
len_aligned_32bytes:
|
||||
sub len, 32 ;Do end of vec first and run backward
|
||||
|
||||
loop32:
|
||||
mov ptr, [arg2+vec*PS] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
|
||||
pxor xp1, xp1 ;p1 = 0
|
||||
pxor xq1, xq1 ;q1 = 0
|
||||
pxor xp2, xp2 ;p2 = 0
|
||||
pxor xq2, xq2 ;q2 = 0
|
||||
|
||||
next_vect:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*PS] ; get pointer to next vect
|
||||
pxor xq1, xs1 ; q1 ^= s1
|
||||
pxor xq2, xs2 ; q2 ^= s2
|
||||
pxor xp1, xs1 ; p1 ^= s1
|
||||
pxor xp2, xs2 ; p2 ^= s2
|
||||
pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
|
||||
pxor xtmp2, xtmp2 ; xtmp2 = 0
|
||||
pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
|
||||
pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
|
||||
pand xtmp1, xpoly ; xtmp1 = poly or 0x00
|
||||
pand xtmp2, xpoly ; xtmp2 = poly or 0x00
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
|
||||
XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
|
||||
paddb xq1, xq1 ; q1 = q1<<1
|
||||
paddb xq2, xq2 ; q2 = q2<<1
|
||||
pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
|
||||
pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
|
||||
jg next_vect ; Loop for each vect except 0
|
||||
|
||||
mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
|
||||
pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
|
||||
pxor xq1, xs1 ;q1 ^= 1 * s1[0]
|
||||
pxor xp2, xs2 ;p2 ^= s2[0]
|
||||
pxor xq2, xs2 ;q2 ^= 1 * s2[0]
|
||||
XSTR [ptr+pos], xp1 ;Write parity P1 vector
|
||||
XSTR [ptr+pos+16], xp2 ;Write parity P2 vector
|
||||
XSTR [tmp+pos], xq1 ;Write parity Q1 vector
|
||||
XSTR [tmp+pos+16], xq2 ;Write parity Q2 vector
|
||||
add pos, 32
|
||||
cmp pos, len
|
||||
jle loop32
|
||||
|
||||
;; ------------------------------
|
||||
;; Do last 16 Bytes remaining
|
||||
add len, 32
|
||||
cmp pos, len
|
||||
je return_pass
|
||||
|
||||
loop16:
|
||||
mov ptr, [arg2+vec*PS] ;Fetch last source pointer
|
||||
mov tmp, vec ;Set tmp to point back to last vector
|
||||
XLDR xs1, [ptr+pos] ;Preload last vector (source)
|
||||
pxor xp1, xp1 ;p = 0
|
||||
pxor xq1, xq1 ;q = 0
|
||||
|
||||
next_vect16:
|
||||
sub tmp, 1 ;Inner loop for each source vector
|
||||
mov ptr, [arg2+tmp*PS] ; get pointer to next vect
|
||||
pxor xq1, xs1 ; q1 ^= s1
|
||||
pxor xtmp1, xtmp1 ; xtmp = 0
|
||||
pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
|
||||
pand xtmp1, xpoly ; xtmp = poly or 0x00
|
||||
pxor xp1, xs1 ; p ^= s
|
||||
paddb xq1, xq1 ; q = q<<1
|
||||
pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
|
||||
XLDR xs1, [ptr+pos] ; Get next vector (source data)
|
||||
jg next_vect16 ; Loop for each vect except 0
|
||||
|
||||
mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
|
||||
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
|
||||
pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
|
||||
pxor xq1, xs1 ;q ^= 1 * s[0]
|
||||
XSTR [ptr+pos], xp1 ;Write parity P vector
|
||||
XSTR [tmp+pos], xq1 ;Write parity Q vector
|
||||
add pos, 16
|
||||
cmp pos, len
|
||||
jl loop16
|
||||
|
||||
|
||||
return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
|
||||
return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
poly:
|
||||
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion pq_gen_sse, 00, 08, 0032
|
194
raid/pq_gen_test.c
Normal file
194
raid/pq_gen_test.c
Normal file
@ -0,0 +1,194 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include<stdio.h>
|
||||
#include<stdint.h>
|
||||
#include<string.h>
|
||||
#include<stdlib.h>
|
||||
#include<limits.h>
|
||||
#include "raid.h"
|
||||
#include "types.h"
|
||||
|
||||
#define TEST_SOURCES 16
|
||||
#define TEST_LEN 1024
|
||||
#define TEST_MEM ((TEST_SOURCES + 2)*(TEST_LEN))
|
||||
#ifndef TEST_SEED
|
||||
# define TEST_SEED 0x1234
|
||||
#endif
|
||||
|
||||
// Generates pseudo-random data
|
||||
|
||||
void rand_buffer(unsigned char *buf, long buffer_size)
|
||||
{
|
||||
long i;
|
||||
for (i = 0; i < buffer_size; i++)
|
||||
buf[i] = rand();
|
||||
}
|
||||
|
||||
int dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", buf[i++]);
|
||||
if (i % 16 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, k, ret, fail = 0;
|
||||
void *buffs[TEST_SOURCES + 2]; // Pointers to src and dest
|
||||
char *tmp_buf[TEST_SOURCES + 2];
|
||||
|
||||
printf("Test pq_gen_test ");
|
||||
|
||||
srand(TEST_SEED);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES + 2; i++) {
|
||||
void *buf;
|
||||
ret = posix_memalign(&buf, 32, TEST_LEN);
|
||||
if (ret) {
|
||||
printf("alloc error: Fail");
|
||||
return 1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES + 2; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
|
||||
for (i = 0; i < TEST_LEN; i++) {
|
||||
if (((char *)buffs[TEST_SOURCES])[i] != 0)
|
||||
fail++;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_LEN; i++) {
|
||||
if (((char *)buffs[TEST_SOURCES + 1])[i] != 0)
|
||||
fail++;
|
||||
}
|
||||
|
||||
if (fail > 0) {
|
||||
printf("fail zero test %d\n", fail);
|
||||
return 1;
|
||||
} else
|
||||
putchar('.');
|
||||
|
||||
// Test rand1
|
||||
for (i = 0; i < TEST_SOURCES + 2; i++)
|
||||
rand_buffer(buffs[i], TEST_LEN);
|
||||
|
||||
ret = pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
fail |= pq_check_base(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
|
||||
if (fail > 0) {
|
||||
int t;
|
||||
printf(" Fail rand test1 fail=%d, ret=%d\n", fail, ret);
|
||||
for (t = 0; t < TEST_SOURCES + 2; t++)
|
||||
dump(buffs[t], 15);
|
||||
|
||||
printf(" reference function p,q\n");
|
||||
pq_gen_base(TEST_SOURCES + 2, TEST_LEN, buffs);
|
||||
for (t = TEST_SOURCES; t < TEST_SOURCES + 2; t++)
|
||||
dump(buffs[t], 15);
|
||||
|
||||
return 1;
|
||||
} else
|
||||
putchar('.');
|
||||
|
||||
// Test various number of sources
|
||||
for (j = 4; j <= TEST_SOURCES + 2; j++) {
|
||||
for (i = 0; i < j; i++)
|
||||
rand_buffer(buffs[i], TEST_LEN);
|
||||
|
||||
pq_gen(j, TEST_LEN, buffs);
|
||||
fail |= pq_check_base(j, TEST_LEN, buffs);
|
||||
|
||||
if (fail > 0) {
|
||||
printf("fail rand test %d sources\n", j);
|
||||
return 1;
|
||||
} else
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
fflush(0);
|
||||
|
||||
// Test various number of sources and len
|
||||
k = 0;
|
||||
while (k <= TEST_LEN) {
|
||||
for (j = 4; j <= TEST_SOURCES + 2; j++) {
|
||||
for (i = 0; i < j; i++)
|
||||
rand_buffer(buffs[i], k);
|
||||
|
||||
ret = pq_gen(j, k, buffs);
|
||||
fail |= pq_check_base(j, k, buffs);
|
||||
|
||||
if (fail > 0) {
|
||||
printf("fail rand test %d sources, len=%d, fail="
|
||||
"%d, ret=%d\n", j, k, fail, ret);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
k += 32;
|
||||
}
|
||||
|
||||
// Test at the end of buffer
|
||||
k = 0;
|
||||
while (k <= TEST_LEN) {
|
||||
for (j = 0; j < (TEST_SOURCES + 2); j++) {
|
||||
rand_buffer(buffs[j], TEST_LEN - k);
|
||||
tmp_buf[j] = (char *)buffs[j] + k;
|
||||
}
|
||||
|
||||
ret = pq_gen(TEST_SOURCES + 2, TEST_LEN - k, (void *)tmp_buf);
|
||||
fail |= pq_check_base(TEST_SOURCES + 2, TEST_LEN - k, (void *)tmp_buf);
|
||||
|
||||
if (fail > 0) {
|
||||
printf("fail end test - offset: %d, len: %d, fail: %d, "
|
||||
"ret: %d\n", k, TEST_LEN - k, fail, ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
fflush(0);
|
||||
k += 32;
|
||||
}
|
||||
|
||||
if (!fail)
|
||||
printf(" done: Pass\n");
|
||||
|
||||
return fail;
|
||||
}
|
147
raid/raid_base.c
Normal file
147
raid/raid_base.c
Normal file
@ -0,0 +1,147 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
# define notbit0 0xfefefefefefefefeULL
|
||||
# define bit7 0x8080808080808080ULL
|
||||
# define gf8poly 0x1d1d1d1d1d1d1d1dULL
|
||||
#else
|
||||
# define notbit0 0xfefefefeUL
|
||||
# define bit7 0x80808080UL
|
||||
# define gf8poly 0x1d1d1d1dUL
|
||||
#endif
|
||||
|
||||
int pq_gen_base(int vects, int len, void **array)
|
||||
{
|
||||
int i, j;
|
||||
unsigned long p, q, s;
|
||||
unsigned long **src = (unsigned long **)array;
|
||||
int blocks = len / sizeof(long);
|
||||
|
||||
for (i = 0; i < blocks; i++) {
|
||||
q = p = src[vects - 3][i];
|
||||
|
||||
for (j = vects - 4; j >= 0; j--) {
|
||||
p ^= s = src[j][i];
|
||||
q = s ^ (((q << 1) & notbit0) ^ // shift each byte
|
||||
((((q & bit7) << 1) - ((q & bit7) >> 7)) // mask out bytes
|
||||
& gf8poly)); // apply poly
|
||||
}
|
||||
|
||||
src[vects - 2][i] = p; // second to last pointer is p
|
||||
src[vects - 1][i] = q; // last pointer is q
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pq_check_base(int vects, int len, void **array)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char p, q, s;
|
||||
unsigned char **src = (unsigned char **)array;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
q = p = src[vects - 3][i];
|
||||
|
||||
for (j = vects - 4; j >= 0; j--) {
|
||||
s = src[j][i];
|
||||
p ^= s;
|
||||
|
||||
// mult by GF{2}
|
||||
q = s ^ ((q << 1) ^ ((q & 0x80) ? 0x1d : 0));
|
||||
}
|
||||
|
||||
if (src[vects - 2][i] != p) // second to last pointer is p
|
||||
return i | 1;
|
||||
if (src[vects - 1][i] != q) // last pointer is q
|
||||
return i | 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int xor_gen_base(int vects, int len, void **array)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char parity;
|
||||
unsigned char **src = (unsigned char **)array;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
parity = src[0][i];
|
||||
for (j = 1; j < vects - 1; j++)
|
||||
parity ^= src[j][i];
|
||||
|
||||
src[vects - 1][i] = parity; // last pointer is dest
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int xor_check_base(int vects, int len, void **array)
|
||||
{
|
||||
int i, j, fail = 0;
|
||||
|
||||
unsigned char parity;
|
||||
unsigned char **src = (unsigned char **)array;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
parity = 0;
|
||||
for (j = 0; j < vects; j++)
|
||||
parity ^= src[j][i];
|
||||
|
||||
if (parity != 0) {
|
||||
fail = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (fail && len > 0)
|
||||
return len;
|
||||
return fail;
|
||||
}
|
||||
|
||||
struct slver {
|
||||
unsigned short snum;
|
||||
unsigned char ver;
|
||||
unsigned char core;
|
||||
};
|
||||
|
||||
struct slver pq_gen_base_slver_0001012a;
|
||||
struct slver pq_gen_base_slver = { 0x012a, 0x01, 0x00 };
|
||||
|
||||
struct slver xor_gen_base_slver_0001012b;
|
||||
struct slver xor_gen_base_slver = { 0x012b, 0x01, 0x00 };
|
||||
|
||||
struct slver pq_check_base_slver_0001012c;
|
||||
struct slver pq_check_base_slver = { 0x012c, 0x01, 0x00 };
|
||||
|
||||
struct slver xor_check_base_slver_0001012d;
|
||||
struct slver xor_check_base_slver = { 0x012d, 0x01, 0x00 };
|
140
raid/raid_multibinary.asm
Normal file
140
raid/raid_multibinary.asm
Normal file
@ -0,0 +1,140 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define WRT_OPT wrt ..plt
|
||||
%else
|
||||
%define WRT_OPT
|
||||
%endif
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
%include "multibinary.asm"
|
||||
|
||||
default rel
|
||||
[bits 64]
|
||||
|
||||
extern pq_gen_base
|
||||
extern pq_gen_sse
|
||||
extern pq_gen_avx
|
||||
extern pq_gen_avx2
|
||||
|
||||
extern xor_gen_base
|
||||
extern xor_gen_sse
|
||||
extern xor_gen_avx
|
||||
|
||||
extern pq_check_base
|
||||
extern pq_check_sse
|
||||
|
||||
extern xor_check_base
|
||||
extern xor_check_sse
|
||||
|
||||
mbin_interface xor_gen
|
||||
mbin_interface pq_gen
|
||||
|
||||
mbin_dispatch_init5 xor_gen, xor_gen_base, xor_gen_sse, xor_gen_avx, xor_gen_avx
|
||||
mbin_dispatch_init5 pq_gen, pq_gen_base, pq_gen_sse, pq_gen_avx, pq_gen_avx2
|
||||
|
||||
|
||||
section .data
|
||||
|
||||
xor_check_dispatched:
|
||||
dq xor_check_mbinit
|
||||
pq_check_dispatched:
|
||||
dq pq_check_mbinit
|
||||
|
||||
section .text
|
||||
|
||||
;;;;
|
||||
; pq_check multibinary function
|
||||
;;;;
|
||||
global pq_check:function
|
||||
pq_check_mbinit:
|
||||
call pq_check_dispatch_init
|
||||
pq_check:
|
||||
jmp qword [pq_check_dispatched]
|
||||
|
||||
pq_check_dispatch_init:
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
push rsi
|
||||
lea rsi, [pq_check_base WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||
lea rbx, [pq_check_sse WRT_OPT]
|
||||
cmovne rsi, rbx
|
||||
|
||||
mov [pq_check_dispatched], rsi
|
||||
pop rsi
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
pop rax
|
||||
ret
|
||||
|
||||
|
||||
;;;;
|
||||
; xor_check multibinary function
|
||||
;;;;
|
||||
global xor_check:function
|
||||
xor_check_mbinit:
|
||||
call xor_check_dispatch_init
|
||||
xor_check:
|
||||
jmp qword [xor_check_dispatched]
|
||||
|
||||
xor_check_dispatch_init:
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
push rsi
|
||||
lea rsi, [xor_check_base WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||
lea rbx, [xor_check_sse WRT_OPT]
|
||||
cmovne rsi, rbx
|
||||
|
||||
mov [xor_check_dispatched], rsi
|
||||
pop rsi
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
pop rax
|
||||
ret
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion xor_gen, 00, 03, 0126
|
||||
slversion xor_check, 00, 03, 0127
|
||||
slversion pq_gen, 00, 03, 0128
|
||||
slversion pq_check, 00, 03, 0129
|
285
raid/xor_check_sse.asm
Normal file
285
raid/xor_check_sse.asm
Normal file
@ -0,0 +1,285 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Optimized xor of N source vectors using SSE
|
||||
;;; int xor_gen_sse(int vects, int len, void **array)
|
||||
|
||||
;;; Generates xor parity vector from N (vects-1) sources in array of pointers
|
||||
;;; (**array). Last pointer is the dest.
|
||||
;;; Vectors must be aligned to 16 bytes. Length can be any value.
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 rax
|
||||
%define tmp2.b al
|
||||
%define tmp3 arg4
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define return rax
|
||||
%define tmp2 rax
|
||||
%define tmp2.b al
|
||||
%define PS 8
|
||||
%define tmp r11
|
||||
%define tmp3 r10
|
||||
%define stack_size 2*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf32
|
||||
%define arg0 arg(0)
|
||||
%define arg1 ecx
|
||||
%define tmp2 eax
|
||||
%define tmp2.b al
|
||||
%define tmp3 edx
|
||||
%define return eax
|
||||
%define PS 4
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp+8+PS*x]
|
||||
%define arg2 edi ; must sav/restore
|
||||
%define arg3 esi
|
||||
%define tmp ebx
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
mov arg2, arg(2)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
mov esp, ebp ;if has frame pointer
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
|
||||
%define vec arg0
|
||||
%define len arg1
|
||||
%define ptr arg3
|
||||
%define pos tmp3
|
||||
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
align 16
|
||||
global xor_check_sse:function
|
||||
func(xor_check_sse)
|
||||
FUNC_SAVE
|
||||
%ifidn PS,8 ;64-bit code
|
||||
sub vec, 1 ; Keep as offset to last source
|
||||
%else ;32-bit code
|
||||
mov tmp, arg(0) ; Update vec length arg to last source
|
||||
sub tmp, 1
|
||||
mov arg(0), tmp
|
||||
%endif
|
||||
|
||||
jng return_fail ;Must have at least 2 sources
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (128-1) ;Check alignment of length
|
||||
jnz len_not_aligned
|
||||
|
||||
|
||||
len_aligned_128bytes:
|
||||
sub len, 128
|
||||
mov pos, 0
|
||||
mov tmp, vec ;Preset to last vector
|
||||
|
||||
loop128:
|
||||
mov tmp2, [arg2+tmp*PS] ;Fetch last pointer in array
|
||||
sub tmp, 1 ;Next vect
|
||||
XLDR xmm0, [tmp2+pos] ;Start with end of array in last vector
|
||||
XLDR xmm1, [tmp2+pos+16] ;Keep xor parity in xmm0-7
|
||||
XLDR xmm2, [tmp2+pos+(2*16)]
|
||||
XLDR xmm3, [tmp2+pos+(3*16)]
|
||||
XLDR xmm4, [tmp2+pos+(4*16)]
|
||||
XLDR xmm5, [tmp2+pos+(5*16)]
|
||||
XLDR xmm6, [tmp2+pos+(6*16)]
|
||||
XLDR xmm7, [tmp2+pos+(7*16)]
|
||||
|
||||
next_vect:
|
||||
mov ptr, [arg2+tmp*PS]
|
||||
sub tmp, 1
|
||||
xorpd xmm0, [ptr+pos] ;Get next vector (source)
|
||||
xorpd xmm1, [ptr+pos+16]
|
||||
xorpd xmm2, [ptr+pos+(2*16)]
|
||||
xorpd xmm3, [ptr+pos+(3*16)]
|
||||
xorpd xmm4, [ptr+pos+(4*16)]
|
||||
xorpd xmm5, [ptr+pos+(5*16)]
|
||||
xorpd xmm6, [ptr+pos+(6*16)]
|
||||
xorpd xmm7, [ptr+pos+(7*16)]
|
||||
;;; prefetch [ptr+pos+(8*16)]
|
||||
jge next_vect ;Loop for each vect
|
||||
|
||||
;; End of vects, chech that all parity regs = 0
|
||||
mov tmp, vec ;Back to last vector
|
||||
por xmm0, xmm1
|
||||
por xmm0, xmm2
|
||||
por xmm0, xmm3
|
||||
por xmm0, xmm4
|
||||
por xmm0, xmm5
|
||||
por xmm0, xmm6
|
||||
por xmm0, xmm7
|
||||
ptest xmm0, xmm0
|
||||
jnz return_fail
|
||||
|
||||
add pos, 128
|
||||
cmp pos, len
|
||||
jle loop128
|
||||
|
||||
return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
|
||||
|
||||
;;; Do one byte at a time for no alignment case
|
||||
|
||||
xor_gen_byte:
|
||||
mov tmp, vec ;Preset to last vector
|
||||
|
||||
loop_1byte:
|
||||
mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
|
||||
mov tmp2.b, [ptr+len-1] ;Get array n
|
||||
sub tmp, 1
|
||||
nextvect_1byte:
|
||||
mov ptr, [arg2+tmp*PS]
|
||||
xor tmp2.b, [ptr+len-1]
|
||||
sub tmp, 1
|
||||
jge nextvect_1byte
|
||||
|
||||
mov tmp, vec ;Back to last vector
|
||||
cmp tmp2.b, 0
|
||||
jne return_fail
|
||||
sub len, 1
|
||||
test len, (8-1)
|
||||
jnz loop_1byte
|
||||
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (128-1) ;If not 0 and 128bit aligned
|
||||
jz len_aligned_128bytes ; then do aligned case. len = y * 128
|
||||
|
||||
;; else we are 8-byte aligned so fall through to recheck
|
||||
|
||||
|
||||
;; Unaligned length cases
|
||||
len_not_aligned:
|
||||
test len, (PS-1)
|
||||
jne xor_gen_byte
|
||||
mov tmp3, len
|
||||
and tmp3, (128-1) ;Do the unaligned bytes 4-8 at a time
|
||||
mov tmp, vec ;Preset to last vector
|
||||
|
||||
;; Run backwards 8 bytes (4B for 32bit) at a time for (tmp3) bytes
|
||||
loopN_bytes:
|
||||
mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
|
||||
mov tmp2, [ptr+len-PS] ;Get array n
|
||||
sub tmp, 1
|
||||
nextvect_Nbytes:
|
||||
mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
|
||||
xor tmp2, [ptr+len-PS]
|
||||
sub tmp, 1
|
||||
jge nextvect_Nbytes ;Loop for each source
|
||||
|
||||
mov tmp, vec ;Back to last vector
|
||||
cmp tmp2, 0
|
||||
jne return_fail
|
||||
sub len, PS
|
||||
sub tmp3, PS
|
||||
jg loopN_bytes
|
||||
|
||||
cmp len, 128 ;Now len is aligned to 128B
|
||||
jge len_aligned_128bytes ;We can do the rest aligned
|
||||
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
|
||||
return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion xor_check_sse, 00, 03, 0031
|
||||
|
280
raid/xor_check_test.c
Normal file
280
raid/xor_check_test.c
Normal file
@ -0,0 +1,280 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include<stdio.h>
|
||||
#include<stdint.h>
|
||||
#include<string.h>
|
||||
#include<stdlib.h>
|
||||
#include "raid.h"
|
||||
#include "types.h"
|
||||
|
||||
#define TEST_SOURCES 16
|
||||
#define TEST_LEN 1024
|
||||
#define TEST_MEM ((TEST_SOURCES + 1)*(TEST_LEN))
|
||||
#ifndef TEST_SEED
|
||||
# define TEST_SEED 0x1234
|
||||
#endif
|
||||
|
||||
// Generates pseudo-random data
|
||||
|
||||
void rand_buffer(unsigned char *buf, long buffer_size)
|
||||
{
|
||||
long i;
|
||||
for (i = 0; i < buffer_size; i++)
|
||||
buf[i] = rand();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, k, ret, fail = 0;
|
||||
void *buffs[TEST_SOURCES + 1];
|
||||
char c;
|
||||
int serr, lerr;
|
||||
char *tmp_buf[TEST_SOURCES + 1];
|
||||
|
||||
printf("Test xor_check_test %d sources X %d bytes\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
srand(TEST_SEED);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES + 1; i++) {
|
||||
void *buf;
|
||||
if (posix_memalign(&buf, 16, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return 1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES + 1; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
xor_gen_base(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
if (ret != 0) {
|
||||
fail++;
|
||||
printf("\nfail zero test %d\n", ret);
|
||||
}
|
||||
|
||||
((char *)(buffs[0]))[TEST_LEN - 2] = 0x7; // corrupt buffer
|
||||
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
if (ret == 0) {
|
||||
fail++;
|
||||
printf("\nfail corrupt buffer test %d\n", ret);
|
||||
}
|
||||
((char *)(buffs[0]))[TEST_LEN - 2] = 0; // un-corrupt buffer
|
||||
|
||||
// Test corrupted buffer any location on all sources
|
||||
for (j = 0; j < TEST_SOURCES + 1; j++) {
|
||||
for (i = TEST_LEN - 1; i >= 0; i--) {
|
||||
((char *)buffs[j])[i] = 0x5; // corrupt buffer
|
||||
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
if (ret == 0) {
|
||||
fail++;
|
||||
printf("\nfail corrupt buffer test j=%d, i=%d\n", j, i);
|
||||
return 1;
|
||||
}
|
||||
((char *)buffs[j])[i] = 0; // un-corrupt buffer
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test rand1
|
||||
for (i = 0; i < TEST_SOURCES + 1; i++)
|
||||
rand_buffer(buffs[i], TEST_LEN);
|
||||
|
||||
xor_gen_base(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
if (ret != 0) {
|
||||
fail++;
|
||||
printf("fail first rand test %d\n", ret);
|
||||
}
|
||||
|
||||
c = ((char *)(buffs[0]))[TEST_LEN - 2];
|
||||
((char *)(buffs[0]))[TEST_LEN - 2] = c ^ 0x1;
|
||||
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
if (ret == 0) {
|
||||
fail++;
|
||||
printf("\nFail corrupt buffer test, passed when should have failed\n");
|
||||
}
|
||||
((char *)(buffs[0]))[TEST_LEN - 2] = c; // un-corrupt buffer
|
||||
|
||||
// Test corrupted buffer any location on all sources w/ random data
|
||||
for (j = 0; j < TEST_SOURCES + 1; j++) {
|
||||
for (i = TEST_LEN - 1; i >= 0; i--) {
|
||||
// Check it still passes
|
||||
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
if (ret != 0) { // should pass
|
||||
fail++;
|
||||
printf
|
||||
("\nFail rand test with un-corrupted buffer j=%d, i=%d\n",
|
||||
j, i);
|
||||
return 1;
|
||||
}
|
||||
c = ((char *)buffs[j])[i];
|
||||
((char *)buffs[j])[i] = c ^ 1; // corrupt buffer
|
||||
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
if (ret == 0) { // Check it now fails
|
||||
fail++;
|
||||
printf("\nfail corrupt buffer test j=%d, i=%d\n", j, i);
|
||||
return 1;
|
||||
}
|
||||
((char *)buffs[j])[i] = c; // un-corrupt buffer
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test various number of sources, full length
|
||||
for (j = 3; j <= TEST_SOURCES + 1; j++) {
|
||||
// New random data
|
||||
for (i = 0; i < j; i++)
|
||||
rand_buffer(buffs[i], TEST_LEN);
|
||||
|
||||
// Generate xor parity for this number of sources
|
||||
xor_gen_base(j, TEST_LEN, buffs);
|
||||
|
||||
// Set errors up in each source and len position
|
||||
for (i = 0; i < j; i++) {
|
||||
for (k = 0; k < TEST_LEN; k++) {
|
||||
// See if it still passes
|
||||
ret = xor_check(j, TEST_LEN, buffs);
|
||||
if (ret != 0) { // Should pass
|
||||
printf("\nfail rand test %d sources\n", j);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
c = ((char *)buffs[i])[k];
|
||||
((char *)buffs[i])[k] = c ^ 1; // corrupt buffer
|
||||
|
||||
ret = xor_check(j, TEST_LEN, buffs);
|
||||
if (ret == 0) { // Should fail
|
||||
printf
|
||||
("\nfail rand test corrupted buffer %d sources\n",
|
||||
j);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
((char *)buffs[i])[k] = c; // un-corrupt buffer
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
fflush(0);
|
||||
|
||||
// Test various number of sources and len
|
||||
k = 1;
|
||||
while (k <= TEST_LEN) {
|
||||
for (j = 3; j <= TEST_SOURCES + 1; j++) {
|
||||
for (i = 0; i < j; i++)
|
||||
rand_buffer(buffs[i], k);
|
||||
|
||||
// Generate xor parity for this number of sources
|
||||
xor_gen_base(j, k, buffs);
|
||||
|
||||
// Inject errors at various source and len positions
|
||||
for (lerr = 0; lerr < k; lerr += 10) {
|
||||
for (serr = 0; serr < j; serr++) {
|
||||
|
||||
// See if it still passes
|
||||
ret = xor_check(j, k, buffs);
|
||||
if (ret != 0) { // Should pass
|
||||
printf("\nfail rand test %d sources\n", j);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
c = ((char *)buffs[serr])[lerr];
|
||||
((char *)buffs[serr])[lerr] = c ^ 1; // corrupt buffer
|
||||
|
||||
ret = xor_check(j, k, buffs);
|
||||
if (ret == 0) { // Should fail
|
||||
printf("\nfail rand test corrupted buffer "
|
||||
"%d sources, len=%d, ret=%d\n", j, k,
|
||||
ret);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
((char *)buffs[serr])[lerr] = c; // un-corrupt buffer
|
||||
}
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
fflush(0);
|
||||
k += 1;
|
||||
}
|
||||
|
||||
// Test at the end of buffer
|
||||
for (i = 0; i < TEST_LEN; i += 32) {
|
||||
for (j = 0; j < TEST_SOURCES + 1; j++) {
|
||||
rand_buffer(buffs[j], TEST_LEN - i);
|
||||
tmp_buf[j] = (char *)buffs[j] + i;
|
||||
}
|
||||
|
||||
xor_gen_base(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
|
||||
|
||||
// Test good data
|
||||
ret = xor_check(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
|
||||
if (ret != 0) {
|
||||
printf("fail end test - offset: %d, len: %d\n", i, TEST_LEN - i);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
// Test bad data
|
||||
for (serr = 0; serr < TEST_SOURCES + 1; serr++) {
|
||||
for (lerr = 0; lerr < (TEST_LEN - i); lerr++) {
|
||||
c = tmp_buf[serr][lerr];
|
||||
tmp_buf[serr][lerr] = c ^ 1;
|
||||
|
||||
ret =
|
||||
xor_check(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
|
||||
if (ret == 0) {
|
||||
printf("fail end test corrupted buffer - "
|
||||
"offset: %d, len: %d, ret: %d\n", i,
|
||||
TEST_LEN - i, ret);
|
||||
fail++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
tmp_buf[serr][lerr] = c;
|
||||
}
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
fflush(0);
|
||||
}
|
||||
|
||||
if (fail == 0)
|
||||
printf("Pass\n");
|
||||
|
||||
return fail;
|
||||
|
||||
}
|
70
raid/xor_example.c
Normal file
70
raid/xor_example.c
Normal file
@ -0,0 +1,70 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2013 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "raid.h"
|
||||
#include "types.h"
|
||||
|
||||
#define TEST_SOURCES 16
|
||||
#define TEST_LEN 16*1024
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, should_pass, should_fail;
|
||||
void *buffs[TEST_SOURCES + 1];
|
||||
|
||||
printf("XOR example\n");
|
||||
for (i = 0; i < TEST_SOURCES + 1; i++) {
|
||||
void *buf;
|
||||
if (posix_memalign(&buf, 16, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return 1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
printf("Make random data\n");
|
||||
for (i = 0; i < TEST_SOURCES + 1; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
((char *)buffs[i])[j] = rand();
|
||||
|
||||
printf("Generate xor parity\n");
|
||||
xor_gen_sse(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
|
||||
printf("Check parity: ");
|
||||
should_pass = xor_check_sse(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
printf("%s\n", should_pass == 0 ? "Pass" : "Fail");
|
||||
|
||||
printf("Find corruption: ");
|
||||
((char *)buffs[TEST_SOURCES / 2])[TEST_LEN / 2] ^= 1; // flip one bit
|
||||
should_fail = xor_check_sse(TEST_SOURCES + 1, TEST_LEN, buffs); //recheck
|
||||
printf("%s\n", should_fail != 0 ? "Pass" : "Fail");
|
||||
|
||||
return 0;
|
||||
}
|
228
raid/xor_gen_avx.asm
Normal file
228
raid/xor_gen_avx.asm
Normal file
@ -0,0 +1,228 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Optimized xor of N source vectors using AVX
|
||||
;;; int xor_gen_avx(int vects, int len, void **array)
|
||||
|
||||
;;; Generates xor parity vector from N (vects-1) sources in array of pointers
|
||||
;;; (**array). Last pointer is the dest.
|
||||
;;; Vectors must be aligned to 32 bytes. Length can be any value.
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp3 arg4
|
||||
%define func(x) x:
|
||||
%define return rax
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define tmp r11
|
||||
%define tmp3 r10
|
||||
%define func(x) proc_frame x
|
||||
%define return rax
|
||||
%define stack_size 2*32 + 8 ;must be an odd multiple of 8
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqu [rsp + 0*32], ymm6
|
||||
vmovdqu [rsp + 1*32], ymm7
|
||||
end_prolog
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqu ymm6, [rsp + 0*32]
|
||||
vmovdqu ymm7, [rsp + 1*32]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%endif ;output formats
|
||||
|
||||
|
||||
%define vec arg0
|
||||
%define len arg1
|
||||
%define ptr arg3
|
||||
%define tmp2 rax
|
||||
%define tmp2.b al
|
||||
%define pos tmp3
|
||||
%define PS 8
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
[bits 64]
|
||||
|
||||
section .text
|
||||
|
||||
align 16
|
||||
global xor_gen_avx:function
|
||||
func(xor_gen_avx)
|
||||
|
||||
FUNC_SAVE
|
||||
sub vec, 2 ;Keep as offset to last source
|
||||
jng return_fail ;Must have at least 2 sources
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (128-1) ;Check alignment of length
|
||||
jnz len_not_aligned
|
||||
|
||||
|
||||
len_aligned_128bytes:
|
||||
sub len, 128
|
||||
mov pos, 0
|
||||
|
||||
loop128:
|
||||
mov tmp, vec ;Back to last vector
|
||||
mov tmp2, [arg2+vec*PS] ;Fetch last pointer in array
|
||||
sub tmp, 1 ;Next vect
|
||||
XLDR ymm0, [tmp2+pos] ;Start with end of array in last vector
|
||||
XLDR ymm1, [tmp2+pos+32] ;Keep xor parity in xmm0-7
|
||||
XLDR ymm2, [tmp2+pos+(2*32)]
|
||||
XLDR ymm3, [tmp2+pos+(3*32)]
|
||||
|
||||
next_vect:
|
||||
mov ptr, [arg2+tmp*PS]
|
||||
sub tmp, 1
|
||||
XLDR ymm4, [ptr+pos] ;Get next vector (source)
|
||||
XLDR ymm5, [ptr+pos+32]
|
||||
XLDR ymm6, [ptr+pos+(2*32)]
|
||||
XLDR ymm7, [ptr+pos+(3*32)]
|
||||
vxorpd ymm0, ymm0, ymm4 ;Add to xor parity
|
||||
vxorpd ymm1, ymm1, ymm5
|
||||
vxorpd ymm2, ymm2, ymm6
|
||||
vxorpd ymm3, ymm3, ymm7
|
||||
jge next_vect ;Loop for each source
|
||||
|
||||
mov ptr, [arg2+PS+vec*PS] ;Address of parity vector
|
||||
XSTR [ptr+pos], ymm0 ;Write parity xor vector
|
||||
XSTR [ptr+pos+(1*32)], ymm1
|
||||
XSTR [ptr+pos+(2*32)], ymm2
|
||||
XSTR [ptr+pos+(3*32)], ymm3
|
||||
add pos, 128
|
||||
cmp pos, len
|
||||
jle loop128
|
||||
|
||||
return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
|
||||
;;; Do one byte at a time for no alignment case
|
||||
loop_1byte:
|
||||
mov tmp, vec ;Back to last vector
|
||||
mov ptr, [arg2+vec*PS] ;Fetch last pointer in array
|
||||
mov tmp2.b, [ptr+len-1] ;Get array n
|
||||
sub tmp, 1
|
||||
nextvect_1byte:
|
||||
mov ptr, [arg2+tmp*PS]
|
||||
xor tmp2.b, [ptr+len-1]
|
||||
sub tmp, 1
|
||||
jge nextvect_1byte
|
||||
|
||||
mov tmp, vec
|
||||
add tmp, 1 ;Add back to point to last vec
|
||||
mov ptr, [arg2+tmp*PS]
|
||||
mov [ptr+len-1], tmp2.b ;Write parity
|
||||
sub len, 1
|
||||
test len, (PS-1)
|
||||
jnz loop_1byte
|
||||
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (128-1) ;If not 0 and 128bit aligned
|
||||
jz len_aligned_128bytes ; then do aligned case. len = y * 128
|
||||
|
||||
;; else we are 8-byte aligned so fall through to recheck
|
||||
|
||||
|
||||
;; Unaligned length cases
|
||||
len_not_aligned:
|
||||
test len, (PS-1)
|
||||
jne loop_1byte
|
||||
mov tmp3, len
|
||||
and tmp3, (128-1) ;Do the unaligned bytes 8 at a time
|
||||
|
||||
;; Run backwards 8 bytes at a time for (tmp3) bytes
|
||||
loop8_bytes:
|
||||
mov tmp, vec ;Back to last vector
|
||||
mov ptr, [arg2+vec*PS] ;Fetch last pointer in array
|
||||
mov tmp2, [ptr+len-PS] ;Get array n
|
||||
sub tmp, 1
|
||||
nextvect_8bytes:
|
||||
mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
|
||||
xor tmp2, [ptr+len-PS]
|
||||
sub tmp, 1
|
||||
jge nextvect_8bytes ;Loop for each source
|
||||
|
||||
mov tmp, vec
|
||||
add tmp, 1 ;Add back to point to last vec
|
||||
mov ptr, [arg2+tmp*PS]
|
||||
mov [ptr+len-PS], tmp2 ;Write parity
|
||||
sub len, PS
|
||||
sub tmp3, PS
|
||||
jg loop8_bytes
|
||||
|
||||
cmp len, 128 ;Now len is aligned to 128B
|
||||
jge len_aligned_128bytes ;We can do the rest aligned
|
||||
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
|
||||
return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion xor_gen_avx, 02, 05, 0037
|
||||
|
98
raid/xor_gen_perf.c
Normal file
98
raid/xor_gen_perf.c
Normal file
@ -0,0 +1,98 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include<stdio.h>
|
||||
#include<stdint.h>
|
||||
#include<string.h>
|
||||
#include<stdlib.h>
|
||||
#include<sys/time.h>
|
||||
#include "raid.h"
|
||||
#include "test.h"
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Loop many times over same
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 400000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN GT_L3_CACHE / TEST_SOURCES
|
||||
# define TEST_LOOPS 1000
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
#endif
|
||||
|
||||
#define TEST_MEM ((TEST_SOURCES + 1)*(TEST_LEN))
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, ret, fail = 0;
|
||||
void **buffs;
|
||||
void *buff;
|
||||
struct perf start, stop;
|
||||
|
||||
printf("Test xor_gen_perf\n");
|
||||
|
||||
ret = posix_memalign((void **)&buff, 8, sizeof(int *) * (TEST_SOURCES + 6));
|
||||
if (ret) {
|
||||
printf("alloc error: Fail");
|
||||
return 1;
|
||||
}
|
||||
buffs = buff;
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES + 1; i++) {
|
||||
void *buf;
|
||||
ret = posix_memalign(&buf, 32, TEST_LEN);
|
||||
if (ret) {
|
||||
printf("alloc error: Fail");
|
||||
return 1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Setup data
|
||||
for (i = 0; i < TEST_SOURCES + 1; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
// Warm up
|
||||
xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++)
|
||||
xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
perf_stop(&stop);
|
||||
printf("xor_gen" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_MEM * i);
|
||||
|
||||
return fail;
|
||||
}
|
284
raid/xor_gen_sse.asm
Normal file
284
raid/xor_gen_sse.asm
Normal file
@ -0,0 +1,284 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Optimized xor of N source vectors using SSE
|
||||
;;; int xor_gen_sse(int vects, int len, void **array)
|
||||
|
||||
;;; Generates xor parity vector from N (vects-1) sources in array of pointers
|
||||
;;; (**array). Last pointer is the dest.
|
||||
;;; Vectors must be aligned to 16 bytes. Length can be any value.
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 rax
|
||||
%define tmp2.b al
|
||||
%define tmp3 arg4
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define return rax
|
||||
%define tmp2 rax
|
||||
%define tmp2.b al
|
||||
%define PS 8
|
||||
%define tmp r11
|
||||
%define tmp3 r10
|
||||
%define stack_size 2*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf32
|
||||
%define arg0 arg(0)
|
||||
%define arg1 ecx
|
||||
%define tmp2 eax
|
||||
%define tmp2.b al
|
||||
%define tmp3 edx
|
||||
%define return eax
|
||||
%define PS 4
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp+8+PS*x]
|
||||
%define arg2 edi ; must sav/restore
|
||||
%define arg3 esi
|
||||
%define tmp ebx
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
mov arg2, arg(2)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
mov esp, ebp ;if has frame pointer
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
|
||||
%define vec arg0
|
||||
%define len arg1
|
||||
%define ptr arg3
|
||||
%define pos tmp3
|
||||
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
align 16
|
||||
global xor_gen_sse:function
|
||||
func(xor_gen_sse)
|
||||
FUNC_SAVE
|
||||
%ifidn PS,8 ;64-bit code
|
||||
sub vec, 2 ; Keep as offset to last source
|
||||
%else ;32-bit code
|
||||
mov tmp, arg(0) ; Update vec length arg to last source
|
||||
sub tmp, 2
|
||||
mov arg(0), tmp
|
||||
%endif
|
||||
|
||||
jng return_fail ;Must have at least 2 sources
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (128-1) ;Check alignment of length
|
||||
jnz len_not_aligned
|
||||
|
||||
|
||||
len_aligned_128bytes:
|
||||
sub len, 128
|
||||
mov pos, 0
|
||||
mov tmp, vec ;Preset to last vector
|
||||
|
||||
loop128:
|
||||
mov tmp2, [arg2+tmp*PS] ;Fetch last pointer in array
|
||||
sub tmp, 1 ;Next vect
|
||||
XLDR xmm0, [tmp2+pos] ;Start with end of array in last vector
|
||||
XLDR xmm1, [tmp2+pos+16] ;Keep xor parity in xmm0-7
|
||||
XLDR xmm2, [tmp2+pos+(2*16)]
|
||||
XLDR xmm3, [tmp2+pos+(3*16)]
|
||||
XLDR xmm4, [tmp2+pos+(4*16)]
|
||||
XLDR xmm5, [tmp2+pos+(5*16)]
|
||||
XLDR xmm6, [tmp2+pos+(6*16)]
|
||||
XLDR xmm7, [tmp2+pos+(7*16)]
|
||||
|
||||
next_vect:
|
||||
mov ptr, [arg2+tmp*PS]
|
||||
sub tmp, 1
|
||||
xorpd xmm0, [ptr+pos] ;Get next vector (source)
|
||||
xorpd xmm1, [ptr+pos+16]
|
||||
xorpd xmm2, [ptr+pos+(2*16)]
|
||||
xorpd xmm3, [ptr+pos+(3*16)]
|
||||
xorpd xmm4, [ptr+pos+(4*16)]
|
||||
xorpd xmm5, [ptr+pos+(5*16)]
|
||||
xorpd xmm6, [ptr+pos+(6*16)]
|
||||
xorpd xmm7, [ptr+pos+(7*16)]
|
||||
;;; prefetch [ptr+pos+(8*16)]
|
||||
jge next_vect ;Loop for each vect
|
||||
|
||||
|
||||
mov tmp, vec ;Back to last vector
|
||||
mov ptr, [arg2+PS+tmp*PS] ;Address of parity vector
|
||||
XSTR [ptr+pos], xmm0 ;Write parity xor vector
|
||||
XSTR [ptr+pos+(1*16)], xmm1
|
||||
XSTR [ptr+pos+(2*16)], xmm2
|
||||
XSTR [ptr+pos+(3*16)], xmm3
|
||||
XSTR [ptr+pos+(4*16)], xmm4
|
||||
XSTR [ptr+pos+(5*16)], xmm5
|
||||
XSTR [ptr+pos+(6*16)], xmm6
|
||||
XSTR [ptr+pos+(7*16)], xmm7
|
||||
add pos, 128
|
||||
cmp pos, len
|
||||
jle loop128
|
||||
|
||||
return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
|
||||
|
||||
;;; Do one byte at a time for no alignment case
|
||||
|
||||
xor_gen_byte:
|
||||
mov tmp, vec ;Preset to last vector
|
||||
|
||||
loop_1byte:
|
||||
mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
|
||||
mov tmp2.b, [ptr+len-1] ;Get array n
|
||||
sub tmp, 1
|
||||
nextvect_1byte:
|
||||
mov ptr, [arg2+tmp*PS]
|
||||
xor tmp2.b, [ptr+len-1]
|
||||
sub tmp, 1
|
||||
jge nextvect_1byte
|
||||
|
||||
mov tmp, vec ;Back to last vector
|
||||
mov ptr, [arg2+PS+tmp*PS] ;Get last vec
|
||||
mov [ptr+len-1], tmp2.b ;Write parity
|
||||
sub len, 1
|
||||
test len, (8-1)
|
||||
jnz loop_1byte
|
||||
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
test len, (128-1) ;If not 0 and 128bit aligned
|
||||
jz len_aligned_128bytes ; then do aligned case. len = y * 128
|
||||
|
||||
;; else we are 8-byte aligned so fall through to recheck
|
||||
|
||||
|
||||
;; Unaligned length cases
|
||||
len_not_aligned:
|
||||
test len, (PS-1)
|
||||
jne xor_gen_byte
|
||||
mov tmp3, len
|
||||
and tmp3, (128-1) ;Do the unaligned bytes 4-8 at a time
|
||||
mov tmp, vec ;Preset to last vector
|
||||
|
||||
;; Run backwards 8 bytes (4B for 32bit) at a time for (tmp3) bytes
|
||||
loopN_bytes:
|
||||
mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
|
||||
mov tmp2, [ptr+len-PS] ;Get array n
|
||||
sub tmp, 1
|
||||
nextvect_Nbytes:
|
||||
mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
|
||||
xor tmp2, [ptr+len-PS]
|
||||
sub tmp, 1
|
||||
jge nextvect_Nbytes ;Loop for each source
|
||||
|
||||
mov tmp, vec ;Back to last vector
|
||||
mov ptr, [arg2+PS+tmp*PS] ;Get last vec
|
||||
mov [ptr+len-PS], tmp2 ;Write parity
|
||||
sub len, PS
|
||||
sub tmp3, PS
|
||||
jg loopN_bytes
|
||||
|
||||
cmp len, 128 ;Now len is aligned to 128B
|
||||
jge len_aligned_128bytes ;We can do the rest aligned
|
||||
|
||||
cmp len, 0
|
||||
je return_pass
|
||||
|
||||
return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion xor_gen_sse, 00, 0c, 0030
|
||||
|
165
raid/xor_gen_test.c
Normal file
165
raid/xor_gen_test.c
Normal file
@ -0,0 +1,165 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include<stdio.h>
|
||||
#include<stdint.h>
|
||||
#include<string.h>
|
||||
#include<stdlib.h>
|
||||
#include "raid.h"
|
||||
#include "types.h"
|
||||
|
||||
#define TEST_SOURCES 16
|
||||
#define TEST_LEN 1024
|
||||
#define TEST_MEM ((TEST_SOURCES + 1)*(TEST_LEN))
|
||||
#ifndef TEST_SEED
|
||||
# define TEST_SEED 0x1234
|
||||
#endif
|
||||
|
||||
// Generates pseudo-random data
|
||||
|
||||
void rand_buffer(unsigned char *buf, long buffer_size)
|
||||
{
|
||||
long i;
|
||||
for (i = 0; i < buffer_size; i++)
|
||||
buf[i] = rand();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, k, ret, fail = 0;
|
||||
void *buffs[TEST_SOURCES + 1];
|
||||
char *tmp_buf[TEST_SOURCES + 1];
|
||||
|
||||
printf("Test xor_gen_test ");
|
||||
|
||||
srand(TEST_SEED);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES + 1; i++) {
|
||||
void *buf;
|
||||
ret = posix_memalign(&buf, 32, TEST_LEN);
|
||||
if (ret) {
|
||||
printf("alloc error: Fail");
|
||||
return 1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES + 1; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
|
||||
for (i = 0; i < TEST_LEN; i++) {
|
||||
if (((char *)buffs[TEST_SOURCES])[i] != 0)
|
||||
fail++;
|
||||
}
|
||||
|
||||
if (fail > 0) {
|
||||
printf("fail zero test");
|
||||
return 1;
|
||||
} else
|
||||
putchar('.');
|
||||
|
||||
// Test rand1
|
||||
for (i = 0; i < TEST_SOURCES + 1; i++)
|
||||
rand_buffer(buffs[i], TEST_LEN);
|
||||
|
||||
xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
|
||||
fail |= xor_check_base(TEST_SOURCES + 1, TEST_LEN, buffs);
|
||||
|
||||
if (fail > 0) {
|
||||
printf("fail rand test %d\n", fail);
|
||||
return 1;
|
||||
} else
|
||||
putchar('.');
|
||||
|
||||
// Test various number of sources
|
||||
for (j = 3; j <= TEST_SOURCES + 1; j++) {
|
||||
for (i = 0; i < j; i++)
|
||||
rand_buffer(buffs[i], TEST_LEN);
|
||||
|
||||
xor_gen(j, TEST_LEN, buffs);
|
||||
fail |= xor_check_base(j, TEST_LEN, buffs);
|
||||
|
||||
if (fail > 0) {
|
||||
printf("fail rand test %d sources\n", j);
|
||||
return 1;
|
||||
} else
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
fflush(0);
|
||||
|
||||
// Test various number of sources and len
|
||||
k = 0;
|
||||
while (k <= TEST_LEN) {
|
||||
for (j = 3; j <= TEST_SOURCES + 1; j++) {
|
||||
for (i = 0; i < j; i++)
|
||||
rand_buffer(buffs[i], k);
|
||||
|
||||
xor_gen(j, k, buffs);
|
||||
fail |= xor_check_base(j, k, buffs);
|
||||
|
||||
if (fail > 0) {
|
||||
printf("fail rand test %d sources, len=%d, ret=%d\n", j, k,
|
||||
fail);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
k += 1;
|
||||
}
|
||||
|
||||
// Test at the end of buffer
|
||||
for (i = 0; i < TEST_LEN; i += 32) {
|
||||
for (j = 0; j < TEST_SOURCES + 1; j++) {
|
||||
rand_buffer((unsigned char *)buffs[j] + i, TEST_LEN - i);
|
||||
tmp_buf[j] = (char *)buffs[j] + i;
|
||||
}
|
||||
|
||||
xor_gen(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
|
||||
fail |= xor_check_base(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
|
||||
|
||||
if (fail > 0) {
|
||||
printf("fail end test - offset: %d, len: %d\n", i, TEST_LEN - i);
|
||||
return 1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
fflush(0);
|
||||
}
|
||||
|
||||
if (!fail)
|
||||
printf(" done: Pass\n");
|
||||
|
||||
return fail;
|
||||
}
|
Loading…
Reference in New Issue
Block a user