From ae77cbc1e7b90473a2b0963bce0e1eb163873214 Mon Sep 17 00:00:00 2001 From: Ken Steele Date: Wed, 7 Aug 2013 12:39:56 -0400 Subject: RAID: add tilegx SIMD implementation of raid6 This change adds TILE-Gx SIMD instructions to the software raid (md), modeling the Altivec implementation. This is only for Syndrome generation; there is more that could be done to improve recovery, as in the recent Intel SSE3 recovery implementation. The code unrolls 8 times; this turns out to be the best on tilegx hardware among the set 1, 2, 4, 8 or 16. The code reads one cache-line of data from each disk, stores P and Q then goes to the next cache-line. The test code in sys/linux/lib/raid6/test reports 2008 MB/s data read rate for syndrome generation using 18 disks (16 data and 2 parity). It was 1512 MB/s before this SIMD optimizations. This is running on 1 core with all the data in cache. This is based on the paper The Mathematics of RAID-6. (http://kernel.org/pub/linux/kernel/people/hpa/raid6.pdf). Signed-off-by: Ken Steele Signed-off-by: Chris Metcalf Signed-off-by: NeilBrown --- lib/raid6/Makefile | 6 ++++ lib/raid6/algos.c | 3 ++ lib/raid6/test/Makefile | 7 ++++ lib/raid6/tilegx.uc | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+) create mode 100644 lib/raid6/tilegx.uc (limited to 'lib/raid6') diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 9f7c184725d7..e5e90219d69e 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -5,6 +5,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o +raid6_pq-$(CONFIG_TILEGX) += tilegx8.o hostprogs-y += mktables @@ -70,6 +71,11 @@ $(obj)/altivec8.c: UNROLL := 8 $(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) +targets += tilegx8.c +$(obj)/tilegx8.c: UNROLL := 8 +$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + quiet_cmd_mktable = TABLE $@ cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 6d7316fe9f30..b9f340180a3b 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -65,6 +65,9 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_altivec2, &raid6_altivec4, &raid6_altivec8, +#endif +#if defined(CONFIG_TILEGX) + &raid6_tilegx8, #endif &raid6_intx1, &raid6_intx2, diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 087332dbf8aa..78f89d807f4e 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -35,6 +35,9 @@ else OBJS += altivec1.o altivec2.o altivec4.o altivec8.o endif endif +ifeq ($(ARCH),tilegx) +OBJS += tilegx8.o +endif .c.o: $(CC) $(CFLAGS) -c -o $@ $< @@ -85,11 +88,15 @@ int16.c: int.uc ../unroll.awk int32.c: int.uc ../unroll.awk $(AWK) ../unroll.awk -vN=32 < int.uc > $@ +tilegx8.c: tilegx.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < tilegx.uc > $@ + tables.c: mktables ./mktables > tables.c clean: rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test + rm -f tilegx*.c spotless: clean rm -f *~ diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc new file mode 100644 index 000000000000..e7c29459cbcd --- /dev/null +++ b/lib/raid6/tilegx.uc @@ -0,0 +1,86 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * Copyright 2012 Tilera Corporation - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * tilegx$#.c + * + * $#-way unrolled TILE-Gx SIMD for RAID-6 math. + * + * This file is postprocessed using unroll.awk. + * + */ + +#include + +/* Create 8 byte copies of constant byte */ +# define NBYTES(x) (__insn_v1addi(0, x)) +# define NSIZE 8 + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ u64 SHLBYTE(u64 v) +{ + /* Vector One Byte Shift Left Immediate. */ + return __insn_v1shli(v, 1); +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ u64 MASK(u64 v) +{ + /* Vector One Byte Shift Right Signed Immediate. */ + return __insn_v1shrsi(v, 7); +} + + +void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u64 *p, *q; + int d, z, z0; + + u64 wd$$, wq$$, wp$$, w1$$, w2$$; + u64 x1d = NBYTES(0x1d); + u64 * z0ptr; + + z0 = disks - 3; /* Highest data disk */ + p = (u64 *)dptr[z0+1]; /* XOR parity */ + q = (u64 *)dptr[z0+2]; /* RS syndrome */ + + z0ptr = (u64 *)&dptr[z0][0]; + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + wq$$ = wp$$ = *z0ptr++; + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd$$ = *(u64 *)&dptr[z][d+$$*NSIZE]; + wp$$ = wp$$ ^ wd$$; + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ = w2$$ & x1d; + w1$$ = w1$$ ^ w2$$; + wq$$ = w1$$ ^ wd$$; + } + *p++ = wp$$; + *q++ = wq$$; + } +} + +const struct raid6_calls raid6_tilegx$# = { + raid6_tilegx$#_gen_syndrome, + NULL, + "tilegx$#", + 0 +}; -- cgit v1.2.3-58-ga151