--- blosc/bitshuffle-altivec.c.orig 2024-08-12 12:46:55 UTC +++ blosc/bitshuffle-altivec.c @@ -40,15 +40,15 @@ #include #include -static void helper_print(__vector uint8_t v, char* txt){ +static void helper_print(__vector unsigned char v, char* txt){ printf("%s %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",txt, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10], v[11], v[12], v[13], v[14], v[15]); } #endif -static inline __vector uint8_t gen_save_mask(size_t offset){ - __vector uint8_t mask; +static inline __vector unsigned char gen_save_mask(size_t offset){ + __vector unsigned char mask; size_t k; for (k = 0; k < 16; k++) mask[k] = (k> 3]; *oui16 = tmp[4]; } @@ -387,28 +387,28 @@ int64_t bshuf_trans_byte_bitrow_altivec(const void* in * the bytes. */ int64_t bshuf_trans_byte_bitrow_altivec(const void* in, void* out, const size_t size, const size_t elem_size) { - static const __vector uint8_t epi8_low = (const __vector uint8_t) { + static const __vector unsigned char epi8_low = (const __vector unsigned char) { 0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17}; - static const __vector uint8_t epi8_hi = (const __vector uint8_t) { + static const __vector unsigned char epi8_hi = (const __vector unsigned char) { 0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b, 0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f}; - static const __vector uint8_t epi16_low = (const __vector uint8_t) { + static const __vector unsigned char epi16_low = (const __vector unsigned char) { 0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17}; - static const __vector uint8_t epi16_hi = (const __vector uint8_t) { + static const __vector unsigned char epi16_hi = (const __vector unsigned char) { 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f}; - static const __vector uint8_t epi32_low = (const __vector uint8_t) { + static const __vector unsigned char epi32_low = (const __vector unsigned char) { 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17}; - static const __vector uint8_t epi32_hi = (const __vector uint8_t) { + static const __vector unsigned char epi32_hi = (const __vector unsigned char) { 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b, 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f}; - static const __vector uint8_t epi64_low = (const __vector uint8_t) { + static const __vector unsigned char epi64_low = (const __vector unsigned char) { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17}; - static const __vector uint8_t epi64_hi = (const __vector uint8_t) { + static const __vector unsigned char epi64_hi = (const __vector unsigned char) { 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; @@ -416,7 +416,7 @@ int64_t bshuf_trans_byte_bitrow_altivec(const void* in uint8_t* out_b = (uint8_t*)out; size_t nrows = 8 * elem_size; size_t nbyte_row = size / 8; - __vector uint8_t xmm0[16], xmm1[16]; + __vector unsigned char xmm0[16], xmm1[16]; CHECK_MULT_EIGHT(size); @@ -543,7 +543,7 @@ int64_t bshuf_shuffle_bit_eightelem_altivec(const void const uint8_t* in_b = (const uint8_t*)in; uint8_t* out_b = (uint8_t*)out; size_t nbyte = elem_size * size; - __vector uint8_t masks[8], data; + __vector unsigned char masks[8], data; CHECK_MULT_EIGHT(size); @@ -560,9 +560,9 @@ int64_t bshuf_shuffle_bit_eightelem_altivec(const void for (size_t jj = 0; jj + 15 < 8 * elem_size; jj += 16) { data = vec_xl(ii + jj, in_b); for (size_t kk = 0; kk < 8; kk++) { - __vector uint16_t tmp; + __vector unsigned short tmp; uint16_t* oui16; - tmp = (__vector uint16_t) vec_bperm(data, masks[kk]); + tmp = (__vector unsigned short) vec_bperm(data, masks[kk]); oui16 = (uint16_t*)&out_b[ii + (jj>>3) + kk * elem_size]; *oui16 = tmp[4]; } --- blosc/shuffle-altivec.c.orig 2024-08-12 12:43:11 UTC +++ blosc/shuffle-altivec.c @@ -27,7 +27,7 @@ shuffle2_altivec(uint8_t* const dest, const uint8_t* c const int32_t vectorizable_elements, const int32_t total_elements){ static const int32_t bytesoftype = 2; uint32_t i, j; - __vector uint8_t xmm0[2]; + __vector unsigned char xmm0[2]; for (j = 0; j < vectorizable_elements; j += 16){ /* Fetch 16 elements (32 bytes) */ @@ -49,7 +49,7 @@ shuffle4_altivec(uint8_t* const dest, const uint8_t* c const int32_t vectorizable_elements, const int32_t total_elements){ static const int32_t bytesoftype = 4; int32_t i, j; - __vector uint8_t xmm0[4]; + __vector unsigned char xmm0[4]; for (j = 0; j < vectorizable_elements; j += 16) { @@ -75,7 +75,7 @@ shuffle8_altivec(uint8_t* const dest, const uint8_t* c const int32_t vectorizable_elements, const int32_t total_elements) { static const uint8_t bytesoftype = 8; int32_t i, j; - __vector uint8_t xmm0[8]; + __vector unsigned char xmm0[8]; for (j = 0; j < vectorizable_elements; j += 16) { @@ -98,7 +98,7 @@ shuffle16_altivec(uint8_t* const dest, const uint8_t* const int32_t vectorizable_elements, const int32_t total_elements) { static const int32_t bytesoftype = 16; int32_t i, j; - __vector uint8_t xmm0[16]; + __vector unsigned char xmm0[16]; for (j = 0; j < vectorizable_elements; j += 16) { @@ -123,7 +123,7 @@ shuffle16_tiled_altivec(uint8_t* const dest, const uin const int32_t bytesoftype) { int32_t j, k; const int32_t vecs_per_el_rem = bytesoftype & 0xF; - __vector uint8_t xmm[16]; + __vector unsigned char xmm[16]; for (j = 0; j < vectorizable_elements; j += 16) { /* Advance the offset into the type by the vector size (in bytes), unless this is @@ -154,7 +154,7 @@ unshuffle2_altivec(uint8_t* const dest, const uint8_t* const int32_t vectorizable_elements, const int32_t total_elements) { static const int32_t bytesoftype = 2; uint32_t i, j; - __vector uint8_t xmm0[2], xmm1[2]; + __vector unsigned char xmm0[2], xmm1[2]; for (j = 0; j < vectorizable_elements; j += 16) { /* Load 16 elements (32 bytes) into 2 vectors registers. */ @@ -178,7 +178,7 @@ unshuffle4_altivec(uint8_t* const dest, const uint8_t* const int32_t vectorizable_elements, const int32_t total_elements) { static const int32_t bytesoftype = 4; uint32_t i, j; - __vector uint8_t xmm0[4], xmm1[4]; + __vector unsigned char xmm0[4], xmm1[4]; for (j = 0; j < vectorizable_elements; j += 16) { /* Load 16 elements (64 bytes) into 4 vectors registers. */ @@ -193,11 +193,11 @@ unshuffle4_altivec(uint8_t* const dest, const uint8_t* /* Shuffle 2-byte words */ for (i = 0; i < 2; i++) { /* Compute the low 32 bytes */ - xmm0[i] = (__vector uint8_t) vec_vmrghh((__vector uint16_t)xmm1[i * 2], - (__vector uint16_t) xmm1[i * 2 + 1]); + xmm0[i] = (__vector unsigned char) vec_vmrghh((__vector unsigned short)xmm1[i * 2], + (__vector unsigned short) xmm1[i * 2 + 1]); /* Compute the hi 32 bytes */ - xmm0[i+2] = (__vector uint8_t) vec_vmrglh((__vector uint16_t)xmm1[i * 2], - (__vector uint16_t)xmm1[i * 2 + 1]); + xmm0[i+2] = (__vector unsigned char) vec_vmrglh((__vector unsigned short)xmm1[i * 2], + (__vector unsigned short)xmm1[i * 2 + 1]); } /* Store the result vectors in proper order */ vec_xst(xmm0[0], bytesoftype * j, dest); @@ -213,7 +213,7 @@ unshuffle8_altivec(uint8_t* const dest, const uint8_t* const int32_t vectorizable_elements, const int32_t total_elements) { static const uint8_t bytesoftype = 8; uint32_t i, j; - __vector uint8_t xmm0[8], xmm1[8]; + __vector unsigned char xmm0[8], xmm1[8]; // Initialize permutations for writing for (j = 0; j < vectorizable_elements; j += 16) { @@ -227,17 +227,17 @@ unshuffle8_altivec(uint8_t* const dest, const uint8_t* } /* Shuffle 2-byte words */ for (i = 0; i < 4; i++) { - xmm0[i] = (__vector uint8_t)vec_vmrghh((__vector uint16_t)xmm1[i * 2], - (__vector uint16_t)xmm1[i * 2 + 1]); - xmm0[4 + i] = (__vector uint8_t)vec_vmrglh((__vector uint16_t)xmm1[i * 2], - (__vector uint16_t)xmm1[i * 2 + 1]); + xmm0[i] = (__vector unsigned char)vec_vmrghh((__vector unsigned short)xmm1[i * 2], + (__vector unsigned short)xmm1[i * 2 + 1]); + xmm0[4 + i] = (__vector unsigned char)vec_vmrglh((__vector unsigned short)xmm1[i * 2], + (__vector unsigned short)xmm1[i * 2 + 1]); } /* Shuffle 4-byte dwords */ for (i = 0; i < 4; i++) { - xmm1[i] = (__vector uint8_t)vec_vmrghw((__vector uint32_t)xmm0[i * 2], - (__vector uint32_t)xmm0[i * 2 + 1]); - xmm1[4 + i] = (__vector uint8_t)vec_vmrglw((__vector uint32_t)xmm0[i * 2], - (__vector uint32_t)xmm0[i * 2 + 1]); + xmm1[i] = (__vector unsigned char)vec_vmrghw((__vector unsigned int)xmm0[i * 2], + (__vector unsigned int)xmm0[i * 2 + 1]); + xmm1[4 + i] = (__vector unsigned char)vec_vmrglw((__vector unsigned int)xmm0[i * 2], + (__vector unsigned int)xmm0[i * 2 + 1]); } /* Store the result vectors in proper order */ vec_xst(xmm1[0], bytesoftype * j, dest); @@ -258,7 +258,7 @@ unshuffle16_altivec(uint8_t* const dest, const uint8_t const int32_t vectorizable_elements, const int32_t total_elements) { static const int32_t bytesoftype = 16; uint32_t i, j; - __vector uint8_t xmm0[16]; + __vector unsigned char xmm0[16]; for (j = 0; j < vectorizable_elements; j += 16) { /* Load 16 elements (64 bytes) into 4 vectors registers. */ @@ -282,7 +282,7 @@ unshuffle16_tiled_altivec(uint8_t* const dest, const u const int32_t bytesoftype) { int32_t i, j, offset_into_type; const int32_t vecs_per_el_rem = bytesoftype & 0xF; - __vector uint8_t xmm[16]; + __vector unsigned char xmm[16]; /* Advance the offset into the type by the vector size (in bytes), unless this is --- blosc/transpose-altivec.h.orig 2024-08-12 12:42:34 UTC +++ blosc/transpose-altivec.h @@ -15,18 +15,18 @@ #include -static const __vector uint8_t even = (const __vector uint8_t) { +static const __vector unsigned char even = (const __vector unsigned char) { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e}; -static const __vector uint8_t odd = (const __vector uint8_t) { +static const __vector unsigned char odd = (const __vector unsigned char) { 0x01, 0x03, 0x05, 0x07, 0x09, 0x0b, 0x0d, 0x0f, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1b, 0x1d, 0x1f}; /* Transpose inplace 2 vectors of 16 bytes in src into dst. */ -static void transpose2x16(__vector uint8_t *xmm0) { - __vector uint8_t xmm1[2]; +static void transpose2x16(__vector unsigned char *xmm0) { + __vector unsigned char xmm1[2]; xmm1[0] = vec_perm(xmm0[0], xmm0[1], even); xmm1[1] = vec_perm(xmm0[0], xmm0[1], odd); @@ -38,8 +38,8 @@ static void transpose2x16(__vector uint8_t *xmm0) { /* Transpose inplace 4 vectors of 16 bytes in src into dst. * Total cost: 8 calls to vec_perm. */ -static void transpose4x16(__vector uint8_t *xmm0) { - __vector uint8_t xmm1[4]; +static void transpose4x16(__vector unsigned char *xmm0) { + __vector unsigned char xmm1[4]; /* Transpose vectors 0-1*/ xmm1[0] = vec_perm(xmm0[0], xmm0[1], even); @@ -56,8 +56,8 @@ static void transpose4x16(__vector uint8_t *xmm0) { /* Transpose inplace 8 vectors of 16 bytes in src into dst. * Total cost: 24 calls to vec_perm. */ -static void transpose8x16(__vector uint8_t *xmm0) { - __vector uint8_t xmm1[8]; +static void transpose8x16(__vector unsigned char *xmm0) { + __vector unsigned char xmm1[8]; /* Transpose vectors 0-1*/ for (int i = 0; i < 8; i += 2){ @@ -85,8 +85,8 @@ static void transpose8x16(__vector uint8_t *xmm0) { /* Transpose inplace 16 vectors of 16 bytes in src into dst. * Total cost: 64 calls to vec_perm. */ -static void transpose16x16(__vector uint8_t * xmm0){ - __vector uint8_t xmm1[16]; +static void transpose16x16(__vector unsigned char * xmm0){ + __vector unsigned char xmm1[16]; /* Transpose vectors 0-1*/ for (int i = 0; i < 16; i += 2){ xmm1[i] = vec_perm(xmm0[i], xmm0[i+1], even);