#include // WARNING: When building the scalar versions of these functions you need to // use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang // from recognising a loop idiom and planting calls to memcpy! static void *__arm_sc_memcpy_fwd(void *dest, const void *src, size_t n) __arm_streaming_compatible { unsigned char *destp = (unsigned char *)dest; const unsigned char *srcp = (const unsigned char *)src; for (size_t i = 0; i < n; ++i) destp[i] = srcp[i]; return dest; } // If dest and src overlap then behaviour is undefined, hence we can add the // restrict keywords here. This also matches the definition of the libc memcpy // according to the man page. void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src, size_t n) __arm_streaming_compatible { return __arm_sc_memcpy_fwd(dest, src, n); } void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible { unsigned char *destp = (unsigned char *)dest; unsigned char c8 = (unsigned char)c; for (size_t i = 0; i < n; ++i) destp[i] = c8; return dest; } static void *__arm_sc_memcpy_rev(void *dest, const void *src, size_t n) __arm_streaming_compatible { unsigned char *destp = (unsigned char *)dest; const unsigned char *srcp = (const unsigned char *)src; // TODO: Improve performance by copying larger chunks in reverse, or by // using SVE. while (n > 0) { --n; destp[n] = srcp[n]; } return dest; } // Semantically a memmove is equivalent to the following: // 1. Copy the entire contents of src to a temporary array that does not // overlap with src or dest. // 2. Copy the contents of the temporary array into dest. void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible { unsigned char *destp = (unsigned char *)dest; const unsigned char *srcp = (const unsigned char *)src; // If src and dest don't overlap then just invoke memcpy if ((srcp > (destp + n)) || (destp > (srcp + n))) return __arm_sc_memcpy_fwd(dest, src, n); // Overlap case 1: // src: Low | -> | High // dest: Low | -> | High // Here src is always ahead of dest at a higher addres. If we first read a // chunk of data from src we can safely write the same chunk to dest without // corrupting future reads of src. if (srcp > destp) return __arm_sc_memcpy_fwd(dest, src, n); // Overlap case 2: // src: Low | -> | High // dest: Low | -> | High // While we're in the overlap region we're always corrupting future reads of // src when writing to dest. An efficient way to do this is to copy the data // in reverse by starting at the highest address. return __arm_sc_memcpy_rev(dest, src, n); } const void *__arm_sc_memchr(const void *src, int c, size_t n) __arm_streaming_compatible { const unsigned char *srcp = (const unsigned char *)src; unsigned char c8 = (unsigned char)c; for (size_t i = 0; i < n; ++i) if (srcp[i] == c8) return &srcp[i]; return NULL; }