Then the optimizer seems to have made a mistake by being overly eager to use ldrd. Here’s the function in question. At the point of the crash the optimizer noticed that two consecutive uint32_t values were being read from the pointer ‘cur’, so ldrd would be an efficient way to read them; except that there’s no guarantee in the code that ‘cur’ is 8-byte aligned. The input is an arbitrary unaligned pointer.
uint32_t crc32_8(void* data, size_t len, uint32_t prev_value)
{
uint32_t *cur = (uint32_t*) data;
uint32_t crc = ~prev_value;
while (len >= 8) {
uint32_t _one_ = *cur++ ^ crc; // <---CRASHES
uint32_t two = *cur++;
crc =
crc_lookup[7][(one ) & 0xFF] ^
crc_lookup[6][(one>> 8) & 0xFF] ^
crc_lookup[5][(one>>16) & 0xFF] ^
crc_lookup[4][(one>>24) & 0xFF] ^
crc_lookup[3][(two ) & 0xFF] ^
crc_lookup[2][(two>> 8) & 0xFF] ^
crc_lookup[1][(two>>16) & 0xFF] ^
crc_lookup[0][(two>>24) & 0xFF];
len -= 8;
}
unsigned char *cur_byte = (unsigned char*) cur;
while (len--)
crc = (crc >> 8) ^ crc_lookup[0][(crc & 0xFF) ^ *cur_byte++];
return ~crc;
}