I wrote the following two functions for one of the CrabFire filters. They require no memory lookups so they will not pollute the data cache or hog the memory bus.
Code:
vector char vec_tolower(vector char str)
{
/* From Holger Bettag's table of constants */
vector char A = vec_rl(vec_splat_u8(4), vec_splat_u8(4));
vector char Z = vec_vor(vec_rl(vec_splat_u8(0xb), vec_splat_u8(0xb)), vec_splat_u8(0xb));
vector char diff = vec_rl(vec_splat_u8(1), vec_splat_u8(5));
vector bool char gt = vec_cmpgt(str, A);
vector bool char lt = vec_cmplt(str, Z);
vector bool char mask = vec_and(gt, lt);
vector char small = vec_add(str, diff);
return vec_sel(str, small, mask);
}
Code:
vector char vec_toupper(vector char str)
{
/* From Holger Bettag's table of constants */
vector char a = vec_rl(vec_splat_u8(3), vec_splat_u8(5));
vector char z = vec_avg(vec_splat_u8(0), vec_splat_u8(-13));
vector char diff = vec_rl(vec_splat_u8(1), vec_splat_u8(5));
vector bool char gt = vec_cmpgt(str, a);
vector bool char lt = vec_cmplt(str, z);
vector bool char mask = vec_and(gt, lt);
vector char small = vec_sub(str, diff);
return vec_sel(str, small, mask);
}