--- a/src/gui/painting/qdrawhelper_arm_gnu.cpp +++ b/src/gui/painting/qdrawhelper_arm_gnu.cpp @@ -122,6 +122,27 @@ // -- I could not verify this on a Cortex-A8 with the above UNROLL_LOOP_WITH_PLD # define PRELOAD_COND2(x,y) if (((uint)&x[i])%L2CacheLineLength == 0) preload(&x[i] + L2CacheLineLengthInInts); \ if (((uint)&y[i])%L2CacheLineLength == 16) preload(&y[i] + L2CacheLineLengthInInts); + +Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_ARM(uint x, uint a) { + switch (a) { + case 255: + return x; + case 0: + return 0; + default: { + uint t = (x & 0xff00ff) * a; + t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8; + t &= 0xff00ff; + + x = ((x >> 8) & 0xff00ff) * a; + x = (x + ((x >> 8) & 0xff00ff) + 0x800080); + x &= 0xff00ff00; + x |= t; + return x; + } + } +} + void comp_func_SourceOver_arm(uint *dest, const uint *src, int length, uint const_alpha) { PRELOAD_INIT2(dest, src) @@ -134,7 +155,7 @@ dest[i] = src[i]; } else { uint s = src[i]; - dest[i] = s + BYTE_MUL(dest[i], qAlpha(~s)); + dest[i] = s + BYTE_MUL_ARM(dest[i], qAlpha(~s)); } ++i; @@ -143,7 +164,7 @@ int i = 0; while (i < length) { uint s = BYTE_MUL(src[i], const_alpha); - dest[i] = s + BYTE_MUL(dest[i], qAlpha(~s)); + dest[i] = s + BYTE_MUL_ARM(dest[i], qAlpha(~s)); PRELOAD_COND2(dest, src) ++i; }