Commit a69e152dcc704eff6b7e409ad1ed43f732e6be9c

  • avatar
  • zecke
  • Mon Nov 09 07:28:12 GMT 2009
Create a specialized SourceOver routine in plain C

The following tweaks were done to boost performance:
     - Handle 0xff and 0x00 alpha special in the const_alpha=255 case
     - Unroll the above block eight times
     - Place and tweak the pld to give optimal performance on Cortex-A8

Read paint buffer with 217 frames
/qtgraphics-cycler.trace, iterations: 3, frames: 217, min(ms): 7981, median(ms): 7997, stddev: 0.842720 %, max(fps): 27.189575
src/gui/painting/qdrawhelper.cpp
(19 / 8)
  
79137913#endif
79147914}
79157915
7916#if defined(Q_CC_RVCT) && defined(QT_HAVE_ARMV6)
7916#if (defined(Q_CC_RVCT) && defined(QT_HAVE_ARMV6)) || defined (Q_CC_GNU)
79177917// Move these to qdrawhelper_arm.c when all
79187918// functions are implemented using arm assembly.
7919static CompositionFunctionSolid qt_functionForModeSolid_ARMv6[numCompositionFunctions] = {
7919static CompositionFunctionSolid qt_functionForModeSolid_ARM[numCompositionFunctions] = {
79207920 comp_func_solid_SourceOver,
79217921 comp_func_solid_DestinationOver,
79227922 comp_func_solid_Clear,
79527952 rasterop_solid_SourceAndNotDestination
79537953};
79547954
7955static CompositionFunction qt_functionForMode_ARMv6[numCompositionFunctions] = {
7955static CompositionFunction qt_functionForMode_ARM[numCompositionFunctions] = {
7956#if QT_HAVE_ARMV6
79567957 comp_func_SourceOver_armv6,
7958#else
7959 comp_func_SourceOver_arm,
7960#endif
79577961 comp_func_DestinationOver,
79587962 comp_func_Clear,
7963#if QT_HAVE_ARMV6
79597964 comp_func_Source_armv6,
7965#else
7966 comp_func_Source,
7967#endif
79607968 comp_func_Destination,
79617969 comp_func_SourceIn,
79627970 comp_func_DestinationIn,
79967996 rasterop_SourceAndNotDestination
79977997};
79987998
7999#if defined(Q_CC_RVCT)
79998000static void qt_blend_color_argb_armv6(int count, const QSpan *spans, void *userData)
80008001{
80018002 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
80028003
8003 CompositionFunctionSolid func = qt_functionForModeSolid_ARMv6[data->rasterBuffer->compositionMode];
8004 CompositionFunctionSolid func = qt_functionForModeSolid_ARM[data->rasterBuffer->compositionMode];
80048005 while (count--) {
80058006 uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
80068007 func(target, spans->len, data->solid.color, spans->coverage);
80078008 ++spans;
80088009 }
80098010}
8011#endif
80108012
80118013#endif // Q_CC_RVCT && QT_HAVE_ARMV6
80128014
81288128
81298129#endif // QT_NO_DEBUG
81308130
8131// ARM optimized drawhing helpers. Currently this is used for GCC and RVCT
81318132#if defined(QT_ARCH_ARM) || defined(QT_ARCH_ARMV6)
81328133
81338134# if defined (Q_CC_RVCT) || defined(Q_CC_GNU)
81348135 qt_memfill32 = qt_memfill32_arm;
8136
8137 functionForModeAsm = qt_functionForMode_ARM;
8138 functionForModeSolidAsm = qt_functionForModeSolid_ARM;
81358139# endif
81368140
8137# if defined(Q_CC_RVCT) && defined(QT_HAVE_ARMV6)
8138 functionForModeAsm = qt_functionForMode_ARMv6;
8139 functionForModeSolidAsm = qt_functionForModeSolid_ARMv6;
81408141
8141
8142# if defined(Q_CC_RVCT) && defined(QT_HAVE_ARMV6)
81428143 qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_armv6;
81438144
81448145 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_armv6;
src/gui/painting/qdrawhelper_arm_gnu.cpp
(65 / 0)
  
8787 );
8888}
8989
90static __attribute__((always_inline)) void preload(const uint *start)
91{
92#if defined(QT_ARM_HAS_PLD)
93 asm __volatile__("pld [%[addr]]\n"
94 :
95 : [addr] "r"(start));
96#endif
97}
98
99#define UNROLL_4_TIMES(block) block block block block
100#define UNROLL_8_TIMES(block) UNROLL_4_TIMES(block) UNROLL_4_TIMES(block)
101
102#define UNROLL_LOOP_WITH_PLD(block) \
103 int i = 0; \
104 int end = length & ~7; \
105 while (i < end) { \
106 preload(&src[i] + 32); \
107 preload(&dest[i]+ 32); \
108 UNROLL_8_TIMES(block) \
109 } \
110 preload(&src[i] + 32); \
111 preload(&dest[i]+ 32); \
112 while (i < length) { \
113 block \
114 }
115
116
117// TODO: on Cortex-A8 this is 64.
118static const uint L2CacheLineLength = 32;
119static const uint L2CacheLineLengthInInts = L2CacheLineLength/sizeof(uint);
120# define PRELOAD_INIT2(x,y) preload(x); preload(y);
121// Two consecutive preloads stall, so space them out a bit by using different modulus.
122// -- I could not verify this on a Cortex-A8 with the above UNROLL_LOOP_WITH_PLD
123# define PRELOAD_COND2(x,y) if (((uint)&x[i])%L2CacheLineLength == 0) preload(&x[i] + L2CacheLineLengthInInts); \
124 if (((uint)&y[i])%L2CacheLineLength == 16) preload(&y[i] + L2CacheLineLengthInInts);
125void comp_func_SourceOver_arm(uint *dest, const uint *src, int length, uint const_alpha)
126{
127 PRELOAD_INIT2(dest, src)
128 if (const_alpha == 255) {
129 UNROLL_LOOP_WITH_PLD(
130 {
131 if ((src[i] & 0xff000000) == 0x00000000) {
132 ; // nothing
133 } else if ((src[i] & 0xff000000) == 0xff000000) {
134 dest[i] = src[i];
135 } else {
136 uint s = src[i];
137 dest[i] = s + BYTE_MUL(dest[i], qAlpha(~s));
138 }
139
140 ++i;
141 });
142 } else {
143 int i = 0;
144 while (i < length) {
145 uint s = BYTE_MUL(src[i], const_alpha);
146 dest[i] = s + BYTE_MUL(dest[i], qAlpha(~s));
147 PRELOAD_COND2(dest, src)
148 ++i;
149 }
150 }
151
152 return;
153}
154
90155#endif
src/gui/painting/qdrawhelper_arm_p.h
(1 / 0)
  
9999
100100#if defined(Q_CC_RVCT) || defined(Q_CC_GNU)
101101extern "C" void qt_memfill32_arm(quint32 *dest, quint32 value, int count);
102extern "C" void comp_func_SourceOver_arm(uint *dest, const uint *src, int length, uint const_alpha);
102103#endif
103104
104105#if defined(Q_CC_RVCT) && defined(QT_HAVE_ARMV6)

Comments

Add a new comment:

Login or create an account to post a comment

Add your comment