Pick the low-hanging fruit (nw)

Implemented most of MooglyGuy's new RGB intrinsics for VMX/Altivec
Still need to do blend, bilinear filter and merge alpha
This commit is contained in:
Vas Crabb 2015-06-22 05:04:10 +10:00 committed by therealmogminer@gmail.com
parent 55c2e3ef5d
commit 84aa21184b
5 changed files with 701 additions and 556 deletions

View file

@ -314,6 +314,7 @@ files {
MAME_DIR .. "src/emu/video/rgbgen.h",
MAME_DIR .. "src/emu/video/rgbsse.c",
MAME_DIR .. "src/emu/video/rgbsse.h",
MAME_DIR .. "src/emu/video/rgbvmx.c",
MAME_DIR .. "src/emu/video/rgbvmx.h",
MAME_DIR .. "src/emu/video/vector.c",
MAME_DIR .. "src/emu/video/vector.h",

View file

@ -10,6 +10,8 @@
***************************************************************************/
#if defined(__SSE2__) || defined(_MSC_VER)
#include "emu.h"
#include <emmintrin.h>
#include "rgbutil.h"
@ -90,3 +92,5 @@ UINT32 rgbaint_t::bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT
color01 = _mm_packus_epi16(color01, color01);
return _mm_cvtsi128_si32(color01);
}
#endif // defined(__SSE2__) || defined(_MSC_VER)

View file

@ -22,7 +22,7 @@ const struct _rgbsse_statics rgbsse_statics =
{
{ 0 },
{ 255, 255, 255, 255, 255, 255, 255, 255 },
{ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000},
{ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000 },
{ 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff },
{ 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff },
{ 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff },
@ -158,147 +158,3 @@ const struct _rgbsse_statics rgbsse_statics =
}
};
#endif // defined(__SSE2__)
/***************************************************************************
VMX/ALTIVEC TABLES
***************************************************************************/
#if defined(__ALTIVEC__)
#include <altivec.h>
const struct _rgbvmx_statics rgbvmx_statics =
{
{ 255, 255, 255, 255, 255, 255, 255, 255 },
{
{ 0, 256, 0, 256, 0, 256, 0, 256 }, { 1, 255, 1, 255, 1, 255, 1, 255 },
{ 2, 254, 2, 254, 2, 254, 2, 254 }, { 3, 253, 3, 253, 3, 253, 3, 253 },
{ 4, 252, 4, 252, 4, 252, 4, 252 }, { 5, 251, 5, 251, 5, 251, 5, 251 },
{ 6, 250, 6, 250, 6, 250, 6, 250 }, { 7, 249, 7, 249, 7, 249, 7, 249 },
{ 8, 248, 8, 248, 8, 248, 8, 248 }, { 9, 247, 9, 247, 9, 247, 9, 247 },
{ 10, 246, 10, 246, 10, 246, 10, 246 }, { 11, 245, 11, 245, 11, 245, 11, 245 },
{ 12, 244, 12, 244, 12, 244, 12, 244 }, { 13, 243, 13, 243, 13, 243, 13, 243 },
{ 14, 242, 14, 242, 14, 242, 14, 242 }, { 15, 241, 15, 241, 15, 241, 15, 241 },
{ 16, 240, 16, 240, 16, 240, 16, 240 }, { 17, 239, 17, 239, 17, 239, 17, 239 },
{ 18, 238, 18, 238, 18, 238, 18, 238 }, { 19, 237, 19, 237, 19, 237, 19, 237 },
{ 20, 236, 20, 236, 20, 236, 20, 236 }, { 21, 235, 21, 235, 21, 235, 21, 235 },
{ 22, 234, 22, 234, 22, 234, 22, 234 }, { 23, 233, 23, 233, 23, 233, 23, 233 },
{ 24, 232, 24, 232, 24, 232, 24, 232 }, { 25, 231, 25, 231, 25, 231, 25, 231 },
{ 26, 230, 26, 230, 26, 230, 26, 230 }, { 27, 229, 27, 229, 27, 229, 27, 229 },
{ 28, 228, 28, 228, 28, 228, 28, 228 }, { 29, 227, 29, 227, 29, 227, 29, 227 },
{ 30, 226, 30, 226, 30, 226, 30, 226 }, { 31, 225, 31, 225, 31, 225, 31, 225 },
{ 32, 224, 32, 224, 32, 224, 32, 224 }, { 33, 223, 33, 223, 33, 223, 33, 223 },
{ 34, 222, 34, 222, 34, 222, 34, 222 }, { 35, 221, 35, 221, 35, 221, 35, 221 },
{ 36, 220, 36, 220, 36, 220, 36, 220 }, { 37, 219, 37, 219, 37, 219, 37, 219 },
{ 38, 218, 38, 218, 38, 218, 38, 218 }, { 39, 217, 39, 217, 39, 217, 39, 217 },
{ 40, 216, 40, 216, 40, 216, 40, 216 }, { 41, 215, 41, 215, 41, 215, 41, 215 },
{ 42, 214, 42, 214, 42, 214, 42, 214 }, { 43, 213, 43, 213, 43, 213, 43, 213 },
{ 44, 212, 44, 212, 44, 212, 44, 212 }, { 45, 211, 45, 211, 45, 211, 45, 211 },
{ 46, 210, 46, 210, 46, 210, 46, 210 }, { 47, 209, 47, 209, 47, 209, 47, 209 },
{ 48, 208, 48, 208, 48, 208, 48, 208 }, { 49, 207, 49, 207, 49, 207, 49, 207 },
{ 50, 206, 50, 206, 50, 206, 50, 206 }, { 51, 205, 51, 205, 51, 205, 51, 205 },
{ 52, 204, 52, 204, 52, 204, 52, 204 }, { 53, 203, 53, 203, 53, 203, 53, 203 },
{ 54, 202, 54, 202, 54, 202, 54, 202 }, { 55, 201, 55, 201, 55, 201, 55, 201 },
{ 56, 200, 56, 200, 56, 200, 56, 200 }, { 57, 199, 57, 199, 57, 199, 57, 199 },
{ 58, 198, 58, 198, 58, 198, 58, 198 }, { 59, 197, 59, 197, 59, 197, 59, 197 },
{ 60, 196, 60, 196, 60, 196, 60, 196 }, { 61, 195, 61, 195, 61, 195, 61, 195 },
{ 62, 194, 62, 194, 62, 194, 62, 194 }, { 63, 193, 63, 193, 63, 193, 63, 193 },
{ 64, 192, 64, 192, 64, 192, 64, 192 }, { 65, 191, 65, 191, 65, 191, 65, 191 },
{ 66, 190, 66, 190, 66, 190, 66, 190 }, { 67, 189, 67, 189, 67, 189, 67, 189 },
{ 68, 188, 68, 188, 68, 188, 68, 188 }, { 69, 187, 69, 187, 69, 187, 69, 187 },
{ 70, 186, 70, 186, 70, 186, 70, 186 }, { 71, 185, 71, 185, 71, 185, 71, 185 },
{ 72, 184, 72, 184, 72, 184, 72, 184 }, { 73, 183, 73, 183, 73, 183, 73, 183 },
{ 74, 182, 74, 182, 74, 182, 74, 182 }, { 75, 181, 75, 181, 75, 181, 75, 181 },
{ 76, 180, 76, 180, 76, 180, 76, 180 }, { 77, 179, 77, 179, 77, 179, 77, 179 },
{ 78, 178, 78, 178, 78, 178, 78, 178 }, { 79, 177, 79, 177, 79, 177, 79, 177 },
{ 80, 176, 80, 176, 80, 176, 80, 176 }, { 81, 175, 81, 175, 81, 175, 81, 175 },
{ 82, 174, 82, 174, 82, 174, 82, 174 }, { 83, 173, 83, 173, 83, 173, 83, 173 },
{ 84, 172, 84, 172, 84, 172, 84, 172 }, { 85, 171, 85, 171, 85, 171, 85, 171 },
{ 86, 170, 86, 170, 86, 170, 86, 170 }, { 87, 169, 87, 169, 87, 169, 87, 169 },
{ 88, 168, 88, 168, 88, 168, 88, 168 }, { 89, 167, 89, 167, 89, 167, 89, 167 },
{ 90, 166, 90, 166, 90, 166, 90, 166 }, { 91, 165, 91, 165, 91, 165, 91, 165 },
{ 92, 164, 92, 164, 92, 164, 92, 164 }, { 93, 163, 93, 163, 93, 163, 93, 163 },
{ 94, 162, 94, 162, 94, 162, 94, 162 }, { 95, 161, 95, 161, 95, 161, 95, 161 },
{ 96, 160, 96, 160, 96, 160, 96, 160 }, { 97, 159, 97, 159, 97, 159, 97, 159 },
{ 98, 158, 98, 158, 98, 158, 98, 158 }, { 99, 157, 99, 157, 99, 157, 99, 157 },
{ 100, 156, 100, 156, 100, 156, 100, 156 }, { 101, 155, 101, 155, 101, 155, 101, 155 },
{ 102, 154, 102, 154, 102, 154, 102, 154 }, { 103, 153, 103, 153, 103, 153, 103, 153 },
{ 104, 152, 104, 152, 104, 152, 104, 152 }, { 105, 151, 105, 151, 105, 151, 105, 151 },
{ 106, 150, 106, 150, 106, 150, 106, 150 }, { 107, 149, 107, 149, 107, 149, 107, 149 },
{ 108, 148, 108, 148, 108, 148, 108, 148 }, { 109, 147, 109, 147, 109, 147, 109, 147 },
{ 110, 146, 110, 146, 110, 146, 110, 146 }, { 111, 145, 111, 145, 111, 145, 111, 145 },
{ 112, 144, 112, 144, 112, 144, 112, 144 }, { 113, 143, 113, 143, 113, 143, 113, 143 },
{ 114, 142, 114, 142, 114, 142, 114, 142 }, { 115, 141, 115, 141, 115, 141, 115, 141 },
{ 116, 140, 116, 140, 116, 140, 116, 140 }, { 117, 139, 117, 139, 117, 139, 117, 139 },
{ 118, 138, 118, 138, 118, 138, 118, 138 }, { 119, 137, 119, 137, 119, 137, 119, 137 },
{ 120, 136, 120, 136, 120, 136, 120, 136 }, { 121, 135, 121, 135, 121, 135, 121, 135 },
{ 122, 134, 122, 134, 122, 134, 122, 134 }, { 123, 133, 123, 133, 123, 133, 123, 133 },
{ 124, 132, 124, 132, 124, 132, 124, 132 }, { 125, 131, 125, 131, 125, 131, 125, 131 },
{ 126, 130, 126, 130, 126, 130, 126, 130 }, { 127, 129, 127, 129, 127, 129, 127, 129 },
{ 128, 128, 128, 128, 128, 128, 128, 128 }, { 129, 127, 129, 127, 129, 127, 129, 127 },
{ 130, 126, 130, 126, 130, 126, 130, 126 }, { 131, 125, 131, 125, 131, 125, 131, 125 },
{ 132, 124, 132, 124, 132, 124, 132, 124 }, { 133, 123, 133, 123, 133, 123, 133, 123 },
{ 134, 122, 134, 122, 134, 122, 134, 122 }, { 135, 121, 135, 121, 135, 121, 135, 121 },
{ 136, 120, 136, 120, 136, 120, 136, 120 }, { 137, 119, 137, 119, 137, 119, 137, 119 },
{ 138, 118, 138, 118, 138, 118, 138, 118 }, { 139, 117, 139, 117, 139, 117, 139, 117 },
{ 140, 116, 140, 116, 140, 116, 140, 116 }, { 141, 115, 141, 115, 141, 115, 141, 115 },
{ 142, 114, 142, 114, 142, 114, 142, 114 }, { 143, 113, 143, 113, 143, 113, 143, 113 },
{ 144, 112, 144, 112, 144, 112, 144, 112 }, { 145, 111, 145, 111, 145, 111, 145, 111 },
{ 146, 110, 146, 110, 146, 110, 146, 110 }, { 147, 109, 147, 109, 147, 109, 147, 109 },
{ 148, 108, 148, 108, 148, 108, 148, 108 }, { 149, 107, 149, 107, 149, 107, 149, 107 },
{ 150, 106, 150, 106, 150, 106, 150, 106 }, { 151, 105, 151, 105, 151, 105, 151, 105 },
{ 152, 104, 152, 104, 152, 104, 152, 104 }, { 153, 103, 153, 103, 153, 103, 153, 103 },
{ 154, 102, 154, 102, 154, 102, 154, 102 }, { 155, 101, 155, 101, 155, 101, 155, 101 },
{ 156, 100, 156, 100, 156, 100, 156, 100 }, { 157, 99, 157, 99, 157, 99, 157, 99 },
{ 158, 98, 158, 98, 158, 98, 158, 98 }, { 159, 97, 159, 97, 159, 97, 159, 97 },
{ 160, 96, 160, 96, 160, 96, 160, 96 }, { 161, 95, 161, 95, 161, 95, 161, 95 },
{ 162, 94, 162, 94, 162, 94, 162, 94 }, { 163, 93, 163, 93, 163, 93, 163, 93 },
{ 164, 92, 164, 92, 164, 92, 164, 92 }, { 165, 91, 165, 91, 165, 91, 165, 91 },
{ 166, 90, 166, 90, 166, 90, 166, 90 }, { 167, 89, 167, 89, 167, 89, 167, 89 },
{ 168, 88, 168, 88, 168, 88, 168, 88 }, { 169, 87, 169, 87, 169, 87, 169, 87 },
{ 170, 86, 170, 86, 170, 86, 170, 86 }, { 171, 85, 171, 85, 171, 85, 171, 85 },
{ 172, 84, 172, 84, 172, 84, 172, 84 }, { 173, 83, 173, 83, 173, 83, 173, 83 },
{ 174, 82, 174, 82, 174, 82, 174, 82 }, { 175, 81, 175, 81, 175, 81, 175, 81 },
{ 176, 80, 176, 80, 176, 80, 176, 80 }, { 177, 79, 177, 79, 177, 79, 177, 79 },
{ 178, 78, 178, 78, 178, 78, 178, 78 }, { 179, 77, 179, 77, 179, 77, 179, 77 },
{ 180, 76, 180, 76, 180, 76, 180, 76 }, { 181, 75, 181, 75, 181, 75, 181, 75 },
{ 182, 74, 182, 74, 182, 74, 182, 74 }, { 183, 73, 183, 73, 183, 73, 183, 73 },
{ 184, 72, 184, 72, 184, 72, 184, 72 }, { 185, 71, 185, 71, 185, 71, 185, 71 },
{ 186, 70, 186, 70, 186, 70, 186, 70 }, { 187, 69, 187, 69, 187, 69, 187, 69 },
{ 188, 68, 188, 68, 188, 68, 188, 68 }, { 189, 67, 189, 67, 189, 67, 189, 67 },
{ 190, 66, 190, 66, 190, 66, 190, 66 }, { 191, 65, 191, 65, 191, 65, 191, 65 },
{ 192, 64, 192, 64, 192, 64, 192, 64 }, { 193, 63, 193, 63, 193, 63, 193, 63 },
{ 194, 62, 194, 62, 194, 62, 194, 62 }, { 195, 61, 195, 61, 195, 61, 195, 61 },
{ 196, 60, 196, 60, 196, 60, 196, 60 }, { 197, 59, 197, 59, 197, 59, 197, 59 },
{ 198, 58, 198, 58, 198, 58, 198, 58 }, { 199, 57, 199, 57, 199, 57, 199, 57 },
{ 200, 56, 200, 56, 200, 56, 200, 56 }, { 201, 55, 201, 55, 201, 55, 201, 55 },
{ 202, 54, 202, 54, 202, 54, 202, 54 }, { 203, 53, 203, 53, 203, 53, 203, 53 },
{ 204, 52, 204, 52, 204, 52, 204, 52 }, { 205, 51, 205, 51, 205, 51, 205, 51 },
{ 206, 50, 206, 50, 206, 50, 206, 50 }, { 207, 49, 207, 49, 207, 49, 207, 49 },
{ 208, 48, 208, 48, 208, 48, 208, 48 }, { 209, 47, 209, 47, 209, 47, 209, 47 },
{ 210, 46, 210, 46, 210, 46, 210, 46 }, { 211, 45, 211, 45, 211, 45, 211, 45 },
{ 212, 44, 212, 44, 212, 44, 212, 44 }, { 213, 43, 213, 43, 213, 43, 213, 43 },
{ 214, 42, 214, 42, 214, 42, 214, 42 }, { 215, 41, 215, 41, 215, 41, 215, 41 },
{ 216, 40, 216, 40, 216, 40, 216, 40 }, { 217, 39, 217, 39, 217, 39, 217, 39 },
{ 218, 38, 218, 38, 218, 38, 218, 38 }, { 219, 37, 219, 37, 219, 37, 219, 37 },
{ 220, 36, 220, 36, 220, 36, 220, 36 }, { 221, 35, 221, 35, 221, 35, 221, 35 },
{ 222, 34, 222, 34, 222, 34, 222, 34 }, { 223, 33, 223, 33, 223, 33, 223, 33 },
{ 224, 32, 224, 32, 224, 32, 224, 32 }, { 225, 31, 225, 31, 225, 31, 225, 31 },
{ 226, 30, 226, 30, 226, 30, 226, 30 }, { 227, 29, 227, 29, 227, 29, 227, 29 },
{ 228, 28, 228, 28, 228, 28, 228, 28 }, { 229, 27, 229, 27, 229, 27, 229, 27 },
{ 230, 26, 230, 26, 230, 26, 230, 26 }, { 231, 25, 231, 25, 231, 25, 231, 25 },
{ 232, 24, 232, 24, 232, 24, 232, 24 }, { 233, 23, 233, 23, 233, 23, 233, 23 },
{ 234, 22, 234, 22, 234, 22, 234, 22 }, { 235, 21, 235, 21, 235, 21, 235, 21 },
{ 236, 20, 236, 20, 236, 20, 236, 20 }, { 237, 19, 237, 19, 237, 19, 237, 19 },
{ 238, 18, 238, 18, 238, 18, 238, 18 }, { 239, 17, 239, 17, 239, 17, 239, 17 },
{ 240, 16, 240, 16, 240, 16, 240, 16 }, { 241, 15, 241, 15, 241, 15, 241, 15 },
{ 242, 14, 242, 14, 242, 14, 242, 14 }, { 243, 13, 243, 13, 243, 13, 243, 13 },
{ 244, 12, 244, 12, 244, 12, 244, 12 }, { 245, 11, 245, 11, 245, 11, 245, 11 },
{ 246, 10, 246, 10, 246, 10, 246, 10 }, { 247, 9, 247, 9, 247, 9, 247, 9 },
{ 248, 8, 248, 8, 248, 8, 248, 8 }, { 249, 7, 249, 7, 249, 7, 249, 7 },
{ 250, 6, 250, 6, 250, 6, 250, 6 }, { 251, 5, 251, 5, 251, 5, 251, 5 },
{ 252, 4, 252, 4, 252, 4, 252, 4 }, { 253, 3, 253, 3, 253, 3, 253, 3 },
{ 254, 2, 254, 2, 254, 2, 254, 2 }, { 255, 1, 255, 1, 255, 1, 255, 1 }
}
};
#endif // defined(__ALTIVEC__)

238
src/emu/video/rgbvmx.c Normal file
View file

@ -0,0 +1,238 @@
// license:BSD-3-Clause
// copyright-holders:Vas Crabb, Ryan Holtz
/***************************************************************************
rgbsse.c
VMX/Altivec optimised RGB utilities.
***************************************************************************/
#if defined(__ALTIVEC__)
#include "emu.h"
#include <emmintrin.h>
#include "rgbutil.h"
/***************************************************************************
TABLES
***************************************************************************/
const rgbaint_t::VECU16 rgbaint_t::maxbyte = { 255, 255, 255, 255, 255, 255, 255, 255 };
const rgbaint_t::VECU32 rgbaint_t::alpha_mask = { 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff };
const rgbaint_t::VECU32 rgbaint_t::red_mask = { 0xffffffff, 0x00000000, 0xffffffff, 0xffffffff };
const rgbaint_t::VECU32 rgbaint_t::green_mask = { 0xffffffff, 0xffffffff, 0x00000000, 0xffffffff };
const rgbaint_t::VECU32 rgbaint_t::blue_mask = { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000 };
const rgbaint_t::VECU16 rgbaint_t::scale_table[256] = {
{ 0, 256, 0, 256, 0, 256, 0, 256 }, { 1, 255, 1, 255, 1, 255, 1, 255 },
{ 2, 254, 2, 254, 2, 254, 2, 254 }, { 3, 253, 3, 253, 3, 253, 3, 253 },
{ 4, 252, 4, 252, 4, 252, 4, 252 }, { 5, 251, 5, 251, 5, 251, 5, 251 },
{ 6, 250, 6, 250, 6, 250, 6, 250 }, { 7, 249, 7, 249, 7, 249, 7, 249 },
{ 8, 248, 8, 248, 8, 248, 8, 248 }, { 9, 247, 9, 247, 9, 247, 9, 247 },
{ 10, 246, 10, 246, 10, 246, 10, 246 }, { 11, 245, 11, 245, 11, 245, 11, 245 },
{ 12, 244, 12, 244, 12, 244, 12, 244 }, { 13, 243, 13, 243, 13, 243, 13, 243 },
{ 14, 242, 14, 242, 14, 242, 14, 242 }, { 15, 241, 15, 241, 15, 241, 15, 241 },
{ 16, 240, 16, 240, 16, 240, 16, 240 }, { 17, 239, 17, 239, 17, 239, 17, 239 },
{ 18, 238, 18, 238, 18, 238, 18, 238 }, { 19, 237, 19, 237, 19, 237, 19, 237 },
{ 20, 236, 20, 236, 20, 236, 20, 236 }, { 21, 235, 21, 235, 21, 235, 21, 235 },
{ 22, 234, 22, 234, 22, 234, 22, 234 }, { 23, 233, 23, 233, 23, 233, 23, 233 },
{ 24, 232, 24, 232, 24, 232, 24, 232 }, { 25, 231, 25, 231, 25, 231, 25, 231 },
{ 26, 230, 26, 230, 26, 230, 26, 230 }, { 27, 229, 27, 229, 27, 229, 27, 229 },
{ 28, 228, 28, 228, 28, 228, 28, 228 }, { 29, 227, 29, 227, 29, 227, 29, 227 },
{ 30, 226, 30, 226, 30, 226, 30, 226 }, { 31, 225, 31, 225, 31, 225, 31, 225 },
{ 32, 224, 32, 224, 32, 224, 32, 224 }, { 33, 223, 33, 223, 33, 223, 33, 223 },
{ 34, 222, 34, 222, 34, 222, 34, 222 }, { 35, 221, 35, 221, 35, 221, 35, 221 },
{ 36, 220, 36, 220, 36, 220, 36, 220 }, { 37, 219, 37, 219, 37, 219, 37, 219 },
{ 38, 218, 38, 218, 38, 218, 38, 218 }, { 39, 217, 39, 217, 39, 217, 39, 217 },
{ 40, 216, 40, 216, 40, 216, 40, 216 }, { 41, 215, 41, 215, 41, 215, 41, 215 },
{ 42, 214, 42, 214, 42, 214, 42, 214 }, { 43, 213, 43, 213, 43, 213, 43, 213 },
{ 44, 212, 44, 212, 44, 212, 44, 212 }, { 45, 211, 45, 211, 45, 211, 45, 211 },
{ 46, 210, 46, 210, 46, 210, 46, 210 }, { 47, 209, 47, 209, 47, 209, 47, 209 },
{ 48, 208, 48, 208, 48, 208, 48, 208 }, { 49, 207, 49, 207, 49, 207, 49, 207 },
{ 50, 206, 50, 206, 50, 206, 50, 206 }, { 51, 205, 51, 205, 51, 205, 51, 205 },
{ 52, 204, 52, 204, 52, 204, 52, 204 }, { 53, 203, 53, 203, 53, 203, 53, 203 },
{ 54, 202, 54, 202, 54, 202, 54, 202 }, { 55, 201, 55, 201, 55, 201, 55, 201 },
{ 56, 200, 56, 200, 56, 200, 56, 200 }, { 57, 199, 57, 199, 57, 199, 57, 199 },
{ 58, 198, 58, 198, 58, 198, 58, 198 }, { 59, 197, 59, 197, 59, 197, 59, 197 },
{ 60, 196, 60, 196, 60, 196, 60, 196 }, { 61, 195, 61, 195, 61, 195, 61, 195 },
{ 62, 194, 62, 194, 62, 194, 62, 194 }, { 63, 193, 63, 193, 63, 193, 63, 193 },
{ 64, 192, 64, 192, 64, 192, 64, 192 }, { 65, 191, 65, 191, 65, 191, 65, 191 },
{ 66, 190, 66, 190, 66, 190, 66, 190 }, { 67, 189, 67, 189, 67, 189, 67, 189 },
{ 68, 188, 68, 188, 68, 188, 68, 188 }, { 69, 187, 69, 187, 69, 187, 69, 187 },
{ 70, 186, 70, 186, 70, 186, 70, 186 }, { 71, 185, 71, 185, 71, 185, 71, 185 },
{ 72, 184, 72, 184, 72, 184, 72, 184 }, { 73, 183, 73, 183, 73, 183, 73, 183 },
{ 74, 182, 74, 182, 74, 182, 74, 182 }, { 75, 181, 75, 181, 75, 181, 75, 181 },
{ 76, 180, 76, 180, 76, 180, 76, 180 }, { 77, 179, 77, 179, 77, 179, 77, 179 },
{ 78, 178, 78, 178, 78, 178, 78, 178 }, { 79, 177, 79, 177, 79, 177, 79, 177 },
{ 80, 176, 80, 176, 80, 176, 80, 176 }, { 81, 175, 81, 175, 81, 175, 81, 175 },
{ 82, 174, 82, 174, 82, 174, 82, 174 }, { 83, 173, 83, 173, 83, 173, 83, 173 },
{ 84, 172, 84, 172, 84, 172, 84, 172 }, { 85, 171, 85, 171, 85, 171, 85, 171 },
{ 86, 170, 86, 170, 86, 170, 86, 170 }, { 87, 169, 87, 169, 87, 169, 87, 169 },
{ 88, 168, 88, 168, 88, 168, 88, 168 }, { 89, 167, 89, 167, 89, 167, 89, 167 },
{ 90, 166, 90, 166, 90, 166, 90, 166 }, { 91, 165, 91, 165, 91, 165, 91, 165 },
{ 92, 164, 92, 164, 92, 164, 92, 164 }, { 93, 163, 93, 163, 93, 163, 93, 163 },
{ 94, 162, 94, 162, 94, 162, 94, 162 }, { 95, 161, 95, 161, 95, 161, 95, 161 },
{ 96, 160, 96, 160, 96, 160, 96, 160 }, { 97, 159, 97, 159, 97, 159, 97, 159 },
{ 98, 158, 98, 158, 98, 158, 98, 158 }, { 99, 157, 99, 157, 99, 157, 99, 157 },
{ 100, 156, 100, 156, 100, 156, 100, 156 }, { 101, 155, 101, 155, 101, 155, 101, 155 },
{ 102, 154, 102, 154, 102, 154, 102, 154 }, { 103, 153, 103, 153, 103, 153, 103, 153 },
{ 104, 152, 104, 152, 104, 152, 104, 152 }, { 105, 151, 105, 151, 105, 151, 105, 151 },
{ 106, 150, 106, 150, 106, 150, 106, 150 }, { 107, 149, 107, 149, 107, 149, 107, 149 },
{ 108, 148, 108, 148, 108, 148, 108, 148 }, { 109, 147, 109, 147, 109, 147, 109, 147 },
{ 110, 146, 110, 146, 110, 146, 110, 146 }, { 111, 145, 111, 145, 111, 145, 111, 145 },
{ 112, 144, 112, 144, 112, 144, 112, 144 }, { 113, 143, 113, 143, 113, 143, 113, 143 },
{ 114, 142, 114, 142, 114, 142, 114, 142 }, { 115, 141, 115, 141, 115, 141, 115, 141 },
{ 116, 140, 116, 140, 116, 140, 116, 140 }, { 117, 139, 117, 139, 117, 139, 117, 139 },
{ 118, 138, 118, 138, 118, 138, 118, 138 }, { 119, 137, 119, 137, 119, 137, 119, 137 },
{ 120, 136, 120, 136, 120, 136, 120, 136 }, { 121, 135, 121, 135, 121, 135, 121, 135 },
{ 122, 134, 122, 134, 122, 134, 122, 134 }, { 123, 133, 123, 133, 123, 133, 123, 133 },
{ 124, 132, 124, 132, 124, 132, 124, 132 }, { 125, 131, 125, 131, 125, 131, 125, 131 },
{ 126, 130, 126, 130, 126, 130, 126, 130 }, { 127, 129, 127, 129, 127, 129, 127, 129 },
{ 128, 128, 128, 128, 128, 128, 128, 128 }, { 129, 127, 129, 127, 129, 127, 129, 127 },
{ 130, 126, 130, 126, 130, 126, 130, 126 }, { 131, 125, 131, 125, 131, 125, 131, 125 },
{ 132, 124, 132, 124, 132, 124, 132, 124 }, { 133, 123, 133, 123, 133, 123, 133, 123 },
{ 134, 122, 134, 122, 134, 122, 134, 122 }, { 135, 121, 135, 121, 135, 121, 135, 121 },
{ 136, 120, 136, 120, 136, 120, 136, 120 }, { 137, 119, 137, 119, 137, 119, 137, 119 },
{ 138, 118, 138, 118, 138, 118, 138, 118 }, { 139, 117, 139, 117, 139, 117, 139, 117 },
{ 140, 116, 140, 116, 140, 116, 140, 116 }, { 141, 115, 141, 115, 141, 115, 141, 115 },
{ 142, 114, 142, 114, 142, 114, 142, 114 }, { 143, 113, 143, 113, 143, 113, 143, 113 },
{ 144, 112, 144, 112, 144, 112, 144, 112 }, { 145, 111, 145, 111, 145, 111, 145, 111 },
{ 146, 110, 146, 110, 146, 110, 146, 110 }, { 147, 109, 147, 109, 147, 109, 147, 109 },
{ 148, 108, 148, 108, 148, 108, 148, 108 }, { 149, 107, 149, 107, 149, 107, 149, 107 },
{ 150, 106, 150, 106, 150, 106, 150, 106 }, { 151, 105, 151, 105, 151, 105, 151, 105 },
{ 152, 104, 152, 104, 152, 104, 152, 104 }, { 153, 103, 153, 103, 153, 103, 153, 103 },
{ 154, 102, 154, 102, 154, 102, 154, 102 }, { 155, 101, 155, 101, 155, 101, 155, 101 },
{ 156, 100, 156, 100, 156, 100, 156, 100 }, { 157, 99, 157, 99, 157, 99, 157, 99 },
{ 158, 98, 158, 98, 158, 98, 158, 98 }, { 159, 97, 159, 97, 159, 97, 159, 97 },
{ 160, 96, 160, 96, 160, 96, 160, 96 }, { 161, 95, 161, 95, 161, 95, 161, 95 },
{ 162, 94, 162, 94, 162, 94, 162, 94 }, { 163, 93, 163, 93, 163, 93, 163, 93 },
{ 164, 92, 164, 92, 164, 92, 164, 92 }, { 165, 91, 165, 91, 165, 91, 165, 91 },
{ 166, 90, 166, 90, 166, 90, 166, 90 }, { 167, 89, 167, 89, 167, 89, 167, 89 },
{ 168, 88, 168, 88, 168, 88, 168, 88 }, { 169, 87, 169, 87, 169, 87, 169, 87 },
{ 170, 86, 170, 86, 170, 86, 170, 86 }, { 171, 85, 171, 85, 171, 85, 171, 85 },
{ 172, 84, 172, 84, 172, 84, 172, 84 }, { 173, 83, 173, 83, 173, 83, 173, 83 },
{ 174, 82, 174, 82, 174, 82, 174, 82 }, { 175, 81, 175, 81, 175, 81, 175, 81 },
{ 176, 80, 176, 80, 176, 80, 176, 80 }, { 177, 79, 177, 79, 177, 79, 177, 79 },
{ 178, 78, 178, 78, 178, 78, 178, 78 }, { 179, 77, 179, 77, 179, 77, 179, 77 },
{ 180, 76, 180, 76, 180, 76, 180, 76 }, { 181, 75, 181, 75, 181, 75, 181, 75 },
{ 182, 74, 182, 74, 182, 74, 182, 74 }, { 183, 73, 183, 73, 183, 73, 183, 73 },
{ 184, 72, 184, 72, 184, 72, 184, 72 }, { 185, 71, 185, 71, 185, 71, 185, 71 },
{ 186, 70, 186, 70, 186, 70, 186, 70 }, { 187, 69, 187, 69, 187, 69, 187, 69 },
{ 188, 68, 188, 68, 188, 68, 188, 68 }, { 189, 67, 189, 67, 189, 67, 189, 67 },
{ 190, 66, 190, 66, 190, 66, 190, 66 }, { 191, 65, 191, 65, 191, 65, 191, 65 },
{ 192, 64, 192, 64, 192, 64, 192, 64 }, { 193, 63, 193, 63, 193, 63, 193, 63 },
{ 194, 62, 194, 62, 194, 62, 194, 62 }, { 195, 61, 195, 61, 195, 61, 195, 61 },
{ 196, 60, 196, 60, 196, 60, 196, 60 }, { 197, 59, 197, 59, 197, 59, 197, 59 },
{ 198, 58, 198, 58, 198, 58, 198, 58 }, { 199, 57, 199, 57, 199, 57, 199, 57 },
{ 200, 56, 200, 56, 200, 56, 200, 56 }, { 201, 55, 201, 55, 201, 55, 201, 55 },
{ 202, 54, 202, 54, 202, 54, 202, 54 }, { 203, 53, 203, 53, 203, 53, 203, 53 },
{ 204, 52, 204, 52, 204, 52, 204, 52 }, { 205, 51, 205, 51, 205, 51, 205, 51 },
{ 206, 50, 206, 50, 206, 50, 206, 50 }, { 207, 49, 207, 49, 207, 49, 207, 49 },
{ 208, 48, 208, 48, 208, 48, 208, 48 }, { 209, 47, 209, 47, 209, 47, 209, 47 },
{ 210, 46, 210, 46, 210, 46, 210, 46 }, { 211, 45, 211, 45, 211, 45, 211, 45 },
{ 212, 44, 212, 44, 212, 44, 212, 44 }, { 213, 43, 213, 43, 213, 43, 213, 43 },
{ 214, 42, 214, 42, 214, 42, 214, 42 }, { 215, 41, 215, 41, 215, 41, 215, 41 },
{ 216, 40, 216, 40, 216, 40, 216, 40 }, { 217, 39, 217, 39, 217, 39, 217, 39 },
{ 218, 38, 218, 38, 218, 38, 218, 38 }, { 219, 37, 219, 37, 219, 37, 219, 37 },
{ 220, 36, 220, 36, 220, 36, 220, 36 }, { 221, 35, 221, 35, 221, 35, 221, 35 },
{ 222, 34, 222, 34, 222, 34, 222, 34 }, { 223, 33, 223, 33, 223, 33, 223, 33 },
{ 224, 32, 224, 32, 224, 32, 224, 32 }, { 225, 31, 225, 31, 225, 31, 225, 31 },
{ 226, 30, 226, 30, 226, 30, 226, 30 }, { 227, 29, 227, 29, 227, 29, 227, 29 },
{ 228, 28, 228, 28, 228, 28, 228, 28 }, { 229, 27, 229, 27, 229, 27, 229, 27 },
{ 230, 26, 230, 26, 230, 26, 230, 26 }, { 231, 25, 231, 25, 231, 25, 231, 25 },
{ 232, 24, 232, 24, 232, 24, 232, 24 }, { 233, 23, 233, 23, 233, 23, 233, 23 },
{ 234, 22, 234, 22, 234, 22, 234, 22 }, { 235, 21, 235, 21, 235, 21, 235, 21 },
{ 236, 20, 236, 20, 236, 20, 236, 20 }, { 237, 19, 237, 19, 237, 19, 237, 19 },
{ 238, 18, 238, 18, 238, 18, 238, 18 }, { 239, 17, 239, 17, 239, 17, 239, 17 },
{ 240, 16, 240, 16, 240, 16, 240, 16 }, { 241, 15, 241, 15, 241, 15, 241, 15 },
{ 242, 14, 242, 14, 242, 14, 242, 14 }, { 243, 13, 243, 13, 243, 13, 243, 13 },
{ 244, 12, 244, 12, 244, 12, 244, 12 }, { 245, 11, 245, 11, 245, 11, 245, 11 },
{ 246, 10, 246, 10, 246, 10, 246, 10 }, { 247, 9, 247, 9, 247, 9, 247, 9 },
{ 248, 8, 248, 8, 248, 8, 248, 8 }, { 249, 7, 249, 7, 249, 7, 249, 7 },
{ 250, 6, 250, 6, 250, 6, 250, 6 }, { 251, 5, 251, 5, 251, 5, 251, 5 },
{ 252, 4, 252, 4, 252, 4, 252, 4 }, { 253, 3, 253, 3, 253, 3, 253, 3 },
{ 254, 2, 254, 2, 254, 2, 254, 2 }, { 255, 1, 255, 1, 255, 1, 255, 1 }
};
extern const struct _rgbvmx_statics
{
} rgbvmx_statics;
/***************************************************************************
HIGHER LEVEL OPERATIONS
***************************************************************************/
void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor)
{
m_value = _mm_unpacklo_epi16(m_value, other.m_value);
m_value = vec_add((VECU16)m_value, scale_table[factor]);
m_value = vec_sr(m_value, vec_splat_u32(8));
}
void rgbaint_t::scale_and_clamp(const rgbaint_t& scale)
{
mul(scale);
shr(8);
min(255);
}
void rgbaint_t::scale_imm_and_clamp(const INT32 scale)
{
mul_imm(scale);
shr(8);
min(255);
}
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2)
{
mul(scale);
rgbaint_t color2(other);
color2.mul(scale2);
mul(scale);
add(color2);
shr(8);
min(255);
}
void rgbaint_t::scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other)
{
mul_imm(scale);
add(other);
shr(8);
min(255);
}
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
{
mul(scale);
add(other);
shr(8);
min(255);
}
UINT32 rgbaint_t::bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
{
__m128i color00 = _mm_cvtsi32_si128(rgb00);
__m128i color01 = _mm_cvtsi32_si128(rgb01);
__m128i color10 = _mm_cvtsi32_si128(rgb10);
__m128i color11 = _mm_cvtsi32_si128(rgb11);
/* interleave color01 and color00 at the byte level */
color01 = _mm_unpacklo_epi8(color01, color00);
color11 = _mm_unpacklo_epi8(color11, color10);
color01 = _mm_unpacklo_epi8(color01, _mm_setzero_si128());
color11 = _mm_unpacklo_epi8(color11, _mm_setzero_si128());
color01 = _mm_madd_epi16(color01, *(__m128i *)&rgbsse_statics.scale_table[u][0]);
color11 = _mm_madd_epi16(color11, *(__m128i *)&rgbsse_statics.scale_table[u][0]);
color01 = _mm_slli_epi32(color01, 15);
color11 = _mm_srli_epi32(color11, 1);
color01 = _mm_max_epi16(color01, color11);
color01 = _mm_madd_epi16(color01, *(__m128i *)&rgbsse_statics.scale_table[v][0]);
color01 = _mm_srli_epi32(color01, 15);
color01 = _mm_packs_epi32(color01, color01);
color01 = _mm_packus_epi16(color01, color01);
return _mm_cvtsi128_si32(color01);
}
#endif // defined(__ALTIVEC__)

View file

@ -1,5 +1,5 @@
// license:BSD-3-Clause
// copyright-holders:Vas Crabb
// copyright-holders:Vas Crabb, Ryan Holtz
/***************************************************************************
rgbvmx.h
@ -11,425 +11,471 @@
#ifndef __RGBVMX__
#define __RGBVMX__
#if defined(__ALTIVEC__)
#include <altivec.h>
#endif
/***************************************************************************
TYPE DEFINITIONS
***************************************************************************/
/* intermediate RGB values are stored in a vector */
typedef vector signed short rgbint;
/* intermediate RGB values are stored in a vector */
typedef vector signed short rgbaint;
/***************************************************************************
BASIC CONVERSIONS
***************************************************************************/
/*-------------------------------------------------
rgb_comp_to_rgbint - converts a trio of RGB
components to an rgbint type
-------------------------------------------------*/
INLINE void rgb_comp_to_rgbint(rgbint *rgb, INT16 r, INT16 g, INT16 b)
class rgbaint_t
{
rgbint result = { 0, r, g, b, 0, 0, 0, 0 };
*rgb = result;
}
/*-------------------------------------------------
rgba_comp_to_rgbint - converts a quad of RGB
components to an rgbint type
-------------------------------------------------*/
INLINE void rgba_comp_to_rgbaint(rgbaint *rgb, INT16 a, INT16 r, INT16 g, INT16 b)
{
rgbaint result = { a, r, g, b, 0, 0, 0, 0 };
*rgb = result;
}
/*-------------------------------------------------
rgb_to_rgbint - converts a packed trio of RGB
components to an rgbint type
-------------------------------------------------*/
INLINE void rgb_to_rgbint(rgbint *rgb, rgb_t const &color)
{
vector signed char temp = (vector signed char)vec_perm((vector signed int)vec_lde(0, color.ptr()), vec_splat_s32(0), vec_lvsl(0, color.ptr()));
*rgb = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), temp);
}
public:
inline rgbaint_t() { }
inline rgbaint_t(UINT32 rgba) { set(rgba); }
inline rgbaint_t(UINT32 a, UINT32 r, UINT32 g, UINT32 b) { set(a, r, g, b); }
inline rgbaint_t(rgb_t& rgb) { set(rgb); }
inline void set(rgbaint_t& other) { m_value = other.m_value; }
inline void set(UINT32 rgba)
{
const vector unsigned int zero = vec_splat_u32(0);
const vector unsigned char temp = vec_perm(vec_lde(0, &rgba), zero, vec_lvsl(0, &rgba));
m_value = vec_mergeh((vector unsigned short)zero, (vector unsigned short)vec_mergeh((vector unsigned char)zero, temp));
}
inline void set(UINT32 a, UINT32 r, UINT32 g, UINT32 b)
{
vector unsigned int result = { a, r, g, b };
m_value = result;
}
inline void set(rgb_t& rgb)
{
const vector unsigned int zero = vec_splat_u32(0);
const vector unsigned char temp = vec_perm(vec_lde(0, rgb.ptr()), zero, vec_lvsl(0, rgb.ptr()));
m_value = vec_mergeh((vector unsigned short)zero, (vector unsigned short)vec_mergeh((vector unsigned char)zero, temp));
}
inline rgb_t to_rgba()
{
const vector unsigned int temp = vec_splat((vector unsigned int)vec_pack(vec_pack(m_value, m_value), vec_splat_u16(0)), 0);
UINT32 result;
vec_ste(temp, 0, &result);
return result;
}
inline rgb_t to_rgba_clamp()
{
const vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(vec_packsu(m_value, m_value), vec_splat_u16(0)), 0);
UINT32 result;
vec_ste(temp, 0, &result);
return result;
}
inline void add(const rgbaint_t& color2)
{
m_value = vec_add(m_value, color2.m_value);
}
inline void add_imm(const UINT32 imm)
{
const vector unsigned int temp = { imm, imm, imm, imm };
m_value = vec_add(m_value, temp);
}
inline void add_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
{
const vector unsigned int temp = { a, r, g, b };
m_value = vec_add(m_value, temp);
}
inline void sub(const rgbaint_t& color2)
{
m_value = vec_sub(m_value, color2.m_value);
}
inline void sub_imm(const UINT32 imm)
{
const vector unsigned int temp = { imm, imm, imm, imm };
m_value = vec_sub(m_value, temp);
}
inline void sub_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
{
const vector unsigned int temp = { a, r, g, b };
m_value = vec_sub(m_value, temp);
}
inline void subr(rgbaint_t& color2)
{
m_value = vec_sub(color2.m_value, m_value);
}
inline void subr_imm(const UINT32 imm)
{
const vector unsigned int temp = { imm, imm, imm, imm };
m_value = vec_sub(temp, m_value);
}
inline void subr_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
{
const vector unsigned int temp = { a, r, g, b };
m_value = vec_sub(temp, m_value);
}
inline void set_a(const UINT32 value)
{
const vector unsigned int temp = { value, 0, 0, 0 };
m_value = vec_or(vec_and(m_value, alpha_mask), temp);
}
inline void set_r(const UINT32 value)
{
const vector unsigned int temp = { 0, value, 0, 0 };
m_value = vec_or(vec_and(m_value, red_mask), temp);
}
inline void set_g(const UINT32 value)
{
const vector unsigned int temp = { 0, 0, value, 0 };
m_value = vec_or(vec_and(m_value, green_mask), temp);
}
inline void set_b(const UINT32 value)
{
const vector unsigned int temp = { 0, 0, 0, value };
m_value = vec_or(vec_and(m_value, blue_mask), temp);
}
inline UINT8 get_a()
{
UINT8 result;
vec_ste(vec_splat((vector unsigned char)m_value, 3), 0, &result);
return result;
}
inline UINT8 get_r()
{
UINT8 result;
vec_ste(vec_splat((vector unsigned char)m_value, 7), 0, &result);
return result;
}
inline UINT8 get_g()
{
UINT8 result;
vec_ste(vec_splat((vector unsigned char)m_value, 11), 0, &result);
return result;
}
inline UINT8 get_b()
{
UINT8 result;
vec_ste(vec_splat((vector unsigned char)m_value, 15), 0, &result);
return result;
}
inline UINT32 get_a32()
{
UINT32 result;
vec_ste(vec_splat(m_value, 0), 0, &result);
return result;
}
inline UINT32 get_r32()
{
UINT32 result;
vec_ste(vec_splat(m_value, 1), 0, &result);
return result;
}
inline UINT32 get_g32()
{
UINT32 result;
vec_ste(vec_splat(m_value, 2), 0, &result);
return result;
}
inline UINT32 get_b32()
{
UINT32 result;
vec_ste(vec_splat(m_value, 3), 0, &result);
return result;
}
inline void mul(const rgbaint_t& color)
{
const vector unsigned int shift = vec_splat_u32(-16);
const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(color.m_value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)color.m_value));
m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)color.m_value));
}
inline void mul_imm(const UINT32 imm)
{
const vector unsigned int value = { imm, imm, imm, imm };
const vector unsigned int shift = vec_splat_u32(-16);
const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)value));
m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)value));
}
inline void mul_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
{
const vector unsigned int value = { a, r, g, b };
const vector unsigned int shift = vec_splat_u32(-16);
const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)value));
m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)value));
}
inline void shl(const rgbaint_t& shift)
{
const vector unsigned int limit = { 32, 32, 32, 32 };
const vector unsigned int temp = vec_splat(shift.m_value, 3);
m_value = vec_and(vec_sl(m_value, temp), vec_cmpgt(limit, temp));
}
inline void shl_imm(const UINT8 shift)
{
const vector unsigned int temp = { shift, shift, shift, shift };
m_value = vec_sl(m_value, temp);
}
inline void shl_imm_all(const UINT8 shift)
{
const vector unsigned char limit = { 128, 128, 128, 128, 128, 128, 128, 128 };
const vector unsigned char temp = { shift, shift, shift, shift, shift, shift, shift, shift };
m_value = vec_and(vec_slo(m_value, temp), (vector unsigned int)vec_cmpgt(limit, temp));
}
inline void shr(const rgbaint_t& shift)
{
const vector unsigned int limit = { 32, 32, 32, 32 };
const vector unsigned int temp = vec_splat(shift.m_value, 3);
m_value = vec_and(vec_sr(m_value, temp), vec_cmpgt(limit, temp));
}
inline void shr_imm(const UINT8 shift)
{
const vector unsigned int temp = { shift, shift, shift, shift };
m_value = vec_sr(m_value, temp);
}
inline void shr_imm_all(const UINT8 shift)
{
const vector unsigned char limit = { 128, 128, 128, 128, 128, 128, 128, 128 };
const vector unsigned char temp = { shift, shift, shift, shift, shift, shift, shift, shift };
m_value = vec_and(vec_sro(m_value, temp), (vector unsigned int)vec_cmpgt(limit, temp));
}
inline void sra(const rgbaint_t& shift)
{
const vector unsigned int limit = { 31, 31, 31, 31 };
m_value = vec_sra(m_value, vec_min(vec_splat(shift.m_value, 3), limit));
}
inline void sra_imm(const UINT8 shift)
{
const vector unsigned int temp = { shift, shift, shift, shift };
m_value = vec_sra(m_value, temp);
}
inline void or_reg(const rgbaint_t& color2)
{
m_value = vec_or(m_value, color2.m_value);
}
inline void or_imm(const UINT32 value)
{
const vector unsigned int temp = { value, value, value, value };
m_value = vec_or(m_value, temp);
}
inline void or_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
{
const vector unsigned int temp = { a, r, g, b };
m_value = vec_or(m_value, temp);
}
inline void and_reg(const rgbaint_t& color)
{
m_value = vec_and(m_value, color.m_value);
}
inline void and_imm(const UINT32 value)
{
const vector unsigned int temp = { value, value, value, value };
m_value = vec_and(m_value, temp);
}
inline void and_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
{
const vector unsigned int temp = { a, r, g, b };
m_value = vec_and(m_value, temp);
}
inline void xor_reg(const rgbaint_t& color2)
{
m_value = vec_xor(m_value, color2.m_value);
}
inline void xor_imm(const INT32 value)
{
const vector unsigned int temp = { value, value, value, value };
m_value = vec_xor(m_value, temp);
}
inline void xor_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
{
const vector unsigned int temp = { a, r, g, b };
m_value = vec_xor(m_value, temp);
}
inline void clamp_and_clear(const UINT32 sign)
{
const vector unsigned int vzero = vec_splat_u32(0);
vector unsigned int vsign = { sign, sign, sign, sign };
m_value = vec_and(m_value, vec_cmpeq(vec_and(m_value, vsign), vzero));
vsign = vec_nor(vec_sra(vsign, vec_splat_u32(1)), vzero);
const vector unsigned int mask = vec_cmpgt(m_value, vsign);
m_value = vec_or(vec_and(vsign, mask), vec_and(m_value, vec_nor(mask, vzero)));
}
inline void sign_extend(const UINT32 compare, const UINT32 sign)
{
const vector unsigned int compare_vec = { compare, compare, compare, compare };
const vector unsigned int compare_mask = vec_cmpeq(vec_and(m_value, compare_vec), compare_vec);
const vector unsigned int sign_vec = { sign, sign, sign, sign };
m_value = vec_or(m_value, vec_and(sign_vec, compare_mask));
}
inline void min(const UINT32 value)
{
const vector unsigned int temp = { value, value, value, value };
m_value = vec_min(m_value, temp);
}
void blend(const rgbaint_t& other, UINT8 factor);
void scale_and_clamp(const rgbaint_t& scale);
void scale_imm_and_clamp(const INT32 scale);
void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2);
void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other);
void scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other);
inline void cmpeq(const rgbaint_t& value)
{
m_value = vec_cmpeq(m_value, value.m_value);
}
inline void cmpeq_imm(const UINT32 value)
{
const vector unsigned int temp = { value, value, value, value };
m_value = vec_cmpeq(m_value, temp);
}
inline void cmpeq_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
{
const vector unsigned int temp = { a, r, g, b };
m_value = vec_cmpeq(m_value, temp);
}
inline void cmpgt(const rgbaint_t& value)
{
m_value = vec_cmpgt(m_value, value.m_value);
}
inline void cmpgt_imm(const UINT32 value)
{
const vector unsigned int temp = { value, value, value, value };
m_value = vec_cmpgt(m_value, temp);
}
inline void cmpgt_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
{
const vector unsigned int temp = { a, r, g, b };
m_value = vec_cmpgt(m_value, temp);
}
inline void cmplt(const rgbaint_t& value)
{
m_value = vec_cmplt(m_value, value.m_value);
}
inline void cmplt_imm(const UINT32 value)
{
const vector unsigned int temp = { value, value, value, value };
m_value = vec_cmplt(m_value, temp);
}
inline void cmplt_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
{
const vector unsigned int temp = { a, r, g, b };
m_value = vec_cmplt(m_value, temp);
}
inline rgbaint_t operator=(const rgbaint_t& other)
{
m_value = other.m_value;
return *this;
}
inline rgbaint_t& operator+=(const rgbaint_t& other)
{
m_value = vec_add(m_value, other.m_value);
return *this;
}
inline rgbaint_t& operator+=(const INT32 other)
{
const vector unsigned int temp = { other, other, other, other };
m_value = vec_add(m_value, temp);
return *this;
}
inline rgbaint_t& operator-=(const rgbaint_t& other)
{
m_value = vec_sub(m_value, other.m_value);
return *this;
}
inline rgbaint_t& operator*=(const rgbaint_t& other)
{
const vector unsigned int shift = vec_splat_u32(-16);
const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(other.m_value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)other.m_value));
m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)other.m_value));
return *this;
}
inline rgbaint_t& operator*=(const INT32 other)
{
const vector unsigned int value = { other, other, other, other };
const vector unsigned int shift = vec_splat_u32(-16);
const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)value));
m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)value));
return *this;
}
inline rgbaint_t& operator>>=(const INT32 shift)
{
const vector unsigned int temp = { shift, shift, shift, shift };
m_value = vec_sra(m_value, temp);
return *this;
}
inline void merge_alpha(rgbaint_t& alpha)
{
m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 7), 7);
m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 6), 6);
}
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v);
protected:
typedef vector unsigned char VECU8;
typedef vector unsigned short VECU16;
typedef vector unsigned int VECU32;
vector VECU32 m_value;
static const VECU16 maxbyte;
static const VECU32 alpha_mask;
static const VECU32 red_mask;
static const VECU32 green_mask;
static const VECU32 blue_mask;
static const VECU16 scale_table[256];
};
/*-------------------------------------------------
rgba_to_rgbaint - converts a packed quad of RGB
components to an rgbint type
-------------------------------------------------*/
INLINE void rgba_to_rgbaint(rgbaint *rgb, rgb_t const &color)
{
vector signed char temp = (vector signed char)vec_perm((vector signed int)vec_lde(0, color.ptr()), vec_splat_s32(0), vec_lvsl(0, color.ptr()));
*rgb = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), temp);
}
/*-------------------------------------------------
rgbint_to_rgb - converts an rgbint back to
a packed trio of RGB values
-------------------------------------------------*/
INLINE rgb_t rgbint_to_rgb(const rgbint *color)
{
vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0);
UINT32 result;
vec_ste(temp, 0, &result);
return result;
}
/*-------------------------------------------------
rgbaint_to_rgba - converts an rgbint back to
a packed quad of RGB values
-------------------------------------------------*/
INLINE rgb_t rgbaint_to_rgba(const rgbaint *color)
{
vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0);
UINT32 result;
vec_ste(temp, 0, &result);
return result;
}
/*-------------------------------------------------
rgbint_to_rgb_clamp - converts an rgbint back
to a packed trio of RGB values, clamping them
to bytes first
-------------------------------------------------*/
INLINE rgb_t rgbint_to_rgb_clamp(const rgbint *color)
{
vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0);
UINT32 result;
vec_ste(temp, 0, &result);
return result;
}
/*-------------------------------------------------
rgbaint_to_rgba_clamp - converts an rgbint back
to a packed quad of RGB values, clamping them
to bytes first
-------------------------------------------------*/
INLINE rgb_t rgbaint_to_rgba_clamp(const rgbaint *color)
{
vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0);
UINT32 result;
vec_ste(temp, 0, &result);
return result;
}
/***************************************************************************
CORE MATH
***************************************************************************/
/*-------------------------------------------------
rgbint_add - add two rgbint values
-------------------------------------------------*/
INLINE void rgbint_add(rgbint *color1, const rgbint *color2)
{
*color1 = vec_add(*color1, *color2);
}
/*-------------------------------------------------
rgbaint_add - add two rgbaint values
-------------------------------------------------*/
INLINE void rgbaint_add(rgbaint *color1, const rgbaint *color2)
{
*color1 = vec_add(*color1, *color2);
}
/*-------------------------------------------------
rgbint_sub - subtract two rgbint values
-------------------------------------------------*/
INLINE void rgbint_sub(rgbint *color1, const rgbint *color2)
{
*color1 = vec_sub(*color1, *color2);
}
/*-------------------------------------------------
rgbaint_sub - subtract two rgbaint values
-------------------------------------------------*/
INLINE void rgbaint_sub(rgbaint *color1, const rgbaint *color2)
{
*color1 = vec_sub(*color1, *color2);
}
/*-------------------------------------------------
rgbint_subr - reverse subtract two rgbint
values
-------------------------------------------------*/
INLINE void rgbint_subr(rgbint *color1, const rgbint *color2)
{
*color1 = vec_sub(*color2, *color1);
}
/*-------------------------------------------------
rgbaint_subr - reverse subtract two rgbaint
values
-------------------------------------------------*/
INLINE void rgbaint_subr(rgbaint *color1, const rgbaint *color2)
{
*color1 = vec_sub(*color2, *color1);
}
/***************************************************************************
TABLES
***************************************************************************/
extern const struct _rgbvmx_statics
{
rgbaint maxbyte;
rgbaint scale_table[256];
} rgbvmx_statics;
/***************************************************************************
HIGHER LEVEL OPERATIONS
***************************************************************************/
/*-------------------------------------------------
rgbint_blend - blend two colors by the given
scale factor
-------------------------------------------------*/
INLINE void rgbint_blend(rgbint *color1, const rgbint *color2, UINT8 color1scale)
{
vector signed int temp;
*color1 = vec_mergeh(*color1, *color2);
temp = vec_msum(*color1, rgbvmx_statics.scale_table[color1scale], vec_splat_s32(0));
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
*color1 = vec_packs(temp, temp);
}
/*-------------------------------------------------
rgbaint_blend - blend two colors by the given
scale factor
-------------------------------------------------*/
INLINE void rgbaint_blend(rgbaint *color1, const rgbaint *color2, UINT8 color1scale)
{
vector signed int temp;
*color1 = vec_mergeh(*color1, *color2);
temp = vec_msum(*color1, rgbvmx_statics.scale_table[color1scale], vec_splat_s32(0));
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
*color1 = vec_packs(temp, temp);
}
/*-------------------------------------------------
rgbint_scale_and_clamp - scale the given
color by an 8.8 scale factor, immediate or
per channel, and clamp to byte values
-------------------------------------------------*/
INLINE void rgbint_scale_immediate_and_clamp(rgbint *color, INT16 colorscale)
{
rgbint splatmap = vec_splat((rgbint)vec_lvsl(0, &colorscale), 0);
rgbint vecscale = vec_lde(0, &colorscale);
vector signed int temp;
vecscale = (rgbint)vec_perm(vecscale, vecscale, (vector unsigned char)splatmap);
*color = (rgbint)vec_mergeh(*color, (rgbint)vec_splat_s32(0));
temp = vec_msum(*color, vecscale, vec_splat_s32(0));
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
*color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte);
}
INLINE void rgbint_scale_channel_and_clamp(rgbint *color, const rgbint *colorscale)
{
rgbint vecscale = (rgbint)vec_mergeh(*colorscale, (rgbint)vec_splat_s32(0));
vector signed int temp;
*color = (rgbint)vec_mergeh(*color, (rgbint)vec_splat_s32(0));
temp = vec_msum(*color, vecscale, vec_splat_s32(0));
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
*color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte);
}
/*-------------------------------------------------
rgbaint_scale_and_clamp - scale the given
color by an 8.8 scale factor, immediate or
per channel, and clamp to byte values
-------------------------------------------------*/
INLINE void rgbaint_scale_immediate_and_clamp(rgbaint *color, INT16 colorscale)
{
rgbaint splatmap = vec_splat((rgbaint)vec_lvsl(0, &colorscale), 0);
rgbaint vecscale = vec_lde(0, &colorscale);
vector signed int temp;
vecscale = (rgbaint)vec_perm(vecscale, vecscale, (vector unsigned char)splatmap);
*color = (rgbaint)vec_mergeh(*color, (rgbaint)vec_splat_s32(0));
temp = vec_msum(*color, vecscale, vec_splat_s32(0));
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
*color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte);
}
INLINE void rgbaint_scale_channel_and_clamp(rgbaint *color, const rgbint *colorscale)
{
rgbaint vecscale = (rgbaint)vec_mergeh(*color, (rgbaint)vec_splat_s32(0));
vector signed int temp;
*color = (rgbaint)vec_mergeh(*color, (rgbaint)vec_splat_s32(0));
temp = vec_msum(*color, vecscale, vec_splat_s32(0));
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
*color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte);
}
/*-------------------------------------------------
rgb_bilinear_filter - bilinear filter between
four pixel values
-------------------------------------------------*/
INLINE rgb_t rgb_bilinear_filter(rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v)
{
rgbint color00 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr()));
rgbint color01 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr()));
rgbint color10 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr()));
rgbint color11 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr()));
/* interleave color01 and color00 at the byte level */
color01 = (rgbint)vec_mergeh((vector signed char)color01, (vector signed char)color00);
color11 = (rgbint)vec_mergeh((vector signed char)color11, (vector signed char)color10);
color01 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01);
color11 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11);
color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
color11 = (rgbint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(1));
color11 = (rgbint)vec_sl((vector signed int)color11, vec_splat_u32(15));
color01 = vec_max(color01, color11);
color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0));
color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(15));
color01 = vec_packs((vector signed int)color01, (vector signed int)color01);
color01 = (rgbint)vec_packsu(color01, color01);
UINT32 result;
vec_ste((vector unsigned int)color01, 0, &result);
return result;
}
/*-------------------------------------------------
rgba_bilinear_filter - bilinear filter between
four pixel values
-------------------------------------------------*/
INLINE rgb_t rgba_bilinear_filter(rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v)
{
rgbaint color00 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr()));
rgbaint color01 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr()));
rgbaint color10 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr()));
rgbaint color11 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr()));
/* interleave color01 and color00 at the byte level */
color01 = (rgbaint)vec_mergeh((vector signed char)color01, (vector signed char)color00);
color11 = (rgbaint)vec_mergeh((vector signed char)color11, (vector signed char)color10);
color01 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01);
color11 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11);
color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
color11 = (rgbaint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(1));
color11 = (rgbaint)vec_sl((vector signed int)color11, vec_splat_u32(15));
color01 = vec_max(color01, color11);
color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0));
color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(15));
color01 = vec_packs((vector signed int)color01, (vector signed int)color01);
color01 = (rgbaint)vec_packsu(color01, color01);
UINT32 result;
vec_ste((vector unsigned int)color01, 0, &result);
return result;
}
/*-------------------------------------------------
rgbint_bilinear_filter - bilinear filter between
four pixel values
-------------------------------------------------*/
INLINE void rgbint_bilinear_filter(rgbint *color, rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v)
{
rgbint color00 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr()));
rgbint color01 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr()));
rgbint color10 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr()));
rgbint color11 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr()));
/* interleave color01 and color00 at the byte level */
color01 = (rgbint)vec_mergeh((vector signed char)color01, (vector signed char)color00);
color11 = (rgbint)vec_mergeh((vector signed char)color11, (vector signed char)color10);
color01 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01);
color11 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11);
color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
color11 = (rgbint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(1));
color11 = (rgbint)vec_sl((vector signed int)color11, vec_splat_u32(15));
color01 = vec_max(color01, color11);
color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0));
color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(15));
*color = vec_packs((vector signed int)color01, (vector signed int)color01);
}
/*-------------------------------------------------
rgbaint_bilinear_filter - bilinear filter between
four pixel values
-------------------------------------------------*/
INLINE void rgbaint_bilinear_filter(rgbaint *color, rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v)
{
rgbaint color00 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr()));
rgbaint color01 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr()));
rgbaint color10 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr()));
rgbaint color11 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr()));
/* interleave color01 and color00 at the byte level */
color01 = (rgbaint)vec_mergeh((vector signed char)color01, (vector signed char)color00);
color11 = (rgbaint)vec_mergeh((vector signed char)color11, (vector signed char)color10);
color01 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01);
color11 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11);
color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
color11 = (rgbaint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(1));
color11 = (rgbaint)vec_sl((vector signed int)color11, vec_splat_u32(15));
color01 = vec_max(color01, color11);
color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0));
color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(15));
*color = vec_packs((vector signed int)color01, (vector signed int)color01);
}
// altivec.h somehow redefines "bool" in a bad way on PowerPC Mac OS X. really.
#ifdef OSX_PPC