mirror of
https://github.com/mamedev/mame.git
synced 2024-11-18 10:06:19 +01:00
Pick the low-hanging fruit (nw)
Implemented most of MooglyGuy's new RGB intrinsics for VMX/Altivec Still need to do blend, bilinear filter and merge alpha
This commit is contained in:
parent
55c2e3ef5d
commit
84aa21184b
5 changed files with 701 additions and 556 deletions
|
@ -314,6 +314,7 @@ files {
|
|||
MAME_DIR .. "src/emu/video/rgbgen.h",
|
||||
MAME_DIR .. "src/emu/video/rgbsse.c",
|
||||
MAME_DIR .. "src/emu/video/rgbsse.h",
|
||||
MAME_DIR .. "src/emu/video/rgbvmx.c",
|
||||
MAME_DIR .. "src/emu/video/rgbvmx.h",
|
||||
MAME_DIR .. "src/emu/video/vector.c",
|
||||
MAME_DIR .. "src/emu/video/vector.h",
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
***************************************************************************/
|
||||
|
||||
#if defined(__SSE2__) || defined(_MSC_VER)
|
||||
|
||||
#include "emu.h"
|
||||
#include <emmintrin.h>
|
||||
#include "rgbutil.h"
|
||||
|
@ -90,3 +92,5 @@ UINT32 rgbaint_t::bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT
|
|||
color01 = _mm_packus_epi16(color01, color01);
|
||||
return _mm_cvtsi128_si32(color01);
|
||||
}
|
||||
|
||||
#endif // defined(__SSE2__) || defined(_MSC_VER)
|
||||
|
|
|
@ -22,7 +22,7 @@ const struct _rgbsse_statics rgbsse_statics =
|
|||
{
|
||||
{ 0 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000},
|
||||
{ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000 },
|
||||
{ 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff },
|
||||
{ 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff },
|
||||
{ 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff },
|
||||
|
@ -158,147 +158,3 @@ const struct _rgbsse_statics rgbsse_statics =
|
|||
}
|
||||
};
|
||||
#endif // defined(__SSE2__)
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
VMX/ALTIVEC TABLES
|
||||
***************************************************************************/
|
||||
|
||||
#if defined(__ALTIVEC__)
|
||||
#include <altivec.h>
|
||||
const struct _rgbvmx_statics rgbvmx_statics =
|
||||
{
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{
|
||||
{ 0, 256, 0, 256, 0, 256, 0, 256 }, { 1, 255, 1, 255, 1, 255, 1, 255 },
|
||||
{ 2, 254, 2, 254, 2, 254, 2, 254 }, { 3, 253, 3, 253, 3, 253, 3, 253 },
|
||||
{ 4, 252, 4, 252, 4, 252, 4, 252 }, { 5, 251, 5, 251, 5, 251, 5, 251 },
|
||||
{ 6, 250, 6, 250, 6, 250, 6, 250 }, { 7, 249, 7, 249, 7, 249, 7, 249 },
|
||||
{ 8, 248, 8, 248, 8, 248, 8, 248 }, { 9, 247, 9, 247, 9, 247, 9, 247 },
|
||||
{ 10, 246, 10, 246, 10, 246, 10, 246 }, { 11, 245, 11, 245, 11, 245, 11, 245 },
|
||||
{ 12, 244, 12, 244, 12, 244, 12, 244 }, { 13, 243, 13, 243, 13, 243, 13, 243 },
|
||||
{ 14, 242, 14, 242, 14, 242, 14, 242 }, { 15, 241, 15, 241, 15, 241, 15, 241 },
|
||||
{ 16, 240, 16, 240, 16, 240, 16, 240 }, { 17, 239, 17, 239, 17, 239, 17, 239 },
|
||||
{ 18, 238, 18, 238, 18, 238, 18, 238 }, { 19, 237, 19, 237, 19, 237, 19, 237 },
|
||||
{ 20, 236, 20, 236, 20, 236, 20, 236 }, { 21, 235, 21, 235, 21, 235, 21, 235 },
|
||||
{ 22, 234, 22, 234, 22, 234, 22, 234 }, { 23, 233, 23, 233, 23, 233, 23, 233 },
|
||||
{ 24, 232, 24, 232, 24, 232, 24, 232 }, { 25, 231, 25, 231, 25, 231, 25, 231 },
|
||||
{ 26, 230, 26, 230, 26, 230, 26, 230 }, { 27, 229, 27, 229, 27, 229, 27, 229 },
|
||||
{ 28, 228, 28, 228, 28, 228, 28, 228 }, { 29, 227, 29, 227, 29, 227, 29, 227 },
|
||||
{ 30, 226, 30, 226, 30, 226, 30, 226 }, { 31, 225, 31, 225, 31, 225, 31, 225 },
|
||||
{ 32, 224, 32, 224, 32, 224, 32, 224 }, { 33, 223, 33, 223, 33, 223, 33, 223 },
|
||||
{ 34, 222, 34, 222, 34, 222, 34, 222 }, { 35, 221, 35, 221, 35, 221, 35, 221 },
|
||||
{ 36, 220, 36, 220, 36, 220, 36, 220 }, { 37, 219, 37, 219, 37, 219, 37, 219 },
|
||||
{ 38, 218, 38, 218, 38, 218, 38, 218 }, { 39, 217, 39, 217, 39, 217, 39, 217 },
|
||||
{ 40, 216, 40, 216, 40, 216, 40, 216 }, { 41, 215, 41, 215, 41, 215, 41, 215 },
|
||||
{ 42, 214, 42, 214, 42, 214, 42, 214 }, { 43, 213, 43, 213, 43, 213, 43, 213 },
|
||||
{ 44, 212, 44, 212, 44, 212, 44, 212 }, { 45, 211, 45, 211, 45, 211, 45, 211 },
|
||||
{ 46, 210, 46, 210, 46, 210, 46, 210 }, { 47, 209, 47, 209, 47, 209, 47, 209 },
|
||||
{ 48, 208, 48, 208, 48, 208, 48, 208 }, { 49, 207, 49, 207, 49, 207, 49, 207 },
|
||||
{ 50, 206, 50, 206, 50, 206, 50, 206 }, { 51, 205, 51, 205, 51, 205, 51, 205 },
|
||||
{ 52, 204, 52, 204, 52, 204, 52, 204 }, { 53, 203, 53, 203, 53, 203, 53, 203 },
|
||||
{ 54, 202, 54, 202, 54, 202, 54, 202 }, { 55, 201, 55, 201, 55, 201, 55, 201 },
|
||||
{ 56, 200, 56, 200, 56, 200, 56, 200 }, { 57, 199, 57, 199, 57, 199, 57, 199 },
|
||||
{ 58, 198, 58, 198, 58, 198, 58, 198 }, { 59, 197, 59, 197, 59, 197, 59, 197 },
|
||||
{ 60, 196, 60, 196, 60, 196, 60, 196 }, { 61, 195, 61, 195, 61, 195, 61, 195 },
|
||||
{ 62, 194, 62, 194, 62, 194, 62, 194 }, { 63, 193, 63, 193, 63, 193, 63, 193 },
|
||||
{ 64, 192, 64, 192, 64, 192, 64, 192 }, { 65, 191, 65, 191, 65, 191, 65, 191 },
|
||||
{ 66, 190, 66, 190, 66, 190, 66, 190 }, { 67, 189, 67, 189, 67, 189, 67, 189 },
|
||||
{ 68, 188, 68, 188, 68, 188, 68, 188 }, { 69, 187, 69, 187, 69, 187, 69, 187 },
|
||||
{ 70, 186, 70, 186, 70, 186, 70, 186 }, { 71, 185, 71, 185, 71, 185, 71, 185 },
|
||||
{ 72, 184, 72, 184, 72, 184, 72, 184 }, { 73, 183, 73, 183, 73, 183, 73, 183 },
|
||||
{ 74, 182, 74, 182, 74, 182, 74, 182 }, { 75, 181, 75, 181, 75, 181, 75, 181 },
|
||||
{ 76, 180, 76, 180, 76, 180, 76, 180 }, { 77, 179, 77, 179, 77, 179, 77, 179 },
|
||||
{ 78, 178, 78, 178, 78, 178, 78, 178 }, { 79, 177, 79, 177, 79, 177, 79, 177 },
|
||||
{ 80, 176, 80, 176, 80, 176, 80, 176 }, { 81, 175, 81, 175, 81, 175, 81, 175 },
|
||||
{ 82, 174, 82, 174, 82, 174, 82, 174 }, { 83, 173, 83, 173, 83, 173, 83, 173 },
|
||||
{ 84, 172, 84, 172, 84, 172, 84, 172 }, { 85, 171, 85, 171, 85, 171, 85, 171 },
|
||||
{ 86, 170, 86, 170, 86, 170, 86, 170 }, { 87, 169, 87, 169, 87, 169, 87, 169 },
|
||||
{ 88, 168, 88, 168, 88, 168, 88, 168 }, { 89, 167, 89, 167, 89, 167, 89, 167 },
|
||||
{ 90, 166, 90, 166, 90, 166, 90, 166 }, { 91, 165, 91, 165, 91, 165, 91, 165 },
|
||||
{ 92, 164, 92, 164, 92, 164, 92, 164 }, { 93, 163, 93, 163, 93, 163, 93, 163 },
|
||||
{ 94, 162, 94, 162, 94, 162, 94, 162 }, { 95, 161, 95, 161, 95, 161, 95, 161 },
|
||||
{ 96, 160, 96, 160, 96, 160, 96, 160 }, { 97, 159, 97, 159, 97, 159, 97, 159 },
|
||||
{ 98, 158, 98, 158, 98, 158, 98, 158 }, { 99, 157, 99, 157, 99, 157, 99, 157 },
|
||||
{ 100, 156, 100, 156, 100, 156, 100, 156 }, { 101, 155, 101, 155, 101, 155, 101, 155 },
|
||||
{ 102, 154, 102, 154, 102, 154, 102, 154 }, { 103, 153, 103, 153, 103, 153, 103, 153 },
|
||||
{ 104, 152, 104, 152, 104, 152, 104, 152 }, { 105, 151, 105, 151, 105, 151, 105, 151 },
|
||||
{ 106, 150, 106, 150, 106, 150, 106, 150 }, { 107, 149, 107, 149, 107, 149, 107, 149 },
|
||||
{ 108, 148, 108, 148, 108, 148, 108, 148 }, { 109, 147, 109, 147, 109, 147, 109, 147 },
|
||||
{ 110, 146, 110, 146, 110, 146, 110, 146 }, { 111, 145, 111, 145, 111, 145, 111, 145 },
|
||||
{ 112, 144, 112, 144, 112, 144, 112, 144 }, { 113, 143, 113, 143, 113, 143, 113, 143 },
|
||||
{ 114, 142, 114, 142, 114, 142, 114, 142 }, { 115, 141, 115, 141, 115, 141, 115, 141 },
|
||||
{ 116, 140, 116, 140, 116, 140, 116, 140 }, { 117, 139, 117, 139, 117, 139, 117, 139 },
|
||||
{ 118, 138, 118, 138, 118, 138, 118, 138 }, { 119, 137, 119, 137, 119, 137, 119, 137 },
|
||||
{ 120, 136, 120, 136, 120, 136, 120, 136 }, { 121, 135, 121, 135, 121, 135, 121, 135 },
|
||||
{ 122, 134, 122, 134, 122, 134, 122, 134 }, { 123, 133, 123, 133, 123, 133, 123, 133 },
|
||||
{ 124, 132, 124, 132, 124, 132, 124, 132 }, { 125, 131, 125, 131, 125, 131, 125, 131 },
|
||||
{ 126, 130, 126, 130, 126, 130, 126, 130 }, { 127, 129, 127, 129, 127, 129, 127, 129 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128 }, { 129, 127, 129, 127, 129, 127, 129, 127 },
|
||||
{ 130, 126, 130, 126, 130, 126, 130, 126 }, { 131, 125, 131, 125, 131, 125, 131, 125 },
|
||||
{ 132, 124, 132, 124, 132, 124, 132, 124 }, { 133, 123, 133, 123, 133, 123, 133, 123 },
|
||||
{ 134, 122, 134, 122, 134, 122, 134, 122 }, { 135, 121, 135, 121, 135, 121, 135, 121 },
|
||||
{ 136, 120, 136, 120, 136, 120, 136, 120 }, { 137, 119, 137, 119, 137, 119, 137, 119 },
|
||||
{ 138, 118, 138, 118, 138, 118, 138, 118 }, { 139, 117, 139, 117, 139, 117, 139, 117 },
|
||||
{ 140, 116, 140, 116, 140, 116, 140, 116 }, { 141, 115, 141, 115, 141, 115, 141, 115 },
|
||||
{ 142, 114, 142, 114, 142, 114, 142, 114 }, { 143, 113, 143, 113, 143, 113, 143, 113 },
|
||||
{ 144, 112, 144, 112, 144, 112, 144, 112 }, { 145, 111, 145, 111, 145, 111, 145, 111 },
|
||||
{ 146, 110, 146, 110, 146, 110, 146, 110 }, { 147, 109, 147, 109, 147, 109, 147, 109 },
|
||||
{ 148, 108, 148, 108, 148, 108, 148, 108 }, { 149, 107, 149, 107, 149, 107, 149, 107 },
|
||||
{ 150, 106, 150, 106, 150, 106, 150, 106 }, { 151, 105, 151, 105, 151, 105, 151, 105 },
|
||||
{ 152, 104, 152, 104, 152, 104, 152, 104 }, { 153, 103, 153, 103, 153, 103, 153, 103 },
|
||||
{ 154, 102, 154, 102, 154, 102, 154, 102 }, { 155, 101, 155, 101, 155, 101, 155, 101 },
|
||||
{ 156, 100, 156, 100, 156, 100, 156, 100 }, { 157, 99, 157, 99, 157, 99, 157, 99 },
|
||||
{ 158, 98, 158, 98, 158, 98, 158, 98 }, { 159, 97, 159, 97, 159, 97, 159, 97 },
|
||||
{ 160, 96, 160, 96, 160, 96, 160, 96 }, { 161, 95, 161, 95, 161, 95, 161, 95 },
|
||||
{ 162, 94, 162, 94, 162, 94, 162, 94 }, { 163, 93, 163, 93, 163, 93, 163, 93 },
|
||||
{ 164, 92, 164, 92, 164, 92, 164, 92 }, { 165, 91, 165, 91, 165, 91, 165, 91 },
|
||||
{ 166, 90, 166, 90, 166, 90, 166, 90 }, { 167, 89, 167, 89, 167, 89, 167, 89 },
|
||||
{ 168, 88, 168, 88, 168, 88, 168, 88 }, { 169, 87, 169, 87, 169, 87, 169, 87 },
|
||||
{ 170, 86, 170, 86, 170, 86, 170, 86 }, { 171, 85, 171, 85, 171, 85, 171, 85 },
|
||||
{ 172, 84, 172, 84, 172, 84, 172, 84 }, { 173, 83, 173, 83, 173, 83, 173, 83 },
|
||||
{ 174, 82, 174, 82, 174, 82, 174, 82 }, { 175, 81, 175, 81, 175, 81, 175, 81 },
|
||||
{ 176, 80, 176, 80, 176, 80, 176, 80 }, { 177, 79, 177, 79, 177, 79, 177, 79 },
|
||||
{ 178, 78, 178, 78, 178, 78, 178, 78 }, { 179, 77, 179, 77, 179, 77, 179, 77 },
|
||||
{ 180, 76, 180, 76, 180, 76, 180, 76 }, { 181, 75, 181, 75, 181, 75, 181, 75 },
|
||||
{ 182, 74, 182, 74, 182, 74, 182, 74 }, { 183, 73, 183, 73, 183, 73, 183, 73 },
|
||||
{ 184, 72, 184, 72, 184, 72, 184, 72 }, { 185, 71, 185, 71, 185, 71, 185, 71 },
|
||||
{ 186, 70, 186, 70, 186, 70, 186, 70 }, { 187, 69, 187, 69, 187, 69, 187, 69 },
|
||||
{ 188, 68, 188, 68, 188, 68, 188, 68 }, { 189, 67, 189, 67, 189, 67, 189, 67 },
|
||||
{ 190, 66, 190, 66, 190, 66, 190, 66 }, { 191, 65, 191, 65, 191, 65, 191, 65 },
|
||||
{ 192, 64, 192, 64, 192, 64, 192, 64 }, { 193, 63, 193, 63, 193, 63, 193, 63 },
|
||||
{ 194, 62, 194, 62, 194, 62, 194, 62 }, { 195, 61, 195, 61, 195, 61, 195, 61 },
|
||||
{ 196, 60, 196, 60, 196, 60, 196, 60 }, { 197, 59, 197, 59, 197, 59, 197, 59 },
|
||||
{ 198, 58, 198, 58, 198, 58, 198, 58 }, { 199, 57, 199, 57, 199, 57, 199, 57 },
|
||||
{ 200, 56, 200, 56, 200, 56, 200, 56 }, { 201, 55, 201, 55, 201, 55, 201, 55 },
|
||||
{ 202, 54, 202, 54, 202, 54, 202, 54 }, { 203, 53, 203, 53, 203, 53, 203, 53 },
|
||||
{ 204, 52, 204, 52, 204, 52, 204, 52 }, { 205, 51, 205, 51, 205, 51, 205, 51 },
|
||||
{ 206, 50, 206, 50, 206, 50, 206, 50 }, { 207, 49, 207, 49, 207, 49, 207, 49 },
|
||||
{ 208, 48, 208, 48, 208, 48, 208, 48 }, { 209, 47, 209, 47, 209, 47, 209, 47 },
|
||||
{ 210, 46, 210, 46, 210, 46, 210, 46 }, { 211, 45, 211, 45, 211, 45, 211, 45 },
|
||||
{ 212, 44, 212, 44, 212, 44, 212, 44 }, { 213, 43, 213, 43, 213, 43, 213, 43 },
|
||||
{ 214, 42, 214, 42, 214, 42, 214, 42 }, { 215, 41, 215, 41, 215, 41, 215, 41 },
|
||||
{ 216, 40, 216, 40, 216, 40, 216, 40 }, { 217, 39, 217, 39, 217, 39, 217, 39 },
|
||||
{ 218, 38, 218, 38, 218, 38, 218, 38 }, { 219, 37, 219, 37, 219, 37, 219, 37 },
|
||||
{ 220, 36, 220, 36, 220, 36, 220, 36 }, { 221, 35, 221, 35, 221, 35, 221, 35 },
|
||||
{ 222, 34, 222, 34, 222, 34, 222, 34 }, { 223, 33, 223, 33, 223, 33, 223, 33 },
|
||||
{ 224, 32, 224, 32, 224, 32, 224, 32 }, { 225, 31, 225, 31, 225, 31, 225, 31 },
|
||||
{ 226, 30, 226, 30, 226, 30, 226, 30 }, { 227, 29, 227, 29, 227, 29, 227, 29 },
|
||||
{ 228, 28, 228, 28, 228, 28, 228, 28 }, { 229, 27, 229, 27, 229, 27, 229, 27 },
|
||||
{ 230, 26, 230, 26, 230, 26, 230, 26 }, { 231, 25, 231, 25, 231, 25, 231, 25 },
|
||||
{ 232, 24, 232, 24, 232, 24, 232, 24 }, { 233, 23, 233, 23, 233, 23, 233, 23 },
|
||||
{ 234, 22, 234, 22, 234, 22, 234, 22 }, { 235, 21, 235, 21, 235, 21, 235, 21 },
|
||||
{ 236, 20, 236, 20, 236, 20, 236, 20 }, { 237, 19, 237, 19, 237, 19, 237, 19 },
|
||||
{ 238, 18, 238, 18, 238, 18, 238, 18 }, { 239, 17, 239, 17, 239, 17, 239, 17 },
|
||||
{ 240, 16, 240, 16, 240, 16, 240, 16 }, { 241, 15, 241, 15, 241, 15, 241, 15 },
|
||||
{ 242, 14, 242, 14, 242, 14, 242, 14 }, { 243, 13, 243, 13, 243, 13, 243, 13 },
|
||||
{ 244, 12, 244, 12, 244, 12, 244, 12 }, { 245, 11, 245, 11, 245, 11, 245, 11 },
|
||||
{ 246, 10, 246, 10, 246, 10, 246, 10 }, { 247, 9, 247, 9, 247, 9, 247, 9 },
|
||||
{ 248, 8, 248, 8, 248, 8, 248, 8 }, { 249, 7, 249, 7, 249, 7, 249, 7 },
|
||||
{ 250, 6, 250, 6, 250, 6, 250, 6 }, { 251, 5, 251, 5, 251, 5, 251, 5 },
|
||||
{ 252, 4, 252, 4, 252, 4, 252, 4 }, { 253, 3, 253, 3, 253, 3, 253, 3 },
|
||||
{ 254, 2, 254, 2, 254, 2, 254, 2 }, { 255, 1, 255, 1, 255, 1, 255, 1 }
|
||||
}
|
||||
};
|
||||
#endif // defined(__ALTIVEC__)
|
||||
|
|
238
src/emu/video/rgbvmx.c
Normal file
238
src/emu/video/rgbvmx.c
Normal file
|
@ -0,0 +1,238 @@
|
|||
// license:BSD-3-Clause
|
||||
// copyright-holders:Vas Crabb, Ryan Holtz
|
||||
/***************************************************************************
|
||||
|
||||
rgbsse.c
|
||||
|
||||
VMX/Altivec optimised RGB utilities.
|
||||
|
||||
***************************************************************************/
|
||||
|
||||
#if defined(__ALTIVEC__)
|
||||
|
||||
#include "emu.h"
|
||||
#include <emmintrin.h>
|
||||
#include "rgbutil.h"
|
||||
|
||||
/***************************************************************************
|
||||
TABLES
|
||||
***************************************************************************/
|
||||
|
||||
const rgbaint_t::VECU16 rgbaint_t::maxbyte = { 255, 255, 255, 255, 255, 255, 255, 255 };
|
||||
const rgbaint_t::VECU32 rgbaint_t::alpha_mask = { 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff };
|
||||
const rgbaint_t::VECU32 rgbaint_t::red_mask = { 0xffffffff, 0x00000000, 0xffffffff, 0xffffffff };
|
||||
const rgbaint_t::VECU32 rgbaint_t::green_mask = { 0xffffffff, 0xffffffff, 0x00000000, 0xffffffff };
|
||||
const rgbaint_t::VECU32 rgbaint_t::blue_mask = { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000 };
|
||||
const rgbaint_t::VECU16 rgbaint_t::scale_table[256] = {
|
||||
{ 0, 256, 0, 256, 0, 256, 0, 256 }, { 1, 255, 1, 255, 1, 255, 1, 255 },
|
||||
{ 2, 254, 2, 254, 2, 254, 2, 254 }, { 3, 253, 3, 253, 3, 253, 3, 253 },
|
||||
{ 4, 252, 4, 252, 4, 252, 4, 252 }, { 5, 251, 5, 251, 5, 251, 5, 251 },
|
||||
{ 6, 250, 6, 250, 6, 250, 6, 250 }, { 7, 249, 7, 249, 7, 249, 7, 249 },
|
||||
{ 8, 248, 8, 248, 8, 248, 8, 248 }, { 9, 247, 9, 247, 9, 247, 9, 247 },
|
||||
{ 10, 246, 10, 246, 10, 246, 10, 246 }, { 11, 245, 11, 245, 11, 245, 11, 245 },
|
||||
{ 12, 244, 12, 244, 12, 244, 12, 244 }, { 13, 243, 13, 243, 13, 243, 13, 243 },
|
||||
{ 14, 242, 14, 242, 14, 242, 14, 242 }, { 15, 241, 15, 241, 15, 241, 15, 241 },
|
||||
{ 16, 240, 16, 240, 16, 240, 16, 240 }, { 17, 239, 17, 239, 17, 239, 17, 239 },
|
||||
{ 18, 238, 18, 238, 18, 238, 18, 238 }, { 19, 237, 19, 237, 19, 237, 19, 237 },
|
||||
{ 20, 236, 20, 236, 20, 236, 20, 236 }, { 21, 235, 21, 235, 21, 235, 21, 235 },
|
||||
{ 22, 234, 22, 234, 22, 234, 22, 234 }, { 23, 233, 23, 233, 23, 233, 23, 233 },
|
||||
{ 24, 232, 24, 232, 24, 232, 24, 232 }, { 25, 231, 25, 231, 25, 231, 25, 231 },
|
||||
{ 26, 230, 26, 230, 26, 230, 26, 230 }, { 27, 229, 27, 229, 27, 229, 27, 229 },
|
||||
{ 28, 228, 28, 228, 28, 228, 28, 228 }, { 29, 227, 29, 227, 29, 227, 29, 227 },
|
||||
{ 30, 226, 30, 226, 30, 226, 30, 226 }, { 31, 225, 31, 225, 31, 225, 31, 225 },
|
||||
{ 32, 224, 32, 224, 32, 224, 32, 224 }, { 33, 223, 33, 223, 33, 223, 33, 223 },
|
||||
{ 34, 222, 34, 222, 34, 222, 34, 222 }, { 35, 221, 35, 221, 35, 221, 35, 221 },
|
||||
{ 36, 220, 36, 220, 36, 220, 36, 220 }, { 37, 219, 37, 219, 37, 219, 37, 219 },
|
||||
{ 38, 218, 38, 218, 38, 218, 38, 218 }, { 39, 217, 39, 217, 39, 217, 39, 217 },
|
||||
{ 40, 216, 40, 216, 40, 216, 40, 216 }, { 41, 215, 41, 215, 41, 215, 41, 215 },
|
||||
{ 42, 214, 42, 214, 42, 214, 42, 214 }, { 43, 213, 43, 213, 43, 213, 43, 213 },
|
||||
{ 44, 212, 44, 212, 44, 212, 44, 212 }, { 45, 211, 45, 211, 45, 211, 45, 211 },
|
||||
{ 46, 210, 46, 210, 46, 210, 46, 210 }, { 47, 209, 47, 209, 47, 209, 47, 209 },
|
||||
{ 48, 208, 48, 208, 48, 208, 48, 208 }, { 49, 207, 49, 207, 49, 207, 49, 207 },
|
||||
{ 50, 206, 50, 206, 50, 206, 50, 206 }, { 51, 205, 51, 205, 51, 205, 51, 205 },
|
||||
{ 52, 204, 52, 204, 52, 204, 52, 204 }, { 53, 203, 53, 203, 53, 203, 53, 203 },
|
||||
{ 54, 202, 54, 202, 54, 202, 54, 202 }, { 55, 201, 55, 201, 55, 201, 55, 201 },
|
||||
{ 56, 200, 56, 200, 56, 200, 56, 200 }, { 57, 199, 57, 199, 57, 199, 57, 199 },
|
||||
{ 58, 198, 58, 198, 58, 198, 58, 198 }, { 59, 197, 59, 197, 59, 197, 59, 197 },
|
||||
{ 60, 196, 60, 196, 60, 196, 60, 196 }, { 61, 195, 61, 195, 61, 195, 61, 195 },
|
||||
{ 62, 194, 62, 194, 62, 194, 62, 194 }, { 63, 193, 63, 193, 63, 193, 63, 193 },
|
||||
{ 64, 192, 64, 192, 64, 192, 64, 192 }, { 65, 191, 65, 191, 65, 191, 65, 191 },
|
||||
{ 66, 190, 66, 190, 66, 190, 66, 190 }, { 67, 189, 67, 189, 67, 189, 67, 189 },
|
||||
{ 68, 188, 68, 188, 68, 188, 68, 188 }, { 69, 187, 69, 187, 69, 187, 69, 187 },
|
||||
{ 70, 186, 70, 186, 70, 186, 70, 186 }, { 71, 185, 71, 185, 71, 185, 71, 185 },
|
||||
{ 72, 184, 72, 184, 72, 184, 72, 184 }, { 73, 183, 73, 183, 73, 183, 73, 183 },
|
||||
{ 74, 182, 74, 182, 74, 182, 74, 182 }, { 75, 181, 75, 181, 75, 181, 75, 181 },
|
||||
{ 76, 180, 76, 180, 76, 180, 76, 180 }, { 77, 179, 77, 179, 77, 179, 77, 179 },
|
||||
{ 78, 178, 78, 178, 78, 178, 78, 178 }, { 79, 177, 79, 177, 79, 177, 79, 177 },
|
||||
{ 80, 176, 80, 176, 80, 176, 80, 176 }, { 81, 175, 81, 175, 81, 175, 81, 175 },
|
||||
{ 82, 174, 82, 174, 82, 174, 82, 174 }, { 83, 173, 83, 173, 83, 173, 83, 173 },
|
||||
{ 84, 172, 84, 172, 84, 172, 84, 172 }, { 85, 171, 85, 171, 85, 171, 85, 171 },
|
||||
{ 86, 170, 86, 170, 86, 170, 86, 170 }, { 87, 169, 87, 169, 87, 169, 87, 169 },
|
||||
{ 88, 168, 88, 168, 88, 168, 88, 168 }, { 89, 167, 89, 167, 89, 167, 89, 167 },
|
||||
{ 90, 166, 90, 166, 90, 166, 90, 166 }, { 91, 165, 91, 165, 91, 165, 91, 165 },
|
||||
{ 92, 164, 92, 164, 92, 164, 92, 164 }, { 93, 163, 93, 163, 93, 163, 93, 163 },
|
||||
{ 94, 162, 94, 162, 94, 162, 94, 162 }, { 95, 161, 95, 161, 95, 161, 95, 161 },
|
||||
{ 96, 160, 96, 160, 96, 160, 96, 160 }, { 97, 159, 97, 159, 97, 159, 97, 159 },
|
||||
{ 98, 158, 98, 158, 98, 158, 98, 158 }, { 99, 157, 99, 157, 99, 157, 99, 157 },
|
||||
{ 100, 156, 100, 156, 100, 156, 100, 156 }, { 101, 155, 101, 155, 101, 155, 101, 155 },
|
||||
{ 102, 154, 102, 154, 102, 154, 102, 154 }, { 103, 153, 103, 153, 103, 153, 103, 153 },
|
||||
{ 104, 152, 104, 152, 104, 152, 104, 152 }, { 105, 151, 105, 151, 105, 151, 105, 151 },
|
||||
{ 106, 150, 106, 150, 106, 150, 106, 150 }, { 107, 149, 107, 149, 107, 149, 107, 149 },
|
||||
{ 108, 148, 108, 148, 108, 148, 108, 148 }, { 109, 147, 109, 147, 109, 147, 109, 147 },
|
||||
{ 110, 146, 110, 146, 110, 146, 110, 146 }, { 111, 145, 111, 145, 111, 145, 111, 145 },
|
||||
{ 112, 144, 112, 144, 112, 144, 112, 144 }, { 113, 143, 113, 143, 113, 143, 113, 143 },
|
||||
{ 114, 142, 114, 142, 114, 142, 114, 142 }, { 115, 141, 115, 141, 115, 141, 115, 141 },
|
||||
{ 116, 140, 116, 140, 116, 140, 116, 140 }, { 117, 139, 117, 139, 117, 139, 117, 139 },
|
||||
{ 118, 138, 118, 138, 118, 138, 118, 138 }, { 119, 137, 119, 137, 119, 137, 119, 137 },
|
||||
{ 120, 136, 120, 136, 120, 136, 120, 136 }, { 121, 135, 121, 135, 121, 135, 121, 135 },
|
||||
{ 122, 134, 122, 134, 122, 134, 122, 134 }, { 123, 133, 123, 133, 123, 133, 123, 133 },
|
||||
{ 124, 132, 124, 132, 124, 132, 124, 132 }, { 125, 131, 125, 131, 125, 131, 125, 131 },
|
||||
{ 126, 130, 126, 130, 126, 130, 126, 130 }, { 127, 129, 127, 129, 127, 129, 127, 129 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128 }, { 129, 127, 129, 127, 129, 127, 129, 127 },
|
||||
{ 130, 126, 130, 126, 130, 126, 130, 126 }, { 131, 125, 131, 125, 131, 125, 131, 125 },
|
||||
{ 132, 124, 132, 124, 132, 124, 132, 124 }, { 133, 123, 133, 123, 133, 123, 133, 123 },
|
||||
{ 134, 122, 134, 122, 134, 122, 134, 122 }, { 135, 121, 135, 121, 135, 121, 135, 121 },
|
||||
{ 136, 120, 136, 120, 136, 120, 136, 120 }, { 137, 119, 137, 119, 137, 119, 137, 119 },
|
||||
{ 138, 118, 138, 118, 138, 118, 138, 118 }, { 139, 117, 139, 117, 139, 117, 139, 117 },
|
||||
{ 140, 116, 140, 116, 140, 116, 140, 116 }, { 141, 115, 141, 115, 141, 115, 141, 115 },
|
||||
{ 142, 114, 142, 114, 142, 114, 142, 114 }, { 143, 113, 143, 113, 143, 113, 143, 113 },
|
||||
{ 144, 112, 144, 112, 144, 112, 144, 112 }, { 145, 111, 145, 111, 145, 111, 145, 111 },
|
||||
{ 146, 110, 146, 110, 146, 110, 146, 110 }, { 147, 109, 147, 109, 147, 109, 147, 109 },
|
||||
{ 148, 108, 148, 108, 148, 108, 148, 108 }, { 149, 107, 149, 107, 149, 107, 149, 107 },
|
||||
{ 150, 106, 150, 106, 150, 106, 150, 106 }, { 151, 105, 151, 105, 151, 105, 151, 105 },
|
||||
{ 152, 104, 152, 104, 152, 104, 152, 104 }, { 153, 103, 153, 103, 153, 103, 153, 103 },
|
||||
{ 154, 102, 154, 102, 154, 102, 154, 102 }, { 155, 101, 155, 101, 155, 101, 155, 101 },
|
||||
{ 156, 100, 156, 100, 156, 100, 156, 100 }, { 157, 99, 157, 99, 157, 99, 157, 99 },
|
||||
{ 158, 98, 158, 98, 158, 98, 158, 98 }, { 159, 97, 159, 97, 159, 97, 159, 97 },
|
||||
{ 160, 96, 160, 96, 160, 96, 160, 96 }, { 161, 95, 161, 95, 161, 95, 161, 95 },
|
||||
{ 162, 94, 162, 94, 162, 94, 162, 94 }, { 163, 93, 163, 93, 163, 93, 163, 93 },
|
||||
{ 164, 92, 164, 92, 164, 92, 164, 92 }, { 165, 91, 165, 91, 165, 91, 165, 91 },
|
||||
{ 166, 90, 166, 90, 166, 90, 166, 90 }, { 167, 89, 167, 89, 167, 89, 167, 89 },
|
||||
{ 168, 88, 168, 88, 168, 88, 168, 88 }, { 169, 87, 169, 87, 169, 87, 169, 87 },
|
||||
{ 170, 86, 170, 86, 170, 86, 170, 86 }, { 171, 85, 171, 85, 171, 85, 171, 85 },
|
||||
{ 172, 84, 172, 84, 172, 84, 172, 84 }, { 173, 83, 173, 83, 173, 83, 173, 83 },
|
||||
{ 174, 82, 174, 82, 174, 82, 174, 82 }, { 175, 81, 175, 81, 175, 81, 175, 81 },
|
||||
{ 176, 80, 176, 80, 176, 80, 176, 80 }, { 177, 79, 177, 79, 177, 79, 177, 79 },
|
||||
{ 178, 78, 178, 78, 178, 78, 178, 78 }, { 179, 77, 179, 77, 179, 77, 179, 77 },
|
||||
{ 180, 76, 180, 76, 180, 76, 180, 76 }, { 181, 75, 181, 75, 181, 75, 181, 75 },
|
||||
{ 182, 74, 182, 74, 182, 74, 182, 74 }, { 183, 73, 183, 73, 183, 73, 183, 73 },
|
||||
{ 184, 72, 184, 72, 184, 72, 184, 72 }, { 185, 71, 185, 71, 185, 71, 185, 71 },
|
||||
{ 186, 70, 186, 70, 186, 70, 186, 70 }, { 187, 69, 187, 69, 187, 69, 187, 69 },
|
||||
{ 188, 68, 188, 68, 188, 68, 188, 68 }, { 189, 67, 189, 67, 189, 67, 189, 67 },
|
||||
{ 190, 66, 190, 66, 190, 66, 190, 66 }, { 191, 65, 191, 65, 191, 65, 191, 65 },
|
||||
{ 192, 64, 192, 64, 192, 64, 192, 64 }, { 193, 63, 193, 63, 193, 63, 193, 63 },
|
||||
{ 194, 62, 194, 62, 194, 62, 194, 62 }, { 195, 61, 195, 61, 195, 61, 195, 61 },
|
||||
{ 196, 60, 196, 60, 196, 60, 196, 60 }, { 197, 59, 197, 59, 197, 59, 197, 59 },
|
||||
{ 198, 58, 198, 58, 198, 58, 198, 58 }, { 199, 57, 199, 57, 199, 57, 199, 57 },
|
||||
{ 200, 56, 200, 56, 200, 56, 200, 56 }, { 201, 55, 201, 55, 201, 55, 201, 55 },
|
||||
{ 202, 54, 202, 54, 202, 54, 202, 54 }, { 203, 53, 203, 53, 203, 53, 203, 53 },
|
||||
{ 204, 52, 204, 52, 204, 52, 204, 52 }, { 205, 51, 205, 51, 205, 51, 205, 51 },
|
||||
{ 206, 50, 206, 50, 206, 50, 206, 50 }, { 207, 49, 207, 49, 207, 49, 207, 49 },
|
||||
{ 208, 48, 208, 48, 208, 48, 208, 48 }, { 209, 47, 209, 47, 209, 47, 209, 47 },
|
||||
{ 210, 46, 210, 46, 210, 46, 210, 46 }, { 211, 45, 211, 45, 211, 45, 211, 45 },
|
||||
{ 212, 44, 212, 44, 212, 44, 212, 44 }, { 213, 43, 213, 43, 213, 43, 213, 43 },
|
||||
{ 214, 42, 214, 42, 214, 42, 214, 42 }, { 215, 41, 215, 41, 215, 41, 215, 41 },
|
||||
{ 216, 40, 216, 40, 216, 40, 216, 40 }, { 217, 39, 217, 39, 217, 39, 217, 39 },
|
||||
{ 218, 38, 218, 38, 218, 38, 218, 38 }, { 219, 37, 219, 37, 219, 37, 219, 37 },
|
||||
{ 220, 36, 220, 36, 220, 36, 220, 36 }, { 221, 35, 221, 35, 221, 35, 221, 35 },
|
||||
{ 222, 34, 222, 34, 222, 34, 222, 34 }, { 223, 33, 223, 33, 223, 33, 223, 33 },
|
||||
{ 224, 32, 224, 32, 224, 32, 224, 32 }, { 225, 31, 225, 31, 225, 31, 225, 31 },
|
||||
{ 226, 30, 226, 30, 226, 30, 226, 30 }, { 227, 29, 227, 29, 227, 29, 227, 29 },
|
||||
{ 228, 28, 228, 28, 228, 28, 228, 28 }, { 229, 27, 229, 27, 229, 27, 229, 27 },
|
||||
{ 230, 26, 230, 26, 230, 26, 230, 26 }, { 231, 25, 231, 25, 231, 25, 231, 25 },
|
||||
{ 232, 24, 232, 24, 232, 24, 232, 24 }, { 233, 23, 233, 23, 233, 23, 233, 23 },
|
||||
{ 234, 22, 234, 22, 234, 22, 234, 22 }, { 235, 21, 235, 21, 235, 21, 235, 21 },
|
||||
{ 236, 20, 236, 20, 236, 20, 236, 20 }, { 237, 19, 237, 19, 237, 19, 237, 19 },
|
||||
{ 238, 18, 238, 18, 238, 18, 238, 18 }, { 239, 17, 239, 17, 239, 17, 239, 17 },
|
||||
{ 240, 16, 240, 16, 240, 16, 240, 16 }, { 241, 15, 241, 15, 241, 15, 241, 15 },
|
||||
{ 242, 14, 242, 14, 242, 14, 242, 14 }, { 243, 13, 243, 13, 243, 13, 243, 13 },
|
||||
{ 244, 12, 244, 12, 244, 12, 244, 12 }, { 245, 11, 245, 11, 245, 11, 245, 11 },
|
||||
{ 246, 10, 246, 10, 246, 10, 246, 10 }, { 247, 9, 247, 9, 247, 9, 247, 9 },
|
||||
{ 248, 8, 248, 8, 248, 8, 248, 8 }, { 249, 7, 249, 7, 249, 7, 249, 7 },
|
||||
{ 250, 6, 250, 6, 250, 6, 250, 6 }, { 251, 5, 251, 5, 251, 5, 251, 5 },
|
||||
{ 252, 4, 252, 4, 252, 4, 252, 4 }, { 253, 3, 253, 3, 253, 3, 253, 3 },
|
||||
{ 254, 2, 254, 2, 254, 2, 254, 2 }, { 255, 1, 255, 1, 255, 1, 255, 1 }
|
||||
};
|
||||
|
||||
extern const struct _rgbvmx_statics
|
||||
{
|
||||
} rgbvmx_statics;
|
||||
|
||||
/***************************************************************************
|
||||
HIGHER LEVEL OPERATIONS
|
||||
***************************************************************************/
|
||||
|
||||
void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor)
|
||||
{
|
||||
m_value = _mm_unpacklo_epi16(m_value, other.m_value);
|
||||
m_value = vec_add((VECU16)m_value, scale_table[factor]);
|
||||
m_value = vec_sr(m_value, vec_splat_u32(8));
|
||||
}
|
||||
|
||||
void rgbaint_t::scale_and_clamp(const rgbaint_t& scale)
|
||||
{
|
||||
mul(scale);
|
||||
shr(8);
|
||||
min(255);
|
||||
}
|
||||
|
||||
void rgbaint_t::scale_imm_and_clamp(const INT32 scale)
|
||||
{
|
||||
mul_imm(scale);
|
||||
shr(8);
|
||||
min(255);
|
||||
}
|
||||
|
||||
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2)
|
||||
{
|
||||
mul(scale);
|
||||
rgbaint_t color2(other);
|
||||
color2.mul(scale2);
|
||||
|
||||
mul(scale);
|
||||
add(color2);
|
||||
shr(8);
|
||||
min(255);
|
||||
}
|
||||
|
||||
void rgbaint_t::scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other)
|
||||
{
|
||||
mul_imm(scale);
|
||||
add(other);
|
||||
shr(8);
|
||||
min(255);
|
||||
}
|
||||
|
||||
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
|
||||
{
|
||||
mul(scale);
|
||||
add(other);
|
||||
shr(8);
|
||||
min(255);
|
||||
}
|
||||
|
||||
UINT32 rgbaint_t::bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
__m128i color00 = _mm_cvtsi32_si128(rgb00);
|
||||
__m128i color01 = _mm_cvtsi32_si128(rgb01);
|
||||
__m128i color10 = _mm_cvtsi32_si128(rgb10);
|
||||
__m128i color11 = _mm_cvtsi32_si128(rgb11);
|
||||
|
||||
/* interleave color01 and color00 at the byte level */
|
||||
color01 = _mm_unpacklo_epi8(color01, color00);
|
||||
color11 = _mm_unpacklo_epi8(color11, color10);
|
||||
color01 = _mm_unpacklo_epi8(color01, _mm_setzero_si128());
|
||||
color11 = _mm_unpacklo_epi8(color11, _mm_setzero_si128());
|
||||
color01 = _mm_madd_epi16(color01, *(__m128i *)&rgbsse_statics.scale_table[u][0]);
|
||||
color11 = _mm_madd_epi16(color11, *(__m128i *)&rgbsse_statics.scale_table[u][0]);
|
||||
color01 = _mm_slli_epi32(color01, 15);
|
||||
color11 = _mm_srli_epi32(color11, 1);
|
||||
color01 = _mm_max_epi16(color01, color11);
|
||||
color01 = _mm_madd_epi16(color01, *(__m128i *)&rgbsse_statics.scale_table[v][0]);
|
||||
color01 = _mm_srli_epi32(color01, 15);
|
||||
color01 = _mm_packs_epi32(color01, color01);
|
||||
color01 = _mm_packus_epi16(color01, color01);
|
||||
return _mm_cvtsi128_si32(color01);
|
||||
}
|
||||
|
||||
#endif // defined(__ALTIVEC__)
|
|
@ -1,5 +1,5 @@
|
|||
// license:BSD-3-Clause
|
||||
// copyright-holders:Vas Crabb
|
||||
// copyright-holders:Vas Crabb, Ryan Holtz
|
||||
/***************************************************************************
|
||||
|
||||
rgbvmx.h
|
||||
|
@ -11,425 +11,471 @@
|
|||
#ifndef __RGBVMX__
|
||||
#define __RGBVMX__
|
||||
|
||||
#if defined(__ALTIVEC__)
|
||||
#include <altivec.h>
|
||||
#endif
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
TYPE DEFINITIONS
|
||||
***************************************************************************/
|
||||
|
||||
/* intermediate RGB values are stored in a vector */
|
||||
typedef vector signed short rgbint;
|
||||
|
||||
/* intermediate RGB values are stored in a vector */
|
||||
typedef vector signed short rgbaint;
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
BASIC CONVERSIONS
|
||||
***************************************************************************/
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgb_comp_to_rgbint - converts a trio of RGB
|
||||
components to an rgbint type
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgb_comp_to_rgbint(rgbint *rgb, INT16 r, INT16 g, INT16 b)
|
||||
class rgbaint_t
|
||||
{
|
||||
rgbint result = { 0, r, g, b, 0, 0, 0, 0 };
|
||||
*rgb = result;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgba_comp_to_rgbint - converts a quad of RGB
|
||||
components to an rgbint type
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgba_comp_to_rgbaint(rgbaint *rgb, INT16 a, INT16 r, INT16 g, INT16 b)
|
||||
{
|
||||
rgbaint result = { a, r, g, b, 0, 0, 0, 0 };
|
||||
*rgb = result;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgb_to_rgbint - converts a packed trio of RGB
|
||||
components to an rgbint type
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgb_to_rgbint(rgbint *rgb, rgb_t const &color)
|
||||
{
|
||||
vector signed char temp = (vector signed char)vec_perm((vector signed int)vec_lde(0, color.ptr()), vec_splat_s32(0), vec_lvsl(0, color.ptr()));
|
||||
*rgb = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), temp);
|
||||
}
|
||||
public:
|
||||
inline rgbaint_t() { }
|
||||
inline rgbaint_t(UINT32 rgba) { set(rgba); }
|
||||
inline rgbaint_t(UINT32 a, UINT32 r, UINT32 g, UINT32 b) { set(a, r, g, b); }
|
||||
inline rgbaint_t(rgb_t& rgb) { set(rgb); }
|
||||
|
||||
inline void set(rgbaint_t& other) { m_value = other.m_value; }
|
||||
|
||||
inline void set(UINT32 rgba)
|
||||
{
|
||||
const vector unsigned int zero = vec_splat_u32(0);
|
||||
const vector unsigned char temp = vec_perm(vec_lde(0, &rgba), zero, vec_lvsl(0, &rgba));
|
||||
m_value = vec_mergeh((vector unsigned short)zero, (vector unsigned short)vec_mergeh((vector unsigned char)zero, temp));
|
||||
}
|
||||
|
||||
inline void set(UINT32 a, UINT32 r, UINT32 g, UINT32 b)
|
||||
{
|
||||
vector unsigned int result = { a, r, g, b };
|
||||
m_value = result;
|
||||
}
|
||||
|
||||
inline void set(rgb_t& rgb)
|
||||
{
|
||||
const vector unsigned int zero = vec_splat_u32(0);
|
||||
const vector unsigned char temp = vec_perm(vec_lde(0, rgb.ptr()), zero, vec_lvsl(0, rgb.ptr()));
|
||||
m_value = vec_mergeh((vector unsigned short)zero, (vector unsigned short)vec_mergeh((vector unsigned char)zero, temp));
|
||||
}
|
||||
|
||||
inline rgb_t to_rgba()
|
||||
{
|
||||
const vector unsigned int temp = vec_splat((vector unsigned int)vec_pack(vec_pack(m_value, m_value), vec_splat_u16(0)), 0);
|
||||
UINT32 result;
|
||||
vec_ste(temp, 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline rgb_t to_rgba_clamp()
|
||||
{
|
||||
const vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(vec_packsu(m_value, m_value), vec_splat_u16(0)), 0);
|
||||
UINT32 result;
|
||||
vec_ste(temp, 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void add(const rgbaint_t& color2)
|
||||
{
|
||||
m_value = vec_add(m_value, color2.m_value);
|
||||
}
|
||||
|
||||
inline void add_imm(const UINT32 imm)
|
||||
{
|
||||
const vector unsigned int temp = { imm, imm, imm, imm };
|
||||
m_value = vec_add(m_value, temp);
|
||||
}
|
||||
|
||||
inline void add_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
|
||||
{
|
||||
const vector unsigned int temp = { a, r, g, b };
|
||||
m_value = vec_add(m_value, temp);
|
||||
}
|
||||
|
||||
inline void sub(const rgbaint_t& color2)
|
||||
{
|
||||
m_value = vec_sub(m_value, color2.m_value);
|
||||
}
|
||||
|
||||
inline void sub_imm(const UINT32 imm)
|
||||
{
|
||||
const vector unsigned int temp = { imm, imm, imm, imm };
|
||||
m_value = vec_sub(m_value, temp);
|
||||
}
|
||||
|
||||
inline void sub_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
|
||||
{
|
||||
const vector unsigned int temp = { a, r, g, b };
|
||||
m_value = vec_sub(m_value, temp);
|
||||
}
|
||||
|
||||
inline void subr(rgbaint_t& color2)
|
||||
{
|
||||
m_value = vec_sub(color2.m_value, m_value);
|
||||
}
|
||||
|
||||
inline void subr_imm(const UINT32 imm)
|
||||
{
|
||||
const vector unsigned int temp = { imm, imm, imm, imm };
|
||||
m_value = vec_sub(temp, m_value);
|
||||
}
|
||||
|
||||
inline void subr_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
|
||||
{
|
||||
const vector unsigned int temp = { a, r, g, b };
|
||||
m_value = vec_sub(temp, m_value);
|
||||
}
|
||||
|
||||
inline void set_a(const UINT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { value, 0, 0, 0 };
|
||||
m_value = vec_or(vec_and(m_value, alpha_mask), temp);
|
||||
}
|
||||
|
||||
inline void set_r(const UINT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { 0, value, 0, 0 };
|
||||
m_value = vec_or(vec_and(m_value, red_mask), temp);
|
||||
}
|
||||
|
||||
inline void set_g(const UINT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { 0, 0, value, 0 };
|
||||
m_value = vec_or(vec_and(m_value, green_mask), temp);
|
||||
}
|
||||
|
||||
inline void set_b(const UINT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { 0, 0, 0, value };
|
||||
m_value = vec_or(vec_and(m_value, blue_mask), temp);
|
||||
}
|
||||
|
||||
inline UINT8 get_a()
|
||||
{
|
||||
UINT8 result;
|
||||
vec_ste(vec_splat((vector unsigned char)m_value, 3), 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline UINT8 get_r()
|
||||
{
|
||||
UINT8 result;
|
||||
vec_ste(vec_splat((vector unsigned char)m_value, 7), 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline UINT8 get_g()
|
||||
{
|
||||
UINT8 result;
|
||||
vec_ste(vec_splat((vector unsigned char)m_value, 11), 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline UINT8 get_b()
|
||||
{
|
||||
UINT8 result;
|
||||
vec_ste(vec_splat((vector unsigned char)m_value, 15), 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline UINT32 get_a32()
|
||||
{
|
||||
UINT32 result;
|
||||
vec_ste(vec_splat(m_value, 0), 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline UINT32 get_r32()
|
||||
{
|
||||
UINT32 result;
|
||||
vec_ste(vec_splat(m_value, 1), 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline UINT32 get_g32()
|
||||
{
|
||||
UINT32 result;
|
||||
vec_ste(vec_splat(m_value, 2), 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline UINT32 get_b32()
|
||||
{
|
||||
UINT32 result;
|
||||
vec_ste(vec_splat(m_value, 3), 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void mul(const rgbaint_t& color)
|
||||
{
|
||||
const vector unsigned int shift = vec_splat_u32(-16);
|
||||
const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(color.m_value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)color.m_value));
|
||||
m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)color.m_value));
|
||||
}
|
||||
|
||||
inline void mul_imm(const UINT32 imm)
|
||||
{
|
||||
const vector unsigned int value = { imm, imm, imm, imm };
|
||||
const vector unsigned int shift = vec_splat_u32(-16);
|
||||
const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)value));
|
||||
m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)value));
|
||||
}
|
||||
|
||||
inline void mul_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
|
||||
{
|
||||
const vector unsigned int value = { a, r, g, b };
|
||||
const vector unsigned int shift = vec_splat_u32(-16);
|
||||
const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)value));
|
||||
m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)value));
|
||||
}
|
||||
|
||||
inline void shl(const rgbaint_t& shift)
|
||||
{
|
||||
const vector unsigned int limit = { 32, 32, 32, 32 };
|
||||
const vector unsigned int temp = vec_splat(shift.m_value, 3);
|
||||
m_value = vec_and(vec_sl(m_value, temp), vec_cmpgt(limit, temp));
|
||||
}
|
||||
|
||||
inline void shl_imm(const UINT8 shift)
|
||||
{
|
||||
const vector unsigned int temp = { shift, shift, shift, shift };
|
||||
m_value = vec_sl(m_value, temp);
|
||||
}
|
||||
|
||||
inline void shl_imm_all(const UINT8 shift)
|
||||
{
|
||||
const vector unsigned char limit = { 128, 128, 128, 128, 128, 128, 128, 128 };
|
||||
const vector unsigned char temp = { shift, shift, shift, shift, shift, shift, shift, shift };
|
||||
m_value = vec_and(vec_slo(m_value, temp), (vector unsigned int)vec_cmpgt(limit, temp));
|
||||
}
|
||||
|
||||
inline void shr(const rgbaint_t& shift)
|
||||
{
|
||||
const vector unsigned int limit = { 32, 32, 32, 32 };
|
||||
const vector unsigned int temp = vec_splat(shift.m_value, 3);
|
||||
m_value = vec_and(vec_sr(m_value, temp), vec_cmpgt(limit, temp));
|
||||
}
|
||||
|
||||
inline void shr_imm(const UINT8 shift)
|
||||
{
|
||||
const vector unsigned int temp = { shift, shift, shift, shift };
|
||||
m_value = vec_sr(m_value, temp);
|
||||
}
|
||||
|
||||
inline void shr_imm_all(const UINT8 shift)
|
||||
{
|
||||
const vector unsigned char limit = { 128, 128, 128, 128, 128, 128, 128, 128 };
|
||||
const vector unsigned char temp = { shift, shift, shift, shift, shift, shift, shift, shift };
|
||||
m_value = vec_and(vec_sro(m_value, temp), (vector unsigned int)vec_cmpgt(limit, temp));
|
||||
}
|
||||
|
||||
inline void sra(const rgbaint_t& shift)
|
||||
{
|
||||
const vector unsigned int limit = { 31, 31, 31, 31 };
|
||||
m_value = vec_sra(m_value, vec_min(vec_splat(shift.m_value, 3), limit));
|
||||
}
|
||||
|
||||
inline void sra_imm(const UINT8 shift)
|
||||
{
|
||||
const vector unsigned int temp = { shift, shift, shift, shift };
|
||||
m_value = vec_sra(m_value, temp);
|
||||
}
|
||||
|
||||
inline void or_reg(const rgbaint_t& color2)
|
||||
{
|
||||
m_value = vec_or(m_value, color2.m_value);
|
||||
}
|
||||
|
||||
inline void or_imm(const UINT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { value, value, value, value };
|
||||
m_value = vec_or(m_value, temp);
|
||||
}
|
||||
|
||||
inline void or_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
|
||||
{
|
||||
const vector unsigned int temp = { a, r, g, b };
|
||||
m_value = vec_or(m_value, temp);
|
||||
}
|
||||
|
||||
inline void and_reg(const rgbaint_t& color)
|
||||
{
|
||||
m_value = vec_and(m_value, color.m_value);
|
||||
}
|
||||
|
||||
inline void and_imm(const UINT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { value, value, value, value };
|
||||
m_value = vec_and(m_value, temp);
|
||||
}
|
||||
|
||||
inline void and_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
|
||||
{
|
||||
const vector unsigned int temp = { a, r, g, b };
|
||||
m_value = vec_and(m_value, temp);
|
||||
}
|
||||
|
||||
inline void xor_reg(const rgbaint_t& color2)
|
||||
{
|
||||
m_value = vec_xor(m_value, color2.m_value);
|
||||
}
|
||||
|
||||
inline void xor_imm(const INT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { value, value, value, value };
|
||||
m_value = vec_xor(m_value, temp);
|
||||
}
|
||||
|
||||
inline void xor_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
|
||||
{
|
||||
const vector unsigned int temp = { a, r, g, b };
|
||||
m_value = vec_xor(m_value, temp);
|
||||
}
|
||||
|
||||
inline void clamp_and_clear(const UINT32 sign)
|
||||
{
|
||||
const vector unsigned int vzero = vec_splat_u32(0);
|
||||
vector unsigned int vsign = { sign, sign, sign, sign };
|
||||
m_value = vec_and(m_value, vec_cmpeq(vec_and(m_value, vsign), vzero));
|
||||
vsign = vec_nor(vec_sra(vsign, vec_splat_u32(1)), vzero);
|
||||
const vector unsigned int mask = vec_cmpgt(m_value, vsign);
|
||||
m_value = vec_or(vec_and(vsign, mask), vec_and(m_value, vec_nor(mask, vzero)));
|
||||
}
|
||||
|
||||
inline void sign_extend(const UINT32 compare, const UINT32 sign)
|
||||
{
|
||||
const vector unsigned int compare_vec = { compare, compare, compare, compare };
|
||||
const vector unsigned int compare_mask = vec_cmpeq(vec_and(m_value, compare_vec), compare_vec);
|
||||
const vector unsigned int sign_vec = { sign, sign, sign, sign };
|
||||
m_value = vec_or(m_value, vec_and(sign_vec, compare_mask));
|
||||
}
|
||||
|
||||
inline void min(const UINT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { value, value, value, value };
|
||||
m_value = vec_min(m_value, temp);
|
||||
}
|
||||
|
||||
void blend(const rgbaint_t& other, UINT8 factor);
|
||||
|
||||
void scale_and_clamp(const rgbaint_t& scale);
|
||||
void scale_imm_and_clamp(const INT32 scale);
|
||||
void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2);
|
||||
void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other);
|
||||
void scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other);
|
||||
|
||||
inline void cmpeq(const rgbaint_t& value)
|
||||
{
|
||||
m_value = vec_cmpeq(m_value, value.m_value);
|
||||
}
|
||||
|
||||
inline void cmpeq_imm(const UINT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { value, value, value, value };
|
||||
m_value = vec_cmpeq(m_value, temp);
|
||||
}
|
||||
|
||||
inline void cmpeq_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
|
||||
{
|
||||
const vector unsigned int temp = { a, r, g, b };
|
||||
m_value = vec_cmpeq(m_value, temp);
|
||||
}
|
||||
|
||||
inline void cmpgt(const rgbaint_t& value)
|
||||
{
|
||||
m_value = vec_cmpgt(m_value, value.m_value);
|
||||
}
|
||||
|
||||
inline void cmpgt_imm(const UINT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { value, value, value, value };
|
||||
m_value = vec_cmpgt(m_value, temp);
|
||||
}
|
||||
|
||||
inline void cmpgt_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
|
||||
{
|
||||
const vector unsigned int temp = { a, r, g, b };
|
||||
m_value = vec_cmpgt(m_value, temp);
|
||||
}
|
||||
|
||||
inline void cmplt(const rgbaint_t& value)
|
||||
{
|
||||
m_value = vec_cmplt(m_value, value.m_value);
|
||||
}
|
||||
|
||||
inline void cmplt_imm(const UINT32 value)
|
||||
{
|
||||
const vector unsigned int temp = { value, value, value, value };
|
||||
m_value = vec_cmplt(m_value, temp);
|
||||
}
|
||||
|
||||
inline void cmplt_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
|
||||
{
|
||||
const vector unsigned int temp = { a, r, g, b };
|
||||
m_value = vec_cmplt(m_value, temp);
|
||||
}
|
||||
|
||||
inline rgbaint_t operator=(const rgbaint_t& other)
|
||||
{
|
||||
m_value = other.m_value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline rgbaint_t& operator+=(const rgbaint_t& other)
|
||||
{
|
||||
m_value = vec_add(m_value, other.m_value);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline rgbaint_t& operator+=(const INT32 other)
|
||||
{
|
||||
const vector unsigned int temp = { other, other, other, other };
|
||||
m_value = vec_add(m_value, temp);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline rgbaint_t& operator-=(const rgbaint_t& other)
|
||||
{
|
||||
m_value = vec_sub(m_value, other.m_value);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline rgbaint_t& operator*=(const rgbaint_t& other)
|
||||
{
|
||||
const vector unsigned int shift = vec_splat_u32(-16);
|
||||
const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(other.m_value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)other.m_value));
|
||||
m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)other.m_value));
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline rgbaint_t& operator*=(const INT32 other)
|
||||
{
|
||||
const vector unsigned int value = { other, other, other, other };
|
||||
const vector unsigned int shift = vec_splat_u32(-16);
|
||||
const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)value));
|
||||
m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)value));
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline rgbaint_t& operator>>=(const INT32 shift)
|
||||
{
|
||||
const vector unsigned int temp = { shift, shift, shift, shift };
|
||||
m_value = vec_sra(m_value, temp);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline void merge_alpha(rgbaint_t& alpha)
|
||||
{
|
||||
m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 7), 7);
|
||||
m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 6), 6);
|
||||
}
|
||||
|
||||
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v);
|
||||
|
||||
protected:
|
||||
typedef vector unsigned char VECU8;
|
||||
typedef vector unsigned short VECU16;
|
||||
typedef vector unsigned int VECU32;
|
||||
|
||||
vector VECU32 m_value;
|
||||
|
||||
static const VECU16 maxbyte;
|
||||
static const VECU32 alpha_mask;
|
||||
static const VECU32 red_mask;
|
||||
static const VECU32 green_mask;
|
||||
static const VECU32 blue_mask;
|
||||
static const VECU16 scale_table[256];
|
||||
};
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgba_to_rgbaint - converts a packed quad of RGB
|
||||
components to an rgbint type
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgba_to_rgbaint(rgbaint *rgb, rgb_t const &color)
|
||||
{
|
||||
vector signed char temp = (vector signed char)vec_perm((vector signed int)vec_lde(0, color.ptr()), vec_splat_s32(0), vec_lvsl(0, color.ptr()));
|
||||
*rgb = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), temp);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_to_rgb - converts an rgbint back to
|
||||
a packed trio of RGB values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE rgb_t rgbint_to_rgb(const rgbint *color)
|
||||
{
|
||||
vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0);
|
||||
UINT32 result;
|
||||
vec_ste(temp, 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_to_rgba - converts an rgbint back to
|
||||
a packed quad of RGB values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE rgb_t rgbaint_to_rgba(const rgbaint *color)
|
||||
{
|
||||
vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0);
|
||||
UINT32 result;
|
||||
vec_ste(temp, 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_to_rgb_clamp - converts an rgbint back
|
||||
to a packed trio of RGB values, clamping them
|
||||
to bytes first
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE rgb_t rgbint_to_rgb_clamp(const rgbint *color)
|
||||
{
|
||||
vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0);
|
||||
UINT32 result;
|
||||
vec_ste(temp, 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_to_rgba_clamp - converts an rgbint back
|
||||
to a packed quad of RGB values, clamping them
|
||||
to bytes first
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE rgb_t rgbaint_to_rgba_clamp(const rgbaint *color)
|
||||
{
|
||||
vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0);
|
||||
UINT32 result;
|
||||
vec_ste(temp, 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
CORE MATH
|
||||
***************************************************************************/
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_add - add two rgbint values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbint_add(rgbint *color1, const rgbint *color2)
|
||||
{
|
||||
*color1 = vec_add(*color1, *color2);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_add - add two rgbaint values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbaint_add(rgbaint *color1, const rgbaint *color2)
|
||||
{
|
||||
*color1 = vec_add(*color1, *color2);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_sub - subtract two rgbint values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbint_sub(rgbint *color1, const rgbint *color2)
|
||||
{
|
||||
*color1 = vec_sub(*color1, *color2);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_sub - subtract two rgbaint values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbaint_sub(rgbaint *color1, const rgbaint *color2)
|
||||
{
|
||||
*color1 = vec_sub(*color1, *color2);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_subr - reverse subtract two rgbint
|
||||
values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbint_subr(rgbint *color1, const rgbint *color2)
|
||||
{
|
||||
*color1 = vec_sub(*color2, *color1);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_subr - reverse subtract two rgbaint
|
||||
values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbaint_subr(rgbaint *color1, const rgbaint *color2)
|
||||
{
|
||||
*color1 = vec_sub(*color2, *color1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
TABLES
|
||||
***************************************************************************/
|
||||
|
||||
extern const struct _rgbvmx_statics
|
||||
{
|
||||
rgbaint maxbyte;
|
||||
rgbaint scale_table[256];
|
||||
} rgbvmx_statics;
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
HIGHER LEVEL OPERATIONS
|
||||
***************************************************************************/
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_blend - blend two colors by the given
|
||||
scale factor
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbint_blend(rgbint *color1, const rgbint *color2, UINT8 color1scale)
|
||||
{
|
||||
vector signed int temp;
|
||||
*color1 = vec_mergeh(*color1, *color2);
|
||||
temp = vec_msum(*color1, rgbvmx_statics.scale_table[color1scale], vec_splat_s32(0));
|
||||
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
|
||||
*color1 = vec_packs(temp, temp);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_blend - blend two colors by the given
|
||||
scale factor
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbaint_blend(rgbaint *color1, const rgbaint *color2, UINT8 color1scale)
|
||||
{
|
||||
vector signed int temp;
|
||||
*color1 = vec_mergeh(*color1, *color2);
|
||||
temp = vec_msum(*color1, rgbvmx_statics.scale_table[color1scale], vec_splat_s32(0));
|
||||
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
|
||||
*color1 = vec_packs(temp, temp);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_scale_and_clamp - scale the given
|
||||
color by an 8.8 scale factor, immediate or
|
||||
per channel, and clamp to byte values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbint_scale_immediate_and_clamp(rgbint *color, INT16 colorscale)
|
||||
{
|
||||
rgbint splatmap = vec_splat((rgbint)vec_lvsl(0, &colorscale), 0);
|
||||
rgbint vecscale = vec_lde(0, &colorscale);
|
||||
vector signed int temp;
|
||||
vecscale = (rgbint)vec_perm(vecscale, vecscale, (vector unsigned char)splatmap);
|
||||
*color = (rgbint)vec_mergeh(*color, (rgbint)vec_splat_s32(0));
|
||||
temp = vec_msum(*color, vecscale, vec_splat_s32(0));
|
||||
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
|
||||
*color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte);
|
||||
}
|
||||
|
||||
INLINE void rgbint_scale_channel_and_clamp(rgbint *color, const rgbint *colorscale)
|
||||
{
|
||||
rgbint vecscale = (rgbint)vec_mergeh(*colorscale, (rgbint)vec_splat_s32(0));
|
||||
vector signed int temp;
|
||||
*color = (rgbint)vec_mergeh(*color, (rgbint)vec_splat_s32(0));
|
||||
temp = vec_msum(*color, vecscale, vec_splat_s32(0));
|
||||
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
|
||||
*color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_scale_and_clamp - scale the given
|
||||
color by an 8.8 scale factor, immediate or
|
||||
per channel, and clamp to byte values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbaint_scale_immediate_and_clamp(rgbaint *color, INT16 colorscale)
|
||||
{
|
||||
rgbaint splatmap = vec_splat((rgbaint)vec_lvsl(0, &colorscale), 0);
|
||||
rgbaint vecscale = vec_lde(0, &colorscale);
|
||||
vector signed int temp;
|
||||
vecscale = (rgbaint)vec_perm(vecscale, vecscale, (vector unsigned char)splatmap);
|
||||
*color = (rgbaint)vec_mergeh(*color, (rgbaint)vec_splat_s32(0));
|
||||
temp = vec_msum(*color, vecscale, vec_splat_s32(0));
|
||||
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
|
||||
*color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte);
|
||||
}
|
||||
|
||||
INLINE void rgbaint_scale_channel_and_clamp(rgbaint *color, const rgbint *colorscale)
|
||||
{
|
||||
rgbaint vecscale = (rgbaint)vec_mergeh(*color, (rgbaint)vec_splat_s32(0));
|
||||
vector signed int temp;
|
||||
*color = (rgbaint)vec_mergeh(*color, (rgbaint)vec_splat_s32(0));
|
||||
temp = vec_msum(*color, vecscale, vec_splat_s32(0));
|
||||
temp = (vector signed int)vec_sr(temp, vec_splat_u32(8));
|
||||
*color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgb_bilinear_filter - bilinear filter between
|
||||
four pixel values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE rgb_t rgb_bilinear_filter(rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
rgbint color00 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr()));
|
||||
rgbint color01 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr()));
|
||||
rgbint color10 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr()));
|
||||
rgbint color11 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr()));
|
||||
|
||||
/* interleave color01 and color00 at the byte level */
|
||||
color01 = (rgbint)vec_mergeh((vector signed char)color01, (vector signed char)color00);
|
||||
color11 = (rgbint)vec_mergeh((vector signed char)color11, (vector signed char)color10);
|
||||
color01 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01);
|
||||
color11 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11);
|
||||
color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
|
||||
color11 = (rgbint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
|
||||
color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(1));
|
||||
color11 = (rgbint)vec_sl((vector signed int)color11, vec_splat_u32(15));
|
||||
color01 = vec_max(color01, color11);
|
||||
color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0));
|
||||
color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(15));
|
||||
color01 = vec_packs((vector signed int)color01, (vector signed int)color01);
|
||||
color01 = (rgbint)vec_packsu(color01, color01);
|
||||
|
||||
UINT32 result;
|
||||
vec_ste((vector unsigned int)color01, 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgba_bilinear_filter - bilinear filter between
|
||||
four pixel values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE rgb_t rgba_bilinear_filter(rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
rgbaint color00 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr()));
|
||||
rgbaint color01 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr()));
|
||||
rgbaint color10 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr()));
|
||||
rgbaint color11 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr()));
|
||||
|
||||
/* interleave color01 and color00 at the byte level */
|
||||
color01 = (rgbaint)vec_mergeh((vector signed char)color01, (vector signed char)color00);
|
||||
color11 = (rgbaint)vec_mergeh((vector signed char)color11, (vector signed char)color10);
|
||||
color01 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01);
|
||||
color11 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11);
|
||||
color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
|
||||
color11 = (rgbaint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
|
||||
color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(1));
|
||||
color11 = (rgbaint)vec_sl((vector signed int)color11, vec_splat_u32(15));
|
||||
color01 = vec_max(color01, color11);
|
||||
color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0));
|
||||
color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(15));
|
||||
color01 = vec_packs((vector signed int)color01, (vector signed int)color01);
|
||||
color01 = (rgbaint)vec_packsu(color01, color01);
|
||||
|
||||
UINT32 result;
|
||||
vec_ste((vector unsigned int)color01, 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_bilinear_filter - bilinear filter between
|
||||
four pixel values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbint_bilinear_filter(rgbint *color, rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
rgbint color00 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr()));
|
||||
rgbint color01 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr()));
|
||||
rgbint color10 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr()));
|
||||
rgbint color11 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr()));
|
||||
|
||||
/* interleave color01 and color00 at the byte level */
|
||||
color01 = (rgbint)vec_mergeh((vector signed char)color01, (vector signed char)color00);
|
||||
color11 = (rgbint)vec_mergeh((vector signed char)color11, (vector signed char)color10);
|
||||
color01 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01);
|
||||
color11 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11);
|
||||
color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
|
||||
color11 = (rgbint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
|
||||
color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(1));
|
||||
color11 = (rgbint)vec_sl((vector signed int)color11, vec_splat_u32(15));
|
||||
color01 = vec_max(color01, color11);
|
||||
color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0));
|
||||
color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(15));
|
||||
*color = vec_packs((vector signed int)color01, (vector signed int)color01);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_bilinear_filter - bilinear filter between
|
||||
four pixel values
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbaint_bilinear_filter(rgbaint *color, rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
rgbaint color00 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr()));
|
||||
rgbaint color01 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr()));
|
||||
rgbaint color10 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr()));
|
||||
rgbaint color11 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr()));
|
||||
|
||||
/* interleave color01 and color00 at the byte level */
|
||||
color01 = (rgbaint)vec_mergeh((vector signed char)color01, (vector signed char)color00);
|
||||
color11 = (rgbaint)vec_mergeh((vector signed char)color11, (vector signed char)color10);
|
||||
color01 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01);
|
||||
color11 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11);
|
||||
color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
|
||||
color11 = (rgbaint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0));
|
||||
color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(1));
|
||||
color11 = (rgbaint)vec_sl((vector signed int)color11, vec_splat_u32(15));
|
||||
color01 = vec_max(color01, color11);
|
||||
color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0));
|
||||
color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(15));
|
||||
*color = vec_packs((vector signed int)color01, (vector signed int)color01);
|
||||
}
|
||||
|
||||
// altivec.h somehow redefines "bool" in a bad way on PowerPC Mac OS X. really.
|
||||
#ifdef OSX_PPC
|
||||
|
|
Loading…
Reference in a new issue