root/ext/pcre/pcrelib/sljit/sljitNativeX86_common.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sljit_get_platform_name
  2. get_cpu_features
  3. get_jump_code
  4. generate_near_jump_code
  5. sljit_generate_code
  6. emit_save_flags
  7. emit_restore_flags
  8. sljit_grow_stack
  9. emit_mov
  10. sljit_emit_op0
  11. emit_mov_byte
  12. emit_mov_half
  13. emit_unary
  14. emit_not_with_flags
  15. emit_clz
  16. sljit_emit_op1
  17. emit_cum_binary
  18. emit_non_cum_binary
  19. emit_mul
  20. emit_lea_binary
  21. emit_cmp_binary
  22. emit_test_binary
  23. emit_shift
  24. emit_shift_with_flags
  25. sljit_emit_op2
  26. sljit_get_register_index
  27. sljit_get_float_register_index
  28. sljit_emit_op_custom
  29. init_compiler
  30. sljit_is_fpu_available
  31. emit_sse2
  32. emit_sse2_logic
  33. emit_sse2_load
  34. emit_sse2_store
  35. sljit_emit_fop1_convw_fromd
  36. sljit_emit_fop1_convd_fromw
  37. sljit_emit_fop1_cmp
  38. sljit_emit_fop1
  39. sljit_emit_fop2
  40. sljit_emit_label
  41. sljit_emit_jump
  42. sljit_emit_ijump
  43. sljit_emit_op_flags
  44. sljit_get_local_base
  45. sljit_emit_const
  46. sljit_set_jump_addr
  47. sljit_set_const
  48. sljit_x86_is_sse2_available
  49. sljit_x86_is_cmov_available
  50. sljit_x86_emit_cmov

   1 /*
   2  *    Stack-less Just-In-Time compiler
   3  *
   4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without modification, are
   7  * permitted provided that the following conditions are met:
   8  *
   9  *   1. Redistributions of source code must retain the above copyright notice, this list of
  10  *      conditions and the following disclaimer.
  11  *
  12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
  13  *      of conditions and the following disclaimer in the documentation and/or other materials
  14  *      provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
  17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25  */
  26 
  27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
  28 {
  29         return "x86" SLJIT_CPUINFO;
  30 }
  31 
  32 /*
  33    32b register indexes:
  34      0 - EAX
  35      1 - ECX
  36      2 - EDX
  37      3 - EBX
  38      4 - none
  39      5 - EBP
  40      6 - ESI
  41      7 - EDI
  42 */
  43 
  44 /*
  45    64b register indexes:
  46      0 - RAX
  47      1 - RCX
  48      2 - RDX
  49      3 - RBX
  50      4 - none
  51      5 - RBP
  52      6 - RSI
  53      7 - RDI
  54      8 - R8   - From now on REX prefix is required
  55      9 - R9
  56     10 - R10
  57     11 - R11
  58     12 - R12
  59     13 - R13
  60     14 - R14
  61     15 - R15
  62 */
  63 
  64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  65 
  66 /* Last register + 1. */
  67 #define TMP_REG1        (SLJIT_NUMBER_OF_REGISTERS + 2)
  68 
  69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
  70         0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
  71 };
  72 
  73 #define CHECK_EXTRA_REGS(p, w, do) \
  74         if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
  75                 w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
  76                 p = SLJIT_MEM1(SLJIT_SP); \
  77                 do; \
  78         }
  79 
  80 #else /* SLJIT_CONFIG_X86_32 */
  81 
  82 /* Last register + 1. */
  83 #define TMP_REG1        (SLJIT_NUMBER_OF_REGISTERS + 2)
  84 #define TMP_REG2        (SLJIT_NUMBER_OF_REGISTERS + 3)
  85 #define TMP_REG3        (SLJIT_NUMBER_OF_REGISTERS + 4)
  86 
  87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
  88    Note: avoid to use r12 and r13 for memory addessing
  89    therefore r12 is better for SAVED_EREG than SAVED_REG. */
  90 #ifndef _WIN64
  91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
  92 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
  93         0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
  94 };
  95 /* low-map. reg_map & 0x7. */
  96 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
  97         0, 0, 6, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 7, 1
  98 };
  99 #else
 100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
 101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
 102         0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
 103 };
 104 /* low-map. reg_map & 0x7. */
 105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
 106         0, 0, 2, 1, 3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 2,  0, 1
 107 };
 108 #endif
 109 
 110 #define REX_W           0x48
 111 #define REX_R           0x44
 112 #define REX_X           0x42
 113 #define REX_B           0x41
 114 #define REX             0x40
 115 
 116 #ifndef _WIN64
 117 #define HALFWORD_MAX 0x7fffffffl
 118 #define HALFWORD_MIN -0x80000000l
 119 #else
 120 #define HALFWORD_MAX 0x7fffffffll
 121 #define HALFWORD_MIN -0x80000000ll
 122 #endif
 123 
 124 #define IS_HALFWORD(x)          ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
 125 #define NOT_HALFWORD(x)         ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
 126 
 127 #define CHECK_EXTRA_REGS(p, w, do)
 128 
 129 #endif /* SLJIT_CONFIG_X86_32 */
 130 
 131 #define TMP_FREG        (0)
 132 
 133 /* Size flags for emit_x86_instruction: */
 134 #define EX86_BIN_INS            0x0010
 135 #define EX86_SHIFT_INS          0x0020
 136 #define EX86_REX                0x0040
 137 #define EX86_NO_REXW            0x0080
 138 #define EX86_BYTE_ARG           0x0100
 139 #define EX86_HALF_ARG           0x0200
 140 #define EX86_PREF_66            0x0400
 141 #define EX86_PREF_F2            0x0800
 142 #define EX86_PREF_F3            0x1000
 143 #define EX86_SSE2_OP1           0x2000
 144 #define EX86_SSE2_OP2           0x4000
 145 #define EX86_SSE2               (EX86_SSE2_OP1 | EX86_SSE2_OP2)
 146 
 147 /* --------------------------------------------------------------------- */
 148 /*  Instrucion forms                                                     */
 149 /* --------------------------------------------------------------------- */
 150 
 151 #define ADD             (/* BINARY */ 0 << 3)
 152 #define ADD_EAX_i32     0x05
 153 #define ADD_r_rm        0x03
 154 #define ADD_rm_r        0x01
 155 #define ADDSD_x_xm      0x58
 156 #define ADC             (/* BINARY */ 2 << 3)
 157 #define ADC_EAX_i32     0x15
 158 #define ADC_r_rm        0x13
 159 #define ADC_rm_r        0x11
 160 #define AND             (/* BINARY */ 4 << 3)
 161 #define AND_EAX_i32     0x25
 162 #define AND_r_rm        0x23
 163 #define AND_rm_r        0x21
 164 #define ANDPD_x_xm      0x54
 165 #define BSR_r_rm        (/* GROUP_0F */ 0xbd)
 166 #define CALL_i32        0xe8
 167 #define CALL_rm         (/* GROUP_FF */ 2 << 3)
 168 #define CDQ             0x99
 169 #define CMOVNE_r_rm     (/* GROUP_0F */ 0x45)
 170 #define CMP             (/* BINARY */ 7 << 3)
 171 #define CMP_EAX_i32     0x3d
 172 #define CMP_r_rm        0x3b
 173 #define CMP_rm_r        0x39
 174 #define CVTPD2PS_x_xm   0x5a
 175 #define CVTSI2SD_x_rm   0x2a
 176 #define CVTTSD2SI_r_xm  0x2c
 177 #define DIV             (/* GROUP_F7 */ 6 << 3)
 178 #define DIVSD_x_xm      0x5e
 179 #define INT3            0xcc
 180 #define IDIV            (/* GROUP_F7 */ 7 << 3)
 181 #define IMUL            (/* GROUP_F7 */ 5 << 3)
 182 #define IMUL_r_rm       (/* GROUP_0F */ 0xaf)
 183 #define IMUL_r_rm_i8    0x6b
 184 #define IMUL_r_rm_i32   0x69
 185 #define JE_i8           0x74
 186 #define JNE_i8          0x75
 187 #define JMP_i8          0xeb
 188 #define JMP_i32         0xe9
 189 #define JMP_rm          (/* GROUP_FF */ 4 << 3)
 190 #define LEA_r_m         0x8d
 191 #define MOV_r_rm        0x8b
 192 #define MOV_r_i32       0xb8
 193 #define MOV_rm_r        0x89
 194 #define MOV_rm_i32      0xc7
 195 #define MOV_rm8_i8      0xc6
 196 #define MOV_rm8_r8      0x88
 197 #define MOVSD_x_xm      0x10
 198 #define MOVSD_xm_x      0x11
 199 #define MOVSXD_r_rm     0x63
 200 #define MOVSX_r_rm8     (/* GROUP_0F */ 0xbe)
 201 #define MOVSX_r_rm16    (/* GROUP_0F */ 0xbf)
 202 #define MOVZX_r_rm8     (/* GROUP_0F */ 0xb6)
 203 #define MOVZX_r_rm16    (/* GROUP_0F */ 0xb7)
 204 #define MUL             (/* GROUP_F7 */ 4 << 3)
 205 #define MULSD_x_xm      0x59
 206 #define NEG_rm          (/* GROUP_F7 */ 3 << 3)
 207 #define NOP             0x90
 208 #define NOT_rm          (/* GROUP_F7 */ 2 << 3)
 209 #define OR              (/* BINARY */ 1 << 3)
 210 #define OR_r_rm         0x0b
 211 #define OR_EAX_i32      0x0d
 212 #define OR_rm_r         0x09
 213 #define OR_rm8_r8       0x08
 214 #define POP_r           0x58
 215 #define POP_rm          0x8f
 216 #define POPF            0x9d
 217 #define PUSH_i32        0x68
 218 #define PUSH_r          0x50
 219 #define PUSH_rm         (/* GROUP_FF */ 6 << 3)
 220 #define PUSHF           0x9c
 221 #define RET_near        0xc3
 222 #define RET_i16         0xc2
 223 #define SBB             (/* BINARY */ 3 << 3)
 224 #define SBB_EAX_i32     0x1d
 225 #define SBB_r_rm        0x1b
 226 #define SBB_rm_r        0x19
 227 #define SAR             (/* SHIFT */ 7 << 3)
 228 #define SHL             (/* SHIFT */ 4 << 3)
 229 #define SHR             (/* SHIFT */ 5 << 3)
 230 #define SUB             (/* BINARY */ 5 << 3)
 231 #define SUB_EAX_i32     0x2d
 232 #define SUB_r_rm        0x2b
 233 #define SUB_rm_r        0x29
 234 #define SUBSD_x_xm      0x5c
 235 #define TEST_EAX_i32    0xa9
 236 #define TEST_rm_r       0x85
 237 #define UCOMISD_x_xm    0x2e
 238 #define UNPCKLPD_x_xm   0x14
 239 #define XCHG_EAX_r      0x90
 240 #define XCHG_r_rm       0x87
 241 #define XOR             (/* BINARY */ 6 << 3)
 242 #define XOR_EAX_i32     0x35
 243 #define XOR_r_rm        0x33
 244 #define XOR_rm_r        0x31
 245 #define XORPD_x_xm      0x57
 246 
 247 #define GROUP_0F        0x0f
 248 #define GROUP_F7        0xf7
 249 #define GROUP_FF        0xff
 250 #define GROUP_BINARY_81 0x81
 251 #define GROUP_BINARY_83 0x83
 252 #define GROUP_SHIFT_1   0xd1
 253 #define GROUP_SHIFT_N   0xc1
 254 #define GROUP_SHIFT_CL  0xd3
 255 
 256 #define MOD_REG         0xc0
 257 #define MOD_DISP8       0x40
 258 
 259 #define INC_SIZE(s)                     (*inst++ = (s), compiler->size += (s))
 260 
 261 #define PUSH_REG(r)                     (*inst++ = (PUSH_r + (r)))
 262 #define POP_REG(r)                      (*inst++ = (POP_r + (r)))
 263 #define RET()                           (*inst++ = (RET_near))
 264 #define RET_I16(n)                      (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
 265 /* r32, r/m32 */
 266 #define MOV_RM(mod, reg, rm)            (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
 267 
 268 /* Multithreading does not affect these static variables, since they store
 269    built-in CPU features. Therefore they can be overwritten by different threads
 270    if they detect the CPU features in the same time. */
 271 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
 272 static sljit_si cpu_has_sse2 = -1;
 273 #endif
 274 static sljit_si cpu_has_cmov = -1;
 275 
 276 #ifdef _WIN32_WCE
 277 #include <cmnintrin.h>
 278 #elif defined(_MSC_VER) && _MSC_VER >= 1400
 279 #include <intrin.h>
 280 #endif
 281 
 282 static void get_cpu_features(void)
 283 {
 284         sljit_ui features;
 285 
 286 #if defined(_MSC_VER) && _MSC_VER >= 1400
 287 
 288         int CPUInfo[4];
 289         __cpuid(CPUInfo, 1);
 290         features = (sljit_ui)CPUInfo[3];
 291 
 292 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
 293 
 294         /* AT&T syntax. */
 295         __asm__ (
 296                 "movl $0x1, %%eax\n"
 297 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 298                 /* On x86-32, there is no red zone, so this
 299                    should work (no need for a local variable). */
 300                 "push %%ebx\n"
 301 #endif
 302                 "cpuid\n"
 303 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 304                 "pop %%ebx\n"
 305 #endif
 306                 "movl %%edx, %0\n"
 307                 : "=g" (features)
 308                 :
 309 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 310                 : "%eax", "%ecx", "%edx"
 311 #else
 312                 : "%rax", "%rbx", "%rcx", "%rdx"
 313 #endif
 314         );
 315 
 316 #else /* _MSC_VER && _MSC_VER >= 1400 */
 317 
 318         /* Intel syntax. */
 319         __asm {
 320                 mov eax, 1
 321                 cpuid
 322                 mov features, edx
 323         }
 324 
 325 #endif /* _MSC_VER && _MSC_VER >= 1400 */
 326 
 327 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
 328         cpu_has_sse2 = (features >> 26) & 0x1;
 329 #endif
 330         cpu_has_cmov = (features >> 15) & 0x1;
 331 }
 332 
 333 static sljit_ub get_jump_code(sljit_si type)
 334 {
 335         switch (type) {
 336         case SLJIT_EQUAL:
 337         case SLJIT_D_EQUAL:
 338                 return 0x84 /* je */;
 339 
 340         case SLJIT_NOT_EQUAL:
 341         case SLJIT_D_NOT_EQUAL:
 342                 return 0x85 /* jne */;
 343 
 344         case SLJIT_LESS:
 345         case SLJIT_D_LESS:
 346                 return 0x82 /* jc */;
 347 
 348         case SLJIT_GREATER_EQUAL:
 349         case SLJIT_D_GREATER_EQUAL:
 350                 return 0x83 /* jae */;
 351 
 352         case SLJIT_GREATER:
 353         case SLJIT_D_GREATER:
 354                 return 0x87 /* jnbe */;
 355 
 356         case SLJIT_LESS_EQUAL:
 357         case SLJIT_D_LESS_EQUAL:
 358                 return 0x86 /* jbe */;
 359 
 360         case SLJIT_SIG_LESS:
 361                 return 0x8c /* jl */;
 362 
 363         case SLJIT_SIG_GREATER_EQUAL:
 364                 return 0x8d /* jnl */;
 365 
 366         case SLJIT_SIG_GREATER:
 367                 return 0x8f /* jnle */;
 368 
 369         case SLJIT_SIG_LESS_EQUAL:
 370                 return 0x8e /* jle */;
 371 
 372         case SLJIT_OVERFLOW:
 373         case SLJIT_MUL_OVERFLOW:
 374                 return 0x80 /* jo */;
 375 
 376         case SLJIT_NOT_OVERFLOW:
 377         case SLJIT_MUL_NOT_OVERFLOW:
 378                 return 0x81 /* jno */;
 379 
 380         case SLJIT_D_UNORDERED:
 381                 return 0x8a /* jp */;
 382 
 383         case SLJIT_D_ORDERED:
 384                 return 0x8b /* jpo */;
 385         }
 386         return 0;
 387 }
 388 
 389 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
 390 
 391 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 392 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
 393 #endif
 394 
 395 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
 396 {
 397         sljit_si short_jump;
 398         sljit_uw label_addr;
 399 
 400         if (jump->flags & JUMP_LABEL)
 401                 label_addr = (sljit_uw)(code + jump->u.label->size);
 402         else
 403                 label_addr = jump->u.target;
 404         short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
 405 
 406 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 407         if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
 408                 return generate_far_jump_code(jump, code_ptr, type);
 409 #endif
 410 
 411         if (type == SLJIT_JUMP) {
 412                 if (short_jump)
 413                         *code_ptr++ = JMP_i8;
 414                 else
 415                         *code_ptr++ = JMP_i32;
 416                 jump->addr++;
 417         }
 418         else if (type >= SLJIT_FAST_CALL) {
 419                 short_jump = 0;
 420                 *code_ptr++ = CALL_i32;
 421                 jump->addr++;
 422         }
 423         else if (short_jump) {
 424                 *code_ptr++ = get_jump_code(type) - 0x10;
 425                 jump->addr++;
 426         }
 427         else {
 428                 *code_ptr++ = GROUP_0F;
 429                 *code_ptr++ = get_jump_code(type);
 430                 jump->addr += 2;
 431         }
 432 
 433         if (short_jump) {
 434                 jump->flags |= PATCH_MB;
 435                 code_ptr += sizeof(sljit_sb);
 436         } else {
 437                 jump->flags |= PATCH_MW;
 438 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 439                 code_ptr += sizeof(sljit_sw);
 440 #else
 441                 code_ptr += sizeof(sljit_si);
 442 #endif
 443         }
 444 
 445         return code_ptr;
 446 }
 447 
 448 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
 449 {
 450         struct sljit_memory_fragment *buf;
 451         sljit_ub *code;
 452         sljit_ub *code_ptr;
 453         sljit_ub *buf_ptr;
 454         sljit_ub *buf_end;
 455         sljit_ub len;
 456 
 457         struct sljit_label *label;
 458         struct sljit_jump *jump;
 459         struct sljit_const *const_;
 460 
 461         CHECK_ERROR_PTR();
 462         CHECK_PTR(check_sljit_generate_code(compiler));
 463         reverse_buf(compiler);
 464 
 465         /* Second code generation pass. */
 466         code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
 467         PTR_FAIL_WITH_EXEC_IF(code);
 468         buf = compiler->buf;
 469 
 470         code_ptr = code;
 471         label = compiler->labels;
 472         jump = compiler->jumps;
 473         const_ = compiler->consts;
 474         do {
 475                 buf_ptr = buf->memory;
 476                 buf_end = buf_ptr + buf->used_size;
 477                 do {
 478                         len = *buf_ptr++;
 479                         if (len > 0) {
 480                                 /* The code is already generated. */
 481                                 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
 482                                 code_ptr += len;
 483                                 buf_ptr += len;
 484                         }
 485                         else {
 486                                 if (*buf_ptr >= 4) {
 487                                         jump->addr = (sljit_uw)code_ptr;
 488                                         if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
 489                                                 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
 490                                         else
 491                                                 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
 492                                         jump = jump->next;
 493                                 }
 494                                 else if (*buf_ptr == 0) {
 495                                         label->addr = (sljit_uw)code_ptr;
 496                                         label->size = code_ptr - code;
 497                                         label = label->next;
 498                                 }
 499                                 else if (*buf_ptr == 1) {
 500                                         const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
 501                                         const_ = const_->next;
 502                                 }
 503                                 else {
 504 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 505                                         *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
 506                                         buf_ptr++;
 507                                         *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
 508                                         code_ptr += sizeof(sljit_sw);
 509                                         buf_ptr += sizeof(sljit_sw) - 1;
 510 #else
 511                                         code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
 512                                         buf_ptr += sizeof(sljit_sw);
 513 #endif
 514                                 }
 515                                 buf_ptr++;
 516                         }
 517                 } while (buf_ptr < buf_end);
 518                 SLJIT_ASSERT(buf_ptr == buf_end);
 519                 buf = buf->next;
 520         } while (buf);
 521 
 522         SLJIT_ASSERT(!label);
 523         SLJIT_ASSERT(!jump);
 524         SLJIT_ASSERT(!const_);
 525 
 526         jump = compiler->jumps;
 527         while (jump) {
 528                 if (jump->flags & PATCH_MB) {
 529                         SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
 530                         *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
 531                 } else if (jump->flags & PATCH_MW) {
 532                         if (jump->flags & JUMP_LABEL) {
 533 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 534                                 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
 535 #else
 536                                 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
 537                                 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
 538 #endif
 539                         }
 540                         else {
 541 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 542                                 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
 543 #else
 544                                 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
 545                                 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
 546 #endif
 547                         }
 548                 }
 549 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 550                 else if (jump->flags & PATCH_MD)
 551                         *(sljit_sw*)jump->addr = jump->u.label->addr;
 552 #endif
 553 
 554                 jump = jump->next;
 555         }
 556 
 557         /* Maybe we waste some space because of short jumps. */
 558         SLJIT_ASSERT(code_ptr <= code + compiler->size);
 559         compiler->error = SLJIT_ERR_COMPILED;
 560         compiler->executable_size = code_ptr - code;
 561         return (void*)code;
 562 }
 563 
 564 /* --------------------------------------------------------------------- */
 565 /*  Operators                                                            */
 566 /* --------------------------------------------------------------------- */
 567 
 568 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
 569         sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
 570         sljit_si dst, sljit_sw dstw,
 571         sljit_si src1, sljit_sw src1w,
 572         sljit_si src2, sljit_sw src2w);
 573 
 574 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
 575         sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
 576         sljit_si dst, sljit_sw dstw,
 577         sljit_si src1, sljit_sw src1w,
 578         sljit_si src2, sljit_sw src2w);
 579 
 580 static sljit_si emit_mov(struct sljit_compiler *compiler,
 581         sljit_si dst, sljit_sw dstw,
 582         sljit_si src, sljit_sw srcw);
 583 
 584 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
 585 {
 586         sljit_ub *inst;
 587 
 588 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 589         inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
 590         FAIL_IF(!inst);
 591         INC_SIZE(5);
 592 #else
 593         inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
 594         FAIL_IF(!inst);
 595         INC_SIZE(6);
 596         *inst++ = REX_W;
 597 #endif
 598         *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
 599         *inst++ = 0x64;
 600         *inst++ = 0x24;
 601         *inst++ = (sljit_ub)sizeof(sljit_sw);
 602         *inst++ = PUSHF;
 603         compiler->flags_saved = 1;
 604         return SLJIT_SUCCESS;
 605 }
 606 
 607 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
 608 {
 609         sljit_ub *inst;
 610 
 611 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 612         inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
 613         FAIL_IF(!inst);
 614         INC_SIZE(5);
 615         *inst++ = POPF;
 616 #else
 617         inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
 618         FAIL_IF(!inst);
 619         INC_SIZE(6);
 620         *inst++ = POPF;
 621         *inst++ = REX_W;
 622 #endif
 623         *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
 624         *inst++ = 0x64;
 625         *inst++ = 0x24;
 626         *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
 627         compiler->flags_saved = keep_flags;
 628         return SLJIT_SUCCESS;
 629 }
 630 
 631 #ifdef _WIN32
 632 #include <malloc.h>
 633 
 634 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
 635 {
 636         /* Workaround for calling the internal _chkstk() function on Windows.
 637         This function touches all 4k pages belongs to the requested stack space,
 638         which size is passed in local_size. This is necessary on Windows where
 639         the stack can only grow in 4k steps. However, this function just burn
 640         CPU cycles if the stack is large enough. However, you don't know it in
 641         advance, so it must always be called. I think this is a bad design in
 642         general even if it has some reasons. */
 643         *(volatile sljit_si*)alloca(local_size) = 0;
 644 }
 645 
 646 #endif
 647 
 648 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 649 #include "sljitNativeX86_32.c"
 650 #else
 651 #include "sljitNativeX86_64.c"
 652 #endif
 653 
 654 static sljit_si emit_mov(struct sljit_compiler *compiler,
 655         sljit_si dst, sljit_sw dstw,
 656         sljit_si src, sljit_sw srcw)
 657 {
 658         sljit_ub* inst;
 659 
 660         if (dst == SLJIT_UNUSED) {
 661                 /* No destination, doesn't need to setup flags. */
 662                 if (src & SLJIT_MEM) {
 663                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
 664                         FAIL_IF(!inst);
 665                         *inst = MOV_r_rm;
 666                 }
 667                 return SLJIT_SUCCESS;
 668         }
 669         if (FAST_IS_REG(src)) {
 670                 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
 671                 FAIL_IF(!inst);
 672                 *inst = MOV_rm_r;
 673                 return SLJIT_SUCCESS;
 674         }
 675         if (src & SLJIT_IMM) {
 676                 if (FAST_IS_REG(dst)) {
 677 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 678                         return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
 679 #else
 680                         if (!compiler->mode32) {
 681                                 if (NOT_HALFWORD(srcw))
 682                                         return emit_load_imm64(compiler, dst, srcw);
 683                         }
 684                         else
 685                                 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
 686 #endif
 687                 }
 688 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 689                 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
 690                         FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
 691                         inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
 692                         FAIL_IF(!inst);
 693                         *inst = MOV_rm_r;
 694                         return SLJIT_SUCCESS;
 695                 }
 696 #endif
 697                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
 698                 FAIL_IF(!inst);
 699                 *inst = MOV_rm_i32;
 700                 return SLJIT_SUCCESS;
 701         }
 702         if (FAST_IS_REG(dst)) {
 703                 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
 704                 FAIL_IF(!inst);
 705                 *inst = MOV_r_rm;
 706                 return SLJIT_SUCCESS;
 707         }
 708 
 709         /* Memory to memory move. Requires two instruction. */
 710         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
 711         FAIL_IF(!inst);
 712         *inst = MOV_r_rm;
 713         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
 714         FAIL_IF(!inst);
 715         *inst = MOV_rm_r;
 716         return SLJIT_SUCCESS;
 717 }
 718 
 719 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
 720         FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
 721 
 722 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
 723 {
 724         sljit_ub *inst;
 725 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 726         sljit_si size;
 727 #endif
 728 
 729         CHECK_ERROR();
 730         CHECK(check_sljit_emit_op0(compiler, op));
 731 
 732         switch (GET_OPCODE(op)) {
 733         case SLJIT_BREAKPOINT:
 734                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
 735                 FAIL_IF(!inst);
 736                 INC_SIZE(1);
 737                 *inst = INT3;
 738                 break;
 739         case SLJIT_NOP:
 740                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
 741                 FAIL_IF(!inst);
 742                 INC_SIZE(1);
 743                 *inst = NOP;
 744                 break;
 745         case SLJIT_LUMUL:
 746         case SLJIT_LSMUL:
 747         case SLJIT_UDIVMOD:
 748         case SLJIT_SDIVMOD:
 749         case SLJIT_UDIVI:
 750         case SLJIT_SDIVI:
 751                 compiler->flags_saved = 0;
 752 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 753 #ifdef _WIN64
 754                 SLJIT_COMPILE_ASSERT(
 755                         reg_map[SLJIT_R0] == 0
 756                         && reg_map[SLJIT_R1] == 2
 757                         && reg_map[TMP_REG1] > 7,
 758                         invalid_register_assignment_for_div_mul);
 759 #else
 760                 SLJIT_COMPILE_ASSERT(
 761                         reg_map[SLJIT_R0] == 0
 762                         && reg_map[SLJIT_R1] < 7
 763                         && reg_map[TMP_REG1] == 2,
 764                         invalid_register_assignment_for_div_mul);
 765 #endif
 766                 compiler->mode32 = op & SLJIT_INT_OP;
 767 #endif
 768                 SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
 769 
 770                 op = GET_OPCODE(op);
 771                 if ((op | 0x2) == SLJIT_UDIVI) {
 772 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
 773                         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
 774                         inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
 775 #else
 776                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
 777 #endif
 778                         FAIL_IF(!inst);
 779                         *inst = XOR_r_rm;
 780                 }
 781 
 782                 if ((op | 0x2) == SLJIT_SDIVI) {
 783 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
 784                         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
 785 #endif
 786 
 787 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 788                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
 789                         FAIL_IF(!inst);
 790                         INC_SIZE(1);
 791                         *inst = CDQ;
 792 #else
 793                         if (compiler->mode32) {
 794                                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
 795                                 FAIL_IF(!inst);
 796                                 INC_SIZE(1);
 797                                 *inst = CDQ;
 798                         } else {
 799                                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
 800                                 FAIL_IF(!inst);
 801                                 INC_SIZE(2);
 802                                 *inst++ = REX_W;
 803                                 *inst = CDQ;
 804                         }
 805 #endif
 806                 }
 807 
 808 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 809                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
 810                 FAIL_IF(!inst);
 811                 INC_SIZE(2);
 812                 *inst++ = GROUP_F7;
 813                 *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
 814 #else
 815 #ifdef _WIN64
 816                 size = (!compiler->mode32 || op >= SLJIT_UDIVMOD) ? 3 : 2;
 817 #else
 818                 size = (!compiler->mode32) ? 3 : 2;
 819 #endif
 820                 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
 821                 FAIL_IF(!inst);
 822                 INC_SIZE(size);
 823 #ifdef _WIN64
 824                 if (!compiler->mode32)
 825                         *inst++ = REX_W | ((op >= SLJIT_UDIVMOD) ? REX_B : 0);
 826                 else if (op >= SLJIT_UDIVMOD)
 827                         *inst++ = REX_B;
 828                 *inst++ = GROUP_F7;
 829                 *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
 830 #else
 831                 if (!compiler->mode32)
 832                         *inst++ = REX_W;
 833                 *inst++ = GROUP_F7;
 834                 *inst = MOD_REG | reg_map[SLJIT_R1];
 835 #endif
 836 #endif
 837                 switch (op) {
 838                 case SLJIT_LUMUL:
 839                         *inst |= MUL;
 840                         break;
 841                 case SLJIT_LSMUL:
 842                         *inst |= IMUL;
 843                         break;
 844                 case SLJIT_UDIVMOD:
 845                 case SLJIT_UDIVI:
 846                         *inst |= DIV;
 847                         break;
 848                 case SLJIT_SDIVMOD:
 849                 case SLJIT_SDIVI:
 850                         *inst |= IDIV;
 851                         break;
 852                 }
 853 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
 854                 if (op <= SLJIT_SDIVMOD)
 855                         EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
 856 #else
 857                 if (op >= SLJIT_UDIVI)
 858                         EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
 859 #endif
 860                 break;
 861         }
 862 
 863         return SLJIT_SUCCESS;
 864 }
 865 
 866 #define ENCODE_PREFIX(prefix) \
 867         do { \
 868                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
 869                 FAIL_IF(!inst); \
 870                 INC_SIZE(1); \
 871                 *inst = (prefix); \
 872         } while (0)
 873 
 874 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
 875         sljit_si dst, sljit_sw dstw,
 876         sljit_si src, sljit_sw srcw)
 877 {
 878         sljit_ub* inst;
 879         sljit_si dst_r;
 880 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 881         sljit_si work_r;
 882 #endif
 883 
 884 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 885         compiler->mode32 = 0;
 886 #endif
 887 
 888         if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
 889                 return SLJIT_SUCCESS; /* Empty instruction. */
 890 
 891         if (src & SLJIT_IMM) {
 892                 if (FAST_IS_REG(dst)) {
 893 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 894                         return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
 895 #else
 896                         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
 897                         FAIL_IF(!inst);
 898                         *inst = MOV_rm_i32;
 899                         return SLJIT_SUCCESS;
 900 #endif
 901                 }
 902                 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
 903                 FAIL_IF(!inst);
 904                 *inst = MOV_rm8_i8;
 905                 return SLJIT_SUCCESS;
 906         }
 907 
 908         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 909 
 910         if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
 911 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 912                 if (reg_map[src] >= 4) {
 913                         SLJIT_ASSERT(dst_r == TMP_REG1);
 914                         EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
 915                 } else
 916                         dst_r = src;
 917 #else
 918                 dst_r = src;
 919 #endif
 920         }
 921 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 922         else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
 923                 /* src, dst are registers. */
 924                 SLJIT_ASSERT(SLOW_IS_REG(dst));
 925                 if (reg_map[dst] < 4) {
 926                         if (dst != src)
 927                                 EMIT_MOV(compiler, dst, 0, src, 0);
 928                         inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
 929                         FAIL_IF(!inst);
 930                         *inst++ = GROUP_0F;
 931                         *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
 932                 }
 933                 else {
 934                         if (dst != src)
 935                                 EMIT_MOV(compiler, dst, 0, src, 0);
 936                         if (sign) {
 937                                 /* shl reg, 24 */
 938                                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
 939                                 FAIL_IF(!inst);
 940                                 *inst |= SHL;
 941                                 /* sar reg, 24 */
 942                                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
 943                                 FAIL_IF(!inst);
 944                                 *inst |= SAR;
 945                         }
 946                         else {
 947                                 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
 948                                 FAIL_IF(!inst);
 949                                 *(inst + 1) |= AND;
 950                         }
 951                 }
 952                 return SLJIT_SUCCESS;
 953         }
 954 #endif
 955         else {
 956                 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
 957                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
 958                 FAIL_IF(!inst);
 959                 *inst++ = GROUP_0F;
 960                 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
 961         }
 962 
 963         if (dst & SLJIT_MEM) {
 964 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 965                 if (dst_r == TMP_REG1) {
 966                         /* Find a non-used register, whose reg_map[src] < 4. */
 967                         if ((dst & REG_MASK) == SLJIT_R0) {
 968                                 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
 969                                         work_r = SLJIT_R2;
 970                                 else
 971                                         work_r = SLJIT_R1;
 972                         }
 973                         else {
 974                                 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
 975                                         work_r = SLJIT_R0;
 976                                 else if ((dst & REG_MASK) == SLJIT_R1)
 977                                         work_r = SLJIT_R2;
 978                                 else
 979                                         work_r = SLJIT_R1;
 980                         }
 981 
 982                         if (work_r == SLJIT_R0) {
 983                                 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
 984                         }
 985                         else {
 986                                 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
 987                                 FAIL_IF(!inst);
 988                                 *inst = XCHG_r_rm;
 989                         }
 990 
 991                         inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
 992                         FAIL_IF(!inst);
 993                         *inst = MOV_rm8_r8;
 994 
 995                         if (work_r == SLJIT_R0) {
 996                                 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
 997                         }
 998                         else {
 999                                 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1000                                 FAIL_IF(!inst);
1001                                 *inst = XCHG_r_rm;
1002                         }
1003                 }
1004                 else {
1005                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1006                         FAIL_IF(!inst);
1007                         *inst = MOV_rm8_r8;
1008                 }
1009 #else
1010                 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1011                 FAIL_IF(!inst);
1012                 *inst = MOV_rm8_r8;
1013 #endif
1014         }
1015 
1016         return SLJIT_SUCCESS;
1017 }
1018 
1019 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1020         sljit_si dst, sljit_sw dstw,
1021         sljit_si src, sljit_sw srcw)
1022 {
1023         sljit_ub* inst;
1024         sljit_si dst_r;
1025 
1026 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1027         compiler->mode32 = 0;
1028 #endif
1029 
1030         if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1031                 return SLJIT_SUCCESS; /* Empty instruction. */
1032 
1033         if (src & SLJIT_IMM) {
1034                 if (FAST_IS_REG(dst)) {
1035 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1036                         return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1037 #else
1038                         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1039                         FAIL_IF(!inst);
1040                         *inst = MOV_rm_i32;
1041                         return SLJIT_SUCCESS;
1042 #endif
1043                 }
1044                 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1045                 FAIL_IF(!inst);
1046                 *inst = MOV_rm_i32;
1047                 return SLJIT_SUCCESS;
1048         }
1049 
1050         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1051 
1052         if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1053                 dst_r = src;
1054         else {
1055                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1056                 FAIL_IF(!inst);
1057                 *inst++ = GROUP_0F;
1058                 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1059         }
1060 
1061         if (dst & SLJIT_MEM) {
1062                 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1063                 FAIL_IF(!inst);
1064                 *inst = MOV_rm_r;
1065         }
1066 
1067         return SLJIT_SUCCESS;
1068 }
1069 
1070 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1071         sljit_si dst, sljit_sw dstw,
1072         sljit_si src, sljit_sw srcw)
1073 {
1074         sljit_ub* inst;
1075 
1076         if (dst == SLJIT_UNUSED) {
1077                 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1078                 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1079                 FAIL_IF(!inst);
1080                 *inst++ = GROUP_F7;
1081                 *inst |= opcode;
1082                 return SLJIT_SUCCESS;
1083         }
1084         if (dst == src && dstw == srcw) {
1085                 /* Same input and output */
1086                 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1087                 FAIL_IF(!inst);
1088                 *inst++ = GROUP_F7;
1089                 *inst |= opcode;
1090                 return SLJIT_SUCCESS;
1091         }
1092         if (FAST_IS_REG(dst)) {
1093                 EMIT_MOV(compiler, dst, 0, src, srcw);
1094                 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1095                 FAIL_IF(!inst);
1096                 *inst++ = GROUP_F7;
1097                 *inst |= opcode;
1098                 return SLJIT_SUCCESS;
1099         }
1100         EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1101         inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1102         FAIL_IF(!inst);
1103         *inst++ = GROUP_F7;
1104         *inst |= opcode;
1105         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1106         return SLJIT_SUCCESS;
1107 }
1108 
1109 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1110         sljit_si dst, sljit_sw dstw,
1111         sljit_si src, sljit_sw srcw)
1112 {
1113         sljit_ub* inst;
1114 
1115         if (dst == SLJIT_UNUSED) {
1116                 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1117                 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1118                 FAIL_IF(!inst);
1119                 *inst++ = GROUP_F7;
1120                 *inst |= NOT_rm;
1121                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1122                 FAIL_IF(!inst);
1123                 *inst = OR_r_rm;
1124                 return SLJIT_SUCCESS;
1125         }
1126         if (FAST_IS_REG(dst)) {
1127                 EMIT_MOV(compiler, dst, 0, src, srcw);
1128                 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1129                 FAIL_IF(!inst);
1130                 *inst++ = GROUP_F7;
1131                 *inst |= NOT_rm;
1132                 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1133                 FAIL_IF(!inst);
1134                 *inst = OR_r_rm;
1135                 return SLJIT_SUCCESS;
1136         }
1137         EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1138         inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1139         FAIL_IF(!inst);
1140         *inst++ = GROUP_F7;
1141         *inst |= NOT_rm;
1142         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1143         FAIL_IF(!inst);
1144         *inst = OR_r_rm;
1145         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1146         return SLJIT_SUCCESS;
1147 }
1148 
1149 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1150         sljit_si dst, sljit_sw dstw,
1151         sljit_si src, sljit_sw srcw)
1152 {
1153         sljit_ub* inst;
1154         sljit_si dst_r;
1155 
1156         SLJIT_UNUSED_ARG(op_flags);
1157         if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1158                 /* Just set the zero flag. */
1159                 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1160                 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1161                 FAIL_IF(!inst);
1162                 *inst++ = GROUP_F7;
1163                 *inst |= NOT_rm;
1164 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1165                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1166 #else
1167                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
1168 #endif
1169                 FAIL_IF(!inst);
1170                 *inst |= SHR;
1171                 return SLJIT_SUCCESS;
1172         }
1173 
1174         if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1175                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1176                 src = TMP_REG1;
1177                 srcw = 0;
1178         }
1179 
1180         inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1181         FAIL_IF(!inst);
1182         *inst++ = GROUP_0F;
1183         *inst = BSR_r_rm;
1184 
1185 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1186         if (FAST_IS_REG(dst))
1187                 dst_r = dst;
1188         else {
1189                 /* Find an unused temporary register. */
1190                 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1191                         dst_r = SLJIT_R0;
1192                 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1193                         dst_r = SLJIT_R1;
1194                 else
1195                         dst_r = SLJIT_R2;
1196                 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1197         }
1198         EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1199 #else
1200         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1201         compiler->mode32 = 0;
1202         EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1203         compiler->mode32 = op_flags & SLJIT_INT_OP;
1204 #endif
1205 
1206         if (cpu_has_cmov == -1)
1207                 get_cpu_features();
1208 
1209         if (cpu_has_cmov) {
1210                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1211                 FAIL_IF(!inst);
1212                 *inst++ = GROUP_0F;
1213                 *inst = CMOVNE_r_rm;
1214         } else {
1215 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1216                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1217                 FAIL_IF(!inst);
1218                 INC_SIZE(4);
1219 
1220                 *inst++ = JE_i8;
1221                 *inst++ = 2;
1222                 *inst++ = MOV_r_rm;
1223                 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1224 #else
1225                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1226                 FAIL_IF(!inst);
1227                 INC_SIZE(5);
1228 
1229                 *inst++ = JE_i8;
1230                 *inst++ = 3;
1231                 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1232                 *inst++ = MOV_r_rm;
1233                 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1234 #endif
1235         }
1236 
1237 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1238         inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1239 #else
1240         inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1241 #endif
1242         FAIL_IF(!inst);
1243         *(inst + 1) |= XOR;
1244 
1245 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1246         if (dst & SLJIT_MEM) {
1247                 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1248                 FAIL_IF(!inst);
1249                 *inst = XCHG_r_rm;
1250         }
1251 #else
1252         if (dst & SLJIT_MEM)
1253                 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1254 #endif
1255         return SLJIT_SUCCESS;
1256 }
1257 
1258 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1259         sljit_si dst, sljit_sw dstw,
1260         sljit_si src, sljit_sw srcw)
1261 {
1262         sljit_ub* inst;
1263         sljit_si update = 0;
1264         sljit_si op_flags = GET_ALL_FLAGS(op);
1265 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1266         sljit_si dst_is_ereg = 0;
1267         sljit_si src_is_ereg = 0;
1268 #else
1269 #       define src_is_ereg 0
1270 #endif
1271 
1272         CHECK_ERROR();
1273         CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1274         ADJUST_LOCAL_OFFSET(dst, dstw);
1275         ADJUST_LOCAL_OFFSET(src, srcw);
1276 
1277         CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1278         CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1279 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1280         compiler->mode32 = op_flags & SLJIT_INT_OP;
1281 #endif
1282 
1283         op = GET_OPCODE(op);
1284         if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1285 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1286                 compiler->mode32 = 0;
1287 #endif
1288 
1289                 if (op_flags & SLJIT_INT_OP) {
1290                         if (FAST_IS_REG(src) && src == dst) {
1291                                 if (!TYPE_CAST_NEEDED(op))
1292                                         return SLJIT_SUCCESS;
1293                         }
1294 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1295                         if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1296                                 op = SLJIT_MOV_UI;
1297                         if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1298                                 op = SLJIT_MOVU_UI;
1299                         if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1300                                 op = SLJIT_MOV_SI;
1301                         if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1302                                 op = SLJIT_MOVU_SI;
1303 #endif
1304                 }
1305 
1306                 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1307                 if (op >= SLJIT_MOVU) {
1308                         update = 1;
1309                         op -= 8;
1310                 }
1311 
1312                 if (src & SLJIT_IMM) {
1313                         switch (op) {
1314                         case SLJIT_MOV_UB:
1315                                 srcw = (sljit_ub)srcw;
1316                                 break;
1317                         case SLJIT_MOV_SB:
1318                                 srcw = (sljit_sb)srcw;
1319                                 break;
1320                         case SLJIT_MOV_UH:
1321                                 srcw = (sljit_uh)srcw;
1322                                 break;
1323                         case SLJIT_MOV_SH:
1324                                 srcw = (sljit_sh)srcw;
1325                                 break;
1326 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1327                         case SLJIT_MOV_UI:
1328                                 srcw = (sljit_ui)srcw;
1329                                 break;
1330                         case SLJIT_MOV_SI:
1331                                 srcw = (sljit_si)srcw;
1332                                 break;
1333 #endif
1334                         }
1335 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1336                         if (SLJIT_UNLIKELY(dst_is_ereg))
1337                                 return emit_mov(compiler, dst, dstw, src, srcw);
1338 #endif
1339                 }
1340 
1341                 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1342                         inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1343                         FAIL_IF(!inst);
1344                         *inst = LEA_r_m;
1345                         src &= SLJIT_MEM | 0xf;
1346                         srcw = 0;
1347                 }
1348 
1349 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1350                 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1351                         SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1352                         dst = TMP_REG1;
1353                 }
1354 #endif
1355 
1356                 switch (op) {
1357                 case SLJIT_MOV:
1358                 case SLJIT_MOV_P:
1359 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1360                 case SLJIT_MOV_UI:
1361                 case SLJIT_MOV_SI:
1362 #endif
1363                         FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1364                         break;
1365                 case SLJIT_MOV_UB:
1366                         FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1367                         break;
1368                 case SLJIT_MOV_SB:
1369                         FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1370                         break;
1371                 case SLJIT_MOV_UH:
1372                         FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1373                         break;
1374                 case SLJIT_MOV_SH:
1375                         FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1376                         break;
1377 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1378                 case SLJIT_MOV_UI:
1379                         FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1380                         break;
1381                 case SLJIT_MOV_SI:
1382                         FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1383                         break;
1384 #endif
1385                 }
1386 
1387 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1388                 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1389                         return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1390 #endif
1391 
1392                 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1393                         inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1394                         FAIL_IF(!inst);
1395                         *inst = LEA_r_m;
1396                 }
1397                 return SLJIT_SUCCESS;
1398         }
1399 
1400         if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1401                 compiler->flags_saved = 0;
1402 
1403         switch (op) {
1404         case SLJIT_NOT:
1405                 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1406                         return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1407                 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1408 
1409         case SLJIT_NEG:
1410                 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1411                         FAIL_IF(emit_save_flags(compiler));
1412                 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1413 
1414         case SLJIT_CLZ:
1415                 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1416                         FAIL_IF(emit_save_flags(compiler));
1417                 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1418         }
1419 
1420         return SLJIT_SUCCESS;
1421 
1422 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1423 #       undef src_is_ereg
1424 #endif
1425 }
1426 
1427 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1428 
1429 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1430         if (IS_HALFWORD(immw) || compiler->mode32) { \
1431                 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1432                 FAIL_IF(!inst); \
1433                 *(inst + 1) |= (op_imm); \
1434         } \
1435         else { \
1436                 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1437                 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1438                 FAIL_IF(!inst); \
1439                 *inst = (op_mr); \
1440         }
1441 
1442 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1443         FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1444 
1445 #else
1446 
1447 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1448         inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1449         FAIL_IF(!inst); \
1450         *(inst + 1) |= (op_imm);
1451 
1452 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1453         FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1454 
1455 #endif
1456 
1457 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1458         sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1459         sljit_si dst, sljit_sw dstw,
1460         sljit_si src1, sljit_sw src1w,
1461         sljit_si src2, sljit_sw src2w)
1462 {
1463         sljit_ub* inst;
1464 
1465         if (dst == SLJIT_UNUSED) {
1466                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1467                 if (src2 & SLJIT_IMM) {
1468                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1469                 }
1470                 else {
1471                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1472                         FAIL_IF(!inst);
1473                         *inst = op_rm;
1474                 }
1475                 return SLJIT_SUCCESS;
1476         }
1477 
1478         if (dst == src1 && dstw == src1w) {
1479                 if (src2 & SLJIT_IMM) {
1480 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1481                         if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1482 #else
1483                         if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1484 #endif
1485                                 BINARY_EAX_IMM(op_eax_imm, src2w);
1486                         }
1487                         else {
1488                                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1489                         }
1490                 }
1491                 else if (FAST_IS_REG(dst)) {
1492                         inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1493                         FAIL_IF(!inst);
1494                         *inst = op_rm;
1495                 }
1496                 else if (FAST_IS_REG(src2)) {
1497                         /* Special exception for sljit_emit_op_flags. */
1498                         inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1499                         FAIL_IF(!inst);
1500                         *inst = op_mr;
1501                 }
1502                 else {
1503                         EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1504                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1505                         FAIL_IF(!inst);
1506                         *inst = op_mr;
1507                 }
1508                 return SLJIT_SUCCESS;
1509         }
1510 
1511         /* Only for cumulative operations. */
1512         if (dst == src2 && dstw == src2w) {
1513                 if (src1 & SLJIT_IMM) {
1514 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1515                         if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1516 #else
1517                         if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1518 #endif
1519                                 BINARY_EAX_IMM(op_eax_imm, src1w);
1520                         }
1521                         else {
1522                                 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1523                         }
1524                 }
1525                 else if (FAST_IS_REG(dst)) {
1526                         inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1527                         FAIL_IF(!inst);
1528                         *inst = op_rm;
1529                 }
1530                 else if (FAST_IS_REG(src1)) {
1531                         inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1532                         FAIL_IF(!inst);
1533                         *inst = op_mr;
1534                 }
1535                 else {
1536                         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1537                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1538                         FAIL_IF(!inst);
1539                         *inst = op_mr;
1540                 }
1541                 return SLJIT_SUCCESS;
1542         }
1543 
1544         /* General version. */
1545         if (FAST_IS_REG(dst)) {
1546                 EMIT_MOV(compiler, dst, 0, src1, src1w);
1547                 if (src2 & SLJIT_IMM) {
1548                         BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1549                 }
1550                 else {
1551                         inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1552                         FAIL_IF(!inst);
1553                         *inst = op_rm;
1554                 }
1555         }
1556         else {
1557                 /* This version requires less memory writing. */
1558                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1559                 if (src2 & SLJIT_IMM) {
1560                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1561                 }
1562                 else {
1563                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1564                         FAIL_IF(!inst);
1565                         *inst = op_rm;
1566                 }
1567                 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1568         }
1569 
1570         return SLJIT_SUCCESS;
1571 }
1572 
1573 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1574         sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1575         sljit_si dst, sljit_sw dstw,
1576         sljit_si src1, sljit_sw src1w,
1577         sljit_si src2, sljit_sw src2w)
1578 {
1579         sljit_ub* inst;
1580 
1581         if (dst == SLJIT_UNUSED) {
1582                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1583                 if (src2 & SLJIT_IMM) {
1584                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1585                 }
1586                 else {
1587                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1588                         FAIL_IF(!inst);
1589                         *inst = op_rm;
1590                 }
1591                 return SLJIT_SUCCESS;
1592         }
1593 
1594         if (dst == src1 && dstw == src1w) {
1595                 if (src2 & SLJIT_IMM) {
1596 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1597                         if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1598 #else
1599                         if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1600 #endif
1601                                 BINARY_EAX_IMM(op_eax_imm, src2w);
1602                         }
1603                         else {
1604                                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1605                         }
1606                 }
1607                 else if (FAST_IS_REG(dst)) {
1608                         inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1609                         FAIL_IF(!inst);
1610                         *inst = op_rm;
1611                 }
1612                 else if (FAST_IS_REG(src2)) {
1613                         inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1614                         FAIL_IF(!inst);
1615                         *inst = op_mr;
1616                 }
1617                 else {
1618                         EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1619                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1620                         FAIL_IF(!inst);
1621                         *inst = op_mr;
1622                 }
1623                 return SLJIT_SUCCESS;
1624         }
1625 
1626         /* General version. */
1627         if (FAST_IS_REG(dst) && dst != src2) {
1628                 EMIT_MOV(compiler, dst, 0, src1, src1w);
1629                 if (src2 & SLJIT_IMM) {
1630                         BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1631                 }
1632                 else {
1633                         inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1634                         FAIL_IF(!inst);
1635                         *inst = op_rm;
1636                 }
1637         }
1638         else {
1639                 /* This version requires less memory writing. */
1640                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1641                 if (src2 & SLJIT_IMM) {
1642                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1643                 }
1644                 else {
1645                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1646                         FAIL_IF(!inst);
1647                         *inst = op_rm;
1648                 }
1649                 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1650         }
1651 
1652         return SLJIT_SUCCESS;
1653 }
1654 
1655 static sljit_si emit_mul(struct sljit_compiler *compiler,
1656         sljit_si dst, sljit_sw dstw,
1657         sljit_si src1, sljit_sw src1w,
1658         sljit_si src2, sljit_sw src2w)
1659 {
1660         sljit_ub* inst;
1661         sljit_si dst_r;
1662 
1663         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1664 
1665         /* Register destination. */
1666         if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1667                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1668                 FAIL_IF(!inst);
1669                 *inst++ = GROUP_0F;
1670                 *inst = IMUL_r_rm;
1671         }
1672         else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1673                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1674                 FAIL_IF(!inst);
1675                 *inst++ = GROUP_0F;
1676                 *inst = IMUL_r_rm;
1677         }
1678         else if (src1 & SLJIT_IMM) {
1679                 if (src2 & SLJIT_IMM) {
1680                         EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1681                         src2 = dst_r;
1682                         src2w = 0;
1683                 }
1684 
1685                 if (src1w <= 127 && src1w >= -128) {
1686                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1687                         FAIL_IF(!inst);
1688                         *inst = IMUL_r_rm_i8;
1689                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1690                         FAIL_IF(!inst);
1691                         INC_SIZE(1);
1692                         *inst = (sljit_sb)src1w;
1693                 }
1694 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1695                 else {
1696                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1697                         FAIL_IF(!inst);
1698                         *inst = IMUL_r_rm_i32;
1699                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1700                         FAIL_IF(!inst);
1701                         INC_SIZE(4);
1702                         *(sljit_sw*)inst = src1w;
1703                 }
1704 #else
1705                 else if (IS_HALFWORD(src1w)) {
1706                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1707                         FAIL_IF(!inst);
1708                         *inst = IMUL_r_rm_i32;
1709                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1710                         FAIL_IF(!inst);
1711                         INC_SIZE(4);
1712                         *(sljit_si*)inst = (sljit_si)src1w;
1713                 }
1714                 else {
1715                         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1716                         if (dst_r != src2)
1717                                 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1718                         inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1719                         FAIL_IF(!inst);
1720                         *inst++ = GROUP_0F;
1721                         *inst = IMUL_r_rm;
1722                 }
1723 #endif
1724         }
1725         else if (src2 & SLJIT_IMM) {
1726                 /* Note: src1 is NOT immediate. */
1727 
1728                 if (src2w <= 127 && src2w >= -128) {
1729                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1730                         FAIL_IF(!inst);
1731                         *inst = IMUL_r_rm_i8;
1732                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1733                         FAIL_IF(!inst);
1734                         INC_SIZE(1);
1735                         *inst = (sljit_sb)src2w;
1736                 }
1737 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1738                 else {
1739                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1740                         FAIL_IF(!inst);
1741                         *inst = IMUL_r_rm_i32;
1742                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1743                         FAIL_IF(!inst);
1744                         INC_SIZE(4);
1745                         *(sljit_sw*)inst = src2w;
1746                 }
1747 #else
1748                 else if (IS_HALFWORD(src2w)) {
1749                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1750                         FAIL_IF(!inst);
1751                         *inst = IMUL_r_rm_i32;
1752                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1753                         FAIL_IF(!inst);
1754                         INC_SIZE(4);
1755                         *(sljit_si*)inst = (sljit_si)src2w;
1756                 }
1757                 else {
1758                         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1759                         if (dst_r != src1)
1760                                 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1761                         inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1762                         FAIL_IF(!inst);
1763                         *inst++ = GROUP_0F;
1764                         *inst = IMUL_r_rm;
1765                 }
1766 #endif
1767         }
1768         else {
1769                 /* Neither argument is immediate. */
1770                 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1771                         dst_r = TMP_REG1;
1772                 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1773                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1774                 FAIL_IF(!inst);
1775                 *inst++ = GROUP_0F;
1776                 *inst = IMUL_r_rm;
1777         }
1778 
1779         if (dst_r == TMP_REG1)
1780                 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1781 
1782         return SLJIT_SUCCESS;
1783 }
1784 
1785 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1786         sljit_si dst, sljit_sw dstw,
1787         sljit_si src1, sljit_sw src1w,
1788         sljit_si src2, sljit_sw src2w)
1789 {
1790         sljit_ub* inst;
1791         sljit_si dst_r, done = 0;
1792 
1793         /* These cases better be left to handled by normal way. */
1794         if (!keep_flags) {
1795                 if (dst == src1 && dstw == src1w)
1796                         return SLJIT_ERR_UNSUPPORTED;
1797                 if (dst == src2 && dstw == src2w)
1798                         return SLJIT_ERR_UNSUPPORTED;
1799         }
1800 
1801         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1802 
1803         if (FAST_IS_REG(src1)) {
1804                 if (FAST_IS_REG(src2)) {
1805                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1806                         FAIL_IF(!inst);
1807                         *inst = LEA_r_m;
1808                         done = 1;
1809                 }
1810 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1811                 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1812                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1813 #else
1814                 if (src2 & SLJIT_IMM) {
1815                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1816 #endif
1817                         FAIL_IF(!inst);
1818                         *inst = LEA_r_m;
1819                         done = 1;
1820                 }
1821         }
1822         else if (FAST_IS_REG(src2)) {
1823 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1824                 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1825                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1826 #else
1827                 if (src1 & SLJIT_IMM) {
1828                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1829 #endif
1830                         FAIL_IF(!inst);
1831                         *inst = LEA_r_m;
1832                         done = 1;
1833                 }
1834         }
1835 
1836         if (done) {
1837                 if (dst_r == TMP_REG1)
1838                         return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1839                 return SLJIT_SUCCESS;
1840         }
1841         return SLJIT_ERR_UNSUPPORTED;
1842 }
1843 
1844 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1845         sljit_si src1, sljit_sw src1w,
1846         sljit_si src2, sljit_sw src2w)
1847 {
1848         sljit_ub* inst;
1849 
1850 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1851         if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1852 #else
1853         if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1854 #endif
1855                 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1856                 return SLJIT_SUCCESS;
1857         }
1858 
1859         if (FAST_IS_REG(src1)) {
1860                 if (src2 & SLJIT_IMM) {
1861                         BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1862                 }
1863                 else {
1864                         inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1865                         FAIL_IF(!inst);
1866                         *inst = CMP_r_rm;
1867                 }
1868                 return SLJIT_SUCCESS;
1869         }
1870 
1871         if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1872                 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1873                 FAIL_IF(!inst);
1874                 *inst = CMP_rm_r;
1875                 return SLJIT_SUCCESS;
1876         }
1877 
1878         if (src2 & SLJIT_IMM) {
1879                 if (src1 & SLJIT_IMM) {
1880                         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1881                         src1 = TMP_REG1;
1882                         src1w = 0;
1883                 }
1884                 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1885         }
1886         else {
1887                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1888                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1889                 FAIL_IF(!inst);
1890                 *inst = CMP_r_rm;
1891         }
1892         return SLJIT_SUCCESS;
1893 }
1894 
1895 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1896         sljit_si src1, sljit_sw src1w,
1897         sljit_si src2, sljit_sw src2w)
1898 {
1899         sljit_ub* inst;
1900 
1901 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1902         if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1903 #else
1904         if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1905 #endif
1906                 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1907                 return SLJIT_SUCCESS;
1908         }
1909 
1910 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1911         if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1912 #else
1913         if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1914 #endif
1915                 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1916                 return SLJIT_SUCCESS;
1917         }
1918 
1919         if (!(src1 & SLJIT_IMM)) {
1920                 if (src2 & SLJIT_IMM) {
1921 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1922                         if (IS_HALFWORD(src2w) || compiler->mode32) {
1923                                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1924                                 FAIL_IF(!inst);
1925                                 *inst = GROUP_F7;
1926                         }
1927                         else {
1928                                 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1929                                 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
1930                                 FAIL_IF(!inst);
1931                                 *inst = TEST_rm_r;
1932                         }
1933 #else
1934                         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1935                         FAIL_IF(!inst);
1936                         *inst = GROUP_F7;
1937 #endif
1938                         return SLJIT_SUCCESS;
1939                 }
1940                 else if (FAST_IS_REG(src1)) {
1941                         inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1942                         FAIL_IF(!inst);
1943                         *inst = TEST_rm_r;
1944                         return SLJIT_SUCCESS;
1945                 }
1946         }
1947 
1948         if (!(src2 & SLJIT_IMM)) {
1949                 if (src1 & SLJIT_IMM) {
1950 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1951                         if (IS_HALFWORD(src1w) || compiler->mode32) {
1952                                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1953                                 FAIL_IF(!inst);
1954                                 *inst = GROUP_F7;
1955                         }
1956                         else {
1957                                 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1958                                 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
1959                                 FAIL_IF(!inst);
1960                                 *inst = TEST_rm_r;
1961                         }
1962 #else
1963                         inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1964                         FAIL_IF(!inst);
1965                         *inst = GROUP_F7;
1966 #endif
1967                         return SLJIT_SUCCESS;
1968                 }
1969                 else if (FAST_IS_REG(src2)) {
1970                         inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1971                         FAIL_IF(!inst);
1972                         *inst = TEST_rm_r;
1973                         return SLJIT_SUCCESS;
1974                 }
1975         }
1976 
1977         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1978         if (src2 & SLJIT_IMM) {
1979 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1980                 if (IS_HALFWORD(src2w) || compiler->mode32) {
1981                         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1982                         FAIL_IF(!inst);
1983                         *inst = GROUP_F7;
1984                 }
1985                 else {
1986                         FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1987                         inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1988                         FAIL_IF(!inst);
1989                         *inst = TEST_rm_r;
1990                 }
1991 #else
1992                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1993                 FAIL_IF(!inst);
1994                 *inst = GROUP_F7;
1995 #endif
1996         }
1997         else {
1998                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1999                 FAIL_IF(!inst);
2000                 *inst = TEST_rm_r;
2001         }
2002         return SLJIT_SUCCESS;
2003 }
2004 
2005 static sljit_si emit_shift(struct sljit_compiler *compiler,
2006         sljit_ub mode,
2007         sljit_si dst, sljit_sw dstw,
2008         sljit_si src1, sljit_sw src1w,
2009         sljit_si src2, sljit_sw src2w)
2010 {
2011         sljit_ub* inst;
2012 
2013         if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2014                 if (dst == src1 && dstw == src1w) {
2015                         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2016                         FAIL_IF(!inst);
2017                         *inst |= mode;
2018                         return SLJIT_SUCCESS;
2019                 }
2020                 if (dst == SLJIT_UNUSED) {
2021                         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2022                         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2023                         FAIL_IF(!inst);
2024                         *inst |= mode;
2025                         return SLJIT_SUCCESS;
2026                 }
2027                 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2028                         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2029                         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2030                         FAIL_IF(!inst);
2031                         *inst |= mode;
2032                         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2033                         return SLJIT_SUCCESS;
2034                 }
2035                 if (FAST_IS_REG(dst)) {
2036                         EMIT_MOV(compiler, dst, 0, src1, src1w);
2037                         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2038                         FAIL_IF(!inst);
2039                         *inst |= mode;
2040                         return SLJIT_SUCCESS;
2041                 }
2042 
2043                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2044                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2045                 FAIL_IF(!inst);
2046                 *inst |= mode;
2047                 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2048                 return SLJIT_SUCCESS;
2049         }
2050 
2051         if (dst == SLJIT_PREF_SHIFT_REG) {
2052                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2053                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2054                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2055                 FAIL_IF(!inst);
2056                 *inst |= mode;
2057                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2058         }
2059         else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2060                 if (src1 != dst)
2061                         EMIT_MOV(compiler, dst, 0, src1, src1w);
2062                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2063                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2064                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2065                 FAIL_IF(!inst);
2066                 *inst |= mode;
2067                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2068         }
2069         else {
2070                 /* This case is really difficult, since ecx itself may used for
2071                    addressing, and we must ensure to work even in that case. */
2072                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2073 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2074                 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2075 #else
2076                 /* [esp+0] contains the flags. */
2077                 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2078 #endif
2079                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2080                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2081                 FAIL_IF(!inst);
2082                 *inst |= mode;
2083 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2084                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2085 #else
2086                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2087 #endif
2088                 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2089         }
2090 
2091         return SLJIT_SUCCESS;
2092 }
2093 
2094 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2095         sljit_ub mode, sljit_si set_flags,
2096         sljit_si dst, sljit_sw dstw,
2097         sljit_si src1, sljit_sw src1w,
2098         sljit_si src2, sljit_sw src2w)
2099 {
2100         /* The CPU does not set flags if the shift count is 0. */
2101         if (src2 & SLJIT_IMM) {
2102 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2103                 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2104                         return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2105 #else
2106                 if ((src2w & 0x1f) != 0)
2107                         return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2108 #endif
2109                 if (!set_flags)
2110                         return emit_mov(compiler, dst, dstw, src1, src1w);
2111                 /* OR dst, src, 0 */
2112                 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2113                         dst, dstw, src1, src1w, SLJIT_IMM, 0);
2114         }
2115 
2116         if (!set_flags)
2117                 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2118 
2119         if (!FAST_IS_REG(dst))
2120                 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2121 
2122         FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2123 
2124         if (FAST_IS_REG(dst))
2125                 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2126         return SLJIT_SUCCESS;
2127 }
2128 
2129 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2130         sljit_si dst, sljit_sw dstw,
2131         sljit_si src1, sljit_sw src1w,
2132         sljit_si src2, sljit_sw src2w)
2133 {
2134         CHECK_ERROR();
2135         CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2136         ADJUST_LOCAL_OFFSET(dst, dstw);
2137         ADJUST_LOCAL_OFFSET(src1, src1w);
2138         ADJUST_LOCAL_OFFSET(src2, src2w);
2139 
2140         CHECK_EXTRA_REGS(dst, dstw, (void)0);
2141         CHECK_EXTRA_REGS(src1, src1w, (void)0);
2142         CHECK_EXTRA_REGS(src2, src2w, (void)0);
2143 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2144         compiler->mode32 = op & SLJIT_INT_OP;
2145 #endif
2146 
2147         if (GET_OPCODE(op) >= SLJIT_MUL) {
2148                 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2149                         compiler->flags_saved = 0;
2150                 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2151                         FAIL_IF(emit_save_flags(compiler));
2152         }
2153 
2154         switch (GET_OPCODE(op)) {
2155         case SLJIT_ADD:
2156                 if (!GET_FLAGS(op)) {
2157                         if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2158                                 return compiler->error;
2159                 }
2160                 else
2161                         compiler->flags_saved = 0;
2162                 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2163                         FAIL_IF(emit_save_flags(compiler));
2164                 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2165                         dst, dstw, src1, src1w, src2, src2w);
2166         case SLJIT_ADDC:
2167                 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2168                         FAIL_IF(emit_restore_flags(compiler, 1));
2169                 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2170                         FAIL_IF(emit_save_flags(compiler));
2171                 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2172                         compiler->flags_saved = 0;
2173                 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2174                         dst, dstw, src1, src1w, src2, src2w);
2175         case SLJIT_SUB:
2176                 if (!GET_FLAGS(op)) {
2177                         if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2178                                 return compiler->error;
2179                 }
2180                 else
2181                         compiler->flags_saved = 0;
2182                 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2183                         FAIL_IF(emit_save_flags(compiler));
2184                 if (dst == SLJIT_UNUSED)
2185                         return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2186                 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2187                         dst, dstw, src1, src1w, src2, src2w);
2188         case SLJIT_SUBC:
2189                 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2190                         FAIL_IF(emit_restore_flags(compiler, 1));
2191                 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2192                         FAIL_IF(emit_save_flags(compiler));
2193                 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2194                         compiler->flags_saved = 0;
2195                 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2196                         dst, dstw, src1, src1w, src2, src2w);
2197         case SLJIT_MUL:
2198                 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2199         case SLJIT_AND:
2200                 if (dst == SLJIT_UNUSED)
2201                         return emit_test_binary(compiler, src1, src1w, src2, src2w);
2202                 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2203                         dst, dstw, src1, src1w, src2, src2w);
2204         case SLJIT_OR:
2205                 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2206                         dst, dstw, src1, src1w, src2, src2w);
2207         case SLJIT_XOR:
2208                 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2209                         dst, dstw, src1, src1w, src2, src2w);
2210         case SLJIT_SHL:
2211                 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2212                         dst, dstw, src1, src1w, src2, src2w);
2213         case SLJIT_LSHR:
2214                 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2215                         dst, dstw, src1, src1w, src2, src2w);
2216         case SLJIT_ASHR:
2217                 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2218                         dst, dstw, src1, src1w, src2, src2w);
2219         }
2220 
2221         return SLJIT_SUCCESS;
2222 }
2223 
2224 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2225 {
2226         CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2227 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2228         if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2229                 return -1;
2230 #endif
2231         return reg_map[reg];
2232 }
2233 
2234 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
2235 {
2236         CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2237         return reg;
2238 }
2239 
2240 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2241         void *instruction, sljit_si size)
2242 {
2243         sljit_ub *inst;
2244 
2245         CHECK_ERROR();
2246         CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2247 
2248         inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2249         FAIL_IF(!inst);
2250         INC_SIZE(size);
2251         SLJIT_MEMMOVE(inst, instruction, size);
2252         return SLJIT_SUCCESS;
2253 }
2254 
2255 /* --------------------------------------------------------------------- */
2256 /*  Floating point operators                                             */
2257 /* --------------------------------------------------------------------- */
2258 
2259 /* Alignment + 2 * 16 bytes. */
2260 static sljit_si sse2_data[3 + (4 + 4) * 2];
2261 static sljit_si *sse2_buffer;
2262 
2263 static void init_compiler(void)
2264 {
2265         sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2266         /* Single precision constants. */
2267         sse2_buffer[0] = 0x80000000;
2268         sse2_buffer[4] = 0x7fffffff;
2269         /* Double precision constants. */
2270         sse2_buffer[8] = 0;
2271         sse2_buffer[9] = 0x80000000;
2272         sse2_buffer[12] = 0xffffffff;
2273         sse2_buffer[13] = 0x7fffffff;
2274 }
2275 
2276 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2277 {
2278 #ifdef SLJIT_IS_FPU_AVAILABLE
2279         return SLJIT_IS_FPU_AVAILABLE;
2280 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2281         if (cpu_has_sse2 == -1)
2282                 get_cpu_features();
2283         return cpu_has_sse2;
2284 #else /* SLJIT_DETECT_SSE2 */
2285         return 1;
2286 #endif /* SLJIT_DETECT_SSE2 */
2287 }
2288 
2289 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2290         sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2291 {
2292         sljit_ub *inst;
2293 
2294         inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2295         FAIL_IF(!inst);
2296         *inst++ = GROUP_0F;
2297         *inst = opcode;
2298         return SLJIT_SUCCESS;
2299 }
2300 
2301 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2302         sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2303 {
2304         sljit_ub *inst;
2305 
2306         inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2307         FAIL_IF(!inst);
2308         *inst++ = GROUP_0F;
2309         *inst = opcode;
2310         return SLJIT_SUCCESS;
2311 }
2312 
2313 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2314         sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2315 {
2316         return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2317 }
2318 
2319 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2320         sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2321 {
2322         return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2323 }
2324 
2325 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
2326         sljit_si dst, sljit_sw dstw,
2327         sljit_si src, sljit_sw srcw)
2328 {
2329         sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2330         sljit_ub *inst;
2331 
2332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2333         if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
2334                 compiler->mode32 = 0;
2335 #endif
2336 
2337         inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2338         FAIL_IF(!inst);
2339         *inst++ = GROUP_0F;
2340         *inst = CVTTSD2SI_r_xm;
2341 
2342         if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2343                 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2344         return SLJIT_SUCCESS;
2345 }
2346 
2347 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
2348         sljit_si dst, sljit_sw dstw,
2349         sljit_si src, sljit_sw srcw)
2350 {
2351         sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2352         sljit_ub *inst;
2353 
2354 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2355         if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
2356                 compiler->mode32 = 0;
2357 #endif
2358 
2359         if (src & SLJIT_IMM) {
2360 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2361                 if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
2362                         srcw = (sljit_si)srcw;
2363 #endif
2364                 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2365                 src = TMP_REG1;
2366                 srcw = 0;
2367         }
2368 
2369         inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2370         FAIL_IF(!inst);
2371         *inst++ = GROUP_0F;
2372         *inst = CVTSI2SD_x_rm;
2373 
2374 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2375         compiler->mode32 = 1;
2376 #endif
2377         if (dst_r == TMP_FREG)
2378                 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2379         return SLJIT_SUCCESS;
2380 }
2381 
2382 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
2383         sljit_si src1, sljit_sw src1w,
2384         sljit_si src2, sljit_sw src2w)
2385 {
2386         compiler->flags_saved = 0;
2387         if (!FAST_IS_REG(src1)) {
2388                 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2389                 src1 = TMP_FREG;
2390         }
2391         return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
2392 }
2393 
2394 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2395         sljit_si dst, sljit_sw dstw,
2396         sljit_si src, sljit_sw srcw)
2397 {
2398         sljit_si dst_r;
2399 
2400 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2401         compiler->mode32 = 1;
2402 #endif
2403 
2404         CHECK_ERROR();
2405         SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2406 
2407         if (GET_OPCODE(op) == SLJIT_DMOV) {
2408                 if (FAST_IS_REG(dst))
2409                         return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2410                 if (FAST_IS_REG(src))
2411                         return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2412                 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2413                 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2414         }
2415 
2416         if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
2417                 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2418                 if (FAST_IS_REG(src)) {
2419                         /* We overwrite the high bits of source. From SLJIT point of view,
2420                            this is not an issue.
2421                            Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2422                         FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
2423                 }
2424                 else {
2425                         FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
2426                         src = TMP_FREG;
2427                 }
2428 
2429                 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
2430                 if (dst_r == TMP_FREG)
2431                         return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2432                 return SLJIT_SUCCESS;
2433         }
2434 
2435         if (SLOW_IS_REG(dst)) {
2436                 dst_r = dst;
2437                 if (dst != src)
2438                         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2439         }
2440         else {
2441                 dst_r = TMP_FREG;
2442                 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2443         }
2444 
2445         switch (GET_OPCODE(op)) {
2446         case SLJIT_DNEG:
2447                 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2448                 break;
2449 
2450         case SLJIT_DABS:
2451                 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2452                 break;
2453         }
2454 
2455         if (dst_r == TMP_FREG)
2456                 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2457         return SLJIT_SUCCESS;
2458 }
2459 
2460 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2461         sljit_si dst, sljit_sw dstw,
2462         sljit_si src1, sljit_sw src1w,
2463         sljit_si src2, sljit_sw src2w)
2464 {
2465         sljit_si dst_r;
2466 
2467         CHECK_ERROR();
2468         CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2469         ADJUST_LOCAL_OFFSET(dst, dstw);
2470         ADJUST_LOCAL_OFFSET(src1, src1w);
2471         ADJUST_LOCAL_OFFSET(src2, src2w);
2472 
2473 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2474         compiler->mode32 = 1;
2475 #endif
2476 
2477         if (FAST_IS_REG(dst)) {
2478                 dst_r = dst;
2479                 if (dst == src1)
2480                         ; /* Do nothing here. */
2481                 else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) {
2482                         /* Swap arguments. */
2483                         src2 = src1;
2484                         src2w = src1w;
2485                 }
2486                 else if (dst != src2)
2487                         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2488                 else {
2489                         dst_r = TMP_FREG;
2490                         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2491                 }
2492         }
2493         else {
2494                 dst_r = TMP_FREG;
2495                 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2496         }
2497 
2498         switch (GET_OPCODE(op)) {
2499         case SLJIT_DADD:
2500                 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2501                 break;
2502 
2503         case SLJIT_DSUB:
2504                 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2505                 break;
2506 
2507         case SLJIT_DMUL:
2508                 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2509                 break;
2510 
2511         case SLJIT_DDIV:
2512                 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2513                 break;
2514         }
2515 
2516         if (dst_r == TMP_FREG)
2517                 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2518         return SLJIT_SUCCESS;
2519 }
2520 
2521 /* --------------------------------------------------------------------- */
2522 /*  Conditional instructions                                             */
2523 /* --------------------------------------------------------------------- */
2524 
2525 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2526 {
2527         sljit_ub *inst;
2528         struct sljit_label *label;
2529 
2530         CHECK_ERROR_PTR();
2531         CHECK_PTR(check_sljit_emit_label(compiler));
2532 
2533         /* We should restore the flags before the label,
2534            since other taken jumps has their own flags as well. */
2535         if (SLJIT_UNLIKELY(compiler->flags_saved))
2536                 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2537 
2538         if (compiler->last_label && compiler->last_label->size == compiler->size)
2539                 return compiler->last_label;
2540 
2541         label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2542         PTR_FAIL_IF(!label);
2543         set_label(label, compiler);
2544 
2545         inst = (sljit_ub*)ensure_buf(compiler, 2);
2546         PTR_FAIL_IF(!inst);
2547 
2548         *inst++ = 0;
2549         *inst++ = 0;
2550 
2551         return label;
2552 }
2553 
2554 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2555 {
2556         sljit_ub *inst;
2557         struct sljit_jump *jump;
2558 
2559         CHECK_ERROR_PTR();
2560         CHECK_PTR(check_sljit_emit_jump(compiler, type));
2561 
2562         if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2563                 if ((type & 0xff) <= SLJIT_JUMP)
2564                         PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2565                 compiler->flags_saved = 0;
2566         }
2567 
2568         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2569         PTR_FAIL_IF_NULL(jump);
2570         set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2571         type &= 0xff;
2572 
2573         if (type >= SLJIT_CALL1)
2574                 PTR_FAIL_IF(call_with_args(compiler, type));
2575 
2576         /* Worst case size. */
2577 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2578         compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2579 #else
2580         compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2581 #endif
2582 
2583         inst = (sljit_ub*)ensure_buf(compiler, 2);
2584         PTR_FAIL_IF_NULL(inst);
2585 
2586         *inst++ = 0;
2587         *inst++ = type + 4;
2588         return jump;
2589 }
2590 
2591 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2592 {
2593         sljit_ub *inst;
2594         struct sljit_jump *jump;
2595 
2596         CHECK_ERROR();
2597         CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2598         ADJUST_LOCAL_OFFSET(src, srcw);
2599 
2600         CHECK_EXTRA_REGS(src, srcw, (void)0);
2601 
2602         if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2603                 if (type <= SLJIT_JUMP)
2604                         FAIL_IF(emit_restore_flags(compiler, 0));
2605                 compiler->flags_saved = 0;
2606         }
2607 
2608         if (type >= SLJIT_CALL1) {
2609 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2610 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2611                 if (src == SLJIT_R2) {
2612                         EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2613                         src = TMP_REG1;
2614                 }
2615                 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2616                         srcw += sizeof(sljit_sw);
2617 #endif
2618 #endif
2619 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2620                 if (src == SLJIT_R2) {
2621                         EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2622                         src = TMP_REG1;
2623                 }
2624 #endif
2625                 FAIL_IF(call_with_args(compiler, type));
2626         }
2627 
2628         if (src == SLJIT_IMM) {
2629                 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2630                 FAIL_IF_NULL(jump);
2631                 set_jump(jump, compiler, JUMP_ADDR);
2632                 jump->u.target = srcw;
2633 
2634                 /* Worst case size. */
2635 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2636                 compiler->size += 5;
2637 #else
2638                 compiler->size += 10 + 3;
2639 #endif
2640 
2641                 inst = (sljit_ub*)ensure_buf(compiler, 2);
2642                 FAIL_IF_NULL(inst);
2643 
2644                 *inst++ = 0;
2645                 *inst++ = type + 4;
2646         }
2647         else {
2648 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2649                 /* REX_W is not necessary (src is not immediate). */
2650                 compiler->mode32 = 1;
2651 #endif
2652                 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2653                 FAIL_IF(!inst);
2654                 *inst++ = GROUP_FF;
2655                 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2656         }
2657         return SLJIT_SUCCESS;
2658 }
2659 
2660 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2661         sljit_si dst, sljit_sw dstw,
2662         sljit_si src, sljit_sw srcw,
2663         sljit_si type)
2664 {
2665         sljit_ub *inst;
2666         sljit_ub cond_set = 0;
2667 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2668         sljit_si reg;
2669 #else
2670         /* CHECK_EXTRA_REGS migh overwrite these values. */
2671         sljit_si dst_save = dst;
2672         sljit_sw dstw_save = dstw;
2673 #endif
2674 
2675         CHECK_ERROR();
2676         CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2677         SLJIT_UNUSED_ARG(srcw);
2678 
2679         if (dst == SLJIT_UNUSED)
2680                 return SLJIT_SUCCESS;
2681 
2682         ADJUST_LOCAL_OFFSET(dst, dstw);
2683         CHECK_EXTRA_REGS(dst, dstw, (void)0);
2684         if (SLJIT_UNLIKELY(compiler->flags_saved))
2685                 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2686 
2687         type &= 0xff;
2688         /* setcc = jcc + 0x10. */
2689         cond_set = get_jump_code(type) + 0x10;
2690 
2691 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2692         if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2693                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2694                 FAIL_IF(!inst);
2695                 INC_SIZE(4 + 3);
2696                 /* Set low register to conditional flag. */
2697                 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2698                 *inst++ = GROUP_0F;
2699                 *inst++ = cond_set;
2700                 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2701                 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2702                 *inst++ = OR_rm8_r8;
2703                 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2704                 return SLJIT_SUCCESS;
2705         }
2706 
2707         reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2708 
2709         inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2710         FAIL_IF(!inst);
2711         INC_SIZE(4 + 4);
2712         /* Set low register to conditional flag. */
2713         *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2714         *inst++ = GROUP_0F;
2715         *inst++ = cond_set;
2716         *inst++ = MOD_REG | reg_lmap[reg];
2717         *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2718         *inst++ = GROUP_0F;
2719         *inst++ = MOVZX_r_rm8;
2720         *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2721 
2722         if (reg != TMP_REG1)
2723                 return SLJIT_SUCCESS;
2724 
2725         if (GET_OPCODE(op) < SLJIT_ADD) {
2726                 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2727                 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2728         }
2729 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2730                 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2731         compiler->skip_checks = 1;
2732 #endif
2733         return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2734 #else /* SLJIT_CONFIG_X86_64 */
2735         if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2736                 if (reg_map[dst] <= 4) {
2737                         /* Low byte is accessible. */
2738                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2739                         FAIL_IF(!inst);
2740                         INC_SIZE(3 + 3);
2741                         /* Set low byte to conditional flag. */
2742                         *inst++ = GROUP_0F;
2743                         *inst++ = cond_set;
2744                         *inst++ = MOD_REG | reg_map[dst];
2745 
2746                         *inst++ = GROUP_0F;
2747                         *inst++ = MOVZX_r_rm8;
2748                         *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2749                         return SLJIT_SUCCESS;
2750                 }
2751 
2752                 /* Low byte is not accessible. */
2753                 if (cpu_has_cmov == -1)
2754                         get_cpu_features();
2755 
2756                 if (cpu_has_cmov) {
2757                         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2758                         /* a xor reg, reg operation would overwrite the flags. */
2759                         EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2760 
2761                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2762                         FAIL_IF(!inst);
2763                         INC_SIZE(3);
2764 
2765                         *inst++ = GROUP_0F;
2766                         /* cmovcc = setcc - 0x50. */
2767                         *inst++ = cond_set - 0x50;
2768                         *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2769                         return SLJIT_SUCCESS;
2770                 }
2771 
2772                 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2773                 FAIL_IF(!inst);
2774                 INC_SIZE(1 + 3 + 3 + 1);
2775                 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2776                 /* Set al to conditional flag. */
2777                 *inst++ = GROUP_0F;
2778                 *inst++ = cond_set;
2779                 *inst++ = MOD_REG | 0 /* eax */;
2780 
2781                 *inst++ = GROUP_0F;
2782                 *inst++ = MOVZX_r_rm8;
2783                 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2784                 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2785                 return SLJIT_SUCCESS;
2786         }
2787 
2788         if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2789                 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2790                 if (dst != SLJIT_R0) {
2791                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2792                         FAIL_IF(!inst);
2793                         INC_SIZE(1 + 3 + 2 + 1);
2794                         /* Set low register to conditional flag. */
2795                         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2796                         *inst++ = GROUP_0F;
2797                         *inst++ = cond_set;
2798                         *inst++ = MOD_REG | 0 /* eax */;
2799                         *inst++ = OR_rm8_r8;
2800                         *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2801                         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2802                 }
2803                 else {
2804                         inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2805                         FAIL_IF(!inst);
2806                         INC_SIZE(2 + 3 + 2 + 2);
2807                         /* Set low register to conditional flag. */
2808                         *inst++ = XCHG_r_rm;
2809                         *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2810                         *inst++ = GROUP_0F;
2811                         *inst++ = cond_set;
2812                         *inst++ = MOD_REG | 1 /* ecx */;
2813                         *inst++ = OR_rm8_r8;
2814                         *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2815                         *inst++ = XCHG_r_rm;
2816                         *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2817                 }
2818                 return SLJIT_SUCCESS;
2819         }
2820 
2821         /* Set TMP_REG1 to the bit. */
2822         inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2823         FAIL_IF(!inst);
2824         INC_SIZE(1 + 3 + 3 + 1);
2825         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2826         /* Set al to conditional flag. */
2827         *inst++ = GROUP_0F;
2828         *inst++ = cond_set;
2829         *inst++ = MOD_REG | 0 /* eax */;
2830 
2831         *inst++ = GROUP_0F;
2832         *inst++ = MOVZX_r_rm8;
2833         *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2834 
2835         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2836 
2837         if (GET_OPCODE(op) < SLJIT_ADD)
2838                 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2839 
2840 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2841                 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2842         compiler->skip_checks = 1;
2843 #endif
2844         return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2845 #endif /* SLJIT_CONFIG_X86_64 */
2846 }
2847 
2848 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2849 {
2850         CHECK_ERROR();
2851         CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2852         ADJUST_LOCAL_OFFSET(dst, dstw);
2853 
2854         CHECK_EXTRA_REGS(dst, dstw, (void)0);
2855 
2856 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2857         compiler->mode32 = 0;
2858 #endif
2859 
2860         ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2861 
2862 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2863         if (NOT_HALFWORD(offset)) {
2864                 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2865 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2866                 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2867                 return compiler->error;
2868 #else
2869                 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2870 #endif
2871         }
2872 #endif
2873 
2874         if (offset != 0)
2875                 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2876         return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2877 }
2878 
2879 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2880 {
2881         sljit_ub *inst;
2882         struct sljit_const *const_;
2883 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2884         sljit_si reg;
2885 #endif
2886 
2887         CHECK_ERROR_PTR();
2888         CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2889         ADJUST_LOCAL_OFFSET(dst, dstw);
2890 
2891         CHECK_EXTRA_REGS(dst, dstw, (void)0);
2892 
2893         const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2894         PTR_FAIL_IF(!const_);
2895         set_const(const_, compiler);
2896 
2897 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2898         compiler->mode32 = 0;
2899         reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2900 
2901         if (emit_load_imm64(compiler, reg, init_value))
2902                 return NULL;
2903 #else
2904         if (dst == SLJIT_UNUSED)
2905                 dst = TMP_REG1;
2906 
2907         if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2908                 return NULL;
2909 #endif
2910 
2911         inst = (sljit_ub*)ensure_buf(compiler, 2);
2912         PTR_FAIL_IF(!inst);
2913 
2914         *inst++ = 0;
2915         *inst++ = 1;
2916 
2917 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2918         if (dst & SLJIT_MEM)
2919                 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2920                         return NULL;
2921 #endif
2922 
2923         return const_;
2924 }
2925 
2926 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2927 {
2928 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2929         *(sljit_sw*)addr = new_addr - (addr + 4);
2930 #else
2931         *(sljit_uw*)addr = new_addr;
2932 #endif
2933 }
2934 
2935 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2936 {
2937         *(sljit_sw*)addr = new_constant;
2938 }
2939 
2940 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void)
2941 {
2942 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2943         if (cpu_has_sse2 == -1)
2944                 get_cpu_features();
2945         return cpu_has_sse2;
2946 #else
2947         return 1;
2948 #endif
2949 }
2950 
2951 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void)
2952 {
2953         if (cpu_has_cmov == -1)
2954                 get_cpu_features();
2955         return cpu_has_cmov;
2956 }
2957 
2958 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
2959         sljit_si type,
2960         sljit_si dst_reg,
2961         sljit_si src, sljit_sw srcw)
2962 {
2963         sljit_ub* inst;
2964 
2965         CHECK_ERROR();
2966 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2967         CHECK_ARGUMENT(sljit_x86_is_cmov_available());
2968         CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
2969         CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
2970         CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP));
2971         FUNCTION_CHECK_SRC(src, srcw);
2972 #endif
2973 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
2974         if (SLJIT_UNLIKELY(!!compiler->verbose)) {
2975                 fprintf(compiler->verbose, "  x86_cmov%s %s%s, ",
2976                         !(dst_reg & SLJIT_INT_OP) ? "" : ".i",
2977                         JUMP_PREFIX(type), jump_names[type & 0xff]);
2978                 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP);
2979                 fprintf(compiler->verbose, ", ");
2980                 sljit_verbose_param(compiler, src, srcw);
2981                 fprintf(compiler->verbose, "\n");
2982         }
2983 #endif
2984 
2985         ADJUST_LOCAL_OFFSET(src, srcw);
2986         CHECK_EXTRA_REGS(src, srcw, (void)0);
2987 
2988 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2989         compiler->mode32 = dst_reg & SLJIT_INT_OP;
2990 #endif
2991         dst_reg &= ~SLJIT_INT_OP;
2992 
2993         if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2994                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2995                 src = TMP_REG1;
2996                 srcw = 0;
2997         }
2998 
2999         inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
3000         FAIL_IF(!inst);
3001         *inst++ = GROUP_0F;
3002         *inst = get_jump_code(type & 0xff) - 0x40;
3003         return SLJIT_SUCCESS;
3004 }

/* [<][>][^][v][top][bottom][index][help] */