diff --git a/NEWS b/NEWS index 5671a4381333..120e717e59f7 100644 --- a/NEWS +++ b/NEWS @@ -162,6 +162,7 @@ PHP NEWS - PCRE: . Upgrade bundled pcre2lib to version 10.43. (nielsdos) . Add "/r" modifier. (Ayesh) + . Upgrade bundled pcre2lib to version 10.44. (Ayesh) - PDO: . Fixed setAttribute and getAttribute. (SakiTakamachi) diff --git a/UPGRADING b/UPGRADING index 8d127d094a06..158c6be93afe 100644 --- a/UPGRADING +++ b/UPGRADING @@ -90,7 +90,7 @@ PHP 8.4 UPGRADE NOTES of JIT startup initialization issues. - PCRE: - . The bundled pcre2lib has been updated to version 10.43. + . The bundled pcre2lib has been updated to version 10.44. As a consequence, this means {,3} is now recognized as a quantifier instead of as text. Furthermore, the meaning of some character classes in UCP mode has changed. Consult https://github.com/PCRE2Project/pcre2/blob/master/NEWS @@ -243,10 +243,12 @@ PHP 8.4 UPGRADE NOTES . Added support for the unix timestamp extension for zip archives. - PCRE: - . The bundled pcre2lib has been updated to version 10.43. + . The bundled pcre2lib has been updated to version 10.44. As a consequence, LoongArch JIT support has been added, spaces are now allowed between braces in Perl-compatible items, and variable-length lookbehind assertions are now supported. + . With pcre2lib version 10.44, the maximum length of named capture groups + has changed from 32 to 128. . Added support for the "r" (PCRE2_EXTRA_CASELESS_RESTRICT) modifier, as well as the (?r) mode modifier. When enabled along with the case-insensitive modifier ("i"), the expression locks out mixing of ASCII and non-ASCII diff --git a/ext/pcre/pcre2lib/pcre2.h b/ext/pcre/pcre2lib/pcre2.h index d7a8ff5201e1..a322d9f2d56f 100644 --- a/ext/pcre/pcre2lib/pcre2.h +++ b/ext/pcre/pcre2lib/pcre2.h @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 43 +#define PCRE2_MINOR 44 #define PCRE2_PRERELEASE -#define PCRE2_DATE 2024-02-16 +#define PCRE2_DATE 2024-06-07 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate @@ -603,6 +603,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -901,6 +903,7 @@ pcre2_compile are called by application code. */ #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) #define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_) #define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) +#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_) #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) diff --git a/ext/pcre/pcre2lib/pcre2_compile.c b/ext/pcre/pcre2lib/pcre2_compile.c index 8b364977c464..8e6787aba38e 100644 --- a/ext/pcre/pcre2lib/pcre2_compile.c +++ b/ext/pcre/pcre2lib/pcre2_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -808,7 +808,8 @@ enum { ERR0 = COMPILE_ERROR_BASE, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, - ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100 }; + ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100, + ERR101 }; /* This is a table of start-of-pattern options such as (*UTF) and settings such as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward @@ -7549,7 +7550,8 @@ for (;; pptr++) if (lengthptr != NULL) { PCRE2_SIZE delta; - if (PRIV(ckd_smul)(&delta, repeat_min - 1, length_prevgroup) || + if (PRIV(ckd_smul)(&delta, repeat_min - 1, + (int)length_prevgroup) || OFLOW_MAX - *lengthptr < delta) { *errorcodeptr = ERR20; @@ -7599,7 +7601,7 @@ for (;; pptr++) { PCRE2_SIZE delta; if (PRIV(ckd_smul)(&delta, repeat_max, - length_prevgroup + 1 + 2 + 2*LINK_SIZE) || + (int)length_prevgroup + 1 + 2 + 2*LINK_SIZE) || OFLOW_MAX + (2 + 2*LINK_SIZE) - *lengthptr < delta) { *errorcodeptr = ERR20; @@ -9908,7 +9910,7 @@ do *bptr |= branchlength; /* branchlength never more than 65535 */ bptr = *pptrptr; } -while (*bptr == META_ALT); +while (META_CODE(*bptr) == META_ALT); /* If any branch is of variable length, the whole lookbehind is of variable length. If the maximum length of any branch exceeds the maximum for variable @@ -10601,14 +10603,21 @@ if (length > MAX_PATTERN_SIZE) goto HAD_CB_ERROR; } -/* Compute the size of, and then get and initialize, the data block for storing -the compiled pattern and names table. Integer overflow should no longer be -possible because nowadays we limit the maximum value of cb.names_found and -cb.name_entry_size. */ +/* Compute the size of, then, if not too large, get and initialize the data +block for storing the compiled pattern and names table. Integer overflow should +no longer be possible because nowadays we limit the maximum value of +cb.names_found and cb.name_entry_size. */ re_blocksize = sizeof(pcre2_real_code) + CU2BYTES(length + (PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size); + +if (re_blocksize > ccontext->max_pattern_compiled_length) + { + errorcode = ERR101; + goto HAD_CB_ERROR; + } + re = (pcre2_real_code *) ccontext->memctl.malloc(re_blocksize, ccontext->memctl.memory_data); if (re == NULL) diff --git a/ext/pcre/pcre2lib/pcre2_context.c b/ext/pcre/pcre2lib/pcre2_context.c index 0bc2ea0b047c..9edbd1b2ae86 100644 --- a/ext/pcre/pcre2lib/pcre2_context.c +++ b/ext/pcre/pcre2lib/pcre2_context.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -136,6 +136,7 @@ const pcre2_compile_context PRIV(default_compile_context) = { NULL, /* Stack guard data */ PRIV(default_tables), /* Character tables */ PCRE2_UNSET, /* Max pattern length */ + PCRE2_UNSET, /* Max pattern compiled length */ BSR_DEFAULT, /* Backslash R default */ NEWLINE_DEFAULT, /* Newline convention */ PARENS_NEST_LIMIT, /* As it says */ @@ -352,6 +353,13 @@ ccontext->max_pattern_length = length; return 0; } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length) +{ +ccontext->max_pattern_compiled_length = length; +return 0; +} + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline) { diff --git a/ext/pcre/pcre2lib/pcre2_error.c b/ext/pcre/pcre2lib/pcre2_error.c index 1569f6315f5c..7fa997aa9510 100644 --- a/ext/pcre/pcre2lib/pcre2_error.c +++ b/ext/pcre/pcre2lib/pcre2_error.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -189,6 +189,7 @@ static const unsigned char compile_error_texts[] = "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0" /* 100 */ "branch too long in variable-length lookbehind assertion\0" + "compiled pattern would be longer than the limit set by the application\0" ; /* Match-time and UTF error texts are in the same format. */ diff --git a/ext/pcre/pcre2lib/pcre2_extuni.c b/ext/pcre/pcre2lib/pcre2_extuni.c index b23946b0d16e..4ed9f00c55a1 100644 --- a/ext/pcre/pcre2lib/pcre2_extuni.c +++ b/ext/pcre/pcre2lib/pcre2_extuni.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -75,7 +75,11 @@ return NULL; * Match an extended grapheme sequence * *************************************************/ -/* +/* NOTE: The logic contained in this function is replicated in three special- +purpose functions in the pcre2_jit_compile.c module. If the logic below is +changed, they must be kept in step so that the interpreter and the JIT have the +same behaviour. + Arguments: c the first character eptr pointer to next character @@ -92,6 +96,7 @@ PCRE2_SPTR PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject, PCRE2_SPTR end_subject, BOOL utf, int *xcount) { +BOOL was_ep_ZWJ = FALSE; int lgb = UCD_GRAPHBREAK(c); while (eptr < end_subject) @@ -102,6 +107,12 @@ while (eptr < end_subject) rgb = UCD_GRAPHBREAK(c); if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ @@ -129,12 +140,15 @@ while (eptr < end_subject) if ((ricount & 1) != 0) break; /* Grapheme break required */ } - /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this - allows any number of them before a following Extended_Pictographic. */ + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); + + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ - if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || - lgb != ucp_gbExtended_Pictographic) - lgb = rgb; + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb; eptr += len; if (xcount != NULL) *xcount += 1; diff --git a/ext/pcre/pcre2lib/pcre2_intmodedep.h b/ext/pcre/pcre2lib/pcre2_intmodedep.h index 5fcddce5fe1f..9bd9e694a496 100644 --- a/ext/pcre/pcre2lib/pcre2_intmodedep.h +++ b/ext/pcre/pcre2lib/pcre2_intmodedep.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -568,6 +568,7 @@ typedef struct pcre2_real_compile_context { void *stack_guard_data; const uint8_t *tables; PCRE2_SIZE max_pattern_length; + PCRE2_SIZE max_pattern_compiled_length; uint16_t bsr_convention; uint16_t newline_convention; uint32_t parens_nest_limit; diff --git a/ext/pcre/pcre2lib/pcre2_jit_compile.c b/ext/pcre/pcre2lib/pcre2_jit_compile.c index 050063ec6d1f..92f4fb858b10 100644 --- a/ext/pcre/pcre2lib/pcre2_jit_compile.c +++ b/ext/pcre/pcre2lib/pcre2_jit_compile.c @@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel This module by Zoltan Herczeg Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -288,7 +288,7 @@ typedef struct bracket_backtrack { /* For OP_ONCE. Less than 0 if not needed. */ int framesize; /* For brackets with >3 alternatives. */ - struct sljit_put_label *matching_put_label; + struct sljit_jump *matching_mov_addr; } u; /* Points to our private memory word on the stack. */ int private_data_ptr; @@ -5891,7 +5891,7 @@ while (TRUE) chr++; } while (byte != 0); - chr = (chr + 7) & ~7; + chr = (chr + 7) & (sljit_u32)(~7); } } while (chars->count != 255 && bytes < bytes_end); @@ -5951,7 +5951,10 @@ while (TRUE) chr = *cc; #ifdef SUPPORT_UNICODE if (common->ucp && chr > 127) - othercase[0] = UCD_OTHERCASE(chr); + { + chr = UCD_OTHERCASE(chr); + othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc; + } else #endif othercase[0] = TABLE_GET(chr, common->fcc, chr); @@ -6183,25 +6186,34 @@ if (max < 1) /* Convert last_count to priority. */ for (i = 0; i < max; i++) { - SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count); + SLJIT_ASSERT(chars[i].last_count <= chars[i].count); - if (chars[i].count == 1) + switch (chars[i].count) { + case 0: + chars[i].count = 255; + chars[i].last_count = 0; + break; + + case 1: chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5; /* Simplifies algorithms later. */ chars[i].chars[1] = chars[i].chars[0]; - } - else if (chars[i].count == 2) - { + break; + + case 2: SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]); if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1])) chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4; else chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2; - } - else + break; + + default: chars[i].last_count = (chars[i].count == 255) ? 0 : 1; + break; + } } #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD @@ -6941,7 +6953,7 @@ int i, byte, length = 0; bit = bits[0] & 0x1; /* All bits will be zero or one (since bit is zero or one). */ -all = -bit; +all = (sljit_u8)-bit; for (i = 0; i < 256; ) { @@ -6958,7 +6970,7 @@ for (i = 0; i < 256; ) ranges[length] = i; length++; bit = cbit; - all = -cbit; + all = (sljit_u8)-cbit; /* sign extend bit into byte */ } i++; } @@ -7102,7 +7114,7 @@ for (i = 0; i < 32; i++) byte = bits[i]; if (nclass) - byte = ~byte; + byte = (sljit_u8)~byte; j = 0; while (byte != 0) @@ -8003,7 +8015,7 @@ if (unicode_status & XCLASS_NEEDS_UCD) if (cc[-1] == XCL_NOTPROP) invertcmp ^= 0x1; - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f))); add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); } cc += 2; @@ -8114,7 +8126,7 @@ if (unicode_status & XCLASS_NEEDS_UCD) invertcmp ^= 0x1; } - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f))); add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); if (jump != NULL) @@ -8685,6 +8697,10 @@ return cc; #if PCRE2_CODE_UNIT_WIDTH != 32 +/* The code in this function copies the logic of the interpreter function that +is defined in the pcre2_extuni.c source. If that code is updated, this +function, and those below it, must be kept in step (note by PH, June 2024). */ + static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc) { PCRE2_SPTR start_subject = args->begin; @@ -8692,6 +8708,7 @@ PCRE2_SPTR end_subject = args->end; int lgb, rgb, ricount; PCRE2_SPTR prevcc, endcc, bptr; BOOL first = TRUE; +BOOL was_ep_ZWJ = FALSE; uint32_t c; prevcc = cc; @@ -8712,6 +8729,12 @@ do if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ @@ -8736,11 +8759,15 @@ do if ((ricount & 1) != 0) break; /* Grapheme break required */ } - /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this - allows any number of them before a following Extended_Pictographic. */ + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); - if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || - lgb != ucp_gbExtended_Pictographic) + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb; prevcc = endcc; @@ -8753,6 +8780,10 @@ return endcc; #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ +/* The code in this function copies the logic of the interpreter function that +is defined in the pcre2_extuni.c source. If that code is updated, this +function, and the one below it, must be kept in step (note by PH, June 2024). */ + static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc) { PCRE2_SPTR start_subject = args->begin; @@ -8760,6 +8791,7 @@ PCRE2_SPTR end_subject = args->end; int lgb, rgb, ricount; PCRE2_SPTR prevcc, endcc, bptr; BOOL first = TRUE; +BOOL was_ep_ZWJ = FALSE; uint32_t c; prevcc = cc; @@ -8780,6 +8812,12 @@ do if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ @@ -8803,11 +8841,15 @@ do break; /* Grapheme break required */ } - /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this - allows any number of them before a following Extended_Pictographic. */ + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); - if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || - lgb != ucp_gbExtended_Pictographic) + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb; prevcc = endcc; @@ -8818,6 +8860,10 @@ while (cc < end_subject); return endcc; } +/* The code in this function copies the logic of the interpreter function that +is defined in the pcre2_extuni.c source. If that code is updated, this +function must be kept in step (note by PH, June 2024). */ + static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc) { PCRE2_SPTR start_subject = args->begin; @@ -8825,6 +8871,7 @@ PCRE2_SPTR end_subject = args->end; int lgb, rgb, ricount; PCRE2_SPTR bptr; uint32_t c; +BOOL was_ep_ZWJ = FALSE; /* Patch by PH */ /* GETCHARINC(c, cc); */ @@ -8848,6 +8895,12 @@ while (cc < end_subject) if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ @@ -8875,11 +8928,15 @@ while (cc < end_subject) break; /* Grapheme break required */ } - /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this - allows any number of them before a following Extended_Pictographic. */ + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); - if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || - lgb != ucp_gbExtended_Pictographic) + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb; cc++; @@ -9836,7 +9893,7 @@ BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL(); return cc + 1 + LINK_SIZE; } -static sljit_s32 SLJIT_FUNC SLJIT_FUNC_ATTRIBUTE do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) +static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) { PCRE2_SPTR begin; PCRE2_SIZE *ovector; @@ -11227,7 +11284,7 @@ if (has_alternatives) if (i <= 3) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); else - BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + BACKTRACK_AS(bracket_backtrack)->u.matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); } if (ket != OP_KETRMAX) BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); @@ -11314,17 +11371,22 @@ if (bra == OP_BRAMINZERO) /* Continue to the normal backtrack. */ } -if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) +if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT)) count_match(common); cc += 1 + LINK_SIZE; if (opcode == OP_ONCE) { + int data; + int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize; + + SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2); /* We temporarily encode the needs_control_head in the lowest bit. - Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns - the same value for small signed numbers (including negative numbers). */ - BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0); + The real value should be short enough for this operation to work + without triggering Undefined Behaviour. */ + data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0)); + BACKTRACK_AS(bracket_backtrack)->u.framesize = data; } return cc + repeat_length; } @@ -13005,7 +13067,7 @@ struct sljit_jump *once = NULL; struct sljit_jump *cond = NULL; struct sljit_label *rmin_label = NULL; struct sljit_label *exact_label = NULL; -struct sljit_put_label *put_label = NULL; +struct sljit_jump *mov_addr = NULL; if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) { @@ -13166,8 +13228,8 @@ else if (has_alternatives) { sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); - SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label); - sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL()); + SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr); + sljit_set_label(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr, LABEL()); sljit_emit_op0(compiler, SLJIT_ENDBR); } else @@ -13320,7 +13382,7 @@ if (has_alternatives) if (alt_max <= 3) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); else - put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); } if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) @@ -13346,7 +13408,7 @@ if (has_alternatives) } else { - sljit_set_put_label(put_label, LABEL()); + sljit_set_label(mov_addr, LABEL()); sljit_emit_op0(compiler, SLJIT_ENDBR); } } @@ -13878,7 +13940,7 @@ jump_list *match = NULL; struct sljit_jump *next_alt = NULL; struct sljit_jump *accept_exit = NULL; struct sljit_label *quit; -struct sljit_put_label *put_label = NULL; +struct sljit_jump *mov_addr = NULL; /* Recurse captures then. */ common->then_trap = NULL; @@ -13941,7 +14003,7 @@ while (1) if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) { if (alt_max > 3) - put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1)); + mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1)); else OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count); } @@ -13974,7 +14036,7 @@ while (1) if (alt_max > 3) { sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); - sljit_set_put_label(put_label, LABEL()); + sljit_set_label(mov_addr, LABEL()); sljit_emit_op0(compiler, SLJIT_ENDBR); } else @@ -13985,7 +14047,7 @@ while (1) } else if (alt_max > 3) { - sljit_set_put_label(put_label, LABEL()); + sljit_set_label(mov_addr, LABEL()); sljit_emit_op0(compiler, SLJIT_ENDBR); } else @@ -14303,7 +14365,7 @@ if (common->has_then) set_then_offsets(common, common->start, NULL); } -compiler = sljit_create_compiler(allocator_data, NULL); +compiler = sljit_create_compiler(allocator_data); if (!compiler) { SLJIT_FREE(common->optimized_cbracket, allocator_data); @@ -14718,7 +14780,7 @@ if (common->getucdtype != NULL) SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); -executable_func = sljit_generate_code(compiler); +executable_func = sljit_generate_code(compiler, 0, NULL); executable_size = sljit_get_generated_code_size(compiler); sljit_free_compiler(compiler); diff --git a/ext/pcre/pcre2lib/pcre2_jit_misc.c b/ext/pcre/pcre2lib/pcre2_jit_misc.c index bb6a5589cb71..c3abc0b33bd4 100644 --- a/ext/pcre/pcre2lib/pcre2_jit_misc.c +++ b/ext/pcre/pcre2lib/pcre2_jit_misc.c @@ -141,8 +141,8 @@ if (startsize == 0 || maxsize == 0 || maxsize > SIZE_MAX - STACK_GROWTH_RATE) return NULL; if (startsize > maxsize) startsize = maxsize; -startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); -maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); +startsize = (startsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1)); +maxsize = (maxsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1)); jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext); if (jit_stack == NULL) return NULL; diff --git a/ext/pcre/pcre2lib/pcre2_jit_simd_inc.h b/ext/pcre/pcre2lib/pcre2_jit_simd_inc.h index 783a85f50e2a..502977fc3204 100644 --- a/ext/pcre/pcre2lib/pcre2_jit_simd_inc.h +++ b/ext/pcre/pcre2lib/pcre2_jit_simd_inc.h @@ -2176,7 +2176,7 @@ struct sljit_label *restart; struct sljit_jump *jump[2]; SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); -SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); +SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset())); /* Initialize. */ if (common->match_end_ptr != 0) diff --git a/ext/pcre/pcre2lib/pcre2_match.c b/ext/pcre/pcre2lib/pcre2_match.c index b4a970313d72..6c422c2e5ef9 100644 --- a/ext/pcre/pcre2lib/pcre2_match.c +++ b/ext/pcre/pcre2lib/pcre2_match.c @@ -5862,7 +5862,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, { ptrdiff_t diff = Feptr - mb->start_subject; - uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? diff : 0); + uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? (int)diff : 0); if (Lmin > available) RRETURN(MATCH_NOMATCH); if (Lmax > available) Lmax = available; Feptr -= Lmax; diff --git a/ext/pcre/pcre2lib/pcre2_tables.c b/ext/pcre/pcre2lib/pcre2_tables.c index e00252f1ebc0..097a1acca871 100644 --- a/ext/pcre/pcre2lib/pcre2_tables.c +++ b/ext/pcre/pcre2lib/pcre2_tables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -171,9 +171,9 @@ are implementing). 6. Do not break after Prepend characters. 7. Do not break within emoji modifier sequences or emoji zwj sequences. That - is, do not break between characters with the Extended_Pictographic property. - Extend and ZWJ characters are allowed between the characters; this cannot be - represented in this table, the code has to deal with it. + is, do not break between characters with the Extended_Pictographic property + if a ZWJ intervenes. Extend characters are allowed between the characters; + this cannot be represented in this table, the code has to deal with it. 8. Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there are an odd number of RI characters @@ -203,8 +203,8 @@ const uint32_t PRIV(ucp_gbtable)[] = { ESZ|(1u< #include -#define SLJIT_MAP_JIT (get_map_jit_flag()) #define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#ifdef MAP_JIT +#define SLJIT_MAP_JIT (get_map_jit_flag()) static SLJIT_INLINE int get_map_jit_flag(void) { size_t page_size; @@ -70,6 +71,9 @@ static SLJIT_INLINE int get_map_jit_flag(void) } return map_jit_flag; } +#else /* !defined(MAP_JIT) */ +#define SLJIT_MAP_JIT (0) +#endif #elif defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM diff --git a/ext/pcre/pcre2lib/sljit/allocator_src/sljitExecAllocatorCore.c b/ext/pcre/pcre2lib/sljit/allocator_src/sljitExecAllocatorCore.c index 6cd391104c22..4e1119bc40c9 100644 --- a/ext/pcre/pcre2lib/sljit/allocator_src/sljitExecAllocatorCore.c +++ b/ext/pcre/pcre2lib/sljit/allocator_src/sljitExecAllocatorCore.c @@ -181,8 +181,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) header->executable_offset = free_block->header.executable_offset; #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ AS_BLOCK_HEADER(header, size)->prev_size = size; - } - else { + } else { sljit_remove_free_block(free_block); header = (struct block_header*)free_block; size = chunk_size; @@ -230,26 +229,25 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ sljit_insert_free_block(free_block, chunk_size); next_header = AS_BLOCK_HEADER(free_block, chunk_size); - } - else { + } else { /* All space belongs to this allocation. */ allocated_size += chunk_size; header->size = chunk_size; next_header = AS_BLOCK_HEADER(header, chunk_size); } - SLJIT_ALLOCATOR_UNLOCK(); next_header->size = 1; next_header->prev_size = chunk_size; #ifdef SLJIT_HAS_EXECUTABLE_OFFSET next_header->executable_offset = executable_offset; #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ + SLJIT_ALLOCATOR_UNLOCK(); return MEM_START(header); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void *ptr) { struct block_header *header; - struct free_block* free_block; + struct free_block *free_block; SLJIT_ALLOCATOR_LOCK(); header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header)); @@ -269,8 +267,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) free_block->size += header->size; header = AS_BLOCK_HEADER(free_block, free_block->size); header->prev_size = free_block->size; - } - else { + } else { free_block = (struct free_block*)header; sljit_insert_free_block(free_block, header->size); } @@ -308,7 +305,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) free_block = free_blocks; while (free_block) { next_free_block = free_block->next; - if (!free_block->header.prev_size && + if (!free_block->header.prev_size && AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) { total_size -= free_block->size; sljit_remove_free_block(free_block); @@ -317,14 +314,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) free_block = next_free_block; } - SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); + SLJIT_ASSERT(total_size || (!total_size && !free_blocks)); SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); SLJIT_ALLOCATOR_UNLOCK(); } #ifdef SLJIT_HAS_EXECUTABLE_OFFSET -SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr) +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code) { - return ((struct block_header *)(ptr))[-1].executable_offset; + return ((struct block_header*)SLJIT_CODE_TO_PTR(code))[-1].executable_offset; } #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ diff --git a/ext/pcre/pcre2lib/sljit/sljitConfigInternal.h b/ext/pcre/pcre2lib/sljit/sljitConfigInternal.h index ce4e7b04ec42..de06dd8e0c02 100644 --- a/ext/pcre/pcre2lib/sljit/sljitConfigInternal.h +++ b/ext/pcre/pcre2lib/sljit/sljitConfigInternal.h @@ -49,8 +49,8 @@ extern "C" { sljit_s16, sljit_u16 : signed and unsigned 16 bit integer type sljit_s32, sljit_u32 : signed and unsigned 32 bit integer type sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer - sljit_p : unsgined pointer value (usually the same as sljit_uw, but - some 64 bit ABIs may use 32 bit pointers) + sljit_sp, sljit_up : signed and unsigned pointer value (usually the same as + sljit_uw, but some 64 bit ABIs may use 32 bit pointers) sljit_f32 : 32 bit single precision floating point value sljit_f64 : 64 bit double precision floating point value @@ -98,6 +98,10 @@ extern "C" { SLJIT_TMP_R(i) : accessing temporary registers SLJIT_TMP_FR0 .. FR9 : accessing temporary floating point registers SLJIT_TMP_FR(i) : accessing temporary floating point registers + SLJIT_TMP_DEST_REG : a temporary register for results + SLJIT_TMP_MEM_REG : a temporary base register for accessing memory + (can be the same as SLJIT_TMP_DEST_REG) + SLJIT_TMP_DEST_FREG : a temporary register for float results SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper) SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit @@ -132,23 +136,23 @@ extern "C" { */ #ifndef SLJIT_MALLOC -#define SLJIT_MALLOC(size, allocator_data) malloc(size) +#define SLJIT_MALLOC(size, allocator_data) (malloc(size)) #endif #ifndef SLJIT_FREE -#define SLJIT_FREE(ptr, allocator_data) free(ptr) +#define SLJIT_FREE(ptr, allocator_data) (free(ptr)) #endif #ifndef SLJIT_MEMCPY -#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len) +#define SLJIT_MEMCPY(dest, src, len) (memcpy(dest, src, len)) #endif #ifndef SLJIT_MEMMOVE -#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) +#define SLJIT_MEMMOVE(dest, src, len) (memmove(dest, src, len)) #endif #ifndef SLJIT_ZEROMEM -#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) +#define SLJIT_ZEROMEM(dest, len) (memset(dest, 0, len)) #endif /***************************/ @@ -198,7 +202,7 @@ extern "C" { /* Type of public API functions. */ /*********************************/ -#ifndef SLJIT_API_FUNC_ATTRIBUTE +#ifndef SLJIT_API_FUNC_ATTRIBUTE #if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) /* Static ABI functions. For all-in-one programs. */ @@ -358,7 +362,8 @@ typedef long int sljit_sw; #endif /* _WIN32 */ #endif -typedef sljit_uw sljit_p; +typedef sljit_sw sljit_sp; +typedef sljit_uw sljit_up; /* Floating point types. */ typedef float sljit_f32; @@ -399,6 +404,10 @@ typedef double sljit_f64; #define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT #define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT #define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_ZERO #else #error "Result for float to integer conversion is not defined" #endif @@ -522,19 +531,6 @@ typedef double sljit_f64; #define SLJIT_FUNC #endif /* !SLJIT_FUNC */ -/* Disable instrumentation for these functions as they may not be sound */ -#ifndef SLJIT_FUNC_ATTRIBUTE -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -#define SLJIT_FUNC_ATTRIBUTE __attribute__((no_sanitize("memory"))) -#endif /* __has_feature(memory_sanitizer) */ -#endif /* defined(__has_feature) */ -#endif - -#ifndef SLJIT_FUNC_ATTRIBUTE -#define SLJIT_FUNC_ATTRIBUTE -#endif - #ifndef SLJIT_INDIRECT_CALL #if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (!defined _CALL_ELF || _CALL_ELF == 1)) \ || ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX) @@ -570,9 +566,9 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); #endif /* SLJIT_FREE_EXEC */ #if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) -SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); -#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr) -#endif +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); +#define SLJIT_EXEC_OFFSET(code) sljit_exec_offset(code) +#endif /* SLJIT_PROT_EXECUTABLE_ALLOCATOR */ #endif /* SLJIT_EXECUTABLE_ALLOCATOR */ @@ -592,6 +588,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw)) #define SLJIT_PREF_SHIFT_REG SLJIT_R2 #define SLJIT_MASKED_SHIFT 1 @@ -612,6 +611,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10 #define SLJIT_LOCALS_OFFSET_BASE (4 * SSIZE_OF(sw)) #endif /* !_WIN64 */ +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_PREF_SHIFT_REG SLJIT_R3 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -624,6 +626,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) @@ -634,6 +639,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw)) #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -646,6 +654,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) #define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw)) #elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) @@ -670,6 +681,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #endif #define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 3 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -681,6 +695,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE 0 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -714,6 +731,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R2 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE #define SLJIT_MASKED_SHIFT 1 @@ -725,6 +745,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE 0 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -738,6 +761,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 0 +#define SLJIT_TMP_DEST_REG 0 +#define SLJIT_TMP_MEM_REG 0 +#define SLJIT_TMP_DEST_FREG 0 #define SLJIT_LOCALS_OFFSET_BASE 0 #endif diff --git a/ext/pcre/pcre2lib/sljit/sljitLir.c b/ext/pcre/pcre2lib/sljit/sljitLir.c index 6f19300081a1..2dca17cd6f07 100644 --- a/ext/pcre/pcre2lib/sljit/sljitLir.c +++ b/ext/pcre/pcre2lib/sljit/sljitLir.c @@ -152,28 +152,43 @@ #define SLJIT_SIMD_TYPE_MASK2(m) ((sljit_s32)0xc0000fff & ~(SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST | (m))) /* Jump flags. */ -#define JUMP_LABEL 0x1 -#define JUMP_ADDR 0x2 +#define JUMP_ADDR 0x1 +#define JUMP_MOV_ADDR 0x2 /* SLJIT_REWRITABLE_JUMP is 0x1000. */ #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) -# define PATCH_MB 0x4 -# define PATCH_MW 0x8 +# define PATCH_MB 0x04 +# define PATCH_MW 0x08 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) # define PATCH_MD 0x10 -#endif +# define MOV_ADDR_HI 0x20 +# define JUMP_MAX_SIZE ((sljit_uw)(10 + 3)) +# define CJUMP_MAX_SIZE ((sljit_uw)(2 + 10 + 3)) +#else /* !SLJIT_CONFIG_X86_64 */ +# define JUMP_MAX_SIZE ((sljit_uw)5) +# define CJUMP_MAX_SIZE ((sljit_uw)6) +#endif /* SLJIT_CONFIG_X86_64 */ # define TYPE_SHIFT 13 +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +/* Bits 7..12 is for debug jump size, SLJIT_REWRITABLE_JUMP is 0x1000 */ +# define JUMP_SIZE_SHIFT 7 +#endif /* SLJIT_DEBUG */ #endif /* SLJIT_CONFIG_X86 */ #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) -# define IS_BL 0x4 -# define PATCH_B 0x8 -#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V6 */ +# define IS_BL 0x04 +# define PATCH_B 0x08 +#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V7 */ #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) # define CPOOL_SIZE 512 #endif /* SLJIT_CONFIG_ARM_V6 */ +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)3) +#endif /* SLJIT_CONFIG_ARM_V7 */ + #if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) # define IS_COND 0x04 # define IS_BL 0x08 @@ -181,25 +196,30 @@ # define PATCH_TYPE1 0x10 /* conditional + imm20 */ # define PATCH_TYPE2 0x20 - /* IT + imm24 */ -# define PATCH_TYPE3 0x30 /* imm11 */ -# define PATCH_TYPE4 0x40 +# define PATCH_TYPE3 0x30 /* imm24 */ -# define PATCH_TYPE5 0x50 +# define PATCH_TYPE4 0x40 /* BL + imm24 */ -# define PATCH_BL 0x60 +# define PATCH_TYPE5 0x50 + /* addwi/subwi */ +# define PATCH_TYPE6 0x60 /* 0xf00 cc code for branches */ +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)5) #endif /* SLJIT_CONFIG_ARM_THUMB2 */ #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) # define IS_COND 0x004 # define IS_CBZ 0x008 # define IS_BL 0x010 -# define PATCH_B 0x020 -# define PATCH_COND 0x040 -# define PATCH_ABS48 0x080 -# define PATCH_ABS64 0x100 +# define PATCH_COND 0x020 +# define PATCH_B 0x040 +# define PATCH_B32 0x080 +# define PATCH_ABS48 0x100 +# define PATCH_ABS64 0x200 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)5) #endif /* SLJIT_CONFIG_ARM_64 */ #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) @@ -210,8 +230,12 @@ #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) # define PATCH_ABS32 0x040 # define PATCH_ABS48 0x080 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)7) +#else /* !SLJIT_CONFIG_PPC_64 */ +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)4) #endif /* SLJIT_CONFIG_PPC_64 */ -# define REMOVE_COND 0x100 #endif /* SLJIT_CONFIG_PPC */ #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) @@ -253,8 +277,11 @@ # define PATCH_ABS32 0x080 # define PATCH_ABS44 0x100 # define PATCH_ABS52 0x200 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)6) #else /* !SLJIT_CONFIG_RISCV_64 */ -# define PATCH_REL32 0x0 +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)2) #endif /* SLJIT_CONFIG_RISCV_64 */ #endif /* SLJIT_CONFIG_RISCV */ @@ -268,6 +295,8 @@ # define PATCH_REL32 0x040 # define PATCH_ABS32 0x080 # define PATCH_ABS52 0x100 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)4) #endif /* SLJIT_CONFIG_LOONGARCH */ /* Stack management. */ @@ -289,6 +318,14 @@ /* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */ #include "sljitUtils.c" +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_CODE_TO_PTR(code) ((void*)((sljit_up)(code) & ~(sljit_up)0x1)) +#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#define SLJIT_CODE_TO_PTR(code) ((void*)(*(sljit_up*)code)) +#else /* !SLJIT_CONFIG_ARM_THUMB2 && !SLJIT_INDIRECT_CALL */ +#define SLJIT_CODE_TO_PTR(code) ((void*)(code)) +#endif /* SLJIT_CONFIG_ARM_THUMB2 || SLJIT_INDIRECT_CALL */ + #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) @@ -417,7 +454,7 @@ static sljit_s32 compiler_initialized = 0; static void init_compiler(void); #endif -SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data) { struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler), allocator_data); if (!compiler) @@ -428,10 +465,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo sizeof(sljit_s8) == 1 && sizeof(sljit_u8) == 1 && sizeof(sljit_s16) == 2 && sizeof(sljit_u16) == 2 && sizeof(sljit_s32) == 4 && sizeof(sljit_u32) == 4 - && (sizeof(sljit_p) == 4 || sizeof(sljit_p) == 8) - && sizeof(sljit_p) <= sizeof(sljit_sw) + && (sizeof(sljit_up) == 4 || sizeof(sljit_up) == 8) + && sizeof(sljit_up) <= sizeof(sljit_sw) + && sizeof(sljit_up) == sizeof(sljit_sp) && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) - && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8), + && (sizeof(sljit_uw) == sizeof(sljit_sw)), invalid_integer_types); SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_32, rewritable_jump_and_single_op_must_not_be_the_same); @@ -442,7 +480,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo compiler->error = SLJIT_SUCCESS; compiler->allocator_data = allocator_data; - compiler->exec_allocator_data = exec_allocator_data; compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data); compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, allocator_data); @@ -537,37 +574,17 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compi compiler->error = SLJIT_ERR_ALLOC_FAILED; } -#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) { SLJIT_UNUSED_ARG(exec_allocator_data); - /* Remove thumb mode flag. */ - SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~(sljit_uw)0x1), exec_allocator_data); + SLJIT_FREE_EXEC(SLJIT_CODE_TO_PTR(code), exec_allocator_data); } -#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) -{ - SLJIT_UNUSED_ARG(exec_allocator_data); - - /* Resolve indirection. */ - code = (void*)(*(sljit_uw*)code); - SLJIT_FREE_EXEC(code, exec_allocator_data); -} -#else -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) -{ - SLJIT_UNUSED_ARG(exec_allocator_data); - - SLJIT_FREE_EXEC(code, exec_allocator_data); -} -#endif SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) { if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { jump->flags &= (sljit_uw)~JUMP_ADDR; - jump->flags |= JUMP_LABEL; jump->u.label = label; } } @@ -575,18 +592,11 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) { if (SLJIT_LIKELY(!!jump)) { - jump->flags &= (sljit_uw)~JUMP_LABEL; jump->flags |= JUMP_ADDR; jump->u.target = target; } } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label) -{ - if (SLJIT_LIKELY(!!put_label)) - put_label->label = label; -} - #define SLJIT_CURRENT_FLAGS_ALL \ (SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE) @@ -681,31 +691,66 @@ static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) compiler->buf = prev; } -/* Only used in RISC architectures where the instruction size is constant */ -#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ - && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) - -static SLJIT_INLINE sljit_uw compute_next_addr(struct sljit_label *label, struct sljit_jump *jump, - struct sljit_const *const_, struct sljit_put_label *put_label) +static SLJIT_INLINE void* allocate_executable_memory(sljit_uw size, sljit_s32 options, + void *exec_allocator_data, sljit_sw *executable_offset) { - sljit_uw result = ~(sljit_uw)0; + void *code; + struct sljit_generate_code_buffer *buffer; + + if (SLJIT_LIKELY(!(options & SLJIT_GENERATE_CODE_BUFFER))) { + code = SLJIT_MALLOC_EXEC(size, exec_allocator_data); + *executable_offset = SLJIT_EXEC_OFFSET(code); + return code; + } + + buffer = (struct sljit_generate_code_buffer*)exec_allocator_data; + + if (size <= buffer->size) { + *executable_offset = buffer->executable_offset; + return buffer->buffer; + } + + return NULL; +} + +#define SLJIT_MAX_ADDRESS ~(sljit_uw)0 + +#define SLJIT_GET_NEXT_SIZE(ptr) (ptr != NULL) ? ((ptr)->size) : SLJIT_MAX_ADDRESS +#define SLJIT_GET_NEXT_ADDRESS(ptr) (ptr != NULL) ? ((ptr)->addr) : SLJIT_MAX_ADDRESS + +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +#define SLJIT_NEXT_DEFINE_TYPES \ + sljit_uw next_label_size; \ + sljit_uw next_jump_addr; \ + sljit_uw next_const_addr; \ + sljit_uw next_min_addr + +#define SLJIT_NEXT_INIT_TYPES() \ + next_label_size = SLJIT_GET_NEXT_SIZE(label); \ + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); \ + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); - if (label) - result = label->size; +#define SLJIT_GET_NEXT_MIN() \ + next_min_addr = sljit_get_next_min(next_label_size, next_jump_addr, next_const_addr); - if (jump && jump->addr < result) - result = jump->addr; +static SLJIT_INLINE sljit_uw sljit_get_next_min(sljit_uw next_label_size, + sljit_uw next_jump_addr, sljit_uw next_const_addr) +{ + sljit_uw result = next_jump_addr; + + SLJIT_ASSERT(result == SLJIT_MAX_ADDRESS || result != next_const_addr); - if (const_ && const_->addr < result) - result = const_->addr; + if (next_const_addr < result) + result = next_const_addr; - if (put_label && put_label->addr < result) - result = put_label->addr; + if (next_label_size < result) + result = next_label_size; return result; } -#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_S390X */ +#endif /* !SLJIT_CONFIG_X86 */ static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, @@ -746,8 +791,9 @@ static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler) { label->next = NULL; + label->u.index = compiler->label_count++; label->size = compiler->size; - if (compiler->last_label) + if (compiler->last_label != NULL) compiler->last_label->next = label; else compiler->labels = label; @@ -758,7 +804,21 @@ static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler { jump->next = NULL; jump->flags = flags; - if (compiler->last_jump) + jump->u.label = NULL; + if (compiler->last_jump != NULL) + compiler->last_jump->next = jump; + else + compiler->jumps = jump; + compiler->last_jump = jump; +} + +static SLJIT_INLINE void set_mov_addr(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_uw offset) +{ + jump->next = NULL; + jump->addr = compiler->size - offset; + jump->flags = JUMP_MOV_ADDR; + jump->u.label = NULL; + if (compiler->last_jump != NULL) compiler->last_jump->next = jump; else compiler->jumps = jump; @@ -769,26 +829,13 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp { const_->next = NULL; const_->addr = compiler->size; - if (compiler->last_const) + if (compiler->last_const != NULL) compiler->last_const->next = const_; else compiler->consts = const_; compiler->last_const = const_; } -static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct sljit_compiler *compiler, sljit_uw offset) -{ - put_label->next = NULL; - put_label->label = NULL; - put_label->addr = compiler->size - offset; - put_label->flags = 0; - if (compiler->last_put_label) - compiler->last_put_label->next = put_label; - else - compiler->put_labels = put_label; - compiler->last_put_label = put_label; -} - #define ADDRESSING_DEPENDS_ON(exp, reg) \ (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg)) @@ -1106,6 +1153,10 @@ static const char* op2_names[] = { "ashr", "mashr", "rotl", "rotr" }; +static const char* op2r_names[] = { + "muladd" +}; + static const char* op_src_dst_names[] = { "fast_return", "skip_frames_before_fast_return", "prefetch_l1", "prefetch_l2", @@ -1187,13 +1238,19 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_com jump = compiler->jumps; while (jump) { /* All jumps have target. */ - CHECK_ARGUMENT(jump->flags & (JUMP_LABEL | JUMP_ADDR)); + CHECK_ARGUMENT((jump->flags & JUMP_ADDR) || jump->u.label != NULL); jump = jump->next; } #endif CHECK_RETURN_OK; } +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#define SLJIT_ENTER_CPU_SPECIFIC_OPTIONS (SLJIT_ENTER_USE_VEX) +#else /* !SLJIT_CONFIG_X86 */ +#define SLJIT_ENTER_CPU_SPECIFIC_OPTIONS (0) +#endif /* !SLJIT_CONFIG_X86 */ + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) @@ -1202,9 +1259,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) if (options & SLJIT_ENTER_REG_ARG) { - CHECK_ARGUMENT(!(options & ~(0x3 | SLJIT_ENTER_REG_ARG))); + CHECK_ARGUMENT(!(options & ~(0x3 | SLJIT_ENTER_REG_ARG | SLJIT_ENTER_CPU_SPECIFIC_OPTIONS))); } else { - CHECK_ARGUMENT(options == 0); + CHECK_ARGUMENT((options & ~SLJIT_ENTER_CPU_SPECIFIC_OPTIONS) == 0); } CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); @@ -1238,11 +1295,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil fprintf(compiler->verbose, "],"); if (options & SLJIT_ENTER_REG_ARG) { - fprintf(compiler->verbose, " enter:reg_arg,"); - if (SLJIT_KEPT_SAVEDS_COUNT(options) > 0) - fprintf(compiler->verbose, " keep:%d,", SLJIT_KEPT_SAVEDS_COUNT(options)); + fprintf(compiler->verbose, " opt:reg_arg(%d),", SLJIT_KEPT_SAVEDS_COUNT(options)); + else + fprintf(compiler->verbose, " opt:reg_arg,"); + } + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + if (options & SLJIT_ENTER_USE_VEX) { + fprintf(compiler->verbose, " opt:use_vex,"); } +#endif /* !SLJIT_CONFIG_X86 */ fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", scratches, saveds, fscratches, fsaveds, local_size); @@ -1259,9 +1322,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) if (options & SLJIT_ENTER_REG_ARG) { - CHECK_ARGUMENT(!(options & ~(0x3 | SLJIT_ENTER_REG_ARG))); + CHECK_ARGUMENT(!(options & ~(0x3 | SLJIT_ENTER_REG_ARG | SLJIT_ENTER_CPU_SPECIFIC_OPTIONS))); } else { - CHECK_ARGUMENT(options == 0); + CHECK_ARGUMENT((options & ~SLJIT_ENTER_CPU_SPECIFIC_OPTIONS) == 0); } CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); @@ -1295,11 +1358,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi fprintf(compiler->verbose, "],"); if (options & SLJIT_ENTER_REG_ARG) { - fprintf(compiler->verbose, " enter:reg_arg,"); - if (SLJIT_KEPT_SAVEDS_COUNT(options) > 0) - fprintf(compiler->verbose, " keep:%d,", SLJIT_KEPT_SAVEDS_COUNT(options)); + fprintf(compiler->verbose, " opt:reg_arg(%d),", SLJIT_KEPT_SAVEDS_COUNT(options)); + else + fprintf(compiler->verbose, " opt:reg_arg,"); + } + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + if (options & SLJIT_ENTER_USE_VEX) { + fprintf(compiler->verbose, " opt:use_vex,"); } +#endif /* !SLJIT_CONFIG_X86 */ fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", scratches, saveds, fscratches, fsaveds, local_size); @@ -1308,6 +1377,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi CHECK_RETURN_OK; } +#undef SLJIT_ENTER_CPU_SPECIFIC_OPTIONS + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_void(struct sljit_compiler *compiler) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { @@ -1636,6 +1707,33 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler CHECK_RETURN_OK; } +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((op | SLJIT_32) == SLJIT_MULADD32); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ", op2r_names[GET_OPCODE(op) - SLJIT_OP2R_BASE], !(op & SLJIT_32) ? "" : "32"); + + sljit_verbose_reg(compiler, dst_reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -1721,11 +1819,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 t #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) if (type == SLJIT_GP_REGISTER) { CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS) - || (reg >= SLJIT_TMP_REGISTER_BASE && reg <= (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS))); + || (reg >= SLJIT_TMP_REGISTER_BASE && reg < (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS))); } else { CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6))); CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS) - || (reg >= SLJIT_TMP_FREGISTER_BASE && reg <= (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS))); + || (reg >= SLJIT_TMP_FREGISTER_BASE && reg < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS))); } #endif CHECK_RETURN_OK; @@ -2927,14 +3025,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " put_label "); + fprintf(compiler->verbose, " mov_addr "); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, "\n"); } @@ -3058,6 +3156,8 @@ static sljit_s32 sljit_emit_fmem_unaligned(struct sljit_compiler *compiler, slji # include "sljitNativeLOONGARCH_64.c" #endif +#include "sljitSerialize.c" + static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) @@ -3288,7 +3388,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler #if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ && !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ - && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, diff --git a/ext/pcre/pcre2lib/sljit/sljitLir.h b/ext/pcre/pcre2lib/sljit/sljitLir.h index 2ba6683c74b8..8b6fa69a0a29 100644 --- a/ext/pcre/pcre2lib/sljit/sljitLir.h +++ b/ext/pcre/pcre2lib/sljit/sljitLir.h @@ -427,7 +427,10 @@ struct sljit_memory_fragment { struct sljit_label { struct sljit_label *next; - sljit_uw addr; + union { + sljit_uw index; + sljit_uw addr; + } u; /* The maximum size difference. */ sljit_uw size; }; @@ -443,36 +446,35 @@ struct sljit_jump { } u; }; -struct sljit_put_label { - struct sljit_put_label *next; - struct sljit_label *label; - sljit_uw addr; - sljit_uw flags; -}; - struct sljit_const { struct sljit_const *next; sljit_uw addr; }; +struct sljit_generate_code_buffer { + void *buffer; + sljit_uw size; + sljit_sw executable_offset; +}; + struct sljit_compiler { sljit_s32 error; sljit_s32 options; struct sljit_label *labels; struct sljit_jump *jumps; - struct sljit_put_label *put_labels; struct sljit_const *consts; struct sljit_label *last_label; struct sljit_jump *last_jump; struct sljit_const *last_const; - struct sljit_put_label *last_put_label; void *allocator_data; - void *exec_allocator_data; + void *user_data; struct sljit_memory_fragment *buf; struct sljit_memory_fragment *abuf; + /* Number of labels created by the compiler. */ + sljit_uw label_count; /* Available scratch registers. */ sljit_s32 scratches; /* Available saved registers. */ @@ -492,15 +494,16 @@ struct sljit_compiler { #if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) sljit_s32 status_flags_state; -#endif +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_s32 args_size; -#endif +#endif /* SLJIT_CONFIG_X86_32 */ #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* Temporary fields. */ sljit_s32 mode32; -#endif +#endif /* SLJIT_CONFIG_X86_64 */ #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) /* Constant pool handling. */ @@ -511,7 +514,7 @@ struct sljit_compiler { /* Other members. */ /* Contains pointer, "ldr pc, [...]" pairs. */ sljit_uw patches; -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) /* Temporary fields. */ @@ -520,40 +523,45 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__) sljit_uw args_size; -#endif +#endif /* SLJIT_CONFIG_ARM_32 && __SOFTFP__ */ #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + /* Temporary fields. */ sljit_u32 imm; -#endif +#endif /* SLJIT_CONFIG_PPC */ #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) sljit_s32 delay_slot; + /* Temporary fields. */ sljit_s32 cache_arg; sljit_sw cache_argw; -#endif +#endif /* SLJIT_CONFIG_MIPS */ #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) sljit_uw args_size; -#endif +#endif /* SLJIT_CONFIG_MIPS_32 */ #if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) + /* Temporary fields. */ sljit_s32 cache_arg; sljit_sw cache_argw; -#endif +#endif /* SLJIT_CONFIG_RISCV */ #if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) /* Need to allocate register save area to make calls. */ + /* Temporary fields. */ sljit_s32 mode; -#endif +#endif /* SLJIT_CONFIG_S390X */ #if (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) + /* Temporary fields. */ sljit_s32 cache_arg; sljit_sw cache_argw; -#endif +#endif /* SLJIT_CONFIG_LOONGARCH */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) FILE* verbose; -#endif +#endif /* SLJIT_VERBOSE */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) @@ -564,7 +572,7 @@ struct sljit_compiler { sljit_s32 last_return; /* Local size passed to entry functions. */ sljit_s32 logical_local_size; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ @@ -572,7 +580,7 @@ struct sljit_compiler { /* Trust arguments when an API function is called. Used internally for calling API functions. */ sljit_s32 skip_checks; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG || SLJIT_VERBOSE */ }; /* --------------------------------------------------------------------- */ @@ -583,12 +591,10 @@ struct sljit_compiler { custom memory managers. This pointer is passed to SLJIT_MALLOC and SLJIT_FREE macros. Most allocators (including the default one) ignores this value, and it is recommended to pass NULL - as a dummy value for allocator_data. The exec_allocator_data - has the same purpose but this one is passed to SLJIT_MALLOC_EXEC / - SLJIT_MALLOC_FREE functions. + as a dummy value for allocator_data. Returns NULL if failed. */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data); +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data); /* Frees everything except the compiled machine code. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler); @@ -619,20 +625,31 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compi of the compiler to out-of-memory status). */ SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size); -/* Returns the allocator data passed to sljit_create_compiler. These pointers - may contain context data even if the normal/exec allocator ignores it. */ -static SLJIT_INLINE void* sljit_get_allocator_data(struct sljit_compiler *compiler) { return compiler->allocator_data; } -static SLJIT_INLINE void* sljit_get_exec_allocator_data(struct sljit_compiler *compiler) { return compiler->exec_allocator_data; } +/* Returns the allocator data passed to sljit_create_compiler. */ +static SLJIT_INLINE void* sljit_compiler_get_allocator_data(struct sljit_compiler *compiler) { return compiler->allocator_data; } +/* Sets/get the user data for a compiler. */ +static SLJIT_INLINE void sljit_compiler_set_user_data(struct sljit_compiler *compiler, void *user_data) { compiler->user_data = user_data; } +static SLJIT_INLINE void* sljit_compiler_get_user_data(struct sljit_compiler *compiler) { return compiler->user_data; } #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) /* Passing NULL disables verbose. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); #endif +/* Option bits for sljit_generate_code. */ + +/* The exec_allocator_data points to a pre-allocated + buffer which type is sljit_generate_code_buffer. */ +#define SLJIT_GENERATE_CODE_BUFFER 0x1 + /* Create executable code from the instruction stream. This is the final step - of the code generation so no more instructions can be emitted after this call. */ + of the code generation, and no more instructions can be emitted after this call. + + options is the combination of SLJIT_GENERATE_CODE_* bits + exec_allocator_data is passed to SLJIT_MALLOC_EXEC and + SLJIT_MALLOC_FREE functions */ -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler); +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data); /* Free executable code. */ @@ -708,6 +725,11 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler #define SLJIT_HAS_AVX2 101 #endif +#if (defined SLJIT_CONFIG_LOONGARCH) +/* [Not emulated] LASX support is available on LoongArch */ +#define SLJIT_HAS_LASX 201 +#endif + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); /* If type is between SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL, @@ -777,17 +799,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); global / local context pointers) across function calls. The value of n must be between 1 and 3. This option is only supported by SLJIT_ENTER_REG_ARG calling convention. */ -#define SLJIT_ENTER_KEEP(n) (n) +#define SLJIT_ENTER_KEEP(n) (n) /* The compiled function uses an SLJIT specific register argument calling convention. This is a lightweight function call type where both the caller and the called functions must be compiled by SLJIT. The type argument of the call must be SLJIT_CALL_REG_ARG and all arguments must be stored in scratch registers. */ -#define SLJIT_ENTER_REG_ARG 0x00000004 +#define SLJIT_ENTER_REG_ARG 0x00000004 /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ -#define SLJIT_MAX_LOCAL_SIZE 1048576 +#define SLJIT_MAX_LOCAL_SIZE 1048576 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +/* Use VEX prefix for all SIMD operations on x86. */ +#define SLJIT_ENTER_USE_VEX 0x00010000 +#endif /* !SLJIT_CONFIG_X86 */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, @@ -855,7 +882,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c int | 4 byte (physical_address & 0x3 == 0) word | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1 | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1 - pointer | size of sljit_p type (4 byte on 32 bit machines, 4 or 8 byte + pointer | size of sljit_up type (4 byte on 32 bit machines, 4 or 8 byte | on 64 bit machines) Note: Different architectures have different addressing limitations. @@ -913,6 +940,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c #define SLJIT_IS_IMM(arg) ((arg) == SLJIT_IMM) #define SLJIT_IS_REG_PAIR(arg) (!((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1)) +/* Macros for extracting registers from operands. */ +/* Support operands which contains a single register or + constructed using SLJIT_MEM1, SLJIT_MEM2, or SLJIT_REG_PAIR. */ +#define SLJIT_EXTRACT_REG(arg) ((arg) & 0x7f) +/* Support operands which constructed using SLJIT_MEM2, or SLJIT_REG_PAIR. */ +#define SLJIT_EXTRACT_SECOND_REG(arg) ((arg) >> 8) + /* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on 32 bit CPUs. When this option is set for an arithmetic operation, only the lower 32 bits of the input registers are used, and the CPU status @@ -1084,7 +1118,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile S16 - signed 16 bit data transfer U32 - unsigned int (32 bit) data transfer S32 - signed int (32 bit) data transfer - P - pointer (sljit_p) data transfer + P - pointer (sljit_up) data transfer */ /* Flags: - (does not modify flags) */ @@ -1251,6 +1285,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); +/* Starting index of opcodes for sljit_emit_op2r. */ +#define SLJIT_OP2R_BASE 96 + +/* Flags: - (may destroy flags) */ +#define SLJIT_MULADD (SLJIT_OP2R_BASE + 0) +#define SLJIT_MULADD32 (SLJIT_MULADD | SLJIT_32) + +/* Similar to sljit_emit_fop2, except the destination is always a register. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + /* Emit a left or right shift operation, where the bits shifted in comes from a separate source operand. All operands are interpreted as unsigned integers. @@ -1298,7 +1345,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * /* Starting index of opcodes for sljit_emit_op_src and sljit_emit_op_dst. */ -#define SLJIT_OP_SRC_DST_BASE 96 +#define SLJIT_OP_SRC_DST_BASE 112 /* Fast return, see SLJIT_FAST_CALL for more details. Note: src cannot be an immedate value @@ -1350,7 +1397,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp sljit_s32 dst, sljit_sw dstw); /* Starting index of opcodes for sljit_emit_fop1. */ -#define SLJIT_FOP1_BASE 128 +#define SLJIT_FOP1_BASE 144 /* Flags: - (does not modify flags) */ #define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0) @@ -1395,7 +1442,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil sljit_s32 src, sljit_sw srcw); /* Starting index of opcodes for sljit_emit_fop2. */ -#define SLJIT_FOP2_BASE 160 +#define SLJIT_FOP2_BASE 176 /* Flags: - (may destroy flags) */ #define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0) @@ -1416,7 +1463,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil sljit_s32 src2, sljit_sw src2w); /* Starting index of opcodes for sljit_emit_fop2r. */ -#define SLJIT_FOP2R_BASE 168 +#define SLJIT_FOP2R_BASE 192 /* Flags: - (may destroy flags) */ #define SLJIT_COPYSIGN_F64 (SLJIT_FOP2R_BASE + 0) @@ -2138,17 +2185,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value); -/* Store the value of a label (see: sljit_set_put_label) +/* Store the value of a label (see: sljit_set_label / sljit_set_target) Flags: - (does not modify flags) */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); - -/* Set the value stored by put_label to this label. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label); +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); -/* After the code generation the address for label, jump and const instructions - are computed. Since these structures are freed by sljit_free_compiler, the - addresses must be preserved by the user program elsewere. */ -static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->addr; } +/* Provides the address of label, jump and const instructions after sljit_generate_code + is called. The returned value is unspecified before the sljit_generate_code call. + Since these structures are freed by sljit_free_compiler, the addresses must be + preserved by the user program elsewere. */ +static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->u.addr; } static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } @@ -2221,6 +2266,98 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags); +/* --------------------------------------------------------------------- */ +/* Serialization functions */ +/* --------------------------------------------------------------------- */ + +/* Label/jump/const enumeration functions. The items in each group + are enumerated in creation order. Serialization / deserialization + preserves this order for each group. For example the fifth label + after deserialization refers to the same machine code location as + the fifth label before the serialization. */ +static SLJIT_INLINE struct sljit_label *sljit_get_first_label(struct sljit_compiler *compiler) { return compiler->labels; } +static SLJIT_INLINE struct sljit_jump *sljit_get_first_jump(struct sljit_compiler *compiler) { return compiler->jumps; } +static SLJIT_INLINE struct sljit_const *sljit_get_first_const(struct sljit_compiler *compiler) { return compiler->consts; } + +static SLJIT_INLINE struct sljit_label *sljit_get_next_label(struct sljit_label *label) { return label->next; } +static SLJIT_INLINE struct sljit_jump *sljit_get_next_jump(struct sljit_jump *jump) { return jump->next; } +static SLJIT_INLINE struct sljit_const *sljit_get_next_const(struct sljit_const *const_) { return const_->next; } + +/* A number starting from 0 is assigned to each label, which +represents its creation index. The first label created by the +compiler has index 0, the second has index 1, the third has +index 2, and so on. The returned value is unspecified after +sljit_generate_code() is called. */ +static SLJIT_INLINE sljit_uw sljit_get_label_index(struct sljit_label *label) { return label->u.index; } + +/* The sljit_jump_has_label() and sljit_jump_has_target() functions +returns non-zero value if a label or target is set for the jump +respectively. Both may return with a zero value. The other two +functions return the value assigned to the jump. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_label(struct sljit_jump *jump); +static SLJIT_INLINE struct sljit_label *sljit_jump_get_label(struct sljit_jump *jump) { return jump->u.label; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_target(struct sljit_jump *jump); +static SLJIT_INLINE sljit_uw sljit_jump_get_target(struct sljit_jump *jump) { return jump->u.target; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_is_mov_addr(struct sljit_jump *jump); + +/* Option bits for sljit_serialize_compiler. */ + +/* When debugging is enabled, the serialized buffer contains +debugging information unless this option is specified. */ +#define SLJIT_SERIALIZE_IGNORE_DEBUG 0x1 + +/* Serialize the internal structure of the compiler into a buffer. +If the serialization is successful, the returned value is a newly +allocated buffer which is allocated by the memory allocator assigned +to the compiler. Otherwise the returned value is NULL. Unlike +sljit_generate_code(), serialization does not modify the internal +state of the compiler, so the code generation can be continued. + + options must be the combination of SLJIT_SERIALIZE_* option bits + size is an output argument, which is set to the byte size of + the result buffer if the operation is successful + +Notes: + - This function is useful for ahead-of-time compilation (AOT). + - The returned buffer must be freed later by the caller. + The SLJIT_FREE() macro is suitable for this purpose: + SLJIT_FREE(returned_buffer, sljit_get_allocator_data(compiler)) + - Memory allocated by sljit_alloc_memory() is not serialized. + - The type of the returned buffer is sljit_uw* to emphasize that + the buffer is word aligned. However, the 'size' output argument + contains the byte size, so this value is always divisible by + sizeof(sljit_uw). +*/ +SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compiler *compiler, + sljit_s32 options, sljit_uw *size); + +/* Construct a new compiler instance from a buffer produced by +sljit_serialize_compiler(). If the operation is successful, the new +compiler instance is returned. Otherwise the returned value is NULL. + + buffer points to a word aligned memory data which was + created by sljit_serialize_compiler() + size is the byte size of the buffer + options must be 0 + allocator_data specify an allocator specific data, see + sljit_create_compiler() for further details + +Notes: + - Labels assigned to jumps are restored with their + corresponding label in the label set created by + the deserializer. Target addresses assigned to + jumps are also restored. Uninitialized jumps + remain uninitialized. + - After the deserialization, sljit_generate_code() does + not need to be the next operation on the returned + compiler, the code generation can be continued. + Even sljit_serialize_compiler() can be called again. + - When debugging is enabled, a buffers without debug + information cannot be deserialized. +*/ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler *sljit_deserialize_compiler(sljit_uw* buffer, sljit_uw size, + sljit_s32 options, void *allocator_data); + /* --------------------------------------------------------------------- */ /* Miscellaneous utility functions */ /* --------------------------------------------------------------------- */ diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeARM_32.c b/ext/pcre/pcre2lib/sljit/sljitNativeARM_32.c index d44616d800f5..a253c06f0100 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeARM_32.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeARM_32.c @@ -120,6 +120,7 @@ static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) #define LDREX 0xe1900f9f #define LDREXB 0xe1d00f9f #define LDREXH 0xe1f00f9f +#define MLA 0xe0200090 #define MOV 0xe1a00000 #define MUL 0xe0000090 #define MVN 0xe1e00000 @@ -482,11 +483,12 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (jump->flags & IS_BL) code_ptr--; +#endif /* SLJIT_CONFIG_ARM_V6 */ if (jump->flags & JUMP_ADDR) diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset); else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); } @@ -494,6 +496,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw if (diff & 0x3) return 0; +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (jump->flags & IS_BL) { if (diff <= 0x01ffffff && diff >= -0x02000000) { *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK); @@ -508,20 +511,8 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw } } #else /* !SLJIT_CONFIG_ARM_V6 */ - if (jump->flags & JUMP_ADDR) - diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset); - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr); - } - - /* Branch to Thumb code has not been optimized yet. */ - if (diff & 0x3) - return 0; - if (diff <= 0x01ffffff && diff >= -0x02000000) { - code_ptr -= 2; - *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK); + *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (*code_ptr & COND_MASK); jump->flags |= PATCH_B; return 1; } @@ -529,7 +520,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw return 0; } -static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache) +static void set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache) { #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) sljit_ins *ptr = (sljit_ins*)jump_ptr; @@ -628,7 +619,7 @@ static sljit_uw get_imm(sljit_uw imm); static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm); static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg); -static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache) +static void set_const_value(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache) { #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) sljit_ins *ptr = (sljit_ins*)addr; @@ -720,18 +711,120 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off #endif /* SLJIT_CONFIG_ARM_V6 */ } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + /* The pc+8 offset is represented by the 2 * SSIZE_OF(ins) below. */ + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if ((diff & 0x3) == 0 && diff <= (0x3fc + 2 * SSIZE_OF(ins)) && diff >= (-0x3fc + 2 * SSIZE_OF(ins))) { + jump->flags |= PATCH_B; + return 0; + } + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + return 0; +#else /* !SLJIT_CONFIG_ARM_V6 */ + return 1; +#endif /* SLJIT_CONFIG_ARM_V6 */ +} + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE - 1; + + if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2; + + if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins))) + total_size = 1 - 1; + } + + size_reduce += JUMP_MAX_SIZE - 1 - total_size; + } else { + /* Real size minus 1. Unit size: instruction. */ + total_size = 1; + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (diff <= 0xff + 2 && diff >= -0xff + 2) + total_size = 0; + } + + size_reduce += 1 - total_size; + } + + jump->flags |= total_size << JUMP_SIZE_SHIFT; + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +#endif /* SLJIT_CONFIG_ARM_V7 */ + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) { struct sljit_memory_fragment *buf; sljit_ins *code; sljit_ins *code_ptr; sljit_ins *buf_ptr; sljit_ins *buf_end; - sljit_uw size; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; sljit_uw addr; + sljit_sw diff; #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) sljit_uw cpool_size; sljit_uw cpool_skip_alignment; @@ -744,22 +837,22 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); /* Second code generation pass. */ #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) - size = compiler->size + (compiler->patches << 1); + compiler->size += (compiler->patches << 1); if (compiler->cpool_fill > 0) - size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; + compiler->size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; #else /* !SLJIT_CONFIG_ARM_V6 */ - size = compiler->size; + reduce_code_size(compiler); #endif /* SLJIT_CONFIG_ARM_V6 */ - code = (sljit_ins*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_ins), compiler->exec_allocator_data); + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) @@ -773,33 +866,24 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; - next_addr = 1; - executable_offset = SLJIT_EXEC_OFFSET(code); - label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; - - if (label && label->size == 0) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); - label = label->next; - } + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { - word_count++; #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (cpool_size > 0) { if (cpool_skip_alignment > 0) { buf_ptr++; cpool_skip_alignment--; - } - else { + } else { if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { - SLJIT_FREE_EXEC(code, compiler->exec_allocator_data); + SLJIT_FREE_EXEC(code, exec_allocator_data); compiler->error = SLJIT_ERR_ALLOC_FAILED; return NULL; } @@ -807,59 +891,59 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (++cpool_current_index >= cpool_size) { SLJIT_ASSERT(!first_patch); cpool_size = 0; - if (label && label->size == word_count) { - /* Points after the current instruction. */ - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = (sljit_uw)(code_ptr - code); - label = label->next; - - next_addr = compute_next_addr(label, jump, const_, put_label); - } } } - } - else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { + } else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { #endif /* SLJIT_CONFIG_ARM_V6 */ *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); - /* These structures are ordered by their address. */ - if (jump && jump->addr == word_count) { -#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) - if (detect_jump_type(jump, code_ptr, code, executable_offset)) - code_ptr--; - jump->addr = (sljit_uw)code_ptr; -#else /* !SLJIT_CONFIG_ARM_V6 */ - jump->addr = (sljit_uw)(code_ptr - 2); - if (detect_jump_type(jump, code_ptr, code, executable_offset)) - code_ptr -= 2; -#endif /* SLJIT_CONFIG_ARM_V6 */ - jump = jump->next; - } - if (label && label->size == word_count) { - /* code_ptr can be affected above. */ - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); - label->size = (sljit_uw)((code_ptr + 1) - code); + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (const_ && const_->addr == word_count) { + + /* These structures are ordered by their address. */ + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) - const_->addr = (sljit_uw)code_ptr; + if (detect_jump_type(jump, code_ptr, code, executable_offset)) + code_ptr--; + jump->addr = (sljit_uw)code_ptr; #else /* !SLJIT_CONFIG_ARM_V6 */ - const_->addr = (sljit_uw)(code_ptr - 1); + word_count += jump->flags >> JUMP_SIZE_SHIFT; + jump->addr = (sljit_uw)code_ptr; + if (!detect_jump_type(jump, code_ptr, code, executable_offset)) { + code_ptr[2] = code_ptr[0]; + addr = ((code_ptr[0] & 0xf) << 12); + code_ptr[0] = MOVW | addr; + code_ptr[1] = MOVT | addr; + code_ptr += 2; + } + SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)); #endif /* SLJIT_CONFIG_ARM_V6 */ + } else { +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + word_count += jump->flags >> JUMP_SIZE_SHIFT; +#endif /* SLJIT_CONFIG_ARM_V7 */ + addr = (sljit_uw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = addr; + } + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) @@ -879,14 +963,20 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil last_pc_patch = code_ptr; } #endif /* SLJIT_CONFIG_ARM_V6 */ + word_count++; } while (buf_ptr < buf_end); buf = buf->next; } while (buf); + if (label && label->size == word_count) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) SLJIT_ASSERT(cpool_size == 0); @@ -901,7 +991,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_current_index = 0; while (buf_ptr < buf_end) { if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { - SLJIT_FREE_EXEC(code, compiler->exec_allocator_data); + SLJIT_FREE_EXEC(code, exec_allocator_data); compiler->error = SLJIT_ERR_ALLOC_FAILED; return NULL; } @@ -914,43 +1004,64 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; buf_ptr = (sljit_ins*)jump->addr; - if (jump->flags & PATCH_B) { - addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); - if (!(jump->flags & JUMP_ADDR)) { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - addr) <= 0x01ffffff && (sljit_sw)(jump->u.label->addr - addr) >= -0x02000000); - *buf_ptr |= ((jump->u.label->addr - addr) >> 2) & 0x00ffffff; - } - else { - SLJIT_ASSERT((sljit_sw)(jump->u.target - addr) <= 0x01ffffff && (sljit_sw)(jump->u.target - addr) >= -0x02000000); - *buf_ptr |= ((jump->u.target - addr) >> 2) & 0x00ffffff; - } - } - else if (jump->flags & SLJIT_REWRITABLE_JUMP) { + if (jump->flags & JUMP_MOV_ADDR) { #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) - jump->addr = (sljit_uw)code_ptr; - code_ptr[0] = (sljit_ins)buf_ptr; - code_ptr[1] = *buf_ptr; - inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); - code_ptr += 2; + SLJIT_ASSERT((buf_ptr[0] & (sljit_ins)0xffff0000) == 0xe59f0000); #else /* !SLJIT_CONFIG_ARM_V6 */ - inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + SLJIT_ASSERT((buf_ptr[0] & ~(sljit_ins)0xf000) == 0); #endif /* SLJIT_CONFIG_ARM_V6 */ + + if (jump->flags & PATCH_B) { + SLJIT_ASSERT((((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) & 0x3) == 0); + diff = ((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) >> 2; + + SLJIT_ASSERT(diff <= 0xff && diff >= -0xff); + + addr = ADD; + if (diff < 0) { + diff = -diff; + addr = SUB; + } + + buf_ptr[0] = addr | (buf_ptr[0] & 0xf000) | RN(TMP_PC) | (1 << 25) | (0xf << 8) | (sljit_ins)(diff & 0xff); + } else { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr; +#else /* !SLJIT_CONFIG_ARM_V6 */ + buf_ptr[1] = MOVT | buf_ptr[0] | ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff); + buf_ptr[0] = MOVW | buf_ptr[0] | ((addr << 4) & 0xf0000) | (addr & 0xfff); +#endif /* SLJIT_CONFIG_ARM_V6 */ + } + } else if (jump->flags & PATCH_B) { + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); + SLJIT_ASSERT(diff <= 0x01ffffff && diff >= -0x02000000); + *buf_ptr |= (diff >> 2) & 0x00ffffff; } else { #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (jump->flags & IS_BL) buf_ptr--; - if (*buf_ptr & (1 << 23)) - buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; - else - buf_ptr += 1; - *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) { + jump->addr = (sljit_uw)code_ptr; + code_ptr[0] = (sljit_ins)buf_ptr; + code_ptr[1] = *buf_ptr; + set_jump_addr((sljit_uw)code_ptr, executable_offset, addr, 0); + code_ptr += 2; + } else { + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + *buf_ptr = addr; + } #else /* !SLJIT_CONFIG_ARM_V6 */ - inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + set_jump_addr((sljit_uw)buf_ptr, executable_offset, addr, 0); #endif /* SLJIT_CONFIG_ARM_V6 */ } + jump = jump->next; } @@ -967,30 +1078,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil else buf_ptr += 1; /* Set the value again (can be a simple constant). */ - inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0); + set_const_value((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0); code_ptr += 2; const_ = const_->next; } #endif /* SLJIT_CONFIG_ARM_V6 */ - put_label = compiler->put_labels; - while (put_label) { - addr = put_label->label->addr; - buf_ptr = (sljit_ins*)put_label->addr; - -#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) - SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000); - buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr; -#else /* !SLJIT_CONFIG_ARM_V6 */ - SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT); - buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff); - buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff); -#endif /* SLJIT_CONFIG_ARM_V6 */ - put_label = put_label->next; - } - - SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size); + SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size); compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; @@ -1113,7 +1208,7 @@ static const sljit_ins data_transfer_insts[16] = { /* Inverted immediate. */ #define INV_IMM 0x02 /* Source and destination is register. */ -#define MOVE_REG_CONV 0x04 +#define REGISTER_OP 0x04 /* Unused return value. */ #define UNUSED_RETURN 0x08 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ @@ -1498,7 +1593,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_uw dst, sljit_uw src1, sljit_uw src2) { - sljit_s32 is_masked; + sljit_s32 reg, is_masked; sljit_uw shift_type; switch (op) { @@ -1515,7 +1610,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MOV_U8: case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if (flags & MOVE_REG_CONV) + if (flags & REGISTER_OP) return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); if (dst != src2) { @@ -1527,7 +1622,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MOV_U16: case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if (flags & MOVE_REG_CONV) + if (flags & REGISTER_OP) return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); if (dst != src2) { @@ -1543,11 +1638,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_CTZ: SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + SLJIT_ASSERT(src1 == TMP_REG1 && src2 != TMP_REG2 && !(flags & ARGS_SWAPPED)); #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) - FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0)); - FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1))); - FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2))); + FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0)); + FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RN(src2) | RM(TMP_REG2))); + FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG1))); FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32)); return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f); #else /* !SLJIT_CONFIG_ARM_V6 */ @@ -1563,9 +1658,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_REV_U16: case SLJIT_REV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED) && src2 != TMP_REG1 && dst != TMP_REG1); + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2))); - if (dst == TMP_REG2 || (src2 == TMP_REG2 && op == SLJIT_REV_U16)) + if (!(flags & REGISTER_OP)) return SLJIT_SUCCESS; return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst)); case SLJIT_ADD: @@ -1601,10 +1696,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (!(flags & SET_FLAGS)) return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1)); - FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1))); + reg = dst == TMP_REG1 ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, SMULL | RN(reg) | RD(dst) | RM8(src2) | RM(src1))); /* cmp TMP_REG1, dst asr #31. */ - return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0); + return push_inst(compiler, CMP | SET_FLAGS | RN(reg) | RM(dst) | 0xfc0); case SLJIT_AND: if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN) @@ -1654,6 +1750,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl is_masked = 0; break; + case SLJIT_MULADD: + return push_inst(compiler, MLA | RN(dst) | RD(dst) | RM8(src2) | RM(src1)); + default: SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; @@ -1973,6 +2072,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 dst_reg; sljit_s32 src1_reg = 0; sljit_s32 src2_reg = 0; + sljit_s32 src2_tmp_reg = 0; sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; sljit_s32 neg_op = 0; sljit_u32 imm2; @@ -1982,7 +2082,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (flags & SET_FLAGS) inp_flags &= ~ALLOW_DOUBLE_IMM; - if (dst == TMP_REG2) + if (dst == TMP_REG1) flags |= UNUSED_RETURN; SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); @@ -2068,17 +2168,6 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } while(0); - /* Source 1. */ - if (FAST_IS_REG(src1)) - src1_reg = src1; - else if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); - src1_reg = TMP_REG1; - } else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) { - FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); - src1_reg = TMP_REG1; - } - /* Destination. */ dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; @@ -2088,21 +2177,44 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 inp_flags &= ~SIGNED; if (FAST_IS_REG(src2)) - return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2); + return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG1); } if (FAST_IS_REG(src2) && dst_reg != TMP_REG2) - flags |= MOVE_REG_CONV; + flags |= REGISTER_OP; + + src2_tmp_reg = dst_reg; + } else { + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { + if (!(dst & SLJIT_MEM) && (!(src2 & SLJIT_MEM) || op == SLJIT_REV_S16)) + flags |= REGISTER_OP; + } + + src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; + } + + if (src2_reg == 0 && (src2 & SLJIT_MEM)) { + src2_reg = src2_tmp_reg; + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG1)); + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) + src1_reg = src1; + else if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_reg = TMP_REG1; + } else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1_reg = TMP_REG1; } /* Source 2. */ if (src2_reg == 0) { - src2_reg = (op <= SLJIT_MOV_P) ? dst_reg : TMP_REG2; + src2_reg = src2_tmp_reg; if (FAST_IS_REG(src2)) src2_reg = src2; - else if (src2 & SLJIT_MEM) - FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2)); else if (!(inp_flags & ALLOW_DOUBLE_IMM)) FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w)); else { @@ -2122,8 +2234,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } if (src2_reg == 0) { - FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)src2w)); - src2_reg = TMP_REG2; + FAIL_IF(load_immediate(compiler, src2_tmp_reg, (sljit_uw)src2w)); + src2_reg = src2_tmp_reg; } else { FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg)); src1_reg = dst_reg; @@ -2368,7 +2480,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + return emit_op(compiler, op, 0, dst_reg, 0, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, @@ -2533,8 +2663,8 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, arg &= ~SLJIT_MEM; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7))); - arg = TMP_REG2; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7))); + arg = TMP_REG1; argw = 0; } @@ -2547,25 +2677,25 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); if (imm) { - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | imm)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2)); } imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); if (imm) { argw = -argw; - FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2)); + FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg & REG_MASK) | imm)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2)); } } if (arg) { - FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2))); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(TMP_REG1))); } else - FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0)); } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, @@ -2675,7 +2805,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0))); else dst_r = src; @@ -2745,7 +2875,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0)); } - if (dst_r == TMP_FREG1) + if (dst_r != dst) FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw)); return SLJIT_SUCCESS; @@ -2974,27 +3104,25 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (type >= SLJIT_FAST_CALL) PTR_FAIL_IF(prepare_blx(compiler)); + + jump->addr = compiler->size; PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0)); - if (jump->flags & SLJIT_REWRITABLE_JUMP) { - jump->addr = compiler->size; + if (jump->flags & SLJIT_REWRITABLE_JUMP) compiler->patches++; - } if (type >= SLJIT_FAST_CALL) { jump->flags |= IS_BL; + jump->addr = compiler->size; PTR_FAIL_IF(emit_blx(compiler)); } - - if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) - jump->addr = compiler->size; #else /* !SLJIT_CONFIG_ARM_V6 */ + jump->addr = compiler->size; if (type >= SLJIT_FAST_CALL) jump->flags |= IS_BL; - PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type))); - jump->addr = compiler->size; + compiler->size += JUMP_MAX_SIZE - 1; #endif /* SLJIT_CONFIG_ARM_V6 */ return jump; } @@ -3264,14 +3392,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (type >= SLJIT_FAST_CALL) FAIL_IF(prepare_blx(compiler)); + jump->addr = compiler->size; FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); - if (type >= SLJIT_FAST_CALL) + if (type >= SLJIT_FAST_CALL) { + jump->addr = compiler->size; FAIL_IF(emit_blx(compiler)); + } #else /* !SLJIT_CONFIG_ARM_V6 */ - FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); + compiler->size += JUMP_MAX_SIZE - 1; #endif /* SLJIT_CONFIG_ARM_V6 */ - jump->addr = compiler->size; return SLJIT_SUCCESS; } @@ -3425,7 +3556,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1)); if (src2_reg != dst_reg) { src1 = src2_reg; @@ -3488,8 +3619,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); - src1 = TMP_FREG1; + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w)); + src1 = TMP_FREG2; } cc = get_cc(compiler, type & ~SLJIT_32); @@ -4444,6 +4575,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) @@ -4454,22 +4589,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value)); #endif /* SLJIT_CONFIG_ARM_V6 */ - const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); - PTR_FAIL_IF(!const_); - set_const(const_, compiler); - if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1)); return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; @@ -4478,24 +4609,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0)); compiler->patches++; #else /* !SLJIT_CONFIG_ARM_V6 */ - PTR_FAIL_IF(emit_imm(compiler, dst_r, 0)); + PTR_FAIL_IF(push_inst(compiler, RD(dst_r))); #endif /* SLJIT_CONFIG_ARM_V6 */ - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 1); + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + compiler->size += 1; +#endif /* SLJIT_CONFIG_ARM_V7 */ if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - inline_set_jump_addr(addr, executable_offset, new_target, 1); + set_jump_addr(addr, executable_offset, new_target, 1); } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - inline_set_const(addr, executable_offset, (sljit_uw)new_constant, 1); + set_const_value(addr, executable_offset, (sljit_uw)new_constant, 1); } diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeARM_64.c b/ext/pcre/pcre2lib/sljit/sljitNativeARM_64.c index b268582f42ce..5331ebdf4296 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeARM_64.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeARM_64.c @@ -71,6 +71,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define ADD 0x8b000000 #define ADDE 0x8b200000 #define ADDI 0x91000000 +#define ADR 0x10000000 +#define ADRP 0x90000000 #define AND 0x8a000000 #define ANDI 0x92000000 #define AND_v 0x0e201c00 @@ -202,77 +204,263 @@ static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, return push_inst(compiler, MOVK | RD(dst) | ((sljit_ins)(imm >> 48) << 5) | (3 << 21)); } -static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; - if (jump->flags & SLJIT_REWRITABLE_JUMP) { - jump->flags |= PATCH_ABS64; - return 0; - } + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; if (jump->flags & JUMP_ADDR) target_addr = jump->u.target; else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } - diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr - 4) - executable_offset; + diff = (sljit_sw)target_addr - (sljit_sw)code_ptr - executable_offset; if (jump->flags & IS_COND) { diff += SSIZE_OF(ins); if (diff <= 0xfffff && diff >= -0x100000) { - code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; - jump->addr -= sizeof(sljit_ins); + *(--code_ptr) ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; jump->flags |= PATCH_COND; - return 5; + jump->addr -= sizeof(sljit_ins); + return code_ptr; } diff -= SSIZE_OF(ins); } if (diff <= 0x7ffffff && diff >= -0x8000000) { + if (jump->flags & IS_COND) + code_ptr[-1] -= (4 << 5); jump->flags |= PATCH_B; - return 4; + return code_ptr; } if (target_addr < 0x100000000l) { if (jump->flags & IS_COND) - code_ptr[-5] -= (2 << 5); - code_ptr[-2] = code_ptr[0]; - return 2; + code_ptr[-1] -= (2 << 5); + code_ptr[2] = code_ptr[0]; + return code_ptr + 2; + } + + if (diff <= 0xfffff000l && diff >= -0x100000000l) { + if (jump->flags & IS_COND) + code_ptr[-1] -= (2 << 5); + jump->flags |= PATCH_B32; + code_ptr[2] = code_ptr[0]; + return code_ptr + 2; } if (target_addr < 0x1000000000000l) { if (jump->flags & IS_COND) - code_ptr[-5] -= (1 << 5); + code_ptr[-1] -= (1 << 5); jump->flags |= PATCH_ABS48; - code_ptr[-1] = code_ptr[0]; - return 1; + code_ptr[3] = code_ptr[0]; + return code_ptr + 3; } +exit: jump->flags |= PATCH_ABS64; - return 0; + code_ptr[4] = code_ptr[0]; + return code_ptr + 4; } -static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { - if (max_label < 0x100000000l) { - put_label->flags = 0; - return 2; + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)4 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if (diff <= 0xfffff && diff >= -0x100000) { + jump->flags |= PATCH_B; + return 0; } - if (max_label < 0x1000000000000l) { - put_label->flags = 1; + if (diff <= 0xfffff000l && diff >= -0x100000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_B32; return 1; } - put_label->flags = 2; - return 0; + if (addr < 0x100000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + return 1; + } + + if (addr < 0x1000000000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS48; + return 2; + } + + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS64; + return 3; +} + +static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_sw addr = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr); + sljit_ins* buf_ptr = (sljit_ins*)jump->addr; + sljit_u32 dst; + SLJIT_UNUSED_ARG(executable_offset); + + if (!(jump->flags & JUMP_MOV_ADDR)) { + if (jump->flags & PATCH_COND) { + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000); + buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5); + return; + } + + if (jump->flags & PATCH_B) { + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000); + buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff); + return; + } + + dst = (buf_ptr[0] >> 5) & 0x1f; + + if (jump->flags & PATCH_B32) { + addr -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) & ~(sljit_sw)0xfff; + SLJIT_ASSERT(addr <= 0xfffff000l && addr >= -0x100000000l); + buf_ptr[0] = ADRP | (((sljit_ins)(addr >> 12) & 0x3) << 29) | (((sljit_ins)(addr >> 14) & 0x7ffff) << 5) | dst; + buf_ptr[1] = ADDI | dst | (dst << 5) | ((sljit_ins)(addr & 0xfff) << 10); + return; + } + } else { + dst = *buf_ptr; + + if (jump->flags & PATCH_B) { + addr -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + SLJIT_ASSERT(addr <= 0xfffff && addr >= -0x100000); + buf_ptr[0] = ADR | (((sljit_ins)addr & 0x3) << 29) | (((sljit_ins)(addr >> 2) & 0x7ffff) << 5) | dst; + return; + } + + if (jump->flags & PATCH_B32) { + addr -= ((sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) & ~(sljit_sw)0xfff; + SLJIT_ASSERT(addr <= 0xffffffffl && addr >= -0x100000000l); + buf_ptr[0] = ADRP | (((sljit_ins)(addr >> 12) & 0x3) << 29) | (((sljit_ins)(addr >> 14) & 0x7ffff) << 5) | dst; + buf_ptr[1] = ADDI | dst | (dst << 5) | ((sljit_ins)(addr & 0xfff) << 10); + return; + } + } + + SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff); + SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff); + + buf_ptr[0] = MOVZ | (((sljit_ins)addr & 0xffff) << 5) | dst; + buf_ptr[1] = MOVK | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21) | dst; + if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) + buf_ptr[2] = MOVK | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21) | dst; + + if (jump->flags & PATCH_ABS64) + buf_ptr[3] = MOVK | ((sljit_ins)((sljit_uw)addr >> 48) << 5) | (3 << 21) | dst; +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { + if (jump->u.target < 0x100000000l) + total_size = 3; + else if (jump->u.target < 0x1000000000000l) + total_size = 4; + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if ((jump->flags & IS_COND) && (diff + 1) <= (0xfffff / SSIZE_OF(ins)) && (diff + 1) >= (-0x100000 / SSIZE_OF(ins))) + total_size = 0; + else if (diff <= (0x7ffffff / SSIZE_OF(ins)) && diff >= (-0x8000000 / SSIZE_OF(ins))) + total_size = 1; + else if (diff <= (0xfffff000l / SSIZE_OF(ins)) && diff >= (-0x100000000l / SSIZE_OF(ins))) + total_size = 3; + } + } + + size_reduce += JUMP_MAX_SIZE - total_size; + } else { + /* Real size minus 1. Unit size: instruction. */ + total_size = 3; + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff <= (0xfffff / SSIZE_OF(ins)) && diff >= (-0x100000 / SSIZE_OF(ins))) + total_size = 0; + else if (diff <= (0xfffff000l / SSIZE_OF(ins)) && diff >= (-0x100000000l / SSIZE_OF(ins))) + total_size = 1; + } else if (jump->u.target < 0x100000000l) + total_size = 1; + else if (jump->u.target < 0x1000000000000l) + total_size = 2; + + size_reduce += 3 - total_size; + } + + jump->flags |= total_size << JUMP_SIZE_SHIFT; + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) { struct sljit_memory_fragment *buf; sljit_ins *code; @@ -280,67 +468,73 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; sljit_sw addr; - sljit_u32 dst; struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); - code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + reduce_code_size(compiler); + + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; word_count = 0; - next_addr = 0; - executable_offset = SLJIT_EXEC_OFFSET(code); - label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == word_count) { - jump->addr = (sljit_uw)(code_ptr - 4); - code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); - jump = jump->next; - } - if (const_ && const_->addr == word_count) { + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + SLJIT_ASSERT((jump->flags & PATCH_COND) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { + word_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_sw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = (sljit_uw)addr; + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)(code_ptr - 3); - code_ptr -= put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; word_count++; @@ -350,7 +544,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -358,61 +552,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); jump = compiler->jumps; while (jump) { - do { - addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); - buf_ptr = (sljit_ins *)jump->addr; - - if (jump->flags & PATCH_B) { - addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000); - buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff); - if (jump->flags & IS_COND) - buf_ptr[-1] -= (4 << 5); - break; - } - if (jump->flags & PATCH_COND) { - addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000); - buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5); - break; - } - - SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff); - SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff); - - dst = buf_ptr[0] & 0x1f; - buf_ptr[0] = MOVZ | dst | (((sljit_ins)addr & 0xffff) << 5); - buf_ptr[1] = MOVK | dst | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21); - if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) - buf_ptr[2] = MOVK | dst | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21); - if (jump->flags & PATCH_ABS64) - buf_ptr[3] = MOVK | dst | ((sljit_ins)(addr >> 48) << 5) | (3 << 21); - } while (0); + generate_jump_or_mov_addr(jump, executable_offset); jump = jump->next; } - put_label = compiler->put_labels; - while (put_label) { - addr = (sljit_sw)put_label->label->addr; - buf_ptr = (sljit_ins*)put_label->addr; - - buf_ptr[0] |= ((sljit_ins)addr & 0xffff) << 5; - buf_ptr[1] |= ((sljit_ins)(addr >> 16) & 0xffff) << 5; - - if (put_label->flags >= 1) - buf_ptr[2] |= ((sljit_ins)(addr >> 32) & 0xffff) << 5; - - if (put_label->flags >= 2) - buf_ptr[3] |= (sljit_ins)(addr >> 48) << 5; - - put_label = put_label->next; - } - compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); @@ -693,7 +840,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s imm = (flags & ARG2_IMM) ? arg2 : arg1; switch (op) { - case SLJIT_MUL: case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: @@ -703,6 +849,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_REV_S32: case SLJIT_ADDC: case SLJIT_SUBC: + case SLJIT_MUL: + case SLJIT_MULADD: /* No form with immediate operand (except imm 0, which is represented by a ZERO register). */ break; @@ -957,6 +1105,9 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s /* fallthrough */ case SLJIT_ROTR: return push_inst(compiler, (RORV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_MULADD: + compiler->status_flags_state = 0; + return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(dst)); default: SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; @@ -1031,14 +1182,20 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s if (argw <= 0xff && argw >= -0x100) return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); - if (argw >= 0) { - if (argw <= 0xfff0ff && ((argw + 0x100) & 0xfff) <= 0x1ff) { + if (((argw + 0x100) & 0xfff) <= 0x1ff && argw <= 0xfff0ff && argw >= -0xfff100) { + if (argw >= 0) { + if (argw & 0x100) + argw += 0x1000; + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); + } else { + if (!(argw & 0x100)) + argw -= 0x1000; + + FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10))); + return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); } - } else if (argw >= -0xfff100 && ((-argw + 0xff) & 0xfff) <= 0x1ff) { - FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10))); - return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); } FAIL_IF(load_immediate(compiler, tmp_reg, argw)); @@ -1422,7 +1579,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile op = GET_OPCODE(op); if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { /* Both operands are registers. */ - if (dst_r != TMP_REG1 && FAST_IS_REG(src)) + if (FAST_IS_REG(dst) && FAST_IS_REG(src)) return emit_op_imm(compiler, op | ((op_flags & SLJIT_32) ? INT_OP : 0), dst_r, TMP_REG1, src); switch (op) { @@ -1472,7 +1629,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile else if (!(src & SLJIT_MEM)) dst_r = src; else - FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG2)); if (dst & SLJIT_MEM) return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); @@ -1534,7 +1691,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile mem_flags = INT_SIZE; } - if (dst == TMP_REG1) + if (dst == TMP_REG2) flags |= UNUSED_RETURN; if (src1 & SLJIT_MEM) { @@ -1572,7 +1729,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, @@ -1753,18 +1927,18 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2)); } arg &= REG_MASK; if (!arg) { - FAIL_IF(load_immediate(compiler, TMP_REG1, argw & ~(0xfff << shift))); + FAIL_IF(load_immediate(compiler, TMP_REG2, argw & ~(0xfff << shift))); argw = (argw >> shift) & 0xfff; - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2) | ((sljit_ins)argw << 10)); } if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { @@ -1772,18 +1946,18 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); if (argw <= 0xffffff) { - FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG2) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); argw = ((argw & 0xfff) >> shift); - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2) | ((sljit_ins)argw << 10)); } } if (argw <= 255 && argw >= -256) return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); - FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); - return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG1)); + FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG2)); } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, @@ -1910,7 +2084,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); else dst_r = src; @@ -2180,14 +2354,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type < SLJIT_JUMP) { jump->flags |= IS_COND; PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(compiler, type))); - } - else if (type >= SLJIT_FAST_CALL) + } else if (type >= SLJIT_FAST_CALL) jump->flags |= IS_BL; - PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1))); + PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG2))); + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2236,9 +2410,11 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi inv_bits |= 1 << 24; PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); - PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1))); + PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG2))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2252,8 +2428,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (src != SLJIT_IMM) { if (src & SLJIT_MEM) { ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); - src = TMP_REG1; + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2)); + src = TMP_REG2; } return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src)); } @@ -2264,9 +2440,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); jump->u.target = (sljit_uw)srcw; - FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; - return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG2)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, @@ -2314,7 +2491,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co if (GET_OPCODE(op) < SLJIT_ADD) { FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); - if (dst_r == TMP_REG1) { + if (dst & SLJIT_MEM) { mem_flags = (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE; return emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG2); } @@ -2361,11 +2538,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp if (src1 == SLJIT_IMM) { if (type & SLJIT_32) src1w = (sljit_s32)src1w; - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1 = TMP_REG1; + FAIL_IF(load_immediate(compiler, TMP_REG2, src1w)); + src1 = TMP_REG2; } else if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG2)); - src1 = TMP_REG1; + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src1, src1w, TMP_REG2)); + src1 = TMP_REG2; } cc = get_cc(compiler, type & ~SLJIT_32); @@ -2386,8 +2563,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com ADJUST_LOCAL_OFFSET(src1, src1w); if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src1, src1w)); - src1 = TMP_FREG1; + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG2, src1, src1w)); + src1 = TMP_FREG2; } cc = get_cc(compiler, type & ~SLJIT_32); @@ -2554,13 +2731,13 @@ static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, slj sljit_s32 mem = *mem_ptr; if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { - *mem_ptr = TMP_REG1; - return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)); + *mem_ptr = TMP_REG2; + return push_inst(compiler, ADD | RD(TMP_REG2) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)); } if (!(mem & REG_MASK)) { - *mem_ptr = TMP_REG1; - return load_immediate(compiler, TMP_REG1, memw); + *mem_ptr = TMP_REG2; + return load_immediate(compiler, TMP_REG2, memw); } mem &= REG_MASK; @@ -2570,11 +2747,11 @@ static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; } - *mem_ptr = TMP_REG1; + *mem_ptr = TMP_REG2; if (memw < -0xffffff || memw > 0xffffff) { - FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); - return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem)); + FAIL_IF(load_immediate(compiler, TMP_REG2, memw)); + return push_inst(compiler, ADD | RD(TMP_REG2) | RN(TMP_REG2) | RM(mem)); } ins = ADDI; @@ -2585,16 +2762,16 @@ static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, slj } if (memw > 0xfff) { - FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG1) | RN(mem) | ((sljit_ins)(memw >> 12) << 10))); + FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG2) | RN(mem) | ((sljit_ins)(memw >> 12) << 10))); memw &= 0xfff; if (memw == 0) return SLJIT_SUCCESS; - mem = TMP_REG1; + mem = TMP_REG2; } - return push_inst(compiler, ins | RD(TMP_REG1) | RN(mem) | ((sljit_ins)memw << 10)); + return push_inst(compiler, ins | RD(TMP_REG2) | RN(mem) | ((sljit_ins)memw << 10)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, @@ -2802,8 +2979,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil return push_inst(compiler, MOVI | imm | VD(freg)); } - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - src = TMP_REG1; + FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); + src = TMP_REG2; } return push_inst(compiler, DUP_g | ins | VD(freg) | RN(src)); @@ -2872,8 +3049,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (elem_size < 3) srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; - FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw)); - srcdst = TMP_REG1; + FAIL_IF(load_immediate(compiler, TMP_REG2, srcdstw)); + srcdst = TMP_REG2; } if (type & SLJIT_SIMD_STORE) { @@ -3030,7 +3207,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c FAIL_IF(push_inst(compiler, USRA | (1 << 30) | (imms << 16) | VD(TMP_FREG1) | VN(TMP_FREG1))); - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; ins = (0x1 << 16); if (reg_size == 4 && elem_size == 0) { @@ -3040,8 +3217,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c FAIL_IF(push_inst(compiler, UMOV | ins | RD(dst_r) | VN(TMP_FREG1))); - if (dst_r == TMP_REG1) - return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG1, dst, dstw, TMP_REG2); + if (dst_r == TMP_REG2) + return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG2, dst, dstw, TMP_REG1); return SLJIT_SUCCESS; } @@ -3264,26 +3441,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, 0)); + PTR_FAIL_IF(push_inst(compiler, RD(dst_r))); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 1); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 1); + + compiler->size += 3; if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeARM_T2_32.c b/ext/pcre/pcre2lib/sljit/sljitNativeARM_T2_32.c index c27c50ddb3a0..799954a85996 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeARM_T2_32.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeARM_T2_32.c @@ -157,6 +157,7 @@ static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) #define LSRSI 0x0800 #define LSR_W 0xfa20f000 #define LSR_WI 0xea4f0010 +#define MLA 0xfb000000 #define MOV 0x4600 #define MOVS 0x0000 #define MOVSI 0x2000 @@ -292,7 +293,7 @@ static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +static sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) { FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); @@ -300,137 +301,262 @@ static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); } -static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) +/* Dst must be in bits[11-8] */ +static void set_imm32_const(sljit_u16 *inst, sljit_ins dst, sljit_uw new_imm) { - sljit_ins dst = inst[1] & 0x0f00; - SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1)); inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff)); inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1)); inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16)); } -static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) +static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) +{ + sljit_ins dst = inst[1] & 0x0f00; + SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); + set_imm32_const(inst, dst, new_imm); +} + +static SLJIT_INLINE sljit_u16* detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) { sljit_sw diff; if (jump->flags & SLJIT_REWRITABLE_JUMP) - return 0; + goto exit; if (jump->flags & JUMP_ADDR) { /* Branch to ARM code is not optimized yet. */ if (!(jump->u.target & 0x1)) - return 0; - diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset) >> 1; - } - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1; + goto exit; + diff = (sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset; + } else { + SLJIT_ASSERT(jump->u.label != NULL); + diff = (sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2); } if (jump->flags & IS_COND) { SLJIT_ASSERT(!(jump->flags & IS_BL)); - if (diff <= 127 && diff >= -128) { + /* Size of the prefix IT instruction. */ + diff += SSIZE_OF(u16); + if (diff <= 0xff && diff >= -0x100) { jump->flags |= PATCH_TYPE1; - return 5; + jump->addr = (sljit_uw)(code_ptr - 1); + return code_ptr - 1; } - if (diff <= 524287 && diff >= -524288) { + if (diff <= 0xfffff && diff >= -0x100000) { jump->flags |= PATCH_TYPE2; - return 4; + jump->addr = (sljit_uw)(code_ptr - 1); + return code_ptr; } - /* +1 comes from the prefix IT instruction. */ - diff--; - if (diff <= 8388607 && diff >= -8388608) { - jump->flags |= PATCH_TYPE3; - return 3; + diff -= SSIZE_OF(u16); + } else if (jump->flags & IS_BL) { + /* Branch and link. */ + if (diff <= 0xffffff && diff >= -0x1000000) { + jump->flags |= PATCH_TYPE5; + return code_ptr + 1; } + goto exit; + } else if (diff <= 0x7ff && diff >= -0x800) { + jump->flags |= PATCH_TYPE3; + return code_ptr; } - else if (jump->flags & IS_BL) { - if (diff <= 8388607 && diff >= -8388608) { - jump->flags |= PATCH_BL; - return 3; - } + + if (diff <= 0xffffff && diff >= -0x1000000) { + jump->flags |= PATCH_TYPE4; + return code_ptr + 1; } - else { - if (diff <= 1023 && diff >= -1024) { - jump->flags |= PATCH_TYPE4; - return 4; - } - if (diff <= 8388607 && diff >= -8388608) { - jump->flags |= PATCH_TYPE5; - return 3; - } + +exit: + code_ptr[4] = code_ptr[0]; + + if (jump->flags & IS_COND) { + code_ptr[3] = code_ptr[-1]; + jump->addr = (sljit_uw)(code_ptr - 1); + } + + return code_ptr + 4; +} + +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + /* The pc+4 offset is represented by the 2 * SSIZE_OF(sljit_u16) below. */ + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + /* Note: ADR with imm8 does not set the last bit (Thumb2 flag). */ + + if (diff <= 0xffd + 2 * SSIZE_OF(u16) && diff >= -0xfff + 2 * SSIZE_OF(u16)) { + jump->flags |= PATCH_TYPE6; + return 1; } - return 0; + return 3; } -static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw executable_offset) +static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) { sljit_s32 type = (jump->flags >> 4) & 0xf; + sljit_u16 *jump_inst = (sljit_u16*)jump->addr; sljit_sw diff; - sljit_u16 *jump_inst; - sljit_s32 s, j1, j2; + sljit_ins ins; + + diff = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr); if (SLJIT_UNLIKELY(type == 0)) { - modify_imm32_const((sljit_u16*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); + ins = (jump->flags & JUMP_MOV_ADDR) ? *jump_inst : RDN3(TMP_REG1); + set_imm32_const((sljit_u16*)jump->addr, ins, (sljit_uw)diff); return; } - if (jump->flags & JUMP_ADDR) { - SLJIT_ASSERT(jump->u.target & 0x1); - diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; - } - else { - SLJIT_ASSERT(jump->u.label->addr & 0x1); - diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; + if (SLJIT_UNLIKELY(type == 6)) { + SLJIT_ASSERT(jump->flags & JUMP_MOV_ADDR); + diff -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_inst + 2, executable_offset) & ~(sljit_sw)0x3; + + SLJIT_ASSERT(diff <= 0xfff && diff >= -0xfff); + + ins = ADDWI >> 16; + if (diff <= 0) { + diff = -diff; + ins = SUBWI >> 16; + } + + jump_inst[1] = (sljit_u16)(jump_inst[0] | COPY_BITS(diff, 8, 12, 3) | (diff & 0xff)); + jump_inst[0] = (sljit_u16)(ins | 0xf | COPY_BITS(diff, 11, 10, 1)); + return; } - jump_inst = (sljit_u16*)jump->addr; + + SLJIT_ASSERT((diff & 0x1) != 0 && !(jump->flags & JUMP_MOV_ADDR)); + diff = (diff - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; switch (type) { case 1: /* Encoding T1 of 'B' instruction */ - SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); + SLJIT_ASSERT(diff <= 0x7f && diff >= -0x80 && (jump->flags & IS_COND)); jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff)); return; case 2: /* Encoding T3 of 'B' instruction */ - SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); + SLJIT_ASSERT(diff <= 0x7ffff && diff >= -0x80000 && (jump->flags & IS_COND)); jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1)); jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff)); return; case 3: - SLJIT_ASSERT(jump->flags & IS_COND); - *jump_inst++ = (sljit_u16)(IT | ((jump->flags >> 4) & 0xf0) | 0x8); - diff--; - type = 5; - break; - case 4: /* Encoding T2 of 'B' instruction */ - SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); + SLJIT_ASSERT(diff <= 0x3ff && diff >= -0x400 && !(jump->flags & IS_COND)); jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff)); return; } - SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608); + SLJIT_ASSERT(diff <= 0x7fffff && diff >= -0x800000); + + /* Really complex instruction form for branches. Negate with sign bit. */ + diff ^= ((diff >> 2) & 0x600000) ^ 0x600000; - /* Really complex instruction form for branches. */ - s = (diff >> 23) & 0x1; - j1 = (~(diff >> 22) ^ s) & 0x1; - j2 = (~(diff >> 21) ^ s) & 0x1; - jump_inst[0] = (sljit_u16)(0xf000 | ((sljit_ins)s << 10) | COPY_BITS(diff, 11, 0, 10)); - jump_inst[1] = (sljit_u16)((j1 << 13) | (j2 << 11) | (diff & 0x7ff)); + jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(diff, 11, 0, 10) | COPY_BITS(diff, 23, 10, 1)); + jump_inst[1] = (sljit_u16)((diff & 0x7ff) | COPY_BITS(diff, 22, 13, 1) | COPY_BITS(diff, 21, 11, 1)); + + SLJIT_ASSERT(type == 4 || type == 5); /* The others have a common form. */ - if (type == 5) /* Encoding T4 of 'B' instruction */ + if (type == 4) /* Encoding T4 of 'B' instruction */ jump_inst[1] |= 0x9000; - else if (type == 6) /* Encoding T1 of 'BL' instruction */ + else /* Encoding T1 of 'BL' instruction */ jump_inst[1] |= 0xd000; - else - SLJIT_UNREACHABLE(); } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2; + + if (jump->flags & IS_COND) { + diff++; + + if (diff <= (0xff / SSIZE_OF(u16)) && diff >= (-0x100 / SSIZE_OF(u16))) + total_size = 0; + else if (diff <= (0xfffff / SSIZE_OF(u16)) && diff >= (-0x100000 / SSIZE_OF(u16))) + total_size = 1; + diff--; + } else if (!(jump->flags & IS_BL) && diff <= (0x7ff / SSIZE_OF(u16)) && diff >= (-0x800 / SSIZE_OF(u16))) + total_size = 1; + + if (total_size == JUMP_MAX_SIZE && diff <= (0xffffff / SSIZE_OF(u16)) && diff >= (-0x1000000 / SSIZE_OF(u16))) + total_size = 2; + } + + size_reduce += JUMP_MAX_SIZE - total_size; + } else { + /* Real size minus 1. Unit size: instruction. */ + total_size = 3; + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff <= (0xffd / SSIZE_OF(u16)) && diff >= (-0xfff / SSIZE_OF(u16))) + total_size = 1; + } + + size_reduce += 3 - total_size; + } + + jump->flags |= total_size << JUMP_SIZE_SHIFT; + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) { struct sljit_memory_fragment *buf; sljit_u16 *code; @@ -438,64 +564,74 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_u16 *buf_ptr; sljit_u16 *buf_end; sljit_uw half_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; + sljit_sw addr; sljit_sw executable_offset; struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); - code = (sljit_u16*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_u16), compiler->exec_allocator_data); + reduce_code_size(compiler); + + code = (sljit_u16*)allocate_executable_memory(compiler->size * sizeof(sljit_u16), options, exec_allocator_data, &executable_offset); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; half_count = 0; - next_addr = 0; - executable_offset = SLJIT_EXEC_OFFSET(code); - label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_u16*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 1); do { *code_ptr = *buf_ptr++; - if (next_addr == half_count) { + if (next_min_addr == half_count) { SLJIT_ASSERT(!label || label->size >= half_count); SLJIT_ASSERT(!jump || jump->addr >= half_count); SLJIT_ASSERT(!const_ || const_->addr >= half_count); - SLJIT_ASSERT(!put_label || put_label->addr >= half_count); /* These structures are ordered by their address. */ - if (label && label->size == half_count) { - label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; + if (next_min_addr == next_label_size) { + label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == half_count) { - jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8); - code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); - jump = jump->next; - } - if (const_ && const_->addr == half_count) { + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + half_count = half_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr < + ((jump->flags >> JUMP_SIZE_SHIFT) + ((jump->flags & 0xf0) <= PATCH_TYPE2)) * sizeof(sljit_u16)); + } else { + half_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_sw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = (sljit_uw)addr; + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == half_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; half_count++; @@ -505,7 +641,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == half_count) { - label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; + label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -513,21 +649,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); jump = compiler->jumps; while (jump) { - set_jump_instruction(jump, executable_offset); + generate_jump_or_mov_addr(jump, executable_offset); jump = jump->next; } - put_label = compiler->put_labels; - while (put_label) { - modify_imm32_const((sljit_u16 *)put_label->addr, put_label->label->addr); - put_label = put_label->next; - } - compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16); @@ -657,10 +786,11 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */ #define SET_FLAGS 0x0100000 #define UNUSED_RETURN 0x0200000 +#define REGISTER_OP 0x0400000 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2) { - /* dst must be register, TMP_REG1 + /* dst must be register arg1 must be register, imm arg2 must be register, imm */ sljit_s32 reg; @@ -686,6 +816,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_REV_U32: case SLJIT_REV_S32: case SLJIT_MUL: + case SLJIT_MULADD: /* No form with immediate operand. */ break; case SLJIT_MOV: @@ -921,17 +1052,17 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2)); case SLJIT_REV_U16: case SLJIT_REV_S16: - SLJIT_ASSERT(arg1 == TMP_REG2 && dst != TMP_REG2); + SLJIT_ASSERT(arg1 == TMP_REG2); - flags &= 0xffff; if (IS_2_LO_REGS(dst, arg2)) FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2))); else FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2))); - if (dst == TMP_REG1 || (arg2 == TMP_REG1 && flags == SLJIT_REV_U16)) + if (!(flags & REGISTER_OP)) return SLJIT_SUCCESS; + flags &= 0xffff; if (reg_map[dst] <= 7) return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst)); return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst)); @@ -966,10 +1097,10 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s compiler->status_flags_state = 0; if (!(flags & SET_FLAGS)) return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); - SLJIT_ASSERT(dst != TMP_REG2); - FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2))); + reg = (dst == TMP_REG2) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(reg) | RN4(arg1) | RM4(arg2))); /* cmp TMP_REG2, dst asr #31. */ - return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); + return push_inst32(compiler, CMP_W | RN4(reg) | 0x70e0 | RM4(dst)); case SLJIT_AND: if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); @@ -985,37 +1116,44 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MSHL: - FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f)); - arg2 = TMP_REG2; + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f)); + arg2 = (sljit_uw)reg; /* fallthrough */ case SLJIT_SHL: if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MLSHR: - FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f)); - arg2 = TMP_REG2; + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f)); + arg2 = (sljit_uw)reg; /* fallthrough */ case SLJIT_LSHR: if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MASHR: - FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f)); - arg2 = TMP_REG2; + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f)); + arg2 = (sljit_uw)reg; /* fallthrough */ case SLJIT_ASHR: if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_ROTL: - FAIL_IF(push_inst32(compiler, RSB_WI | RD4(TMP_REG2) | RN4(arg2) | 0)); - arg2 = TMP_REG2; + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, RSB_WI | RD4(reg) | RN4(arg2) | 0)); + arg2 = (sljit_uw)reg; /* fallthrough */ case SLJIT_ROTR: if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, RORS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ROR_W | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MULADD: + compiler->status_flags_state = 0; + return push_inst32(compiler, MLA | RD4(dst) | RN4(arg1) | RM4(arg2) | RT4(dst)); } SLJIT_UNREACHABLE(); @@ -1779,14 +1917,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r, flags; - sljit_s32 op_flags = GET_ALL_FLAGS(op); CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; op = GET_OPCODE(op); if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { @@ -1826,35 +1963,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile if (src == SLJIT_IMM) FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw)); - else if (src & SLJIT_MEM) { + else if (src & SLJIT_MEM) FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1)); - } else { - if (dst_r != TMP_REG1) - return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); + else if (FAST_IS_REG(dst)) + return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); + else dst_r = src; - } if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; - return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1); } SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0); - flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; + flags = WORD_SIZE; - if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { + if (!(dst & SLJIT_MEM) && (!(src & SLJIT_MEM) || op == SLJIT_REV_S16)) + op |= REGISTER_OP; flags |= HALF_SIZE; + } if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } - emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, (sljit_uw)src); + emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) - return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1); return SLJIT_SUCCESS; } @@ -1863,7 +2002,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 dst_reg, flags, src2_reg; + sljit_s32 dst_reg, src2_tmp_reg, flags; CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); @@ -1871,36 +2010,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; flags = HAS_FLAGS(op) ? SET_FLAGS : 0; if (dst == TMP_REG1) flags |= UNUSED_RETURN; + if (src2 == SLJIT_IMM) + flags |= ARG2_IMM; + else if (src2 & SLJIT_MEM) { + src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; + emit_op_mem(compiler, WORD_SIZE, src2_tmp_reg, src2, src2w, TMP_REG1); + src2w = src2_tmp_reg; + } else + src2w = src2; + if (src1 == SLJIT_IMM) flags |= ARG1_IMM; else if (src1 & SLJIT_MEM) { emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1); src1w = TMP_REG1; - } - else + } else src1w = src1; - if (src2 == SLJIT_IMM) - flags |= ARG2_IMM; - else if (src2 & SLJIT_MEM) { - src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1; - emit_op_mem(compiler, WORD_SIZE, src2_reg, src2, src2w, src2_reg); - src2w = src2_reg; - } - else - src2w = src2; - emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w); if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; - return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2); + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, @@ -1914,6 +2051,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -2228,7 +2382,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src))); else dst_r = src; @@ -2519,7 +2673,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); type &= 0xff; - PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); if (type < SLJIT_JUMP) { jump->flags |= IS_COND; cc = get_cc(compiler, type); @@ -2535,6 +2688,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1))); } + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2800,8 +2955,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); jump->u.target = (sljit_uw)srcw; - FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1)); } @@ -2968,7 +3124,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1)); if (src2_reg != dst_reg) { src1 = src2_reg; @@ -3040,8 +3196,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); - src1 = TMP_FREG1; + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w)); + src1 = TMP_FREG2; } FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8)); @@ -4106,25 +4262,26 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, 0)); + PTR_FAIL_IF(push_inst16(compiler, RDN3(dst_r))); + compiler->size += 3; if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeLOONGARCH_64.c b/ext/pcre/pcre2lib/sljit/sljitNativeLOONGARCH_64.c index dbd760540fb5..2e1d742aee74 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeLOONGARCH_64.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeLOONGARCH_64.c @@ -85,10 +85,12 @@ lower parts in the instruction word, denoted by the “L” and “H” suffixes #define FRK(fk) ((sljit_ins)freg_map[fk] << 10) #define FRA(fa) ((sljit_ins)freg_map[fa] << 15) +#define IMM_V(imm) ((sljit_ins)(imm) << 10) #define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10) #define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10) #define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10) #define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10) +#define IMM_I20(imm) (((sljit_ins)(imm)&0xffffffff) >> 12 << 5) #define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f)) #define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff)) @@ -318,6 +320,36 @@ lower parts in the instruction word, denoted by the “L” and “H” suffixes #define FSTX_S OPC_3R(0x7070) #define FSTX_D OPC_3R(0x7078) +/* Vector Instructions */ + +/* Vector Arithmetic Instructions */ +#define VOR_V OPC_3R(0xe24d) +#define VXOR_V OPC_3R(0xe24e) +#define VAND_V OPC_3R(0xe24c) +#define VMSKLTZ OPC_2R(0x1ca710) + +/* Vector Memory Access Instructions */ +#define VLD OPC_2RI12(0xb0) +#define VST OPC_2RI12(0xb1) +#define XVLD OPC_2RI12(0xb2) +#define XVST OPC_2RI12(0xb3) +#define VSTELM OPC_2RI8(0xc40) + +/* Vector Float Conversion Instructions */ +#define VFCVTL_D_S OPC_2R(0x1ca77c) + +/* Vector Bit Manipulate Instructions */ +#define VSLLWIL OPC_2R(0x1cc200) + +/* Vector Move And Shuffle Instructions */ +#define VLDREPL OPC_2R(0xc0000) +#define VINSGR2VR OPC_2R(0x1cbac0) +#define VPICKVE2GR_U OPC_2R(0x1cbce0) +#define VREPLGR2VR OPC_2R(0x1ca7c0) +#define VREPLVE OPC_3R(0xe244) +#define VREPLVEI OPC_2R(0x1cbde0) +#define XVPERMI OPC_2RI8(0x1dfa) + #define I12_MAX (0x7ff) #define I12_MIN (-0x800) #define BRANCH16_MAX (0x7fff << 2) @@ -337,16 +369,32 @@ lower parts in the instruction word, denoted by the “L” and “H” suffixes /* LoongArch CPUCFG register for feature detection */ #define LOONGARCH_CFG2 0x02 -#define LOONGARCH_FEATURE_LAMCAS (1 << 28) +#define LOONGARCH_CFG2_LAMCAS (1 << 28) -static sljit_u32 cpu_feature_list = 0; +static sljit_u32 cfg2_feature_list = 0; -static SLJIT_INLINE sljit_u32 get_cpu_features(void) -{ - if (cpu_feature_list == 0) - __asm__ ("cpucfg %0, %1" : "+&r"(cpu_feature_list) : "r"(LOONGARCH_CFG2)); - return cpu_feature_list; -} +/* According to Software Development and Build Convention for LoongArch Architectures, ++ the status of LSX and LASX extension must be checked through HWCAP */ +#include + +#define LOONGARCH_HWCAP_LSX (1 << 4) +#define LOONGARCH_HWCAP_LASX (1 << 5) + +static sljit_u32 hwcap_feature_list = 0; + +/* Feature type */ +#define GET_CFG2 0 +#define GET_HWCAP 1 + +static SLJIT_INLINE sljit_u32 get_cpu_features(sljit_u32 feature_type) + { + if (cfg2_feature_list == 0) + __asm__ ("cpucfg %0, %1" : "+&r"(cfg2_feature_list) : "r"(LOONGARCH_CFG2)); + if (hwcap_feature_list == 0) + hwcap_feature_list = (sljit_u32)getauxval(AT_HWCAP); + + return feature_type ? hwcap_feature_list : cfg2_feature_list; + } static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { @@ -371,24 +419,23 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if (jump->flags & JUMP_ADDR) target_addr = jump->u.target; else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; if (jump->flags & IS_COND) { - inst--; diff += SSIZE_OF(ins); if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) { - jump->flags |= PATCH_B; + inst--; inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000; + jump->flags |= PATCH_B; jump->addr = (sljit_uw)inst; return inst; } - inst++; diff -= SSIZE_OF(ins); } @@ -435,65 +482,179 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i return inst + 3; } -static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { - if (max_label <= (sljit_uw)S32_MAX) { - put_label->flags = PATCH_ABS32; + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if (diff >= S32_MIN && diff <= S32_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_REL32; + return 1; + } + + if (addr <= S32_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS32; return 1; } - if (max_label <= S52_MAX) { - put_label->flags = PATCH_ABS52; + if (addr <= S52_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS52; return 2; } - put_label->flags = 0; + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); return 3; } -static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) +static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset) { - struct sljit_jump *jump = NULL; - struct sljit_put_label *put_label; - sljit_uw flags; - sljit_ins *inst; - sljit_uw addr; + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1; + SLJIT_UNUSED_ARG(executable_offset); - if (reg != 0) { - jump = (struct sljit_jump*)dst; - flags = jump->flags; - inst = (sljit_ins*)jump->addr; - addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - } else { - put_label = (struct sljit_put_label*)dst; - flags = put_label->flags; - inst = (sljit_ins*)put_label->addr; - addr = put_label->label->addr; - reg = *inst; + if (flags & PATCH_REL32) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + + SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); + + if ((addr & 0x800) != 0) + addr += 0x1000; + + ins[0] = PCADDU12I | RD(reg) | IMM_I20(addr); + + if (!(flags & JUMP_MOV_ADDR)) { + SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL); + ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2); + } else + ins[1] = ADDI_D | RD(reg) | RJ(reg) | IMM_I12(addr); + return; } if (flags & PATCH_ABS32) { SLJIT_ASSERT(addr <= S32_MAX); - inst[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); + ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); } else if (flags & PATCH_ABS52) { - inst[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); - inst[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5); - inst += 1; + ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); + ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5); + ins += 1; } else { - inst[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); - inst[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5); - inst[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52); - inst += 2; + ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); + ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5); + ins[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52); + ins += 2; } - if (jump != NULL) { - SLJIT_ASSERT((inst[1] & OPC_2RI16(0x3f)) == JIRL); - inst[1] = (inst[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2); + if (!(flags & JUMP_MOV_ADDR)) { + SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL); + ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2); } else - inst[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr); + ins[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr); +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { + if (jump->u.target <= S32_MAX) + total_size = 2; + else if (jump->u.target <= S52_MAX) + total_size = 3; + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins))) + total_size = 0; + else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins))) + total_size = 1; + else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) + total_size = 2; + } + } + + size_reduce += JUMP_MAX_SIZE - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; + } else { + total_size = 3; + + if (!(jump->flags & JUMP_ADDR)) { + /* Real size minus 1. Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) + total_size = 1; + } else if (jump->u.target < S32_MAX) + total_size = 1; + else if (jump->u.target <= S52_MAX) + total_size = 2; + + size_reduce += 3 - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) { struct sljit_memory_fragment *buf; sljit_ins *code; @@ -501,70 +662,72 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; sljit_uw addr; struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); - code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + reduce_code_size(compiler); + + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; word_count = 0; - next_addr = 0; - executable_offset = SLJIT_EXEC_OFFSET(code); - label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == word_count) { - word_count += 3; - jump->addr = (sljit_uw)code_ptr; - code_ptr = detect_jump_type(jump, code, executable_offset); + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code, executable_offset); + SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { + word_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_uw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = addr; + } jump = jump->next; - } - if (const_ && const_->addr == word_count) { + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; - - code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); - word_count += 3; - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + SLJIT_GET_NEXT_MIN(); } code_ptr++; word_count++; @@ -574,7 +737,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->u.addr = (sljit_uw)code_ptr; label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -582,18 +745,17 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); jump = compiler->jumps; while (jump) { do { - if (!(jump->flags & (PATCH_B | PATCH_J | PATCH_REL32))) { - load_addr_to_reg(jump, TMP_REG1); + if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) { + load_addr_to_reg(jump, executable_offset); break; } - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; buf_ptr = (sljit_ins *)jump->addr; addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); @@ -603,15 +765,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil break; } - if (jump->flags & PATCH_REL32) { - SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); - - buf_ptr[0] = PCADDU12I | RD(TMP_REG1) | (sljit_ins)((sljit_sw)addr & ~0xfff); - SLJIT_ASSERT((buf_ptr[1] & OPC_2RI16(0x3f)) == JIRL); - buf_ptr[1] |= IMM_I16((addr & 0xfff) >> 2); - break; - } - SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX); if (jump->flags & IS_CALL) buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2); @@ -621,12 +774,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = jump->next; } - put_label = compiler->put_labels; - while (put_label) { - load_addr_to_reg(put_label, 0); - put_label = put_label->next; - } - compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); @@ -651,8 +798,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) return 1; #endif + case SLJIT_HAS_LASX: + return (LOONGARCH_HWCAP_LASX & get_cpu_features(GET_HWCAP)); + + case SLJIT_HAS_SIMD: + return (LOONGARCH_HWCAP_LSX & get_cpu_features(GET_HWCAP)); + case SLJIT_HAS_ATOMIC: - return (LOONGARCH_FEATURE_LAMCAS & get_cpu_features()); + return (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2)); case SLJIT_HAS_CLZ: case SLJIT_HAS_CTZ: @@ -707,6 +860,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) #define SLOW_SRC1 0x08000 #define SLOW_SRC2 0x10000 #define SLOW_DEST 0x20000 +#define MEM_USE_TMP2 0x40000 #define STACK_STORE ST_D #define STACK_LOAD LD_D @@ -1039,7 +1193,7 @@ static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, slj static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { sljit_s32 base = arg & REG_MASK; - sljit_s32 tmp_r = TMP_REG1; + sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1; sljit_sw offset; SLJIT_ASSERT(arg & SLJIT_MEM); @@ -1048,11 +1202,6 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl next_argw = 0; } - /* Since tmp can be the same as base or offset registers, - * these might be unavailable after modifying tmp. */ - if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) - tmp_r = reg; - if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { argw &= 0x3; @@ -1149,8 +1298,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, slji FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \ } \ } \ - } \ - else { \ + } else { \ if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \ if (!(flags & UNUSED_DEST)) \ @@ -1165,88 +1313,88 @@ static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, slji static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { - sljit_s32 is_overflow, is_carry, carry_src_r, is_handled; + sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg; sljit_ins op_imm, op_reg; sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64); switch (GET_OPCODE(op)) { case SLJIT_MOV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0)); return SLJIT_SUCCESS; case SLJIT_MOV_U8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_S8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_S32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2)); case SLJIT_CTZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2)); case SLJIT_REV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2)); case SLJIT_REV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2))); return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst)); case SLJIT_REV_U16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2))); return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16)); case SLJIT_REV_S32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1); FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2))); return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0)); case SLJIT_REV_U32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1); FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2))); return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16)); @@ -1263,15 +1411,13 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1))); FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); } - } - else if (op & SLJIT_SET_Z) + } else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2))); /* Only the zero flag is needed. */ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2))); - } - else { + } else { if (is_overflow) FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); else if (op & SLJIT_SET_Z) @@ -1361,8 +1507,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (GET_FLAG_TYPE(op) == SLJIT_LESS) { FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2))); is_handled = 1; - } - else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) { + } else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) { FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2))); is_handled = 1; } @@ -1372,8 +1517,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl is_handled = 1; if (flags & SRC2_IMM) { - FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2))); - src2 = TMP_REG2; + reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, ADDI_D | RD(reg) | RJ(TMP_ZERO) | IMM_I12(src2))); + src2 = reg; flags &= ~SRC2_IMM; } @@ -1399,8 +1545,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2))); if (!(flags & UNUSED_DEST)) return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)); - } - else { + } else { if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); if (!(flags & UNUSED_DEST)) @@ -1420,8 +1565,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1))); FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); } - } - else if (op & SLJIT_SET_Z) + } else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2))); if (is_overflow || is_carry) @@ -1430,8 +1574,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl /* Only the zero flag is needed. */ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2))); - } - else { + } else { if (is_overflow) FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); else if (op & SLJIT_SET_Z) @@ -1468,8 +1611,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2))); FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2))); - } - else { + } else { if (is_carry) FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); @@ -1550,7 +1692,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (GET_OPCODE(op) == SLJIT_ROTL) src2 = word_size - src2; return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)); - } if (src2 == TMP_ZERO) { @@ -1601,7 +1742,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 dst_r = TMP_REG2; sljit_s32 src1_r; sljit_sw src2_r = 0; - sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; if (!(flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -1612,22 +1753,19 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 SLJIT_ASSERT(HAS_FLAGS(op)); flags |= UNUSED_DEST; dst = TMP_REG2; - } - else if (FAST_IS_REG(dst)) { + } else if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; if (flags & MOVE_OP) - sugg_src2_r = dst_r; - } - else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + src2_tmp_reg = dst_r; + } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) flags |= SLOW_DEST; if (flags & IMM_OP) { if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) { flags |= SRC2_IMM; src2_r = src2w; - } - else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) { + } else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) { flags |= SRC2_IMM; src2_r = src1w; @@ -1643,16 +1781,14 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (FAST_IS_REG(src1)) { src1_r = src1; flags |= REG1_SOURCE; - } - else if (src1 == SLJIT_IMM) { + } else if (src1 == SLJIT_IMM) { if (src1w) { FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); src1_r = TMP_REG1; } else src1_r = TMP_ZERO; - } - else { + } else { if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) FAIL_IF(compiler->error); else @@ -1666,14 +1802,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= REG2_SOURCE; if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) dst_r = (sljit_s32)src2_r; - } - else if (src2 == SLJIT_IMM) { + } else if (src2 == SLJIT_IMM) { if (!(flags & SRC2_IMM)) { if (src2w) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; - } - else { + FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w)); + src2_r = src2_tmp_reg; + } else { src2_r = TMP_ZERO; if (flags & MOVE_OP) { if (dst & SLJIT_MEM) @@ -1683,31 +1817,29 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } } - } - else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) + } else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w)) FAIL_IF(compiler->error); else flags |= SLOW_SRC2; - src2_r = sugg_src2_r; + src2_r = src2_tmp_reg; } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { SLJIT_ASSERT(src2_r == TMP_REG2); - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw)); + } else { FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); } - else { - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); - } } else if (flags & SLOW_SRC1) FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw)); FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); @@ -1778,40 +1910,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_MOV: case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw); case SLJIT_MOV_U32: - return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); case SLJIT_MOV_S32: /* Logical operators have no W variant, so sign extended input is necessary for them. */ case SLJIT_MOV32: - return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); case SLJIT_MOV_U8: - return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: - return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw); case SLJIT_REV_U16: case SLJIT_REV_S16: - return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); case SLJIT_REV_U32: case SLJIT_REV_S32: - return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); } SLJIT_UNREACHABLE(); @@ -1893,6 +2025,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ADD_D | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -2045,7 +2195,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, slji if (type == SLJIT_GP_REGISTER) return reg_map[reg]; - if (type != SLJIT_FLOAT_REGISTER) + if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256) return -1; return freg_map[reg]; @@ -2292,7 +2442,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src))); else dst_r = src; @@ -2351,11 +2501,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - } - else { + } else { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); } @@ -2385,7 +2534,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil break; } - if (dst_r == TMP_FREG2) + if (dst_r != dst) FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; } @@ -2573,8 +2722,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst(compiler, inst)); /* Maximum number of instructions required for generating a constant. */ - compiler->size += 3; - + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2601,6 +2749,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler struct sljit_jump *jump; sljit_s32 flags; sljit_ins inst; + sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); @@ -2618,8 +2767,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler } if (src2 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG2, src2, src2w, 0, 0)); - src2 = TMP_REG2; + PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0)); + src2 = src2_tmp_reg; } if (src1 == SLJIT_IMM) { @@ -2633,8 +2782,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler if (src2 == SLJIT_IMM) { if (src2w != 0) { - PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w)); - src2 = TMP_REG2; + PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w)); + src2 = src2_tmp_reg; } else src2 = TMP_ZERO; @@ -2687,7 +2836,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0))); /* Maximum number of instructions required for generating a constant. */ - compiler->size += 3; + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2718,7 +2867,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0))); /* Maximum number of instructions required for generating a constant. */ - compiler->size += 3; + compiler->size += JUMP_MAX_SIZE - 1; return SLJIT_SUCCESS; } @@ -2860,13 +3009,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp type ^= 0x1; } else { if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { - FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(dst_reg) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(dst_reg) | IMM_I12(0))); if ((src1 & REG_MASK) == dst_reg) - src1 = (src1 & ~REG_MASK) | TMP_REG2; + src1 = (src1 & ~REG_MASK) | TMP_REG1; if (OFFS_REG(src1) == dst_reg) - src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); } FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0))); @@ -2908,15 +3057,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com if ((type & ~SLJIT_32) == SLJIT_EQUAL) invert = 1; FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG))); - } - else + } else { + if (get_jump_instruction(type & ~SLJIT_32) == (BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO))) + invert = 1; FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG))); + } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w)); + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src1, src1w)); if (invert) - return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(dst_freg) | FRK(src2_freg) | FCA(F_OTHER_FLAG)); - return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(dst_freg) | FCA(F_OTHER_FLAG)); + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(TMP_FREG2) | FRK(src2_freg) | FCA(F_OTHER_FLAG)); + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(TMP_FREG2) | FCA(F_OTHER_FLAG)); } else { if (invert) return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG)); @@ -2982,6 +3133,468 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile #undef TO_ARGW_HI +static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw) +{ + sljit_s32 mem = *mem_ptr; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + *mem_ptr = TMP_REG3; + FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(mem)) | IMM_I12(memw & 0x3))); + return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem & REG_MASK)); + } + + if (!(mem & REG_MASK)) { + *mem_ptr = TMP_REG3; + return load_immediate(compiler, TMP_REG3, memw); + } + + mem &= REG_MASK; + + if (memw == 0) { + *mem_ptr = mem; + return SLJIT_SUCCESS; + } + + *mem_ptr = TMP_REG3; + + FAIL_IF(load_immediate(compiler, TMP_REG3, memw)); + return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (!(srcdst & SLJIT_MEM)) { + if (type & SLJIT_SIMD_STORE) + ins = FRD(srcdst) | FRJ(freg) | FRK(freg); + else + ins = FRD(freg) | FRJ(srcdst) | FRK(srcdst); + + if (reg_size == 5) + ins |= VOR_V | (sljit_ins)1 << 26; + else + ins |= VOR_V; + + return push_inst(compiler, ins); + } + + ins = (type & SLJIT_SIMD_STORE) ? VST : VLD; + + if (reg_size == 5) + ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD; + + if (FAST_IS_REG(srcdst) && srcdst >= 0 && (srcdstw >= I12_MIN && srcdstw <= I12_MAX)) + return push_inst(compiler, ins | FRD(freg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw)); + else { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + return push_inst(compiler, ins | FRD(freg) | RJ(srcdst) | IMM_I12(0)); + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + if (reg_size == 5) + ins = (sljit_ins)1 << 25; + + return push_inst(compiler, VLDREPL | ins | FRD(freg) | RJ(src) | (sljit_ins)1 << (23 - elem_size)); + } + + if (reg_size == 5) + ins = (sljit_ins)1 << 26; + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) + return push_inst(compiler, VREPLGR2VR | ins | FRD(freg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10); + + FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(freg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15)); + + if (reg_size == 5) { + ins = (sljit_ins)(0x44 << 10); + return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg)); + } + + return SLJIT_SUCCESS; + } + + ins |= VREPLGR2VR | (sljit_ins)elem_size << 10; + + if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); + src = TMP_REG2; + } + + return push_inst(compiler, ins | FRD(freg) | RJ(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (type & SLJIT_SIMD_LANE_ZERO) { + ins = (reg_size == 5) ? ((sljit_ins)1 << 26) : 0; + + if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { + FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + srcdst = TMP_FREG1; + srcdstw = 0; + } + + FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(freg) | FRJ(freg) | FRK(freg))); + } + + if (srcdst & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (reg_size == 5) + ins = (sljit_ins)1 << 25; + + if (type & SLJIT_SIMD_STORE) { + ins |= (sljit_ins)lane_index << 18 | (sljit_ins)(1 << (23 - elem_size)); + return push_inst(compiler, VSTELM | ins | FRD(freg) | RJ(srcdst)); + } else { + emit_op_mem(compiler, (elem_size == 3 ? WORD_DATA : (elem_size == 2 ? INT_DATA : (elem_size == 1 ? HALF_DATA : BYTE_DATA))) | LOAD_DATA, TMP_REG1, srcdst | SLJIT_MEM, 0); + srcdst = TMP_REG1; + ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; + + if (reg_size == 5) { + if (elem_size < 2) { + FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + if (lane_index >= (2 << (3 - elem_size))) { + FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1))); + FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size))))); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2)); + } else { + FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index))); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18)); + } + } else + ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26; + } + + return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)); + } + } + + if (type & SLJIT_SIMD_FLOAT) { + ins = (reg_size == 5) ? (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26 : (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; + + if (type & SLJIT_SIMD_STORE) { + FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(freg) | IMM_V(lane_index))); + return push_inst(compiler, VINSGR2VR | ins | FRD(srcdst) | RJ(TMP_REG1) | IMM_V(0)); + } else { + FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(srcdst) | IMM_V(0))); + return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(TMP_REG1) | IMM_V(lane_index)); + } + } + + if (srcdst == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw)); + srcdst = TMP_REG1; + } + + if (type & SLJIT_SIMD_STORE) { + ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; + + if (type & SLJIT_SIMD_LANE_SIGNED) + ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18)); + else + ins |= VPICKVE2GR_U; + + if (reg_size == 5) { + if (elem_size < 2) { + if (lane_index >= (2 << (3 - elem_size))) { + if (type & SLJIT_SIMD_LANE_SIGNED) + ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18)); + else + ins |= VPICKVE2GR_U; + + FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1))); + return push_inst(compiler, ins | RD(srcdst) | FRJ(TMP_FREG1) | IMM_V(lane_index % (2 << (3 - elem_size)))); + } + } else { + ins ^= (sljit_ins)1 << (15 - elem_size); + ins |= (sljit_ins)1 << 26; + } + } + + return push_inst(compiler, ins | RD(srcdst) | FRJ(freg) | IMM_V(lane_index)); + } else { + ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; + + if (reg_size == 5) { + if (elem_size < 2) { + FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + if (lane_index >= (2 << (3 - elem_size))) { + FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1))); + FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size))))); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2)); + } else { + FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index))); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18)); + } + } else + ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26; + } + + return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)); + } + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; + + if (reg_size == 5) { + FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size))))); + + ins = (src_lane_index < (2 << (3 - elem_size))) ? (sljit_ins)(0x44 << 10) : (sljit_ins)(0xee << 10); + + return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg)); + } + + return push_inst(compiler, VREPLVEI | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + ins = (type & SLJIT_SIMD_STORE) ? VST : VLD; + + if (reg_size == 5) + ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD; + + if (FAST_IS_REG(src) && src >= 0 && (srcw >= I12_MIN && srcw <= I12_MAX)) + FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(srcw))); + else { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(0))); + } + src = freg; + } + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size != 2 || elem2_size != 3) + return SLJIT_ERR_UNSUPPORTED; + + ins = 0; + if (reg_size == 5) { + ins = (sljit_ins)1 << 26; + FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16))); + } + + return push_inst(compiler, VFCVTL_D_S | ins | FRD(freg) | FRJ(src)); + } + + ins = (type & SLJIT_SIMD_EXTEND_SIGNED) ? VSLLWIL : (VSLLWIL | (sljit_ins)1 << 18); + + if (reg_size == 5) + ins |= (sljit_ins)1 << 26; + + do { + if (reg_size == 5) + FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16))); + + FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(freg) | FRJ(src))); + src = freg; + } while (++elem_size < elem2_size); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + if (reg_size == 5) + ins = (sljit_ins)1 << 26; + + FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(freg))); + + FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x3c << 10) | RD(dst_r) | FRJ(TMP_FREG1))); + + if (reg_size == 5) { + FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x38 << 10) | ins | RD(TMP_REG3) | FRJ(TMP_FREG1) | IMM_V(2))); + FAIL_IF(push_inst(compiler, SLLI_W | RD(TMP_REG3) | RJ(TMP_REG3) | IMM_I12(2 << (3 - elem_size)))); + FAIL_IF(push_inst(compiler, OR | RD(dst_r) | RJ(dst_r) | RK(TMP_REG3))); + } + + if (dst_r == TMP_REG2) + return emit_op_mem(compiler, ((type & SLJIT_32) ? INT_DATA : WORD_DATA), TMP_REG2, dst, dstw); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = VAND_V; + break; + case SLJIT_SIMD_OP2_OR: + ins = VOR_V; + break; + case SLJIT_SIMD_OP2_XOR: + ins = VXOR_V; + break; + } + + if (reg_size == 5) + ins |= (sljit_ins)1 << 26; + + return push_inst(compiler, ins | FRD(dst_freg) | FRJ(src1_freg) | FRK(src2_freg)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, @@ -2992,9 +3605,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); - if (!(LOONGARCH_FEATURE_LAMCAS & get_cpu_features())) - return SLJIT_ERR_UNSUPPORTED; - switch(GET_OPCODE(op)) { case SLJIT_MOV_U8: ins = LD_BU; @@ -3029,9 +3639,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); - if (!(LOONGARCH_FEATURE_LAMCAS & get_cpu_features())) - return SLJIT_ERR_UNSUPPORTED; - switch (GET_OPCODE(op)) { case SLJIT_MOV_U8: ins = AMCAS_B; @@ -3128,28 +3735,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); - compiler->size += 3; + compiler->size += JUMP_MAX_SIZE - 1; if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_32.c b/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_32.c index 9620b945f655..91153e5f2580 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_32.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_32.c @@ -225,7 +225,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_ins f64_hi = TA(6), f64_lo = TA(7); #endif /* SLJIT_LITTLE_ENDIAN */ - SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); + SLJIT_ASSERT(reg_map[TMP_REG2] == 4 && freg_map[TMP_FREG1] == 12); arg_types >>= SLJIT_ARG_SHIFT; @@ -370,7 +370,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile } else if (type & SLJIT_CALL_RETURN) PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25); if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; @@ -441,7 +441,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi return sljit_emit_ijump(compiler, type, src, srcw); } - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25); if (src == SLJIT_IMM) FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_64.c b/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_64.c index 52a0d3fb7adf..b9f03a7bd244 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_64.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_64.c @@ -225,7 +225,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_ins prev_ins = *ins_ptr; sljit_ins ins = NOP; - SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); + SLJIT_ASSERT(reg_map[TMP_REG2] == 4 && freg_map[TMP_FREG1] == 12); arg_types >>= SLJIT_ARG_SHIFT; @@ -309,7 +309,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile if ((type & 0xff) != SLJIT_CALL_REG_ARG) PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25); if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; @@ -366,7 +366,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi return sljit_emit_ijump(compiler, type, src, srcw); } - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG1); if (src == SLJIT_IMM) FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_common.c b/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_common.c index 807b3474eadd..88eb30b7f163 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_common.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeMIPS_common.c @@ -83,7 +83,7 @@ typedef sljit_u32 sljit_ins; #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) /* For position independent code, t9 must contain the function address. */ -#define PIC_ADDR_REG TMP_REG2 +#define PIC_ADDR_REG TMP_REG1 /* Floating point status register. */ #define FCSR_REG 31 @@ -95,7 +95,7 @@ typedef sljit_u32 sljit_ins; #define OTHER_FLAG 1 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { - 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31, 3, 1 + 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 25, 4, 31, 3, 1 }; #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) @@ -504,7 +504,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if (jump->flags & JUMP_ADDR) target_addr = jump->u.target; else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } @@ -635,75 +635,66 @@ static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) -static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) { - if (max_label < 0x80000000l) { - put_label->flags = PATCH_ABS32; + sljit_uw addr; + SLJIT_UNUSED_ARG(executable_offset); + + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (addr < 0x80000000l) { + jump->flags |= PATCH_ABS32; return 1; } - if (max_label < 0x800000000000l) { - put_label->flags = PATCH_ABS48; + if (addr < 0x800000000000l) { + jump->flags |= PATCH_ABS48; return 3; } - put_label->flags = 0; return 5; } #endif /* SLJIT_CONFIG_MIPS_64 */ -static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) +static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump) { - struct sljit_jump *jump; - struct sljit_put_label *put_label; - sljit_uw flags; - sljit_ins *inst; - sljit_uw addr; - - if (reg != 0) { - jump = (struct sljit_jump*)dst; - flags = jump->flags; - inst = (sljit_ins*)jump->addr; - addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - } else { - put_label = (struct sljit_put_label*)dst; -#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - flags = put_label->flags; -#endif - inst = (sljit_ins*)put_label->addr; - addr = put_label->label->addr; - reg = *inst; - } + sljit_uw flags = jump->flags; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : PIC_ADDR_REG; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - inst[0] = LUI | T(reg) | IMM(addr >> 16); + ins[0] = LUI | T(reg) | IMM(addr >> 16); #else /* !SLJIT_CONFIG_MIPS_32 */ if (flags & PATCH_ABS32) { SLJIT_ASSERT(addr < 0x80000000l); - inst[0] = LUI | T(reg) | IMM(addr >> 16); + ins[0] = LUI | T(reg) | IMM(addr >> 16); } else if (flags & PATCH_ABS48) { SLJIT_ASSERT(addr < 0x800000000000l); - inst[0] = LUI | T(reg) | IMM(addr >> 32); - inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); - inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); - inst += 2; + ins[0] = LUI | T(reg) | IMM(addr >> 32); + ins[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); + ins[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); + ins += 2; } else { - inst[0] = LUI | T(reg) | IMM(addr >> 48); - inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff); - inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); - inst[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); - inst[4] = DSLL | T(reg) | D(reg) | SH_IMM(16); - inst += 4; + ins[0] = LUI | T(reg) | IMM(addr >> 48); + ins[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff); + ins[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); + ins[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); + ins[4] = DSLL | T(reg) | D(reg) | SH_IMM(16); + ins += 4; } #endif /* SLJIT_CONFIG_MIPS_32 */ - inst[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff); + ins[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff); } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) { struct sljit_memory_fragment *buf; sljit_ins *code; @@ -711,77 +702,76 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; sljit_uw addr; - struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); - code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); PTR_FAIL_WITH_EXEC_IF(code); buf = compiler->buf; code_ptr = code; word_count = 0; - next_addr = 0; - executable_offset = SLJIT_EXEC_OFFSET(code); - label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == word_count) { + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - word_count += 2; -#else - word_count += 6; -#endif - jump->addr = (sljit_uw)(code_ptr - 1); - code_ptr = detect_jump_type(jump, code, executable_offset); + word_count += 2; +#else /* !SLJIT_CONFIG_MIPS_32 */ + word_count += 6; +#endif /* SLJIT_CONFIG_MIPS_32 */ + jump->addr = (sljit_uw)(code_ptr - 1); + code_ptr = detect_jump_type(jump, code, executable_offset); + } else { + jump->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + code_ptr += 1; + word_count += 1; +#else /* !SLJIT_CONFIG_MIPS_32 */ + code_ptr += mov_addr_get_length(jump, code, executable_offset); + word_count += 5; +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + jump = jump->next; - } - if (const_ && const_->addr == word_count) { + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - code_ptr += 1; - word_count += 1; -#else - code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); - word_count += 5; -#endif - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; word_count++; @@ -791,7 +781,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->u.addr = (sljit_uw)code_ptr; label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -799,13 +789,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); jump = compiler->jumps; while (jump) { do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { @@ -821,15 +810,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil break; } - load_addr_to_reg(jump, PIC_ADDR_REG); + load_addr_to_reg(jump); } while (0); - jump = jump->next; - } - put_label = compiler->put_labels; - while (put_label) { - load_addr_to_reg(put_label, 0); - put_label = put_label->next; + jump = jump->next; } compiler->error = SLJIT_ERR_COMPILED; @@ -932,9 +916,9 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define SELECT_OP(a, b) (b) +#define SELECT_OP(d, w) (w) #else -#define SELECT_OP(a, b) (!(op & SLJIT_32) ? a : b) +#define SELECT_OP(d, w) (!(op & SLJIT_32) ? (d) : (w)) #endif #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -1001,9 +985,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi offset = local_size - SSIZE_OF(sw); } else { FAIL_IF(load_immediate(compiler, OTHER_FLAG, local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | TA(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP))); - base = S(TMP_REG2); + base = S(TMP_REG1); offset = -SSIZE_OF(sw); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) local_size = 0; @@ -1212,8 +1196,8 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit if (tmp < frame_size) tmp = frame_size; - FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size - tmp)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); + FAIL_IF(load_immediate(compiler, DR(TMP_REG2), local_size - tmp)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG2) | D(SLJIT_SP), DR(SLJIT_SP))); local_size = tmp; } @@ -1711,7 +1695,7 @@ static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { - sljit_s32 is_overflow, is_carry, carry_src_ar, is_handled; + sljit_s32 is_overflow, is_carry, carry_src_ar, is_handled, reg; sljit_ins op_imm, op_v; #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) sljit_ins ins, op_dimm, op_dimm32, op_dv; @@ -1963,8 +1947,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl is_handled = 1; if (flags & SRC2_IMM) { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); - src2 = TMP_REG2; + reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(reg) | IMM(src2), DR(reg))); + src2 = reg; flags &= ~SRC2_IMM; } @@ -2283,7 +2268,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 dst_r = TMP_REG2; sljit_s32 src1_r; sljit_sw src2_r = 0; - sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; if (!(flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -2299,7 +2284,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 dst_r = dst; flags |= REG_DEST; if (flags & MOVE_OP) - sugg_src2_r = dst_r; + src2_tmp_reg = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) flags |= SLOW_DEST; @@ -2351,8 +2336,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 else if (src2 == SLJIT_IMM) { if (!(flags & SRC2_IMM)) { if (src2w) { - FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w)); - src2_r = sugg_src2_r; + FAIL_IF(load_immediate(compiler, DR(src2_tmp_reg), src2w)); + src2_r = src2_tmp_reg; } else { src2_r = 0; @@ -2366,16 +2351,16 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w)) + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(src2_tmp_reg), src2, src2w)) FAIL_IF(compiler->error); else flags |= SLOW_SRC2; - src2_r = sugg_src2_r; + src2_r = src2_tmp_reg; } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { SLJIT_ASSERT(src2_r == TMP_REG2); - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((flags & SLOW_DEST) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); } @@ -2387,7 +2372,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 else if (flags & SLOW_SRC1) FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(src2_tmp_reg), src2, src2w, dst, dstw)); FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); @@ -2659,12 +2644,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) #define SELECT_OP3(op, src2w, D, D32, W) (((op & SLJIT_32) ? (W) : ((src2w) < 32) ? (D) : (D32)) | (((sljit_ins)src2w & 0x1f) << 6)) -#define SELECT_OP2(op, D, W) ((op & SLJIT_32) ? (W) : (D)) #else /* !SLJIT_CONFIG_MIPS_64 */ #define SELECT_OP3(op, src2w, D, D32, W) ((W) | ((sljit_ins)(src2w) << 6)) -#define SELECT_OP2(op, D, W) (W) #endif /* SLJIT_CONFIG_MIPS_64 */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst_reg) | T(TMP_REG2) | D(dst_reg), DR(dst_reg)); + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -2718,18 +2719,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src3, src3w)); src3 = TMP_REG2; } else if (dst_reg == src3) { - FAIL_IF(push_inst(compiler, SELECT_OP2(op, DADDU, ADDU) | S(src3) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src3) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); src3 = TMP_REG2; } if (is_left) { - ins1 = SELECT_OP2(op, DSRL, SRL); - ins2 = SELECT_OP2(op, DSLLV, SLLV); - ins3 = SELECT_OP2(op, DSRLV, SRLV); + ins1 = SELECT_OP(DSRL, SRL); + ins2 = SELECT_OP(DSLLV, SLLV); + ins3 = SELECT_OP(DSRLV, SRLV); } else { - ins1 = SELECT_OP2(op, DSLL, SLL); - ins2 = SELECT_OP2(op, DSRLV, SRLV); - ins3 = SELECT_OP2(op, DSLLV, SLLV); + ins1 = SELECT_OP(DSLL, SLL); + ins2 = SELECT_OP(DSRLV, SRLV); + ins3 = SELECT_OP(DSLLV, SLLV); } FAIL_IF(push_inst(compiler, ins2 | S(src3) | T(src1_reg) | D(dst_reg), DR(dst_reg))); @@ -2739,14 +2740,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * FAIL_IF(push_inst(compiler, XORI | S(src3) | T(TMP_REG2) | ((sljit_ins)bit_length - 1), DR(TMP_REG2))); src2_reg = TMP_REG1; } else - FAIL_IF(push_inst(compiler, SELECT_OP2(op, DSUBU, SUBU) | SA(0) | T(src3) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(0) | T(src3) | D(TMP_REG2), DR(TMP_REG2))); FAIL_IF(push_inst(compiler, ins3 | S(TMP_REG2) | T(src2_reg) | D(TMP_REG1), DR(TMP_REG1))); return push_inst(compiler, OR | S(dst_reg) | T(TMP_REG1) | D(dst_reg), DR(dst_reg)); } #undef SELECT_OP3 -#undef SELECT_OP2 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) @@ -3103,7 +3103,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, MOV_fmt(FMT(op)) | FS(src) | FD(dst_r), MOVABLE_INS)); else dst_r = src; @@ -3162,11 +3162,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw)); - } - else { + } else { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw)); } @@ -3361,10 +3360,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS)); if (type <= SLJIT_JUMP) - PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); else { jump->flags |= IS_JAL; - PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); } jump->addr = compiler->size; @@ -3392,8 +3391,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #define RESOLVE_IMM2() \ if (src2 == SLJIT_IMM) { \ if (src2w) { \ - PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \ - src2 = TMP_REG2; \ + PTR_FAIL_IF(load_immediate(compiler, DR(src2_tmp_reg), src2w)); \ + src2 = src2_tmp_reg; \ } \ else \ src2 = 0; \ @@ -3406,6 +3405,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler struct sljit_jump *jump; sljit_s32 flags; sljit_ins inst; + sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); @@ -3426,8 +3426,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler } if (src2 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0)); - src2 = TMP_REG2; + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(src2_tmp_reg), src2, src2w, 0, 0)); + src2 = src2_tmp_reg; } jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); @@ -3515,7 +3515,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | BRANCH_LENGTH, UNMOVABLE_INS)); } - PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); @@ -3553,11 +3553,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (compiler->delay_slot != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; - src = TMP_REG2; + src = PIC_ADDR_REG; } else if (src & SLJIT_MEM) { ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(TMP_REG2), src, srcw)); - src = TMP_REG2; + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; } if (type <= SLJIT_JUMP) @@ -3755,8 +3755,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src1, src1w)); - src1 = TMP_REG2; + FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG1), src1, src1w)); + src1 = TMP_REG1; } else if (src1 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (type & SLJIT_32) @@ -3784,13 +3784,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp type ^= 0x1; } else { if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { - FAIL_IF(push_inst(compiler, ADDU_W | S(dst_reg) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, ADDU_W | S(dst_reg) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); if ((src1 & REG_MASK) == dst_reg) - src1 = (src1 & ~REG_MASK) | TMP_REG2; + src1 = (src1 & ~REG_MASK) | TMP_REG1; if (OFFS_REG(src1) == dst_reg) - src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); } FAIL_IF(push_inst(compiler, mov_ins | S(src2_reg) | TA(0) | D(dst_reg), DR(dst_reg))); @@ -3847,8 +3847,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(TMP_FREG1), src1, src1w)); - src1 = TMP_FREG1; + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(TMP_FREG2), src1, src1w)); + src1 = TMP_FREG2; } return push_inst(compiler, get_select_cc(type, 1) | FMT(type) | FS(src1) | FD(dst_freg), MOVABLE_INS); @@ -4231,18 +4231,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r, UNMOVABLE_INS)); @@ -4255,5 +4255,5 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, DR(TMP_REG2), dst, dstw)); - return put_label; + return jump; } diff --git a/ext/pcre/pcre2lib/sljit/sljitNativePPC_common.c b/ext/pcre/pcre2lib/sljit/sljitNativePPC_common.c index 54977f02e3e7..1f17d90423eb 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativePPC_common.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativePPC_common.c @@ -98,7 +98,7 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) #define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 5) #else -#define TMP_CALL_REG TMP_REG2 +#define TMP_CALL_REG TMP_REG1 #endif #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) @@ -310,24 +310,23 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; - sljit_uw extra_jump_flags; #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) - return 0; + goto exit; #else if (jump->flags & SLJIT_REWRITABLE_JUMP) - return 0; + goto exit; #endif if (jump->flags & JUMP_ADDR) target_addr = jump->u.target; else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } @@ -336,101 +335,256 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in goto keep_address; #endif - diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l; + diff = (sljit_sw)target_addr - (sljit_sw)code_ptr - executable_offset; - extra_jump_flags = 0; if (jump->flags & IS_COND) { if (diff <= 0x7fff && diff >= -0x8000) { jump->flags |= PATCH_B; - return 1; + return code_ptr; } if (target_addr <= 0xffff) { jump->flags |= PATCH_B | PATCH_ABS_B; - return 1; + return code_ptr; } - extra_jump_flags = REMOVE_COND; diff -= SSIZE_OF(ins); } if (diff <= 0x01ffffff && diff >= -0x02000000) { - jump->flags |= PATCH_B | extra_jump_flags; - return 1; + jump->flags |= PATCH_B; + } else if (target_addr <= 0x01ffffff) { + jump->flags |= PATCH_B | PATCH_ABS_B; } - if (target_addr <= 0x03ffffff) { - jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags; - return 1; + if (jump->flags & PATCH_B) { + if (!(jump->flags & IS_COND)) + return code_ptr; + + code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001); + code_ptr[1] = Bx; + jump->addr += sizeof(sljit_ins); + jump->flags -= IS_COND; + return code_ptr + 1; } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) keep_address: -#endif - if (target_addr <= 0x7fffffff) { +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + if (target_addr < 0x80000000l) { jump->flags |= PATCH_ABS32; - return 1; + code_ptr[2] = MTCTR | S(TMP_CALL_REG); + code_ptr[3] = code_ptr[0]; + return code_ptr + 3; } - if (target_addr <= 0x7fffffffffffl) { + if (target_addr < 0x800000000000l) { jump->flags |= PATCH_ABS48; - return 1; + code_ptr[4] = MTCTR | S(TMP_CALL_REG); + code_ptr[5] = code_ptr[0]; + return code_ptr + 5; } -#endif +#endif /* SLJIT_CONFIG_PPC_64 */ - return 0; +exit: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + code_ptr[2] = MTCTR | S(TMP_CALL_REG); + code_ptr[3] = code_ptr[0]; +#else /* !SLJIT_CONFIG_PPC_32 */ + code_ptr[5] = MTCTR | S(TMP_CALL_REG); + code_ptr[6] = code_ptr[0]; +#endif /* SLJIT_CONFIG_PPC_32 */ + return code_ptr + JUMP_MAX_SIZE - 1; } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) { - if (max_label < 0x100000000l) { - put_label->flags = 0; + sljit_uw addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)5 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (addr < 0x80000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS32; return 1; } - if (max_label < 0x1000000000000l) { - put_label->flags = 1; + if (addr < 0x800000000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS48; return 3; } - put_label->flags = 2; + SLJIT_ASSERT(jump->flags >= ((sljit_uw)4 << JUMP_SIZE_SHIFT)); return 4; } -static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) -{ - sljit_uw addr = put_label->label->addr; - sljit_ins *inst = (sljit_ins *)put_label->addr; - sljit_u32 reg = *inst; +#endif /* SLJIT_CONFIG_PPC_64 */ - if (put_label->flags == 0) { - SLJIT_ASSERT(addr < 0x100000000l); - inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 16); - } - else { - if (put_label->flags == 1) { - SLJIT_ASSERT(addr < 0x1000000000000l); - inst[0] = ORI | S(TMP_ZERO) | A(reg) | IMM(addr >> 32); +static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_s32 reg; + SLJIT_UNUSED_ARG(executable_offset); + + if (flags & PATCH_B) { + if (flags & IS_COND) { + if (!(flags & PATCH_ABS_B)) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); + ins[0] = BCx | ((sljit_ins)addr & 0xfffc) | (ins[0] & 0x03ff0001); + } else { + SLJIT_ASSERT(addr <= 0xffff); + ins[0] = BCx | ((sljit_ins)addr & 0xfffc) | 0x2 | ((*ins) & 0x03ff0001); + } + return; } - else { - inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48); - inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff); - inst++; + + if (!(flags & PATCH_ABS_B)) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); + ins[0] = Bx | ((sljit_ins)addr & 0x03fffffc) | (ins[0] & 0x1); + } else { + SLJIT_ASSERT(addr <= 0x03ffffff); + ins[0] = Bx | ((sljit_ins)addr & 0x03fffffc) | 0x2 | (ins[0] & 0x1); } + return; + } + + reg = (flags & JUMP_MOV_ADDR) ? (sljit_s32)ins[0] : TMP_CALL_REG; + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 16); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr); +#else /* !SLJIT_CONFIG_PPC_32 */ + + /* The TMP_ZERO cannot be used because it is restored for tail calls. */ + if (flags & PATCH_ABS32) { + SLJIT_ASSERT(addr < 0x80000000l); + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 16); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr); + return; + } - inst[1] = SLDI(32) | S(reg) | A(reg); - inst[2] = ORIS | S(reg) | A(reg) | IMM((addr >> 16) & 0xffff); - inst += 2; + if (flags & PATCH_ABS48) { + SLJIT_ASSERT(addr < 0x800000000000l); + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 32); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr >> 16); + ins[2] = SLDI(16) | S(reg) | A(reg); + ins[3] = ORI | S(reg) | A(reg) | IMM(addr); + return; } - inst[1] = ORI | S(reg) | A(reg) | IMM(addr & 0xffff); + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 48); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr >> 32); + ins[2] = SLDI(32) | S(reg) | A(reg); + ins[3] = ORIS | S(reg) | A(reg) | IMM(addr >> 16); + ins[4] = ORI | S(reg) | A(reg) | IMM(addr); +#endif /* SLJIT_CONFIG_PPC_32 */ } +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE - 1; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { + if (jump->u.target <= 0x01ffffff) + total_size = 1 - 1; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + else if (jump->u.target < 0x80000000l) + total_size = 4 - 1; + else if (jump->u.target < 0x800000000000l) + total_size = 6 - 1; +#endif /* SLJIT_CONFIG_PPC_64 */ + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (jump->flags & IS_COND) { + if (diff <= (0x7fff / SSIZE_OF(ins)) && diff >= (-0x8000 / SSIZE_OF(ins))) + total_size = 1 - 1; + else if ((diff - 1) <= (0x01ffffff / SSIZE_OF(ins)) && (diff - 1) >= (-0x02000000 / SSIZE_OF(ins))) + total_size = 2 - 1; + } else if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins))) + total_size = 1 - 1; + } + } + + size_reduce += (JUMP_MAX_SIZE - 1) - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + } else { + total_size = (sljit_uw)4 << JUMP_SIZE_SHIFT; + + if (jump->flags & JUMP_ADDR) { + if (jump->u.target < 0x80000000l) { + total_size = (sljit_uw)1 << JUMP_SIZE_SHIFT; + size_reduce += 3; + } else if (jump->u.target < 0x800000000000l) { + total_size = (sljit_uw)3 << JUMP_SIZE_SHIFT; + size_reduce += 1; + } + } + jump->flags |= total_size; #endif /* SLJIT_CONFIG_PPC_64 */ + } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) { struct sljit_memory_fragment *buf; sljit_ins *code; @@ -438,18 +592,17 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; - sljit_uw addr; struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); + + reduce_code_size(compiler); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) /* add to compiler->size additional instruction space to hold the trampoline and padding */ @@ -459,93 +612,64 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); #endif #endif - code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; word_count = 0; - next_addr = 0; - executable_offset = SLJIT_EXEC_OFFSET(code); - label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); /* These structures are ordered by their address. */ - if (label && label->size == word_count) { + if (next_min_addr == next_label_size) { /* Just recording the address. */ - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == word_count) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - jump->addr = (sljit_uw)(code_ptr - 3); -#else - jump->addr = (sljit_uw)(code_ptr - 6); -#endif - if (detect_jump_type(jump, code_ptr, code, executable_offset)) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - code_ptr[-3] = code_ptr[0]; - code_ptr -= 3; -#else - if (jump->flags & PATCH_ABS32) { - code_ptr -= 3; - code_ptr[-1] = code_ptr[2]; - code_ptr[0] = code_ptr[3]; - } - else if (jump->flags & PATCH_ABS48) { - code_ptr--; - code_ptr[-1] = code_ptr[0]; - code_ptr[0] = code_ptr[1]; - /* rldicr rX,rX,32,31 -> rX,rX,16,47 */ - SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6); - code_ptr[-3] ^= 0x8422; - /* oris -> ori */ - code_ptr[-2] ^= 0x4000000; - } - else { - code_ptr[-6] = code_ptr[0]; - code_ptr -= 6; - } -#endif - if (jump->flags & REMOVE_COND) { - code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001); - code_ptr++; - jump->addr += sizeof(sljit_ins); - code_ptr[0] = Bx; - jump->flags -= IS_COND; - } + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count += jump->flags >> JUMP_SIZE_SHIFT; + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + SLJIT_ASSERT(((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { + jump->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + word_count += jump->flags >> JUMP_SIZE_SHIFT; + code_ptr += mov_addr_get_length(jump, code, executable_offset); +#else /* !SLJIT_CONFIG_PPC_64 */ + word_count++; + code_ptr++; +#endif /* SLJIT_CONFIG_PPC_64 */ } jump = jump->next; - } - if (const_ && const_->addr == word_count) { + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); - word_count += 4; -#endif - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; word_count++; @@ -555,7 +679,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -563,7 +687,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) SLJIT_ASSERT(code_ptr - code <= (sljit_sw)(compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)))); @@ -573,84 +696,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { - do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins *)jump->addr; - - if (jump->flags & PATCH_B) { - if (jump->flags & IS_COND) { - if (!(jump->flags & PATCH_ABS_B)) { - addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); - SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); - *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); - } - else { - SLJIT_ASSERT(addr <= 0xffff); - *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); - } - } - else { - if (!(jump->flags & PATCH_ABS_B)) { - addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); - SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); - *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | ((*buf_ptr) & 0x1); - } - else { - SLJIT_ASSERT(addr <= 0x03ffffff); - *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); - } - } - break; - } - - /* Set the fields of immediate loads. */ -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[1] |= (sljit_ins)addr & 0xffff; -#else - if (jump->flags & PATCH_ABS32) { - SLJIT_ASSERT(addr <= 0x7fffffff); - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[1] |= (sljit_ins)addr & 0xffff; - break; - } - - if (jump->flags & PATCH_ABS48) { - SLJIT_ASSERT(addr <= 0x7fffffffffff); - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 32) & 0xffff; - buf_ptr[1] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[3] |= (sljit_ins)addr & 0xffff; - break; - } - - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3] | buf_ptr[4]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 48) & 0xffff; - buf_ptr[1] |= (sljit_ins)(addr >> 32) & 0xffff; - buf_ptr[3] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[4] |= (sljit_ins)addr & 0xffff; -#endif - } while (0); + generate_jump_or_mov_addr(jump, executable_offset); jump = jump->next; } - put_label = compiler->put_labels; - while (put_label) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - addr = put_label->label->addr; - buf_ptr = (sljit_ins *)put_label->addr; - - SLJIT_ASSERT((buf_ptr[0] & 0xfc1f0000) == ADDIS && (buf_ptr[1] & 0xfc000000) == ORI); - buf_ptr[0] |= (addr >> 16) & 0xffff; - buf_ptr[1] |= addr & 0xffff; -#else - put_label_set(put_label); -#endif - put_label = put_label->next; - } - compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; @@ -671,11 +720,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins) + sizeof(struct sljit_function_context); - return code_ptr; #else compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); - return code; #endif } @@ -937,14 +984,16 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit sljit_s32 i, tmp, base, offset; sljit_s32 local_size = compiler->local_size; + SLJIT_ASSERT(TMP_CALL_REG != TMP_REG2); + base = SLJIT_SP; if (local_size > STACK_MAX_DISTANCE) { - base = TMP_REG1; + base = TMP_REG2; if (local_size > 2 * STACK_MAX_DISTANCE + LR_SAVE_OFFSET) { FAIL_IF(push_inst(compiler, STACK_LOAD | D(base) | A(SLJIT_SP) | IMM(0))); local_size = 0; } else { - FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(SLJIT_SP) | IMM(local_size - STACK_MAX_DISTANCE))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(SLJIT_SP) | IMM(local_size - STACK_MAX_DISTANCE))); local_size = STACK_MAX_DISTANCE; } } @@ -986,7 +1035,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit if (local_size > 0) return push_inst(compiler, ADDI | D(SLJIT_SP) | A(base) | IMM(local_size)); - SLJIT_ASSERT(base == TMP_REG1); + SLJIT_ASSERT(base == TMP_REG2); return push_inst(compiler, OR | S(base) | A(SLJIT_SP) | B(base)); } @@ -1253,7 +1302,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 dst_r = TMP_REG2; sljit_s32 src1_r; sljit_s32 src2_r; - sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 src2_tmp_reg = (!(input_flags & ALT_SIGN_EXT) && GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS); /* Destination check. */ @@ -1264,24 +1313,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= REG_DEST; if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) - sugg_src2_r = dst_r; - } - - /* Source 1. */ - if (FAST_IS_REG(src1)) { - src1_r = src1; - flags |= REG1_SOURCE; - } - else if (src1 == SLJIT_IMM) { - src1_r = TMP_ZERO; - if (src1w != 0) { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1_r = TMP_REG1; - } - } - else { - FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); - src1_r = TMP_REG1; + src2_tmp_reg = dst_r; } /* Source 2. */ @@ -1291,17 +1323,30 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) dst_r = src2_r; - } - else if (src2 == SLJIT_IMM) { + } else if (src2 == SLJIT_IMM) { src2_r = TMP_ZERO; if (src2w != 0) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; + FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w)); + src2_r = src2_tmp_reg; } + } else { + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, src2_tmp_reg, src2, src2w, TMP_REG1)); + src2_r = src2_tmp_reg; } - else { - FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2)); - src2_r = sugg_src2_r; + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } else if (src1 == SLJIT_IMM) { + src1_r = TMP_ZERO; + if (src1w != 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + } else { + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_r = TMP_REG1; } FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); @@ -1757,7 +1802,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) { - if (dst == TMP_REG2) { + if (dst == TMP_REG1) { if (TEST_UL_IMM(src2, src2w)) { compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1772,7 +1817,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); } - if (dst == TMP_REG2 && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + if (dst == TMP_REG1 && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1911,13 +1956,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); } #undef TEST_ADD_FORM1 #undef TEST_SUB_FORM2 #undef TEST_SUB_FORM3 +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ADD | D(dst_reg) | A(dst_reg) | B(TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -2228,7 +2291,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil /* Fall through. */ case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src))); else dst_r = src; @@ -2268,7 +2331,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } if (src2 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2)); + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG1)); src2 = TMP_FREG2; } @@ -2451,7 +2514,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile type &= 0xff; if ((type | 0x1) == SLJIT_NOT_CARRY) - PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); + PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG2) | A(TMP_ZERO) | B(TMP_ZERO))); /* In PPC, we don't need to touch the arguments. */ if (type < SLJIT_JUMP) @@ -2461,10 +2524,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile jump->flags |= IS_CALL; #endif - PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); - PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG))); jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2498,18 +2562,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - if (FAST_IS_REG(src)) { -#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) - if (type >= SLJIT_CALL && src != TMP_CALL_REG) { - FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); - src_r = TMP_CALL_REG; - } - else - src_r = src; -#else /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ - src_r = src; -#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ - } else if (src == SLJIT_IMM) { + if (src == SLJIT_IMM) { /* These jumps are converted to jump/call instructions when possible. */ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); @@ -2521,8 +2574,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump->flags |= IS_CALL; #endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ - FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); - src_r = TMP_CALL_REG; + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src)) { +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL && src != TMP_CALL_REG) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src_r = TMP_CALL_REG; + } else + src_r = src; +#else /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + src_r = src; +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ } else { ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG)); @@ -2530,8 +2599,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi } FAIL_IF(push_inst(compiler, MTCTR | S(src_r))); - if (jump) - jump->addr = compiler->size; return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0)); } @@ -2748,13 +2815,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp type ^= 0x1; } else { if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { - FAIL_IF(push_inst(compiler, OR | S(dst_reg) | A(TMP_REG2) | B(dst_reg))); + FAIL_IF(push_inst(compiler, OR | S(dst_reg) | A(TMP_REG1) | B(dst_reg))); if ((src1 & REG_MASK) == dst_reg) - src1 = (src1 & ~REG_MASK) | TMP_REG2; + src1 = (src1 & ~REG_MASK) | TMP_REG1; if (OFFS_REG(src1) == dst_reg) - src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); } FAIL_IF(push_inst(compiler, OR | S(src2_reg) | A(dst_reg) | B(src2_reg))); @@ -3061,31 +3128,31 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); + compiler->size++; #else - PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); compiler->size += 4; #endif if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeRISCV_common.c b/ext/pcre/pcre2lib/sljit/sljitNativeRISCV_common.c index 64bd411d9d3a..d86100a80cef 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeRISCV_common.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeRISCV_common.c @@ -181,24 +181,23 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if (jump->flags & JUMP_ADDR) target_addr = jump->u.target; else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; if (jump->flags & IS_COND) { - inst--; diff += SSIZE_OF(ins); if (diff >= BRANCH_MIN && diff <= BRANCH_MAX) { - jump->flags |= PATCH_B; + inst--; inst[0] = (inst[0] & 0x1fff07f) ^ 0x1000; + jump->flags |= PATCH_B; jump->addr = (sljit_uw)inst; return inst; } - inst++; diff -= SSIZE_OF(ins); } @@ -265,83 +264,109 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) -static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { - if (max_label <= (sljit_uw)S32_MAX) { - put_label->flags = PATCH_ABS32; + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if (diff >= S32_MIN && diff <= S32_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_REL32; + return 1; + } + + if (addr <= S32_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS32; return 1; } - if (max_label <= S44_MAX) { - put_label->flags = PATCH_ABS44; + if (addr <= S44_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS44; return 3; } - if (max_label <= S52_MAX) { - put_label->flags = PATCH_ABS52; + if (addr <= S52_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)4 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS52; return 4; } - put_label->flags = 0; + SLJIT_ASSERT(jump->flags >= ((sljit_uw)5 << JUMP_SIZE_SHIFT)); return 5; } #endif /* SLJIT_CONFIG_RISCV_64 */ -static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) +static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset) { - struct sljit_jump *jump = NULL; - struct sljit_put_label *put_label; - sljit_uw flags; - sljit_ins *inst; + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1; #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) sljit_sw high; #endif - sljit_uw addr; + SLJIT_UNUSED_ARG(executable_offset); - if (reg != 0) { - jump = (struct sljit_jump*)dst; - flags = jump->flags; - inst = (sljit_ins*)jump->addr; - addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - } else { - put_label = (struct sljit_put_label*)dst; #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) - flags = put_label->flags; -#endif - inst = (sljit_ins*)put_label->addr; - addr = put_label->label->addr; - reg = *inst; + if (flags & PATCH_REL32) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + + SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); + + if ((addr & 0x800) != 0) + addr += 0x1000; + + ins[0] = AUIPC | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + + if (!(flags & JUMP_MOV_ADDR)) { + SLJIT_ASSERT((ins[1] & 0x707f) == JALR); + ins[1] = (ins[1] & 0xfffff) | IMM_I(addr); + } else + ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); + return; } +#endif if ((addr & 0x800) != 0) addr += 0x1000; #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + ins[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); #else /* !SLJIT_CONFIG_RISCV_32 */ if (flags & PATCH_ABS32) { SLJIT_ASSERT(addr <= S32_MAX); - inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + ins[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); } else if (flags & PATCH_ABS44) { high = (sljit_sw)addr >> 12; SLJIT_ASSERT((sljit_uw)high <= 0x7fffffff); if (high > S32_MAX) { SLJIT_ASSERT((high & 0x800) != 0); - inst[0] = LUI | RD(reg) | (sljit_ins)0x80000000u; - inst[1] = XORI | RD(reg) | RS1(reg) | IMM_I(high); + ins[0] = LUI | RD(reg) | (sljit_ins)0x80000000u; + ins[1] = XORI | RD(reg) | RS1(reg) | IMM_I(high); } else { if ((high & 0x800) != 0) high += 0x1000; - inst[0] = LUI | RD(reg) | (sljit_ins)(high & ~0xfff); - inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(high); + ins[0] = LUI | RD(reg) | (sljit_ins)(high & ~0xfff); + ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(high); } - inst[2] = SLLI | RD(reg) | RS1(reg) | IMM_I(12); - inst += 2; + ins[2] = SLLI | RD(reg) | RS1(reg) | IMM_I(12); + ins += 2; } else { high = (sljit_sw)addr >> 32; @@ -350,30 +375,128 @@ static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) if (flags & PATCH_ABS52) { SLJIT_ASSERT(addr <= S52_MAX); - inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12); + ins[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12); } else { if ((high & 0x800) != 0) high += 0x1000; - inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff); - inst[1] = ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high); - inst++; + ins[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff); + ins[1] = ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high); + ins++; } - inst[1] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); - inst[2] = SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I((flags & PATCH_ABS52) ? 20 : 32); - inst[3] = XOR | RD(reg) | RS1(reg) | RS2(TMP_REG3); - inst += 3; + ins[1] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + ins[2] = SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I((flags & PATCH_ABS52) ? 20 : 32); + ins[3] = XOR | RD(reg) | RS1(reg) | RS2(TMP_REG3); + ins += 3; } #endif /* !SLJIT_CONFIG_RISCV_32 */ - if (jump != NULL) { - SLJIT_ASSERT((inst[1] & 0x707f) == JALR); - inst[1] = (inst[1] & 0xfffff) | IMM_I(addr); + if (!(flags & JUMP_MOV_ADDR)) { + SLJIT_ASSERT((ins[1] & 0x707f) == JALR); + ins[1] = (ins[1] & 0xfffff) | IMM_I(addr); } else - inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); + ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (jump->u.target <= S32_MAX) + total_size = 2; + else if (jump->u.target <= S44_MAX) + total_size = 4; + else if (jump->u.target <= S52_MAX) + total_size = 5; +#endif /* SLJIT_CONFIG_RISCV_64 */ + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH_MIN / SSIZE_OF(ins))) + total_size = 0; + else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins))) + total_size = 1; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) + total_size = 2; +#endif /* SLJIT_CONFIG_RISCV_64 */ + } + } + + size_reduce += JUMP_MAX_SIZE - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + } else { + total_size = 5; + + if (!(jump->flags & JUMP_ADDR)) { + /* Real size minus 1. Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) + total_size = 1; + } else if (jump->u.target < S32_MAX) + total_size = 1; + else if (jump->u.target < S44_MAX) + total_size = 3; + else if (jump->u.target <= S52_MAX) + total_size = 4; + + size_reduce += 5 - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; +#endif /* !SLJIT_CONFIG_RISCV_64 */ + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) { struct sljit_memory_fragment *buf; sljit_ins *code; @@ -381,77 +504,78 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; sljit_uw addr; struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); - code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + reduce_code_size(compiler); + + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; word_count = 0; - next_addr = 0; - executable_offset = SLJIT_EXEC_OFFSET(code); - label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == word_count) { + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code, executable_offset); + SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - word_count += 1; -#else - word_count += 5; -#endif - jump->addr = (sljit_uw)code_ptr; - code_ptr = detect_jump_type(jump, code, executable_offset); + word_count += 1; + jump->addr = (sljit_uw)code_ptr; + code_ptr += 1; +#else /* !SLJIT_CONFIG_RISCV_32 */ + word_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_uw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = addr; +#endif /* SLJIT_CONFIG_RISCV_32 */ + } jump = jump->next; - } - if (const_ && const_->addr == word_count) { + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; -#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - code_ptr += 1; - word_count += 1; -#else - code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); - word_count += 5; -#endif - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; word_count++; @@ -461,7 +585,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->u.addr = (sljit_uw)code_ptr; label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -469,18 +593,17 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); jump = compiler->jumps; while (jump) { do { - if (!(jump->flags & (PATCH_B | PATCH_J | PATCH_REL32))) { - load_addr_to_reg(jump, TMP_REG1); + if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) { + load_addr_to_reg(jump, executable_offset); break; } - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; buf_ptr = (sljit_ins *)jump->addr; addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); @@ -491,31 +614,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil break; } -#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) - if (jump->flags & PATCH_REL32) { - SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); - - if ((addr & 0x800) != 0) - addr += 0x1000; - - buf_ptr[0] = AUIPC | RD(TMP_REG1) | (sljit_ins)((sljit_sw)addr & ~0xfff); - SLJIT_ASSERT((buf_ptr[1] & 0x707f) == JALR); - buf_ptr[1] |= IMM_I(addr); - break; - } -#endif - SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX); addr = (addr & 0xff000) | ((addr & 0x800) << 9) | ((addr & 0x7fe) << 20) | ((addr & 0x100000) << 11); buf_ptr[0] = JAL | RD((jump->flags & IS_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | (sljit_ins)addr; } while (0); - jump = jump->next; - } - put_label = compiler->put_labels; - while (put_label) { - load_addr_to_reg(put_label, 0); - put_label = put_label->next; + jump = jump->next; } compiler->error = SLJIT_ERR_COMPILED; @@ -599,6 +703,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) #define SLOW_SRC1 0x08000 #define SLOW_SRC2 0x10000 #define SLOW_DEST 0x20000 +#define MEM_USE_TMP2 0x40000 #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) #define STACK_STORE SW @@ -883,7 +988,6 @@ static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, /* Can perform an operation using at most 1 instruction. */ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { - SLJIT_ASSERT(arg & SLJIT_MEM); if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) { @@ -928,7 +1032,7 @@ static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, slj static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { sljit_s32 base = arg & REG_MASK; - sljit_s32 tmp_r = TMP_REG1; + sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1; sljit_sw offset, argw_hi; SLJIT_ASSERT(arg & SLJIT_MEM); @@ -937,11 +1041,6 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl next_argw = 0; } - /* Since tmp can be the same as base or offset registers, - * these might be unavailable after modifying tmp. */ - if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA) && reg == TMP_REG2) - tmp_r = reg; - if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { argw &= 0x3; @@ -1187,7 +1286,7 @@ static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { - sljit_s32 is_overflow, is_carry, carry_src_r, is_handled; + sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg; sljit_ins op_imm, op_reg; #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; @@ -1197,20 +1296,20 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl switch (GET_OPCODE(op)) { case SLJIT_MOV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, ADDI | RD(dst) | RS1(src2) | IMM_I(0)); return SLJIT_SUCCESS; case SLJIT_MOV_U8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, ANDI | RD(dst) | RS1(src2) | IMM_I(0xff)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_S8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(24))); return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(24)); @@ -1219,7 +1318,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; case SLJIT_MOV_U16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); return push_inst(compiler, SRLI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16)); @@ -1228,7 +1327,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; case SLJIT_MOV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16)); @@ -1238,7 +1337,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) case SLJIT_MOV_U32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(src2) | IMM_I(32))); return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32)); @@ -1247,7 +1346,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; case SLJIT_MOV_S32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, ADDI | 0x8 | RD(dst) | RS1(src2) | IMM_I(0)); SLJIT_ASSERT(dst == src2); @@ -1256,7 +1355,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_CLZ: case SLJIT_CTZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return emit_clz_ctz(compiler, op, dst, src2); case SLJIT_REV: @@ -1264,17 +1363,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) case SLJIT_REV_U32: #endif /* SLJIT_CONFIG_RISCV_32 */ - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return emit_rev(compiler, op, dst, src2); case SLJIT_REV_U16: case SLJIT_REV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return emit_rev16(compiler, op, dst, src2); #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) case SLJIT_REV_U32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1); FAIL_IF(emit_rev(compiler, op, dst, src2)); if (dst == TMP_REG2) return SLJIT_SUCCESS; @@ -1402,8 +1501,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl is_handled = 1; if (flags & SRC2_IMM) { - FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2))); - src2 = TMP_REG2; + reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, ADDI | RD(reg) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = reg; flags &= ~SRC2_IMM; } @@ -1631,7 +1731,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 dst_r = TMP_REG2; sljit_s32 src1_r; sljit_sw src2_r = 0; - sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; if (!(flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -1647,7 +1747,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 dst_r = dst; flags |= REG_DEST; if (flags & MOVE_OP) - sugg_src2_r = dst_r; + src2_tmp_reg = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) flags |= SLOW_DEST; @@ -1673,16 +1773,14 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (FAST_IS_REG(src1)) { src1_r = src1; flags |= REG1_SOURCE; - } - else if (src1 == SLJIT_IMM) { + } else if (src1 == SLJIT_IMM) { if (src1w) { FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); src1_r = TMP_REG1; } else src1_r = TMP_ZERO; - } - else { + } else { if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) FAIL_IF(compiler->error); else @@ -1696,14 +1794,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= REG2_SOURCE; if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) dst_r = (sljit_s32)src2_r; - } - else if (src2 == SLJIT_IMM) { + } else if (src2 == SLJIT_IMM) { if (!(flags & SRC2_IMM)) { if (src2w) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w, TMP_REG3)); - src2_r = sugg_src2_r; - } - else { + FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w, TMP_REG3)); + src2_r = src2_tmp_reg; + } else { src2_r = TMP_ZERO; if (flags & MOVE_OP) { if (dst & SLJIT_MEM) @@ -1713,30 +1809,28 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } } - } - else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) + } else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w)) FAIL_IF(compiler->error); else flags |= SLOW_SRC2; - src2_r = sugg_src2_r; + src2_r = src2_tmp_reg; } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { SLJIT_ASSERT(src2_r == TMP_REG2); - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw)); + } else { FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); } - else { - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); - } } else if (flags & SLOW_SRC1) FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw)); FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); @@ -1819,42 +1913,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV32: #endif case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw); #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) case SLJIT_MOV_U32: - return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); case SLJIT_MOV_S32: /* Logical operators have no W variant, so sign extended input is necessary for them. */ case SLJIT_MOV32: - return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); #endif case SLJIT_MOV_U8: - return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: - return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw); case SLJIT_REV_U16: case SLJIT_REV_S16: - return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); case SLJIT_REV_U32: case SLJIT_REV_S32: - return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); } SLJIT_UNREACHABLE(); @@ -1941,6 +2035,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ADD | WORD | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -2292,7 +2410,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); else dst_r = src; @@ -2351,11 +2469,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - } - else { + } else { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); } @@ -2391,7 +2508,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)); } - if (dst_r == TMP_FREG2) + if (dst_r != dst) FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; @@ -2522,11 +2639,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst(compiler, inst)); /* Maximum number of instructions required for generating a constant. */ -#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - compiler->size += 1; -#else - compiler->size += 5; -#endif + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2553,6 +2666,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler struct sljit_jump *jump; sljit_s32 flags; sljit_ins inst; + sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); @@ -2573,8 +2687,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler } if (src2 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG2, src2, src2w, 0, 0)); - src2 = TMP_REG2; + PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0)); + src2 = src2_tmp_reg; } if (src1 == SLJIT_IMM) { @@ -2588,8 +2702,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler if (src2 == SLJIT_IMM) { if (src2w != 0) { - PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w, TMP_REG3)); - src2 = TMP_REG2; + PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w, TMP_REG3)); + src2 = src2_tmp_reg; } else src2 = TMP_ZERO; @@ -2639,11 +2753,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler PTR_FAIL_IF(push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0))); /* Maximum number of instructions required for generating a constant. */ -#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - compiler->size += 1; -#else - compiler->size += 5; -#endif + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2675,11 +2785,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0))); /* Maximum number of instructions required for generating a constant. */ -#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - compiler->size += 1; -#else - compiler->size += 5; -#endif + compiler->size += JUMP_MAX_SIZE - 1; return SLJIT_SUCCESS; } @@ -2826,13 +2932,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp type ^= 0x1; } else { if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { - FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(dst_reg) | IMM_I(0))); + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(dst_reg) | IMM_I(0))); if ((src1 & REG_MASK) == dst_reg) - src1 = (src1 & ~REG_MASK) | TMP_REG2; + src1 = (src1 & ~REG_MASK) | TMP_REG1; if (OFFS_REG(src1) == dst_reg) - src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); } FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src2_reg) | IMM_I(0))); @@ -2856,7 +2962,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp } else FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src1) | IMM_I(0))); - *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 9); + size = compiler->size - size; + *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((size & 0x7) << 9) | (sljit_ins)((size >> 3) << 25); return SLJIT_SUCCESS; } @@ -2895,7 +3002,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com else FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(type) | FRD(dst_freg) | FRS1(src1) | FRS2(src1))); - *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 9); + size = compiler->size - size; + *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((size & 0x7) << 9) | (sljit_ins)((size >> 3) << 25); return SLJIT_SUCCESS; } @@ -2980,31 +3088,31 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) compiler->size += 1; -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ compiler->size += 5; -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeS390X.c b/ext/pcre/pcre2lib/sljit/sljitNativeS390X.c index 67516f9b320f..99e846350fdd 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeS390X.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeS390X.c @@ -38,12 +38,9 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) return "s390x" SLJIT_CPUINFO; } -/* Instructions. */ +/* Instructions are stored as 64 bit values regardless their size. */ typedef sljit_uw sljit_ins; -/* Instruction tags (most significant halfword). */ -static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; - #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) @@ -140,50 +137,21 @@ static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r) return reg_map[r]; } -/* Size of instruction in bytes. Tags must already be cleared. */ -static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins) -{ - /* keep faulting instructions */ - if (ins == 0) - return 2; - - if ((ins & 0x00000000ffffL) == ins) - return 2; - if ((ins & 0x0000ffffffffL) == ins) - return 4; - if ((ins & 0xffffffffffffL) == ins) - return 6; - - SLJIT_UNREACHABLE(); - return (sljit_uw)-1; -} - static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ibuf); *ibuf = ins; + + SLJIT_ASSERT(ins <= 0xffffffffffffL); + compiler->size++; - return SLJIT_SUCCESS; -} + if (ins & 0xffff00000000L) + compiler->size++; -static sljit_s32 encode_inst(void **ptr, sljit_ins ins) -{ - sljit_u16 *ibuf = (sljit_u16 *)*ptr; - sljit_uw size = sizeof_ins(ins); + if (ins & 0xffffffff0000L) + compiler->size++; - SLJIT_ASSERT((size & 6) == size); - switch (size) { - case 6: - *ibuf++ = (sljit_u16)(ins >> 32); - /* fallthrough */ - case 4: - *ibuf++ = (sljit_u16)(ins >> 16); - /* fallthrough */ - case 2: - *ibuf++ = (sljit_u16)(ins); - } - *ptr = (void*)ibuf; return SLJIT_SUCCESS; } @@ -1424,97 +1392,60 @@ static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const str return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w); } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) { struct sljit_label *label; struct sljit_jump *jump; - struct sljit_s390x_const *const_; - struct sljit_put_label *put_label; + struct sljit_const *const_; sljit_sw executable_offset; - sljit_uw ins_size = 0; /* instructions */ + sljit_uw ins_size = compiler->size << 1; sljit_uw pool_size = 0; /* literal pool */ sljit_uw pad_size; - sljit_uw i, j = 0; + sljit_uw half_count; + SLJIT_NEXT_DEFINE_TYPES; struct sljit_memory_fragment *buf; - void *code, *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_u16 *code; + sljit_u16 *code_ptr; sljit_uw *pool, *pool_ptr; - sljit_sw source, offset; /* TODO(carenas): only need 32 bit */ + sljit_ins ins; + sljit_sw source, offset; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); - /* branch handling */ - label = compiler->labels; jump = compiler->jumps; - put_label = compiler->put_labels; - - /* TODO(carenas): compiler->executable_size could be calculated - * before to avoid the following loop (except for - * pool_size) - */ - /* calculate the size of the code */ - for (buf = compiler->buf; buf != NULL; buf = buf->next) { - sljit_uw len = buf->used_size / sizeof(sljit_ins); - sljit_ins *ibuf = (sljit_ins *)buf->memory; - for (i = 0; i < len; ++i, ++j) { - sljit_ins ins = ibuf[i]; - - /* TODO(carenas): instruction tag vs size/addr == j - * using instruction tags for const is creative - * but unlike all other architectures, and is not - * done consistently for all other objects. - * This might need reviewing later. - */ - if (ins & sljit_ins_const) { - pool_size += sizeof(*pool); - ins &= ~sljit_ins_const; - } - if (label && label->size == j) { - label->size = ins_size; - label = label->next; - } - if (jump && jump->addr == j) { - if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { - /* encoded: */ - /* brasl %r14, (or brcl , ) */ - /* replace with: */ - /* lgrl %r1, */ - /* bras %r14, %r1 (or bcr , %r1) */ - pool_size += sizeof(*pool); - ins_size += 2; - } - jump = jump->next; - } - if (put_label && put_label->addr == j) { - pool_size += sizeof(*pool); - put_label = put_label->next; - } - ins_size += sizeof_ins(ins); + while (jump != NULL) { + if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) { + /* encoded: */ + /* brasl %r14, (or brcl , ) */ + /* replace with: */ + /* lgrl %r1, */ + /* bras %r14, %r1 (or bcr , %r1) */ + pool_size += sizeof(*pool); + if (!(jump->flags & JUMP_MOV_ADDR)) + ins_size += 2; } + jump = jump->next; } - /* emit trailing label */ - if (label && label->size == j) { - label->size = ins_size; - label = label->next; + const_ = compiler->consts; + while (const_) { + pool_size += sizeof(*pool); + const_ = const_->next; } - SLJIT_ASSERT(!label); - SLJIT_ASSERT(!jump); - SLJIT_ASSERT(!put_label); - /* pad code size to 8 bytes so is accessible with half word offsets */ /* the literal pool needs to be doubleword aligned */ pad_size = ((ins_size + 7UL) & ~7UL) - ins_size; SLJIT_ASSERT(pad_size < 8UL); /* allocate target buffer */ - code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size, - compiler->exec_allocator_data); + code = (sljit_u16*)allocate_executable_memory(ins_size + pad_size + pool_size, options, exec_allocator_data, &executable_offset); PTR_FAIL_WITH_EXEC_IF(code); code_ptr = code; - executable_offset = SLJIT_EXEC_OFFSET(code); /* TODO(carenas): pool is optional, and the ABI recommends it to * be created before the function code, instead of @@ -1523,130 +1454,166 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil */ pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size); pool_ptr = pool; - const_ = (struct sljit_s390x_const *)compiler->consts; + buf = compiler->buf; + half_count = 0; - /* update label addresses */ label = compiler->labels; - while (label) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET( - (sljit_uw)code_ptr + label->size, executable_offset); - label = label->next; - } - - /* reset jumps */ jump = compiler->jumps; - put_label = compiler->put_labels; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); - /* emit the code */ - j = 0; - for (buf = compiler->buf; buf != NULL; buf = buf->next) { - sljit_uw len = buf->used_size / sizeof(sljit_ins); - sljit_ins *ibuf = (sljit_ins *)buf->memory; - for (i = 0; i < len; ++i, ++j) { - sljit_ins ins = ibuf[i]; - if (ins & sljit_ins_const) { - /* clear the const tag */ - ins &= ~sljit_ins_const; + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 3); + do { + ins = *buf_ptr++; + + if (next_min_addr == half_count) { + SLJIT_ASSERT(!label || label->size >= half_count); + SLJIT_ASSERT(!jump || jump->addr >= half_count); + SLJIT_ASSERT(!const_ || const_->addr >= half_count); + + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } - /* update instruction with relative address of constant */ - source = (sljit_sw)code_ptr; - offset = (sljit_sw)pool_ptr - source; + if (next_min_addr == next_jump_addr) { + if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) { + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + jump->addr = (sljit_uw)pool_ptr; + + /* store target into pool */ + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + pool_ptr++; + + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); + ins |= (sljit_ins)offset & 0xffffffff; + } else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) { + sljit_ins arg; + + jump->addr = (sljit_uw)pool_ptr; + + /* load address into tmp1 */ + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); + + code_ptr[0] = (sljit_u16)(0xc408 | R4A(tmp1) /* lgrl */); + code_ptr[1] = (sljit_u16)(offset >> 16); + code_ptr[2] = (sljit_u16)offset; + code_ptr += 3; + pool_ptr++; + + /* branch to tmp1 */ + arg = (ins >> 36) & 0xf; + if (((ins >> 32) & 0xf) == 4) { + /* brcl -> bcr */ + ins = bcr(arg, tmp1); + } else { + SLJIT_ASSERT(((ins >> 32) & 0xf) == 5); + /* brasl -> basr */ + ins = basr(arg, tmp1); + } + + /* Adjust half_count. */ + half_count += 2; + } else + jump->addr = (sljit_uw)code_ptr; + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + /* update instruction with relative address of constant */ + source = (sljit_sw)code_ptr; + offset = (sljit_sw)pool_ptr - source; + + SLJIT_ASSERT(!(offset & 0x1)); + offset >>= 1; /* halfword (not byte) offset */ + SLJIT_ASSERT(is_s32(offset)); - SLJIT_ASSERT(!(offset & 1)); - offset >>= 1; /* halfword (not byte) offset */ - SLJIT_ASSERT(is_s32(offset)); + ins |= (sljit_ins)offset & 0xffffffff; - ins |= (sljit_ins)offset & 0xffffffff; + /* update address */ + const_->addr = (sljit_uw)pool_ptr; - /* update address */ - const_->const_.addr = (sljit_uw)pool_ptr; + /* store initial value into pool and update pool address */ + *(pool_ptr++) = (sljit_uw)(((struct sljit_s390x_const*)const_)->init_value); - /* store initial value into pool and update pool address */ - *(pool_ptr++) = (sljit_uw)const_->init_value; + /* move to next constant */ + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + } - /* move to next constant */ - const_ = (struct sljit_s390x_const *)const_->const_.next; + SLJIT_GET_NEXT_MIN(); } - if (jump && jump->addr == j) { - sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); - if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { - sljit_ins op, arg; - jump->addr = (sljit_uw)pool_ptr; + if (ins & 0xffff00000000L) { + *code_ptr++ = (sljit_u16)(ins >> 32); + half_count++; + } - /* load address into tmp1 */ - source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + if (ins & 0xffffffff0000L) { + *code_ptr++ = (sljit_u16)(ins >> 16); + half_count++; + } - SLJIT_ASSERT(!(offset & 1)); - offset >>= 1; - SLJIT_ASSERT(is_s32(offset)); + *code_ptr++ = (sljit_u16)ins; + half_count++; + } while (buf_ptr < buf_end); - encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff)); - - /* store jump target into pool and update pool address */ - *(pool_ptr++) = (sljit_uw)target; - - /* branch to tmp1 */ - op = (ins >> 32) & 0xf; - arg = (ins >> 36) & 0xf; - switch (op) { - case 4: /* brcl -> bcr */ - ins = bcr(arg, tmp1); - break; - case 5: /* brasl -> basr */ - ins = basr(arg, tmp1); - break; - default: - abort(); - } - } - else { - jump->addr = (sljit_uw)code_ptr + 2; - source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - offset = target - source; + buf = buf->next; + } while (buf); - /* offset must be halfword aligned */ - SLJIT_ASSERT(!(offset & 1)); - offset >>= 1; - SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */ + if (next_label_size == half_count) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label = label->next; + } - /* patch jump target */ - ins |= (sljit_ins)offset & 0xffffffff; - } - jump = jump->next; - } - if (put_label && put_label->addr == j) { - source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code + (ins_size >> 1) == code_ptr); + SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr); - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; + jump = compiler->jumps; + while (jump != NULL) { + offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr); - /* store target into pool */ - *pool_ptr = put_label->label->addr; - offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; - pool_ptr++; + if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) { + /* Store jump target into pool. */ + *(sljit_uw*)(jump->addr) = (sljit_uw)offset; + } else { + code_ptr = (sljit_u16*)jump->addr; + offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - SLJIT_ASSERT(!(offset & 1)); - offset >>= 1; - SLJIT_ASSERT(is_s32(offset)); - ins |= (sljit_ins)offset & 0xffffffff; + /* offset must be halfword aligned */ + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */ - put_label = put_label->next; - } - encode_inst(&code_ptr, ins); + code_ptr[1] = (sljit_u16)(offset >> 16); + code_ptr[2] = (sljit_u16)offset; } + jump = jump->next; } - SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr); - SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr); compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; compiler->executable_size = ins_size; if (pool_size) compiler->executable_size += (pad_size + pool_size); - code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset); - code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); SLJIT_CACHE_FLUSH(code, code_ptr); SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); return code; @@ -2497,7 +2464,7 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, const struct ins_forms *forms; sljit_ins ins; - if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) { + if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) { int compare_signed = flag_type >= SLJIT_SIG_LESS; compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; @@ -2597,7 +2564,7 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, - the first operand is less if the sign bit of the result is not set The -result operation sets the corrent sign, because the result cannot be zero. The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */ - FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2))); + FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2)))); FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); } else if (op & SLJIT_SET_Z) @@ -2759,7 +2726,7 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o sljit_s32 type = GET_OPCODE(op); const struct ins_forms *forms; - if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) { + if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) { sljit_s32 count16 = 0; sljit_uw imm = (sljit_uw)src2w; @@ -2775,13 +2742,13 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o if ((imm & 0xffff000000000000ull) != 0) count16++; - if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) { - sljit_gpr src_r = tmp0; + if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) { + sljit_gpr src_r = tmp1; if (FAST_IS_REG(src1)) src_r = gpr(src1 & REG_MASK); else - FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + FAIL_IF(emit_move(compiler, tmp1, src1, src1w)); if ((imm & 0x000000000000ffffull) != 0 || imm == 0) return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm); @@ -3002,11 +2969,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { + sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1; + CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w); + return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0)); + } + + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, @@ -3415,12 +3402,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src))); } - if (!(dst & SLJIT_MEM)) - return SLJIT_SUCCESS; - - SLJIT_ASSERT(dst_r == TMP_FREG1); + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); - return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; } #define FLOAT_MOV(op, dst_r, src_r) \ @@ -3491,7 +3476,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil if (dst & SLJIT_MEM) return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); - SLJIT_ASSERT(dst_r != TMP_FREG1); return SLJIT_SUCCESS; } @@ -3738,8 +3722,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(push_inst(compiler, WHEN2(op & SLJIT_32, lochi, locghi))); } else { - /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */ - abort(); + FAIL_IF(push_load_imm_inst(compiler, loc_r, 1)); + FAIL_IF(push_inst(compiler, brc(mask, 2 + 2))); + FAIL_IF(push_load_imm_inst(compiler, loc_r, 0)); } #undef LEVAL @@ -3837,8 +3822,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16); } - FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w)); - src_r = tmp0; + FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w)); + src_r = tmp1; } else src_r = gpr(src1); @@ -4474,9 +4459,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; if (have_genext()) - PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0))); + PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0))); else { - PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0))); + PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0))); PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1))); } @@ -4503,20 +4488,18 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( - struct sljit_compiler *compiler, - sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_gpr dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; @@ -4530,7 +4513,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( if (dst & SLJIT_MEM) PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0)); - return put_label; + return jump; } /* TODO(carenas): EVAL probably should move up or be refactored */ diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeX86_32.c b/ext/pcre/pcre2lib/sljit/sljitNativeX86_32.c index ba4a1ebbc20f..59ea04a5c81b 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeX86_32.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeX86_32.c @@ -283,28 +283,25 @@ static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw /* Enter / return */ /* --------------------------------------------------------------------- */ -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) { sljit_uw type = jump->flags >> TYPE_SHIFT; if (type == SLJIT_JUMP) { *code_ptr++ = JMP_i32; - jump->addr++; - } - else if (type >= SLJIT_FAST_CALL) { + } else if (type >= SLJIT_FAST_CALL) { *code_ptr++ = CALL_i32; - jump->addr++; - } - else { + } else { *code_ptr++ = GROUP_0F; *code_ptr++ = get_jump_code(type); - jump->addr += 2; } - if (jump->flags & JUMP_LABEL) - jump->flags |= PATCH_MW; - else + jump->addr = (sljit_uw)code_ptr; + + if (jump->flags & JUMP_ADDR) sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset)); + else + jump->flags |= PATCH_MW; code_ptr += 4; return code_ptr; @@ -1249,6 +1246,68 @@ static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler, /* Other operations */ /* --------------------------------------------------------------------- */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_s32 dst = dst_reg; + sljit_sw dstw = 0; + sljit_sw src2w = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2_reg, src2w, (void)0); + + type &= ~SLJIT_32; + + if (dst & SLJIT_MEM) { + if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else + EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w); + + dst_reg = TMP_REG1; + } else { + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + EMIT_MOV(compiler, dst_reg, 0, src1, src1w); + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else + EMIT_MOV(compiler, dst_reg, 0, src2_reg, src2w); + } + } + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && (src1 != SLJIT_IMM || dst_reg != TMP_REG1)) { + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = TMP_REG1; + src1w = 0; + } + + FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w)); + } else + FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w)); + + if (dst & SLJIT_MEM) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) @@ -1449,10 +1508,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *comp if (u.imm == 0) { inst[2] = PXOR_x_xm; - inst[3] = U8(freg | (freg << 3) | MOD_REG); + inst[3] = U8(freg_map[freg] | (freg_map[freg] << 3) | MOD_REG); } else { inst[2] = MOVD_x_rm; - inst[3] = U8(reg_map[TMP_REG1] | (freg << 3) | MOD_REG); + inst[3] = U8(reg_map[TMP_REG1] | (freg_map[freg] << 3) | MOD_REG); } return SLJIT_SUCCESS; @@ -1462,7 +1521,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp sljit_s32 freg, sljit_f64 value) { sljit_u8 *inst; - sljit_s32 tmp_freg = freg; union { sljit_s32 imm[2]; sljit_f64 value; @@ -1478,8 +1536,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp return emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]); - } else + } else { + SLJIT_ASSERT(cpu_feature_list != 0); + + if (!(cpu_feature_list & CPU_FEATURE_SSE41) && u.imm[1] != 0 && u.imm[0] != u.imm[1]) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, u.imm[0]); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, u.imm[1]); + + return emit_groupf(compiler, MOVLPD_x_m | EX86_SSE2, freg, SLJIT_MEM1(SLJIT_SP), 0); + } + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[0]); + } FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, TMP_REG1, 0)); @@ -1493,23 +1561,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp inst[0] = GROUP_0F; inst[1] = SHUFPS_x_xm; - inst[2] = U8(MOD_REG | (freg << 3) | freg); + inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[freg]); inst[3] = 0x51; return SLJIT_SUCCESS; } if (u.imm[0] != u.imm[1]) { - SLJIT_ASSERT(u.imm[1] != 0 && cpu_feature_list != 0); - + SLJIT_ASSERT(cpu_feature_list & CPU_FEATURE_SSE41); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]); - if (cpu_feature_list & CPU_FEATURE_SSE41) { - FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0)); - return emit_byte(compiler, 1); - } - - FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, TMP_REG1, 0)); - tmp_freg = TMP_FREG; + FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0)); + return emit_byte(compiler, 1); } inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); @@ -1518,7 +1580,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp inst[0] = GROUP_0F; inst[1] = UNPCKLPS_x_xm; - inst[2] = U8(MOD_REG | (freg << 3) | tmp_freg); + inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[freg]); return SLJIT_SUCCESS; } @@ -1581,7 +1643,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi inst[0] = GROUP_66; inst[1] = GROUP_0F; inst[2] = PSHUFD_x_xm; - inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg); + inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg_map[freg]); inst[4] = 1; } else if (reg != 0) FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw)); @@ -1597,7 +1659,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi inst[0] = GROUP_0F; inst[1] = UNPCKLPS_x_xm; - inst[2] = U8(MOD_REG | (freg << 3) | (reg == 0 ? freg : TMP_FREG)); + inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[reg == 0 ? freg : TMP_FREG]); } else FAIL_IF(emit_groupf(compiler, MOVD_rm_x | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw)); diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeX86_64.c b/ext/pcre/pcre2lib/sljit/sljitNativeX86_64.c index f313f3f038f7..1ab79293c7ef 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeX86_64.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeX86_64.c @@ -358,26 +358,28 @@ static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw /* Enter / return */ /* --------------------------------------------------------------------- */ -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr) +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr) { sljit_uw type = jump->flags >> TYPE_SHIFT; - int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff); + int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && (jump->flags & JUMP_ADDR) && (jump->u.target <= 0xffffffff); /* The relative jump below specialized for this case. */ - SLJIT_ASSERT(reg_map[TMP_REG2] >= 8); + SLJIT_ASSERT(reg_map[TMP_REG2] >= 8 && TMP_REG2 != SLJIT_TMP_DEST_REG); if (type < SLJIT_JUMP) { /* Invert type. */ - *code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10); - *code_ptr++ = short_addr ? (6 + 3) : (10 + 3); + code_ptr[0] = U8(get_jump_code(type ^ 0x1) - 0x10); + code_ptr[1] = short_addr ? (6 + 3) : (10 + 3); + code_ptr += 2; } - *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B); - *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2]; + code_ptr[0] = short_addr ? REX_B : (REX_W | REX_B); + code_ptr[1] = MOV_r_i32 | reg_lmap[TMP_REG2]; + code_ptr += 2; jump->addr = (sljit_uw)code_ptr; - if (jump->flags & JUMP_LABEL) + if (!(jump->flags & JUMP_ADDR)) jump->flags |= PATCH_MD; else if (short_addr) sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target); @@ -386,60 +388,62 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw); - *code_ptr++ = REX_B; - *code_ptr++ = GROUP_FF; - *code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]); + code_ptr[0] = REX_B; + code_ptr[1] = GROUP_FF; + code_ptr[2] = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]); - return code_ptr; + return code_ptr + 3; } -static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label) +static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) { - if (max_label > HALFWORD_MAX) { - put_label->addr -= put_label->flags; - put_label->flags = PATCH_MD; - return code_ptr; - } + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); - if (put_label->flags == 0) { - /* Destination is register. */ - code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw); + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) <= 10); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + jump->u.label->size; - SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); - SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); + if (addr > 0xffffffffl) { + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - if ((code_ptr[0] & 0x07) != 0) { - code_ptr[0] = U8(code_ptr[0] & ~0x08); - code_ptr += 2 + sizeof(sljit_s32); - } - else { - code_ptr[0] = code_ptr[1]; - code_ptr += 1 + sizeof(sljit_s32); + if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 7); + code_ptr -= SSIZE_OF(s32) - 1; + + SLJIT_ASSERT((code_ptr[-3 - SSIZE_OF(s32)] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[-2 - SSIZE_OF(s32)] & 0xf8) == MOV_r_i32); + + code_ptr[-3 - SSIZE_OF(s32)] = U8(REX_W | ((code_ptr[-3 - SSIZE_OF(s32)] & 0x1) << 2)); + code_ptr[-1 - SSIZE_OF(s32)] = U8(((code_ptr[-2 - SSIZE_OF(s32)] & 0x7) << 3) | 0x5); + code_ptr[-2 - SSIZE_OF(s32)] = LEA_r_m; + + jump->flags |= PATCH_MW; + return code_ptr; } - put_label->addr = (sljit_uw)code_ptr; + jump->flags |= PATCH_MD; return code_ptr; } - code_ptr -= put_label->flags + (2 + sizeof(sljit_uw)); - SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags); + code_ptr -= 2 + sizeof(sljit_uw); SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); - if ((code_ptr[1] & 0xf8) == MOV_r_i32) { - code_ptr += 2 + sizeof(sljit_uw); - SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + if ((code_ptr[0] & 0x07) != 0) { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 6); + code_ptr[0] = U8(code_ptr[0] & ~0x08); + code_ptr += 2 + sizeof(sljit_s32); + } else { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 5); + code_ptr[0] = code_ptr[1]; + code_ptr += 1 + sizeof(sljit_s32); } - SLJIT_ASSERT(code_ptr[1] == MOV_rm_r); - - code_ptr[0] = U8(code_ptr[0] & ~0x4); - code_ptr[1] = MOV_rm_i32; - code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3)); - - code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32)); - put_label->addr = (sljit_uw)code_ptr; - put_label->flags = 0; return code_ptr; } @@ -1003,6 +1007,46 @@ static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler, /* Other operations */ /* --------------------------------------------------------------------- */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + compiler->mode32 = type & SLJIT_32; + type &= ~SLJIT_32; + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + EMIT_MOV(compiler, dst_reg, 0, src1, src1w); + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else + EMIT_MOV(compiler, dst_reg, 0, src2_reg, 0); + } + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG2, 0, src1, src1w); + src1 = TMP_REG2; + src1w = 0; + } + + return emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w); + } + + return emit_cmov_generic(compiler, type, dst_reg, src1, src1w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) diff --git a/ext/pcre/pcre2lib/sljit/sljitNativeX86_common.c b/ext/pcre/pcre2lib/sljit/sljitNativeX86_common.c index c2c0421349b8..ecb7e9be3b0e 100644 --- a/ext/pcre/pcre2lib/sljit/sljitNativeX86_common.c +++ b/ext/pcre/pcre2lib/sljit/sljitNativeX86_common.c @@ -24,12 +24,6 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -#include -#endif /* __has_feature(memory_sanitizer) */ -#endif /* defined(__has_feature) */ - SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { return "x86" SLJIT_CPUINFO; @@ -72,7 +66,6 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3 }; @@ -379,6 +372,11 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { #define RET() (*inst++ = RET_near) #define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0) +#define SLJIT_INST_LABEL 255 +#define SLJIT_INST_JUMP 254 +#define SLJIT_INST_MOV_ADDR 253 +#define SLJIT_INST_CONST 252 + /* Multithreading does not affect these static variables, since they store built-in CPU features. Therefore they can be overwritten by different threads if they detect the CPU features in the same time. */ @@ -392,6 +390,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { #define CPU_FEATURE_CMOV 0x020 #define CPU_FEATURE_AVX 0x040 #define CPU_FEATURE_AVX2 0x080 +#define CPU_FEATURE_OSXSAVE 0x100 static sljit_u32 cpu_feature_list = 0; @@ -490,22 +489,50 @@ static void execute_cpu_id(sljit_u32 info[4]) } #endif /* _MSC_VER && _MSC_VER >= 1400 */ +} -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -__msan_unpoison(info, 4 * sizeof(sljit_u32)); -#endif /* __has_feature(memory_sanitizer) */ -#endif /* defined(__has_feature) */ +static sljit_u32 execute_get_xcr0_low(void) +{ + sljit_u32 xcr0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + xcr0 = (sljit_u32)_xgetbv(0); + +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__) + + /* AT&T syntax. */ + __asm__ ( + "xorl %%ecx, %%ecx\n" + "xgetbv\n" + : "=a" (xcr0) + : +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + : "ecx", "edx" +#else /* !SLJIT_CONFIG_X86_32 */ + : "rcx", "rdx" +#endif /* SLJIT_CONFIG_X86_32 */ + ); + +#else /* _MSC_VER < 1400 */ + + /* Intel syntax. */ + __asm { + mov ecx, 0 + xgetbv + mov xcr0, eax + } + +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + return xcr0; } static void get_cpu_features(void) { sljit_u32 feature_list = CPU_FEATURE_DETECTED; - sljit_u32 info[4]; + sljit_u32 info[4] = {0}; sljit_u32 max_id; - info[0] = 0; execute_cpu_id(info); max_id = info[0]; @@ -526,6 +553,8 @@ static void get_cpu_features(void) if (info[2] & 0x80000) feature_list |= CPU_FEATURE_SSE41; + if (info[2] & 0x8000000) + feature_list |= CPU_FEATURE_OSXSAVE; if (info[2] & 0x10000000) feature_list |= CPU_FEATURE_AVX; #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) @@ -537,12 +566,14 @@ static void get_cpu_features(void) } info[0] = 0x80000001; - info[2] = 0; /* Silences an incorrect compiler warning. */ execute_cpu_id(info); if (info[2] & 0x20) feature_list |= CPU_FEATURE_LZCNT; + if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0) + feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2); + cpu_feature_list = feature_list; } @@ -617,52 +648,47 @@ static sljit_u8 get_jump_code(sljit_uw type) } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset); -#else -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr); -static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label); -#endif +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset); +#else /* !SLJIT_CONFIG_X86_32 */ +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr); +static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset); +#endif /* SLJIT_CONFIG_X86_32 */ -static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) +static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) { sljit_uw type = jump->flags >> TYPE_SHIFT; sljit_s32 short_jump; sljit_uw label_addr; - if (jump->flags & JUMP_LABEL) - label_addr = (sljit_uw)(code + jump->u.label->size); - else + if (jump->flags & JUMP_ADDR) label_addr = jump->u.target - (sljit_uw)executable_offset; + else + label_addr = (sljit_uw)(code + jump->u.label->size); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((sljit_sw)(label_addr - (jump->addr + 2)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 6)) < HALFWORD_MIN) - return generate_far_jump_code(jump, code_ptr); -#endif + if ((sljit_sw)(label_addr - (sljit_uw)(code_ptr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 5)) < HALFWORD_MIN) + return detect_far_jump_type(jump, code_ptr); +#endif /* SLJIT_CONFIG_X86_64 */ - short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; + short_jump = (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) >= -0x80 && (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) <= 0x7f; if (type == SLJIT_JUMP) { if (short_jump) *code_ptr++ = JMP_i8; else *code_ptr++ = JMP_i32; - jump->addr++; - } - else if (type >= SLJIT_FAST_CALL) { + } else if (type > SLJIT_JUMP) { short_jump = 0; *code_ptr++ = CALL_i32; - jump->addr++; - } - else if (short_jump) { + } else if (short_jump) { *code_ptr++ = U8(get_jump_code(type) - 0x10); - jump->addr++; - } - else { + } else { *code_ptr++ = GROUP_0F; *code_ptr++ = get_jump_code(type); - jump->addr += 2; } + jump->addr = (sljit_uw)code_ptr; + if (short_jump) { jump->flags |= PATCH_MB; code_ptr += sizeof(sljit_s8); @@ -674,7 +700,172 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code return code_ptr; } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_uw jump_addr = jump->addr; + SLJIT_UNUSED_ARG(executable_offset); + + if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr); +#else /* SLJIT_CONFIG_X86_32 */ + if (flags & PATCH_MD) { + SLJIT_ASSERT(addr > HALFWORD_MAX); + sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr); + return; + } + + if (flags & PATCH_MW) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN); + } else { + SLJIT_ASSERT(addr <= HALFWORD_MAX); + } + sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr); +#endif /* !SLJIT_CONFIG_X86_32 */ + return; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (SLJIT_UNLIKELY(flags & PATCH_MD)) { + SLJIT_ASSERT(!(flags & JUMP_ADDR)); + sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr); + return; + } +#endif /* SLJIT_CONFIG_X86_64 */ + + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset); + + if (flags & PATCH_MB) { + addr -= sizeof(sljit_s8); + SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80); + *(sljit_u8*)jump_addr = U8(addr); + return; + } else if (flags & PATCH_MW) { + addr -= sizeof(sljit_s32); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr); +#else /* !SLJIT_CONFIG_X86_32 */ + SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN); + sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr); +#endif /* SLJIT_CONFIG_X86_32 */ + } +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + sljit_uw next_label_size; + sljit_uw next_jump_addr; + sljit_uw next_min_addr; + sljit_uw size_reduce = 0; + sljit_sw diff; + sljit_uw type; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + sljit_uw size_reduce_max; +#endif /* SLJIT_DEBUG */ + + label = compiler->labels; + jump = compiler->jumps; + + next_label_size = SLJIT_GET_NEXT_SIZE(label); + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + + while (1) { + next_min_addr = next_label_size; + if (next_jump_addr < next_min_addr) + next_min_addr = next_jump_addr; + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr != next_jump_addr) + continue; + + if (!(jump->flags & JUMP_MOV_ADDR)) { +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE); +#endif /* SLJIT_DEBUG */ + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (jump->u.target <= 0xffffffffl) + size_reduce += sizeof(sljit_s32); +#endif /* SLJIT_CONFIG_X86_64 */ + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce); + type = jump->flags >> TYPE_SHIFT; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (type == SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += JUMP_MAX_SIZE - 2; + else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5) + size_reduce += JUMP_MAX_SIZE - 5; + } else if (type < SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += CJUMP_MAX_SIZE - 2; + else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6) + size_reduce += CJUMP_MAX_SIZE - 6; + } else { + if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5) + size_reduce += JUMP_MAX_SIZE - 5; + } +#else /* !SLJIT_CONFIG_X86_64 */ + if (type == SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += JUMP_MAX_SIZE - 2; + } else if (type < SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += CJUMP_MAX_SIZE - 2; + } +#endif /* SLJIT_CONFIG_X86_64 */ + } + } + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT; +#endif /* SLJIT_DEBUG */ +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + } else { +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + size_reduce_max = size_reduce + 10; +#endif /* SLJIT_DEBUG */ + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce - 3); + + if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) + size_reduce += 3; + } else if (jump->u.target <= 0xffffffffl) + size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5; + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT; +#endif /* SLJIT_DEBUG */ +#endif /* SLJIT_CONFIG_X86_64 */ + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) { struct sljit_memory_fragment *buf; sljit_u8 *code; @@ -683,77 +874,82 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_u8 *buf_end; sljit_u8 len; sljit_sw executable_offset; - sljit_uw jump_addr; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + sljit_uw addr; +#endif /* SLJIT_DEBUG */ struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); + + reduce_code_size(compiler); /* Second code generation pass. */ - code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size, compiler->exec_allocator_data); + code = (sljit_u8*)allocate_executable_memory(compiler->size, options, exec_allocator_data, &executable_offset); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; - executable_offset = SLJIT_EXEC_OFFSET(code); do { buf_ptr = buf->memory; buf_end = buf_ptr + buf->used_size; do { len = *buf_ptr++; - if (len > 0) { + SLJIT_ASSERT(len > 0); + if (len < SLJIT_INST_CONST) { /* The code is already generated. */ SLJIT_MEMCPY(code_ptr, buf_ptr, len); code_ptr += len; buf_ptr += len; - } - else { - switch (*buf_ptr) { - case 0: - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + } else { + switch (len) { + case SLJIT_INST_LABEL: + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; break; - case 1: - jump->addr = (sljit_uw)code_ptr; + case SLJIT_INST_JUMP: +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + addr = (sljit_uw)code_ptr; +#endif /* SLJIT_DEBUG */ if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) - code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset); + code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset); else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset); -#else - code_ptr = generate_far_jump_code(jump, code_ptr); -#endif + code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset); +#else /* !SLJIT_CONFIG_X86_32 */ + code_ptr = detect_far_jump_type(jump, code_ptr); +#endif /* SLJIT_CONFIG_X86_32 */ } + + SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f)); jump = jump->next; break; - case 2: - const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); - const_ = const_->next; + case SLJIT_INST_MOV_ADDR: +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset); +#endif /* SLJIT_CONFIG_X86_64 */ + jump->addr = (sljit_uw)code_ptr; + jump = jump->next; break; default: - SLJIT_ASSERT(*buf_ptr == 3); - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size); -#endif - put_label = put_label->next; + SLJIT_ASSERT(len == SLJIT_INST_CONST); + const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); + const_ = const_->next; break; } - buf_ptr++; } } while (buf_ptr < buf_end); + SLJIT_ASSERT(buf_ptr == buf_end); buf = buf->next; } while (buf); @@ -761,61 +957,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr <= code + compiler->size); jump = compiler->jumps; while (jump) { - if (jump->flags & (PATCH_MB | PATCH_MW)) { - if (jump->flags & JUMP_LABEL) - jump_addr = jump->u.label->addr; - else - jump_addr = jump->u.target; - - jump_addr -= jump->addr + (sljit_uw)executable_offset; - - if (jump->flags & PATCH_MB) { - jump_addr -= sizeof(sljit_s8); - SLJIT_ASSERT((sljit_sw)jump_addr >= -128 && (sljit_sw)jump_addr <= 127); - *(sljit_u8*)jump->addr = U8(jump_addr); - } else { - jump_addr -= sizeof(sljit_s32); -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump_addr); -#else - SLJIT_ASSERT((sljit_sw)jump_addr >= HALFWORD_MIN && (sljit_sw)jump_addr <= HALFWORD_MAX); - sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)jump_addr); -#endif - } - } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - else if (jump->flags & PATCH_MD) { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr); - } -#endif - + generate_jump_or_mov_addr(jump, executable_offset); jump = jump->next; } - put_label = compiler->put_labels; - while (put_label) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr); -#else - if (put_label->flags & PATCH_MD) { - SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX); - sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr); - } - else { - SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX); - sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr); - } -#endif - - put_label = put_label->next; - } - compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; compiler->executable_size = (sljit_uw)(code_ptr - code); @@ -921,8 +1070,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) BINARY_IMM32(op_imm, immw, arg, argw); \ } \ else { \ - FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ - inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ + FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \ + inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ FAIL_IF(!inst); \ *inst = (op_mr); \ } \ @@ -2248,10 +2397,9 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); FAIL_IF(!inst); *inst = GROUP_F7; - } - else { - FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w); + } else { + FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w)); + inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w); FAIL_IF(!inst); *inst = TEST_rm_r; } @@ -2488,8 +2636,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile compiler->mode32 = op & SLJIT_32; #endif - SLJIT_ASSERT(dst != TMP_REG1 || HAS_FLAGS(op)); - switch (GET_OPCODE(op)) { case SLJIT_ADD: if (!HAS_FLAGS(op)) { @@ -2583,12 +2729,44 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil compiler->mode32 = op & SLJIT_32; #endif - if (opcode == SLJIT_SUB) { + if (opcode == SLJIT_SUB) return emit_cmp_binary(compiler, src1, src1w, src2, src2w); - } + return emit_test_binary(compiler, src1, src1w, src2, src2w); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_sw dstw = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(dst_reg, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw); + FAIL_IF(!inst); + *inst = ADD_rm_r; + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -3117,19 +3295,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil dst_r = dst; if (dst == src1) ; /* Do nothing here. */ - else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) { + else if (dst == src2 && (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64)) { /* Swap arguments. */ src2 = src1; src2w = src1w; - } - else if (dst != src2) + } else if (dst != src2) FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w)); else { dst_r = TMP_FREG; FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); } - } - else { + } else { dst_r = TMP_FREG; FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); } @@ -3152,7 +3328,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil break; } - if (dst_r == TMP_FREG) + if (dst_r != dst) return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } @@ -3215,10 +3391,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi PTR_FAIL_IF(!label); set_label(label, compiler); - inst = (sljit_u8*)ensure_buf(compiler, 2); + inst = (sljit_u8*)ensure_buf(compiler, 1); PTR_FAIL_IF(!inst); - inst[0] = 0; - inst[1] = 0; + inst[0] = SLJIT_INST_LABEL; return label; } @@ -3236,18 +3411,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT))); type &= 0xff; + jump->addr = compiler->size; /* Worst case size. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; -#else - compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); -#endif - - inst = (sljit_u8*)ensure_buf(compiler, 2); + compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE; + inst = (sljit_u8*)ensure_buf(compiler, 1); PTR_FAIL_IF_NULL(inst); - inst[0] = 0; - inst[1] = 1; + inst[0] = SLJIT_INST_JUMP; return jump; } @@ -3268,20 +3438,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT))); jump->u.target = (sljit_uw)srcw; + jump->addr = compiler->size; /* Worst case size. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - compiler->size += 5; -#else - compiler->size += 10 + 3; -#endif - - inst = (sljit_u8*)ensure_buf(compiler, 2); + compiler->size += JUMP_MAX_SIZE; + inst = (sljit_u8*)ensure_buf(compiler, 1); FAIL_IF_NULL(inst); - inst[0] = 0; - inst[1] = 1; - } - else { + inst[0] = SLJIT_INST_JUMP; + } else { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) /* REX_W is not necessary (src is not immediate). */ compiler->mode32 = 1; @@ -3414,82 +3578,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #endif /* SLJIT_CONFIG_X86_64 */ } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_reg, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2_reg) -{ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_s32 dst = dst_reg; - sljit_sw dstw = 0; -#endif /* SLJIT_CONFIG_X86_32 */ - sljit_sw src2w = 0; - - CHECK_ERROR(); - CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); - - ADJUST_LOCAL_OFFSET(src1, src1w); - - CHECK_EXTRA_REGS(dst, dstw, (void)0); - CHECK_EXTRA_REGS(src1, src1w, (void)0); - CHECK_EXTRA_REGS(src2_reg, src2w, (void)0); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = type & SLJIT_32; -#endif /* SLJIT_CONFIG_X86_64 */ - type &= ~SLJIT_32; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (dst & SLJIT_MEM) { - if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) { - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - src1 = src2_reg; - src1w = src2w; - type ^= 0x1; - } else - EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w); - - dst_reg = TMP_REG1; - } else { -#endif /* SLJIT_CONFIG_X86_32 */ - if (dst_reg != src2_reg) { - if (dst_reg == src1) { - src1 = src2_reg; - src1w = src2w; - type ^= 0x1; - } else { - if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { - EMIT_MOV(compiler, dst_reg, 0, src1, src1w); - src1 = src2_reg; - src1w = src2w; - type ^= 0x1; - } else - EMIT_MOV(compiler, dst_reg, 0, src2_reg, src2w); - } - } - - if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { - SLJIT_ASSERT(dst_reg != TMP_REG1); - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - src1 = TMP_REG1; - src1w = 0; - } -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - } -#endif /* SLJIT_CONFIG_X86_32 */ - - if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) - FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w)); - else - FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w)); - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (dst_reg == TMP_REG1) - return emit_mov(compiler, dst, dstw, TMP_REG1, 0); -#endif /* SLJIT_CONFIG_X86_32 */ - return SLJIT_SUCCESS; -} - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_freg, sljit_s32 src1, sljit_sw src1w, @@ -3581,7 +3669,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co if (type & SLJIT_SIMD_TEST) return SLJIT_SUCCESS; - if (op & VEX_256) + if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX))) return emit_vex_instruction(compiler, op, freg, 0, srcdst, srcdstw); return emit_groupf(compiler, op, freg, srcdst, srcdstw); @@ -3593,9 +3681,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); sljit_u8 *inst; sljit_u8 opcode = 0; - sljit_uw size; + sljit_uw op; CHECK_ERROR(); CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); @@ -3616,20 +3705,55 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil return SLJIT_ERR_UNSUPPORTED; #endif /* SLJIT_CONFIG_X86_32 */ - if (cpu_feature_list & CPU_FEATURE_AVX2) { - if (reg_size < 4 || reg_size > 5) - return SLJIT_ERR_UNSUPPORTED; + if (reg_size != 4 && (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))) + return SLJIT_ERR_UNSUPPORTED; - if (src != SLJIT_IMM && (reg_size == 5 || elem_size < 3 || !(type & SLJIT_SIMD_FLOAT))) { - if (type & SLJIT_SIMD_TEST) - return SLJIT_SUCCESS; + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 5) + use_vex = 1; + if (use_vex && src != SLJIT_IMM) { + op = 0; + + switch (elem_size) { + case 0: + if (cpu_feature_list & CPU_FEATURE_AVX2) + op = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + case 1: + if (cpu_feature_list & CPU_FEATURE_AVX2) + op = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + case 2: + if (type & SLJIT_SIMD_FLOAT) { + if ((cpu_feature_list & CPU_FEATURE_AVX2) || ((cpu_feature_list & CPU_FEATURE_AVX) && (src & SLJIT_MEM))) + op = VBROADCASTSS_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + } else if (cpu_feature_list & CPU_FEATURE_AVX2) + op = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + default: +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (!(type & SLJIT_SIMD_FLOAT)) { + if (cpu_feature_list & CPU_FEATURE_AVX2) + op = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + } +#endif /* SLJIT_CONFIG_X86_64 */ + + if (reg_size == 5) + op = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + } + + if (op != 0) { if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (elem_size >= 3) compiler->mode32 = 0; #endif /* SLJIT_CONFIG_X86_64 */ - FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, src, srcw)); + FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw)); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; #endif /* SLJIT_CONFIG_X86_64 */ @@ -3637,51 +3761,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil srcw = 0; } - switch (elem_size) { - case 0: - size = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; - break; - case 1: - size = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; - break; - case 2: - size = ((type & SLJIT_SIMD_FLOAT) ? VBROADCASTSS_x_xm : VPBROADCASTD_x_xm) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; - break; - default: -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - size = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; -#else /* !SLJIT_CONFIG_X86_32 */ - size = ((type & SLJIT_SIMD_FLOAT) ? VBROADCASTSD_x_xm : VPBROADCASTQ_x_xm) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; -#endif /* SLJIT_CONFIG_X86_32 */ - break; - } - if (reg_size == 5) - size |= VEX_256; + op |= VEX_256; - return emit_vex_instruction(compiler, size, freg, 0, src, srcw); + return emit_vex_instruction(compiler, op, freg, 0, src, srcw); } - } else if (reg_size != 4) - return SLJIT_ERR_UNSUPPORTED; - - if (type & SLJIT_SIMD_TEST) - return SLJIT_SUCCESS; + } if (type & SLJIT_SIMD_FLOAT) { if (src == SLJIT_IMM) { - if (reg_size == 5) - return emit_vex_instruction(compiler, XORPD_x_xm | VEX_256 | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0); + if (use_vex) + return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0); return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, freg, 0); } + SLJIT_ASSERT(reg_size == 4); + + if (use_vex) { + if (elem_size == 3) + return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, srcw); + + SLJIT_ASSERT(!(src & SLJIT_MEM)); + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0)); + return emit_byte(compiler, 0); + } + if (elem_size == 2 && freg != src) { FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw)); src = freg; srcw = 0; } - FAIL_IF(emit_groupf(compiler, (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2, freg, src, srcw)); + op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2; + FAIL_IF(emit_groupf(compiler, op, freg, src, srcw)); if (elem_size == 2) return emit_byte(compiler, 0); @@ -3706,8 +3819,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil #endif /* SLJIT_CONFIG_X86_64 */ if (srcw == 0 || srcw == -1) { - if (reg_size == 5) - return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0); + if (use_vex) + return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0); return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, freg, freg, 0); } @@ -3721,16 +3834,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil src = TMP_REG1; srcw = 0; + } - size = 2; + op = 2; opcode = MOVD_x_rm; switch (elem_size) { case 0: if (!FAST_IS_REG(src)) { opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */; - size = 3; + op = 3; } break; case 1: @@ -3747,44 +3861,66 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil #endif /* SLJIT_CONFIG_X86_64 */ } - inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = opcode; + if (use_vex) { + if (opcode != MOVD_x_rm) { + op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode; + FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, freg, src, srcw)); + } else + FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw)); + } else { + inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw); + FAIL_IF(!inst); + inst[0] = GROUP_0F; + inst[1] = opcode; - if (reg_size == 5) { - SLJIT_ASSERT(opcode == MOVD_x_rm); + if (op == 3) { + SLJIT_ASSERT(opcode == 0x3a); + inst[2] = PINSRB_x_rm_i8; + } + } + + if (use_vex && elem_size >= 2) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - size = VPBROADCASTD_x_xm; + op = VPBROADCASTD_x_xm; #else /* !SLJIT_CONFIG_X86_32 */ - size = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm; + op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm; #endif /* SLJIT_CONFIG_X86_32 */ - return emit_vex_instruction(compiler, size | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); + return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); } - if (size == 3) { - SLJIT_ASSERT(opcode == 0x3a); - inst[2] = PINSRB_x_rm_i8; - } + SLJIT_ASSERT(reg_size == 4); if (opcode != MOVD_x_rm) FAIL_IF(emit_byte(compiler, 0)); switch (elem_size) { case 0: + if (use_vex) { + FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0)); + return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, TMP_FREG, 0); + } FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0)); return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0); case 1: - FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, freg, 0)); + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, freg, 0)); FAIL_IF(emit_byte(compiler, 0)); /* fallthrough */ default: - FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0)); + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0)); return emit_byte(compiler, 0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) case 3: compiler->mode32 = 1; - FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0)); + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0)); return emit_byte(compiler, 0x44); #endif /* SLJIT_CONFIG_X86_64 */ } @@ -3796,9 +3932,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); sljit_u8 *inst; sljit_u8 opcode = 0; - sljit_uw size; + sljit_uw op; sljit_s32 freg_orig = freg; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_s32 srcdst_is_ereg = 0; @@ -3814,6 +3951,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (reg_size == 5) { if (!(cpu_feature_list & CPU_FEATURE_AVX2)) return SLJIT_ERR_UNSUPPORTED; + use_vex = 1; } else if (reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -3865,20 +4003,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile } if (elem_size == 2) { - if (reg_size == 4) - return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw); - return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw); + if (use_vex) + return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw); + return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw); } } else if (srcdst & SLJIT_MEM) { SLJIT_ASSERT(elem_size == 2 || elem_size == 3); - if (reg_size == 4) - return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw); - return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw); + if (use_vex) + return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw); + return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw); } else if (elem_size == 3) { - if (reg_size == 4) - return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0); - return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0); + if (use_vex) + return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0); + return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0); + } else if (use_vex) { + FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0)); + return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, freg, TMP_FREG, srcdst, 0); } } @@ -3886,18 +4027,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile freg = TMP_FREG; lane_index -= (1 << (4 - elem_size)); } else if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { - FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw)); + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw)); + else + FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw)); srcdst = TMP_FREG; srcdstw = 0; } - size = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0) + op = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0) | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2; - if (reg_size == 5) - FAIL_IF(emit_vex_instruction(compiler, size | VEX_256 | VEX_SSE2_OPV, freg, freg, freg, 0)); + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, freg, freg, freg, 0)); else - FAIL_IF(emit_groupf(compiler, size, freg, freg, 0)); + FAIL_IF(emit_groupf(compiler, op, freg, freg, 0)); } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) { FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0)); FAIL_IF(emit_byte(compiler, 1)); @@ -3910,58 +4054,80 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (elem_size == 3) { if (srcdst & SLJIT_MEM) { if (type & SLJIT_SIMD_STORE) - size = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x; + op = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x; else - size = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m; + op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m; - FAIL_IF(emit_groupf(compiler, size | EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw)); + /* VEX prefix clears upper bits of the target register. */ + if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || freg == TMP_FREG)) + FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2 + | ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), freg, (type & SLJIT_SIMD_STORE) ? 0 : freg, srcdst, srcdstw)); + else + FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw)); /* In case of store, freg is not TMP_FREG. */ } else if (type & SLJIT_SIMD_STORE) { - if (lane_index == 1) + if (lane_index == 1) { + if (use_vex) + return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0); return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, freg, 0); + } + if (use_vex) + return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0); return emit_sse2_load(compiler, 0, srcdst, freg, 0); + } else if (use_vex && (reg_size == 4 || freg == TMP_FREG)) { + if (lane_index == 1) + FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0)); + else + FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0)); } else { if (lane_index == 1) FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, freg, srcdst, 0)); else - FAIL_IF(emit_sse2_store(compiler, 0, freg, 0, srcdst)); + FAIL_IF(emit_sse2_load(compiler, 0, freg, srcdst, 0)); } } else if (type & SLJIT_SIMD_STORE) { - if (lane_index == 0) + if (lane_index == 0) { + if (use_vex) + return emit_vex_instruction(compiler, ((srcdst & SLJIT_MEM) ? MOVSD_xm_x : MOVSD_x_xm) | EX86_PREF_F3 | EX86_SSE2 + | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV), freg, ((srcdst & SLJIT_MEM) ? 0 : freg), srcdst, srcdstw); return emit_sse2_store(compiler, 1, srcdst, srcdstw, freg); + } if (srcdst & SLJIT_MEM) { - FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw)); + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, srcdst, srcdstw)); + else + FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw)); + return emit_byte(compiler, U8(lane_index)); + } + + if (use_vex) { + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0)); return emit_byte(compiler, U8(lane_index)); } if (srcdst == freg) - size = SHUFPS_x_xm | EX86_SSE2; + op = SHUFPS_x_xm | EX86_SSE2; else { - if (cpu_feature_list & CPU_FEATURE_AVX) { - FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0)); - return emit_byte(compiler, U8(lane_index)); - } - switch (lane_index) { case 1: - size = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2; + op = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2; break; case 2: - size = MOVHLPS_x_x | EX86_SSE2; + op = MOVHLPS_x_x | EX86_SSE2; break; default: SLJIT_ASSERT(lane_index == 3); - size = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2; + op = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2; break; } } - FAIL_IF(emit_groupf(compiler, size, srcdst, freg, 0)); + FAIL_IF(emit_groupf(compiler, op, srcdst, freg, 0)); - size &= 0xff; - if (size == SHUFPS_x_xm || size == PSHUFD_x_xm) + op &= 0xff; + if (op == SHUFPS_x_xm || op == PSHUFD_x_xm) return emit_byte(compiler, U8(lane_index)); return SLJIT_SUCCESS; @@ -3993,7 +4159,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile srcdstw = 0; } - size = 3; + op = 3; switch (elem_size) { case 0: @@ -4001,7 +4167,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile break; case 1: if (!(type & SLJIT_SIMD_STORE)) { - size = 2; + op = 2; opcode = PINSRW_x_rm_i8; } else opcode = PEXTRW_rm_x_i8; @@ -4018,15 +4184,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile #endif /* SLJIT_CONFIG_X86_64 */ } - inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw); - FAIL_IF(!inst); - inst[0] = GROUP_0F; + if (use_vex && (type & SLJIT_SIMD_STORE)) { + op = opcode | ((op == 3) ? VEX_OP_0F3A : 0); + FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, 0, srcdst, srcdstw)); + } else { + inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw); + FAIL_IF(!inst); + inst[0] = GROUP_0F; - if (size == 3) { - inst[1] = 0x3a; - inst[2] = opcode; - } else - inst[1] = opcode; + if (op == 3) { + inst[1] = 0x3a; + inst[2] = opcode; + } else + inst[1] = opcode; + } FAIL_IF(emit_byte(compiler, U8(lane_index))); @@ -4056,23 +4227,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile compiler->mode32 = (type & SLJIT_32); - size = 2; + op = 2; if (elem_size == 0) - size |= EX86_REX; + op |= EX86_REX; if (elem_size == 2) { if (type & SLJIT_32) return SLJIT_SUCCESS; SLJIT_ASSERT(!(compiler->mode32)); - size = 1; + op = 1; } - inst = emit_x86_instruction(compiler, size, srcdst, 0, srcdst, 0); + inst = emit_x86_instruction(compiler, op, srcdst, 0, srcdst, 0); FAIL_IF(!inst); - if (size != 1) { + if (op != 1) { inst[0] = GROUP_0F; inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16); } else @@ -4096,6 +4267,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); sljit_uw pref; sljit_u8 byte; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -4115,6 +4287,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c if (reg_size == 5) { if (!(cpu_feature_list & CPU_FEATURE_AVX2)) return SLJIT_ERR_UNSUPPORTED; + use_vex = 1; } else if (reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -4136,8 +4309,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c return emit_byte(compiler, U8(byte | (byte << 4))); } - if (src_lane_index == 0) + if (src_lane_index == 0) { + if (use_vex) + return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, 0); return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, src, 0); + } /* Changes it to SHUFPD_x_xm. */ pref = EX86_PREF_66; @@ -4163,7 +4339,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c FAIL_IF(emit_byte(compiler, byte)); FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0)); byte = U8(src_lane_index); - } else if (freg != src && (cpu_feature_list & CPU_FEATURE_AVX)) { + } else if (use_vex) { FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0)); } else { if (freg != src) @@ -4192,7 +4368,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c src = freg; } - if ((freg != src && !(cpu_feature_list & CPU_FEATURE_AVX2)) || src_lane_index != 0) { + if (src_lane_index != 0 || (freg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) { pref = 0; if ((src_lane_index & 0x3) == 0) { @@ -4202,7 +4378,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c pref = EX86_PREF_F2; byte = U8(src_lane_index >> 1); } else { - if (freg == src || !(cpu_feature_list & CPU_FEATURE_AVX2)) { + if (!use_vex) { if (freg != src) FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0)); @@ -4214,14 +4390,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c } if (pref != 0) { - FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0)); + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0)); FAIL_IF(emit_byte(compiler, byte)); } src = freg; } - if (cpu_feature_list & CPU_FEATURE_AVX2) + if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2)) return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0); SLJIT_ASSERT(reg_size == 4); @@ -4229,7 +4408,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0); } - if ((cpu_feature_list & CPU_FEATURE_AVX2) && src_lane_index == 0 && elem_size <= 3) { + if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) { switch (elem_size) { case 1: pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; @@ -4291,11 +4470,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c src_lane_index >>= 1; pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3; - FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0)); + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0)); byte = U8(byte | (byte << 2)); FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4)))); - if ((cpu_feature_list & CPU_FEATURE_AVX2) && pref == EX86_PREF_F2) + if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2) return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); src = freg; @@ -4310,7 +4492,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c break; } - FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0)); + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, src, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0)); return emit_byte(compiler, U8(byte | (byte << 4))); } @@ -4321,6 +4506,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); sljit_u8 opcode; CHECK_ERROR(); @@ -4335,6 +4521,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler if (reg_size == 5) { if (!(cpu_feature_list & CPU_FEATURE_AVX2)) return SLJIT_ERR_UNSUPPORTED; + use_vex = 1; } else if (reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -4345,9 +4532,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler if (type & SLJIT_SIMD_TEST) return SLJIT_SUCCESS; - if (reg_size == 4) - return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, freg, src, srcw); - return emit_vex_instruction(compiler, CVTPS2PD_x_xm | VEX_256 | EX86_SSE2, freg, 0, src, srcw); + if (use_vex) + return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, freg, 0, src, srcw); + return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, freg, src, srcw); } switch (elem_size) { @@ -4382,9 +4569,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler if (type & SLJIT_SIMD_TEST) return SLJIT_SUCCESS; - if (reg_size == 4) - return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw); - return emit_vex_instruction(compiler, opcode | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw); + if (use_vex) + return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw); + return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, @@ -4393,8 +4580,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); sljit_s32 dst_r; - sljit_uw pref; + sljit_uw op; sljit_u8 *inst; CHECK_ERROR(); @@ -4414,20 +4602,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c if (type & SLJIT_SIMD_TEST) return SLJIT_SUCCESS; - pref = EX86_PREF_66 | EX86_SSE2_OP2; + op = EX86_PREF_66 | EX86_SSE2_OP2; switch (elem_size) { case 1: - FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0)); + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0)); freg = TMP_FREG; break; case 2: - pref = EX86_SSE2_OP2; + op = EX86_SSE2_OP2; break; } dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - FAIL_IF(emit_groupf(compiler, (elem_size < 2 ? PMOVMSKB_r_x : MOVMSKPS_r_x) | pref, dst_r, freg, 0)); + op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x; + + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, op, dst_r, freg, 0)); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = type & SLJIT_32; @@ -4459,14 +4655,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, TMP_FREG, 0)); FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0)); } else { - pref = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2; + op = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2; if (elem_size == 0) - pref = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2; + op = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2; else if (elem_size == 3) - pref |= EX86_PREF_66; + op |= EX86_PREF_66; - FAIL_IF(emit_vex_instruction(compiler, pref, dst_r, 0, freg, 0)); + FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0)); } if (dst_r == TMP_REG1) { @@ -4497,7 +4693,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); - sljit_s32 needs_move = 0; sljit_uw op = 0; CHECK_ERROR(); @@ -4540,20 +4735,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co if (type & SLJIT_SIMD_TEST) return SLJIT_SUCCESS; - needs_move = dst_freg != src1_freg && dst_freg != src2_freg; - - if (reg_size == 5 || (needs_move && (cpu_feature_list & CPU_FEATURE_AVX2))) { + if (reg_size == 5 || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX))) { if (reg_size == 5) op |= VEX_256; return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_freg, src1_freg, src2_freg, 0); } - if (needs_move) { - FAIL_IF(emit_simd_mov(compiler, type, dst_freg, src1_freg)); - } else if (dst_freg != src1_freg) { - SLJIT_ASSERT(dst_freg == src2_freg); - src2_freg = src1_freg; + if (dst_freg != src1_freg) { + if (dst_freg == src2_freg) + src2_freg = src1_freg; + else + FAIL_IF(emit_simd_mov(compiler, type, dst_freg, src1_freg)); } FAIL_IF(emit_groupf(compiler, op | EX86_SSE2, dst_freg, src2_freg, 0)); @@ -4727,11 +4920,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return NULL; #endif - inst = (sljit_u8*)ensure_buf(compiler, 2); + inst = (sljit_u8*)ensure_buf(compiler, 1); PTR_FAIL_IF(!inst); - inst[0] = 0; - inst[1] = 2; + inst[0] = SLJIT_INST_CONST; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (dst & SLJIT_MEM) @@ -4742,52 +4934,48 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_s32 reg; - sljit_uw start_size; -#endif +#endif /* SLJIT_CONFIG_X86_64 */ CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); CHECK_EXTRA_REGS(dst, dstw, (void)0); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; reg = FAST_IS_REG(dst) ? dst : TMP_REG1; - if (emit_load_imm64(compiler, reg, 0)) - return NULL; -#else - if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0)) - return NULL; -#endif + PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0)); + jump->addr = compiler->size; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (dst & SLJIT_MEM) { - start_size = compiler->size; - if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) - return NULL; - put_label->flags = compiler->size - start_size; - } -#endif + if (reg_map[reg] >= 8) + jump->flags |= MOV_ADDR_HI; +#else /* !SLJIT_CONFIG_X86_64 */ + PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0)); +#endif /* SLJIT_CONFIG_X86_64 */ - inst = (sljit_u8*)ensure_buf(compiler, 2); + inst = (sljit_u8*)ensure_buf(compiler, 1); PTR_FAIL_IF(!inst); - inst[0] = 0; - inst[1] = 3; + inst[0] = SLJIT_INST_MOV_ADDR; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0)); +#endif /* SLJIT_CONFIG_X86_64 */ - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) diff --git a/ext/pcre/pcre2lib/sljit/sljitSerialize.c b/ext/pcre/pcre2lib/sljit/sljitSerialize.c new file mode 100644 index 000000000000..6ef161fd4982 --- /dev/null +++ b/ext/pcre/pcre2lib/sljit/sljitSerialize.c @@ -0,0 +1,516 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_label(struct sljit_jump *jump) +{ + return !(jump->flags & JUMP_ADDR) && (jump->u.label != NULL); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_target(struct sljit_jump *jump) +{ + return (jump->flags & JUMP_ADDR) != 0; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_is_mov_addr(struct sljit_jump *jump) +{ + return (jump->flags & JUMP_MOV_ADDR) != 0; +} + +#define SLJIT_SERIALIZE_DEBUG ((sljit_u16)0x1) + +struct sljit_serialized_compiler { + sljit_u32 signature; + sljit_u16 version; + sljit_u16 cpu_type; + + sljit_uw buf_segment_count; + sljit_uw label_count; + sljit_uw jump_count; + sljit_uw const_count; + + sljit_s32 options; + sljit_s32 scratches; + sljit_s32 saveds; + sljit_s32 fscratches; + sljit_s32 fsaveds; + sljit_s32 local_size; + sljit_uw size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + sljit_s32 status_flags_state; +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 args_size; +#endif /* SLJIT_CONFIG_X86_32 */ + +#if ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_uw args_size; +#endif /* (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + sljit_uw cpool_diff; + sljit_uw cpool_fill; + sljit_uw patches; +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + sljit_s32 delay_slot; +#endif /* SLJIT_CONFIG_MIPS */ + +}; + +struct sljit_serialized_debug_info { + sljit_sw last_flags; + sljit_s32 last_return; + sljit_s32 logical_local_size; +}; + +struct sljit_serialized_label { + sljit_uw size; +}; + +struct sljit_serialized_jump { + sljit_uw addr; + sljit_uw flags; + sljit_uw value; +}; + +struct sljit_serialized_const { + sljit_uw addr; +}; + +#define SLJIT_SERIALIZE_ALIGN(v) (((v) + sizeof(sljit_uw) - 1) & ~(sljit_uw)(sizeof(sljit_uw) - 1)) +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define SLJIT_SERIALIZE_SIGNATURE 0x534c4a54 +#else /* !SLJIT_LITTLE_ENDIAN */ +#define SLJIT_SERIALIZE_SIGNATURE 0x544a4c53 +#endif /* SLJIT_LITTLE_ENDIAN */ +#define SLJIT_SERIALIZE_VERSION 1 + +SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compiler *compiler, + sljit_s32 options, sljit_uw *size) +{ + sljit_uw serialized_size = sizeof(struct sljit_serialized_compiler); + struct sljit_memory_fragment *buf; + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_serialized_compiler *serialized_compiler; + struct sljit_serialized_label *serialized_label; + struct sljit_serialized_jump *serialized_jump; + struct sljit_serialized_const *serialized_const; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + struct sljit_serialized_debug_info *serialized_debug_info; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + sljit_uw counter, used_size; + sljit_u8 *result; + sljit_u8 *ptr; + SLJIT_UNUSED_ARG(options); + + if (size != NULL) + *size = 0; + + PTR_FAIL_IF(compiler->error); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (!(options & SLJIT_SERIALIZE_IGNORE_DEBUG)) + serialized_size += sizeof(struct sljit_serialized_debug_info); +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + serialized_size += SLJIT_SERIALIZE_ALIGN(compiler->cpool_fill * (sizeof(sljit_uw) + 1)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + + /* Compute the size of the data. */ + buf = compiler->buf; + while (buf != NULL) { + serialized_size += sizeof(sljit_uw) + SLJIT_SERIALIZE_ALIGN(buf->used_size); + buf = buf->next; + } + + serialized_size += compiler->label_count * sizeof(struct sljit_serialized_label); + + jump = compiler->jumps; + while (jump != NULL) { + serialized_size += sizeof(struct sljit_serialized_jump); + jump = jump->next; + } + + const_ = compiler->consts; + while (const_ != NULL) { + serialized_size += sizeof(struct sljit_serialized_const); + const_ = const_->next; + } + + result = (sljit_u8*)SLJIT_MALLOC(serialized_size, compiler->allocator_data); + PTR_FAIL_IF_NULL(result); + + if (size != NULL) + *size = serialized_size; + + ptr = result; + serialized_compiler = (struct sljit_serialized_compiler*)ptr; + ptr += sizeof(struct sljit_serialized_compiler); + + serialized_compiler->signature = SLJIT_SERIALIZE_SIGNATURE; + serialized_compiler->version = SLJIT_SERIALIZE_VERSION; + serialized_compiler->cpu_type = 0; + serialized_compiler->label_count = compiler->label_count; + serialized_compiler->options = compiler->options; + serialized_compiler->scratches = compiler->scratches; + serialized_compiler->saveds = compiler->saveds; + serialized_compiler->fscratches = compiler->fscratches; + serialized_compiler->fsaveds = compiler->fsaveds; + serialized_compiler->local_size = compiler->local_size; + serialized_compiler->size = compiler->size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + serialized_compiler->status_flags_state = compiler->status_flags_state; +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + serialized_compiler->args_size = compiler->args_size; +#endif /* SLJIT_CONFIG_X86_32 || (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + serialized_compiler->cpool_diff = compiler->cpool_diff; + serialized_compiler->cpool_fill = compiler->cpool_fill; + serialized_compiler->patches = compiler->patches; + + SLJIT_MEMCPY(ptr, compiler->cpool, compiler->cpool_fill * sizeof(sljit_uw)); + SLJIT_MEMCPY(ptr + compiler->cpool_fill * sizeof(sljit_uw), compiler->cpool_unique, compiler->cpool_fill); + ptr += SLJIT_SERIALIZE_ALIGN(compiler->cpool_fill * (sizeof(sljit_uw) + 1)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + serialized_compiler->delay_slot = compiler->delay_slot; +#endif /* SLJIT_CONFIG_MIPS */ + + buf = compiler->buf; + counter = 0; + while (buf != NULL) { + used_size = buf->used_size; + *(sljit_uw*)ptr = used_size; + ptr += sizeof(sljit_uw); + SLJIT_MEMCPY(ptr, buf->memory, used_size); + ptr += SLJIT_SERIALIZE_ALIGN(used_size); + buf = buf->next; + counter++; + } + serialized_compiler->buf_segment_count = counter; + + label = compiler->labels; + while (label != NULL) { + serialized_label = (struct sljit_serialized_label*)ptr; + serialized_label->size = label->size; + ptr += sizeof(struct sljit_serialized_label); + label = label->next; + } + + jump = compiler->jumps; + counter = 0; + while (jump != NULL) { + serialized_jump = (struct sljit_serialized_jump*)ptr; + serialized_jump->addr = jump->addr; + serialized_jump->flags = jump->flags; + + if (jump->flags & JUMP_ADDR) + serialized_jump->value = jump->u.target; + else if (jump->u.label != NULL) + serialized_jump->value = jump->u.label->u.index; + else + serialized_jump->value = SLJIT_MAX_ADDRESS; + + ptr += sizeof(struct sljit_serialized_jump); + jump = jump->next; + counter++; + } + serialized_compiler->jump_count = counter; + + const_ = compiler->consts; + counter = 0; + while (const_ != NULL) { + serialized_const = (struct sljit_serialized_const*)ptr; + serialized_const->addr = const_->addr; + ptr += sizeof(struct sljit_serialized_const); + const_ = const_->next; + counter++; + } + serialized_compiler->const_count = counter; + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (!(options & SLJIT_SERIALIZE_IGNORE_DEBUG)) { + serialized_debug_info = (struct sljit_serialized_debug_info*)ptr; + serialized_debug_info->last_flags = compiler->last_flags; + serialized_debug_info->last_return = compiler->last_return; + serialized_debug_info->logical_local_size = compiler->logical_local_size; + serialized_compiler->cpu_type |= SLJIT_SERIALIZE_DEBUG; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + ptr += sizeof(struct sljit_serialized_debug_info); +#endif /* SLJIT_DEBUG */ + } +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + + SLJIT_ASSERT((sljit_uw)(ptr - result) == serialized_size); + return (sljit_uw*)result; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler *sljit_deserialize_compiler(sljit_uw* buffer, sljit_uw size, + sljit_s32 options, void *allocator_data) +{ + struct sljit_compiler *compiler; + struct sljit_serialized_compiler *serialized_compiler; + struct sljit_serialized_label *serialized_label; + struct sljit_serialized_jump *serialized_jump; + struct sljit_serialized_const *serialized_const; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + struct sljit_serialized_debug_info *serialized_debug_info; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *last_buf; + struct sljit_label *label; + struct sljit_label *last_label; + struct sljit_label **label_list = NULL; + struct sljit_jump *jump; + struct sljit_jump *last_jump; + struct sljit_const *const_; + struct sljit_const *last_const; + sljit_u8 *ptr = (sljit_u8*)buffer; + sljit_u8 *end = ptr + size; + sljit_uw i, used_size, aligned_size, label_count; + SLJIT_UNUSED_ARG(options); + + if (size < sizeof(struct sljit_serialized_compiler) || (size & (sizeof(sljit_uw) - 1)) != 0) + return NULL; + + serialized_compiler = (struct sljit_serialized_compiler*)ptr; + + if (serialized_compiler->signature != SLJIT_SERIALIZE_SIGNATURE || serialized_compiler->version != SLJIT_SERIALIZE_VERSION) + return NULL; + + compiler = sljit_create_compiler(allocator_data); + PTR_FAIL_IF(compiler == NULL); + + compiler->label_count = serialized_compiler->label_count; + compiler->options = serialized_compiler->options; + compiler->scratches = serialized_compiler->scratches; + compiler->saveds = serialized_compiler->saveds; + compiler->fscratches = serialized_compiler->fscratches; + compiler->fsaveds = serialized_compiler->fsaveds; + compiler->local_size = serialized_compiler->local_size; + compiler->size = serialized_compiler->size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + compiler->status_flags_state = serialized_compiler->status_flags_state; +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->args_size = serialized_compiler->args_size; +#endif /* SLJIT_CONFIG_X86_32 || (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + used_size = serialized_compiler->cpool_fill; + aligned_size = SLJIT_SERIALIZE_ALIGN(used_size * (sizeof(sljit_uw) + 1)); + compiler->cpool_diff = serialized_compiler->cpool_diff; + compiler->cpool_fill = used_size; + compiler->patches = serialized_compiler->patches; + + if ((sljit_uw)(end - ptr) < aligned_size) + goto error; + + SLJIT_MEMCPY(compiler->cpool, ptr, used_size * sizeof(sljit_uw)); + SLJIT_MEMCPY(compiler->cpool_unique, ptr + used_size * sizeof(sljit_uw), used_size); + ptr += aligned_size; +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + compiler->delay_slot = serialized_compiler->delay_slot; +#endif /* SLJIT_CONFIG_MIPS */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (!(serialized_compiler->cpu_type & SLJIT_SERIALIZE_DEBUG)) + goto error; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + + ptr += sizeof(struct sljit_serialized_compiler); + i = serialized_compiler->buf_segment_count; + last_buf = NULL; + while (i > 0) { + if ((sljit_uw)(end - ptr) < sizeof(sljit_uw)) + goto error; + + used_size = *(sljit_uw*)ptr; + aligned_size = SLJIT_SERIALIZE_ALIGN(used_size); + ptr += sizeof(sljit_uw); + + if ((sljit_uw)(end - ptr) < aligned_size) + goto error; + + if (last_buf == NULL) { + SLJIT_ASSERT(compiler->buf != NULL && compiler->buf->next == NULL); + buf = compiler->buf; + } else { + buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data); + if (!buf) + goto error; + buf->next = NULL; + } + + buf->used_size = used_size; + SLJIT_MEMCPY(buf->memory, ptr, used_size); + + if (last_buf != NULL) + last_buf->next = buf; + last_buf = buf; + + ptr += aligned_size; + i--; + } + + last_label = NULL; + label_count = serialized_compiler->label_count; + if ((sljit_uw)(end - ptr) < label_count * sizeof(struct sljit_serialized_label)) + goto error; + + label_list = (struct sljit_label **)SLJIT_MALLOC(label_count * sizeof(struct sljit_label*), allocator_data); + if (label_list == NULL) + goto error; + + for (i = 0; i < label_count; i++) { + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + if (label == NULL) + goto error; + + serialized_label = (struct sljit_serialized_label*)ptr; + label->next = NULL; + label->u.index = i; + label->size = serialized_label->size; + + if (last_label != NULL) + last_label->next = label; + else + compiler->labels = label; + last_label = label; + + label_list[i] = label; + ptr += sizeof(struct sljit_serialized_label); + } + compiler->last_label = last_label; + + last_jump = NULL; + i = serialized_compiler->jump_count; + if ((sljit_uw)(end - ptr) < i * sizeof(struct sljit_serialized_jump)) + goto error; + + while (i > 0) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + if (jump == NULL) + goto error; + + serialized_jump = (struct sljit_serialized_jump*)ptr; + jump->next = NULL; + jump->addr = serialized_jump->addr; + jump->flags = serialized_jump->flags; + + if (!(serialized_jump->flags & JUMP_ADDR)) { + if (serialized_jump->value != SLJIT_MAX_ADDRESS) { + if (serialized_jump->value >= label_count) + goto error; + jump->u.label = label_list[serialized_jump->value]; + } else + jump->u.label = NULL; + } else + jump->u.target = serialized_jump->value; + + if (last_jump != NULL) + last_jump->next = jump; + else + compiler->jumps = jump; + last_jump = jump; + + ptr += sizeof(struct sljit_serialized_jump); + i--; + } + compiler->last_jump = last_jump; + + SLJIT_FREE(label_list, allocator_data); + label_list = NULL; + + last_const = NULL; + i = serialized_compiler->const_count; + if ((sljit_uw)(end - ptr) < i * sizeof(struct sljit_serialized_const)) + goto error; + + while (i > 0) { + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + if (const_ == NULL) + goto error; + + serialized_const = (struct sljit_serialized_const*)ptr; + const_->next = NULL; + const_->addr = serialized_const->addr; + + if (last_const != NULL) + last_const->next = const_; + else + compiler->consts = const_; + last_const = const_; + + ptr += sizeof(struct sljit_serialized_const); + i--; + } + compiler->last_const = last_const; + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if ((sljit_uw)(end - ptr) < sizeof(struct sljit_serialized_debug_info)) + goto error; + + serialized_debug_info = (struct sljit_serialized_debug_info*)ptr; + compiler->last_flags = (sljit_s32)serialized_debug_info->last_flags; + compiler->last_return = serialized_debug_info->last_return; + compiler->logical_local_size = serialized_debug_info->logical_local_size; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + + return compiler; + +error: + sljit_free_compiler(compiler); + if (label_list != NULL) + SLJIT_FREE(label_list, allocator_data); + return NULL; +}