piler, instruction, 4);

CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);

JUMPHERE(jump[0]);

/* BSF r32, r/m32 */
instruction[0] = 0x0f;
instruction[1] = 0xbc;
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 3);

OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);

add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));

if (common->match_end_ptr != 0)
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);

#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
  {
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));

  jump[0] = jump_if_utf_char_start(compiler, TMP1);

  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);

  add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));

  JUMPHERE(jump[0]);
  }
#endif

OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));

if (common->match_end_ptr != 0)
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
}

#endif /* !_WIN64 */

#undef SSE2_COMPARE_TYPE_INDEX

#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */

#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))

#include <arm_neon.h>

typedef union {
  unsigned int x;
  struct { unsigned char c1, c2, c3, c4; } c;
} int_char;

#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
static SLJIT_INLINE int utf_continue(sljit_u8 *s)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
return (*s & 0xc0) == 0x80;
#elif PCRE2_CODE_UNIT_WIDTH == 16
return (*s & 0xfc00) == 0xdc00;
#else
#error "Unknown code width"
#endif
}
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */

#if PCRE2_CODE_UNIT_WIDTH == 8
# define VECTOR_FACTOR 16
# define vect_t uint8x16_t
# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
# define VCEQQ vceqq_u8
# define VORRQ vorrq_u8
# define VST1Q vst1q_u8
# define VDUPQ vdupq_n_u8
# define VEXTQ vextq_u8
# define VANDQ vandq_u8
typedef union {
       uint8_t mem[16];
       uint64_t dw[2];
} quad_word;
#elif PCRE2_CODE_UNIT_WIDTH == 16
# define VECTOR_FACTOR 8
# define vect_t uint16x8_t
# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
# define VCEQQ vceqq_u16
# define VORRQ vorrq_u16
# define VST1Q vst1q_u16
# define VDUPQ vdupq_n_u16
# define VEXTQ vextq_u16
# define VANDQ vandq_u16
typedef union {
       uint16_t mem[8];
       uint64_t dw[2];
} quad_word;
#else
# define VECTOR_FACTOR 4
# define vect_t uint32x4_t
# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
# define VCEQQ vceqq_u32
# define VORRQ vorrq_u32
# define VST1Q vst1q_u32
# define VDUPQ vdupq_n_u32
# define VEXTQ vextq_u32
# define VANDQ vandq_u32
typedef union {
       uint32_t mem[4];
       uint64_t dw[2];
} quad_word;
#endif

#define FFCS
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCS

#define FFCS_2
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCS_2

#define FFCS_MASK
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCS_MASK

#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1

static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{
DEFINE_COMPILER;
int_char ic;
struct sljit_jump *partial_quit;
/* Save temporary registers. */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0);

/* Prepare function arguments */
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);

if (char1 == char2)
  {
    ic.c.c1 = char1;
    ic.c.c2 = char2;
    OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);

#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  if (common->utf && offset > 0)
    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf));
  else
    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
#else
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
                   SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
#endif
  }
else
  {
  PCRE2_UCHAR mask = char1 ^ char2;
  if (is_powerof2(mask))
    {
    ic.c.c1 = char1 | mask;
    ic.c.c2 = mask;
    OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);

#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
    if (common->utf && offset > 0)
      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf));
    else
      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
#else
    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
#endif
    }
  else
    {
      ic.c.c1 = char1;
      ic.c.c2 = char2;
      OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);

#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
    if (common->utf && offset > 0)
      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf));
    else
      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
#else
    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
#endif
    }
  }
/* Restore registers. */
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);

/* Check return value. */
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
  add_jump(compiler, &common->failed_match, partial_quit);

/* Fast forward STR_PTR to the result of memchr. */
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);

if (common->mode != PCRE2_JIT_COMPLETE)
  JUMPHERE(partial_quit);
}

typedef enum {
  compare_match1,
  compare_match1i,
  compare_match2,
} compare_type;

static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
{
if (ctype == compare_match2)
  {
  vect_t tmp = dst;
  dst = VCEQQ(dst, cmp1);
  tmp = VCEQQ(tmp, cmp2);
  dst = VORRQ(dst, tmp);
  return dst;
  }

if (ctype == compare_match1i)
  dst = VORRQ(dst, cmp2);
dst = VCEQQ(dst, cmp1);
return dst;
}

static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
return 15;
#elif PCRE2_CODE_UNIT_WIDTH == 16
return 7;
#elif PCRE2_CODE_UNIT_WIDTH == 32
return 3;
#else
#error "Unsupported unit width"
#endif
}

/* ARM doesn't have a shift left across lanes. */
static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
{
vect_t zero = VDUPQ(0);
SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
/* VEXTQ takes an immediate as last argument. */
#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
switch (n)
  {
  C(1); C(2); C(3);
#if PCRE2_CODE_UNIT_WIDTH != 32
  C(4); C(5); C(6); C(7);
# if PCRE2_CODE_UNIT_WIDTH != 16
  C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
# endif
#endif
  default:
    /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
       happen. The return is still here for compilers to not warn. */
    return a;
  }
}

#define FFCPS
#define FFCPS_DIFF1
#define FFCPS_CHAR1A2A

#define FFCPS_0
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCPS_0

#undef FFCPS_CHAR1A2A

#define FFCPS_1
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
#