nillerusr
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎common/sse2neon.h
+52-8 b/‎common/sse2neon.h
+52-8
diff --git a/‎engine/cmodel.cpp
+2-2 b/‎engine/cmodel.cpp
+2-2
diff --git a/‎engine/l_studio.cpp
+1-1 b/‎engine/l_studio.cpp
+1-1
diff --git a/‎engine/sys_engine.cpp
+1-1 b/‎engine/sys_engine.cpp
+1-1
diff --git a/‎game/client/detailobjectsystem.cpp
+2-2 b/‎game/client/detailobjectsystem.cpp
+2-2
diff --git a/‎inputsystem/inputsystem.cpp
+4 b/‎inputsystem/inputsystem.cpp
+4
@@ -1,3 +1,4 @@
+*~
 *.mak
 *.mak.vpc_crc
 *.vpc_crc
 
@@ -89,9 +89,6 @@
 #define _sse2neon_likely(x) __builtin_expect(!!(x), 1)
 #define _sse2neon_unlikely(x) __builtin_expect(!!(x), 0)
 #elif defined(_MSC_VER)
-#if _MSVC_TRADITIONAL
-#error Using the traditional MSVC preprocessor is not supported! Use /Zc:preprocessor instead.
-#endif
 #ifndef FORCE_INLINE
 #define FORCE_INLINE static inline
 #endif
@@ -184,6 +181,10 @@
     } while (0)
 #endif
 
+#ifdef _M_ARM
+#define vst1q_lane_s64(a, b, c)
+#endif
+
 /* Memory barriers
  * __atomic_thread_fence does not include a compiler barrier; instead,
  * the barrier is part of __atomic_load/__atomic_store's "volatile-like"
@@ -202,8 +203,12 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
 #elif defined(__GNUC__) || defined(__clang__)
     __atomic_thread_fence(__ATOMIC_SEQ_CST);
 #else /* MSVC */
+#ifdef _M_ARM
+    __dmb(_ARM_BARRIER_ISH);
+#else
     __dmb(_ARM64_BARRIER_ISH);
 #endif
+#endif
 }
 
 /* Architecture-specific build options */
@@ -268,7 +273,7 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
  * we have to perform syscall instead.
  */
 #if (!defined(__aarch64__) && !defined(_M_ARM64))
-#include <sys/time.h>
+#include <time.h>
 #endif
 
 /* "__has_builtin" can be used to query support for built-in functions
@@ -574,10 +579,10 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t);
 /* Backwards compatibility for compilers with lack of specific type support */
 
 // Older gcc does not define vld1q_u8_x4 type
-#if defined(__GNUC__) && !defined(__clang__) &&                        \
+#if defined(_M_ARM) || (defined(__GNUC__) && !defined(__clang__) &&    \
     ((__GNUC__ <= 12 && defined(__arm__)) ||                           \
      (__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \
-     (__GNUC__ <= 9 && defined(__aarch64__)))
+     (__GNUC__ <= 9 && defined(__aarch64__))))
 FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
 {
     uint8x16x4_t ret;
@@ -610,6 +615,9 @@ FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8)
 }
 #endif
 
+#if defined(_M_ARM)
+#pragma message("TODO: Windows ARM32: Port many SSE2NEON functions")
+#else
 #if !defined(__aarch64__) && !defined(_M_ARM64)
 /* emulate vaddvq u8 variant */
 FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a)
@@ -645,6 +653,7 @@ FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a)
     return vaddvq_u16(a);
 }
 #endif
+#endif
 
 /* Function Naming Conventions
  * The naming convention of SSE intrinsics is straightforward. A generic SSE
@@ -1765,6 +1774,7 @@ FORCE_INLINE void _mm_free(void *addr)
 }
 #endif
 
+#ifndef _M_ARM
 FORCE_INLINE uint64_t _sse2neon_get_fpcr()
 {
     uint64_t value;
@@ -1808,6 +1818,7 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode()
 
     return r.field.bit24 ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF;
 }
+#endif
 
 // Macro: Get the rounding mode bits from the MXCSR control and status register.
 // The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST,
@@ -1826,6 +1837,8 @@ FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE()
 
 #if defined(__aarch64__) || defined(_M_ARM64)
     r.value = _sse2neon_get_fpcr();
+#elif defined(_M_ARM)
+    r.value = _MoveFromCoprocessor(10,7, 1,0,0);
 #else
     __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
 #endif
@@ -2247,7 +2260,7 @@ FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b)
 FORCE_INLINE void _mm_prefetch(char const *p, int i)
 {
     (void) i;
-#if defined(_MSC_VER)
+#ifdef _M_ARM64
     switch (i) {
     case _MM_HINT_NTA:
         __prefetch2(p, 1);
@@ -2262,6 +2275,8 @@ FORCE_INLINE void _mm_prefetch(char const *p, int i)
         __prefetch2(p, 4);
         break;
     }
+#elif defined(_M_ARM)
+    // TODO
 #else
     switch (i) {
     case _MM_HINT_NTA:
@@ -2348,6 +2363,7 @@ FORCE_INLINE __m64 _mm_sad_pu8(__m64 a, __m64 b)
         vset_lane_u16((int) vget_lane_u64(t, 0), vdup_n_u16(0), 0));
 }
 
+#ifndef _M_ARM
 // Macro: Set the flush zero bits of the MXCSR control and status register to
 // the value in unsigned 32-bit integer a. The flush zero may contain any of the
 // following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF
@@ -2379,6 +2395,7 @@ FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode(unsigned int flag)
     __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r));        /* write */
 #endif
 }
+#endif
 
 // Set packed single-precision (32-bit) floating-point elements in dst with the
 // supplied values.
@@ -2404,6 +2421,7 @@ FORCE_INLINE __m128 _mm_set_ps1(float _w)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_ROUNDING_MODE
 FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding)
 {
+#ifndef _M_ARM
     union {
         fpcr_bitfield field;
 #if defined(__aarch64__) || defined(_M_ARM64)
@@ -2442,6 +2460,7 @@ FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding)
 #else
     __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r));        /* write */
 #endif
+#endif
 }
 
 // Copy single-precision (32-bit) floating-point element a to the lower element
@@ -3206,6 +3225,7 @@ FORCE_INLINE __m128d _mm_cmpeq_sd(__m128d a, __m128d b)
     return _mm_move_sd(a, _mm_cmpeq_pd(a, b));
 }
 
+#ifndef _M_ARM
 // Compare packed double-precision (64-bit) floating-point elements in a and b
 // for greater-than-or-equal, and store the results in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd
@@ -3247,6 +3267,7 @@ FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b)
     return vreinterpretq_m128d_u64(vld1q_u64(d));
 #endif
 }
+#endif
 
 // Compare packed signed 16-bit integers in a and b for greater-than, and store
 // the results in dst.
@@ -3275,6 +3296,7 @@ FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
         vcgtq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
 }
 
+#ifndef _M_ARM
 // Compare packed double-precision (64-bit) floating-point elements in a and b
 // for greater-than, and store the results in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd
@@ -3358,6 +3380,7 @@ FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b)
     return vreinterpretq_m128d_u64(vld1q_u64(d));
 #endif
 }
+#endif
 
 // Compare packed signed 16-bit integers in a and b for less-than, and store the
 // results in dst. Note: This intrinsic emits the pcmpgtw instruction with the
@@ -3389,6 +3412,7 @@ FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
         vcltq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
 }
 
+#ifndef _M_ARM
 // Compare packed double-precision (64-bit) floating-point elements in a and b
 // for less-than, and store the results in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd
@@ -3429,6 +3453,7 @@ FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b)
     return vreinterpretq_m128d_u64(vld1q_u64(d));
 #endif
 }
+#endif
 
 // Compare packed double-precision (64-bit) floating-point elements in a and b
 // for not-equal, and store the results in dst.
@@ -3456,6 +3481,7 @@ FORCE_INLINE __m128d _mm_cmpneq_sd(__m128d a, __m128d b)
     return _mm_move_sd(a, _mm_cmpneq_pd(a, b));
 }
 
+#ifndef _M_ARM
 // Compare packed double-precision (64-bit) floating-point elements in a and b
 // for not-greater-than-or-equal, and store the results in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd
@@ -3756,6 +3782,7 @@ FORCE_INLINE int _mm_comilt_sd(__m128d a, __m128d b)
     return (*(double *) &a0 < *(double *) &b0);
 #endif
 }
+#endif
 
 // Compare the lower double-precision (64-bit) floating-point element in a and b
 // for equality, and return the boolean result (0 or 1).
@@ -4401,6 +4428,7 @@ FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b)
         vmaxq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
 }
 
+#ifndef _M_ARM
 // Compare packed double-precision (64-bit) floating-point elements in a and b,
 // and store packed maximum values in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd
@@ -4487,6 +4515,7 @@ FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b)
     return vreinterpretq_m128d_u64(vld1q_u64(d));
 #endif
 }
+#endif
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
 // b, store the minimum value in the lower element of dst, and copy the upper
@@ -4793,7 +4822,11 @@ FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b)
 FORCE_INLINE void _mm_pause()
 {
 #if defined(_MSC_VER)
+#ifdef _M_ARM
+    __isb(_ARM_BARRIER_SY);
+#else
     __isb(_ARM64_BARRIER_SY);
+#endif
 #else
     __asm__ __volatile__("isb\n");
 #endif
@@ -7622,6 +7655,7 @@ FORCE_INLINE int _mm_testz_si128(__m128i a, __m128i b)
 }
 
 /* SSE4.2 */
+#ifndef _M_ARM
 
 const static uint16_t ALIGN_STRUCT(16) _sse2neon_cmpestr_mask16b[8] = {
     0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
@@ -8463,9 +8497,11 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
     return crc;
 }
 
+#endif
+
 /* AES */
 
-#if !defined(__ARM_FEATURE_CRYPTO) && !defined(_M_ARM64)
+#if !defined(__ARM_FEATURE_CRYPTO) && !defined(_M_ARM64) && !defined(_M_ARM)
 /* clang-format off */
 #define SSE2NEON_AES_SBOX(w)                                           \
     {                                                                  \
@@ -8913,6 +8949,7 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
 #undef SSE2NEON_MULTIPLY
 #endif
 
+#elif defined(_M_ARM)
 #else /* __ARM_FEATURE_CRYPTO */
 // Implements equivalent of 'aesenc' by combining AESE (with an empty key) and
 // AESMC and then manually applying the real key as an xor operation. This
@@ -9034,6 +9071,7 @@ FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm)
     }
 }
 
+#ifndef _M_ARM
 FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode()
 {
     union {
@@ -9053,6 +9091,7 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode()
 
     return r.field.bit24 ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF;
 }
+#endif
 
 // Count the number of bits set to 1 in unsigned 32-bit integer a, and
 // return that count in dst.
@@ -9113,6 +9152,7 @@ FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
 #endif
 }
 
+#ifndef _M_ARM
 FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag)
 {
     // AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting,
@@ -9140,6 +9180,7 @@ FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag)
     __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r));        /* write */
 #endif
 }
+#endif
 
 // Return the current 64-bit value of the processor's time-stamp counter.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=rdtsc
@@ -9161,6 +9202,9 @@ FORCE_INLINE uint64_t _rdtsc(void)
 #endif
 
     return val;
+#elif defined(_M_ARM)
+	uint32_t val = _MoveFromCoprocessor(15,0, 9,13,0);
+	return ((uint64_t)val) << 6;
 #else
     uint32_t pmccntr, pmuseren, pmcntenset;
     // Read the user mode Performance Monitoring Unit (PMU)
 
@@ -862,7 +862,7 @@ BOX TRACING
 
 // Custom SIMD implementation for box brushes
 
-const fltx4 Four_DistEpsilons={DIST_EPSILON,DIST_EPSILON,DIST_EPSILON,DIST_EPSILON};
+const fltx4 Four_DistEpsilons=FLTX4(DIST_EPSILON,DIST_EPSILON,DIST_EPSILON,DIST_EPSILON);
 const int32 ALIGN16 g_CubeFaceIndex0[4] ALIGN16_POST = {0,1,2,-1};
 const int32 ALIGN16 g_CubeFaceIndex1[4] ALIGN16_POST = {3,4,5,-1};
 bool IntersectRayWithBoxBrush( TraceInfo_t *pTraceInfo, const cbrush_t *pBrush, cboxbrush_t *pBox )
@@ -1572,7 +1572,7 @@ void FASTCALL CM_TraceToLeaf( TraceInfo_t * RESTRICT pTraceInfo, int ndxLeaf, fl
 			fltx4 traceStart = LoadUnaligned3SIMD(pTraceInfo->m_start.Base());
 			fltx4 traceDelta = LoadUnaligned3SIMD(pTraceInfo->m_delta.Base());
 			fltx4 traceInvDelta = LoadUnaligned3SIMD(pTraceInfo->m_invDelta.Base());
-			static const fltx4 vecEpsilon = {DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON};
+			static const fltx4 vecEpsilon = FLTX4(DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON);
 			// only used in !IS_POINT version:
 			fltx4 extents;
 			if (!IS_POINT)
 
@@ -40,7 +40,7 @@
 #include "materialsystem/materialsystem_config.h"
 #include "materialsystem/itexture.h"
 #include "IHammer.h"
-#if defined( _WIN32 ) && !defined( _X360 )
+#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM)
 #include <xmmintrin.h>
 #endif
 #include "staticpropmgr.h"
 
@@ -104,7 +104,7 @@ extern ConVar host_timer_spin_ms;
 extern float host_nexttick;
 extern IVEngineClient *engineClient;
 
-#ifdef WIN32
+#if defined(_WIN32) && !defined(_M_ARM)
 static void cpu_frequency_monitoring_callback( IConVar *var, const char *pOldValue, float flOldValue )
 {
 	// Set the specified interval for CPU frequency monitoring
 
@@ -2122,8 +2122,8 @@ int CDetailObjectSystem::SortSpritesBackToFront( int nLeaf, const Vector &viewOr
 #else
 #define MANTISSA_LSB_OFFSET 0
 #endif
-static fltx4 Four_MagicNumbers={ MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER };
-static fltx4 Four_255s={ 255.0, 255.0, 255.0, 255.0 };
+static fltx4 Four_MagicNumbers=FLTX4( MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER );
+static fltx4 Four_255s=FLTX4( 255.0, 255.0, 255.0, 255.0 );
 
 static ALIGN16 int32 And255Mask[4] ALIGN16_POST = {0xff,0xff,0xff,0xff};
 #define PIXMASK ( * ( reinterpret_cast< fltx4 *>( &And255Mask ) ) )
 
@@ -167,8 +167,10 @@ InitReturnVal_t CInputSystem::Init()
 
 	joy_xcontroller_found.SetValue( 0 );
 
+#ifdef USE_SDL
 	if( !m_bConsoleTextMode )
 		InitializeTouch();
+#endif
 
 	if ( IsPC() && !m_bConsoleTextMode )
 	{
@@ -975,7 +977,9 @@ void CInputSystem::SetPrimaryUserId( int userId )
 //-----------------------------------------------------------------------------
 void CInputSystem::SetRumble( float fLeftMotor, float fRightMotor, int userId )
 {
+#ifdef USE_SDL
 	SetXDeviceRumble( fLeftMotor, fRightMotor, userId );
+#endif
 }
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+*~`
`1`	`2`	`*.mak`
`2`	`3`	`*.mak.vpc_crc`
`3`	`4`	`*.vpc_crc`
Original file line number	Diff line number	Diff line change
`@@ -104,7 +104,7 @@ extern ConVar host_timer_spin_ms;`
`104`	`104`	`extern float host_nexttick;`
`105`	`105`	`extern IVEngineClient *engineClient;`
`106`	`106`
`107`		`-#ifdef WIN32`
	`107`	`+#if defined(_WIN32) && !defined(_M_ARM)`
`108`	`108`	`static void cpu_frequency_monitoring_callback( IConVar var, const char pOldValue, float flOldValue )`
`109`	`109`	`{`
`110`	`110`	`// Set the specified interval for CPU frequency monitoring`
Original file line number	Diff line number	Diff line change
`@@ -167,8 +167,10 @@ InitReturnVal_t CInputSystem::Init()`
`167`	`167`
`168`	`168`	`joy_xcontroller_found.SetValue( 0 );`
`169`	`169`
	`170`	`+#ifdef USE_SDL`
`170`	`171`	`if( !m_bConsoleTextMode )`
`171`	`172`	`InitializeTouch();`
	`173`	`+#endif`
`172`	`174`
`173`	`175`	`if ( IsPC() && !m_bConsoleTextMode )`
`174`	`176`	`{`
`@@ -975,7 +977,9 @@ void CInputSystem::SetPrimaryUserId( int userId )`
`975`	`977`	`//-----------------------------------------------------------------------------`
`976`	`978`	`void CInputSystem::SetRumble( float fLeftMotor, float fRightMotor, int userId )`
`977`	`979`	`{`
	`980`	`+#ifdef USE_SDL`
`978`	`981`	`SetXDeviceRumble( fLeftMotor, fRightMotor, userId );`
	`982`	`+#endif`
`979`	`983`	`}`
`980`	`984`
`981`	`985`