Merge pull request #1958 from fireice-uk/dev

release 2.5.1
fireice-uk · Oct 17, 2018 · 4e72408 · 4e72408
2 parents 9012512 + 5e66d4c
commit 4e72408
Show file tree

Hide file tree

Showing 11 changed files with 69 additions and 23 deletions.
diff --git a/.gitignore b/.gitignore
@@ -22,3 +22,11 @@ xmr-stak.kdev4
 cmake-build-release/
 cmake-build-debug/
 \.idea/
+
+# MacOS files
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Thumbnails
+._*
diff --git a/doc/compile_Linux.md b/doc/compile_Linux.md
@@ -2,10 +2,17 @@
 
 ## Install Dependencies
 
-### AMD APP SDK 3.0 (only needed to use AMD GPUs)
+### AMD Driver (only needed to use AMD GPUs)
 
-- download and install the latest version from http://debian.nullivex.com/amd/AMD-APP-SDKInstaller-v3.0.130.136-GA-linux64.tar.bz2 (see https://github.com/fireice-uk/xmr-stak/issues/1511#issuecomment-385120692)
-  (do not wonder why it is a link to a dropbox but AMD has removed the SDK downloads, see https://community.amd.com/thread/228059)
+- the AMD APP SDK is not longer needed (all is included in the driver package)
+- download & unzip the AMD driver: https://www.amd.com/en/support
+- run `./amdgpu-pro-install --opencl=legacy,pal` from the unzipped folder
+- set the environment variable to opencl `export AMDAPPSDKROOT=/opt/amdgpu-pro/`
+
+**ATTENTION** The linux driver 18.3 creating invalid shares. 
+If you have an issue with `invalid shares` please downgrade your driver or switch to ROCm.
+
+For linux also the OpenSource driver ROCm 1.9.X+ is a well working alternative, see https://rocm.github.io/ROCmInstall.html
 
 ### Cuda 8.0+ (only needed to use NVIDIA GPUs)
 

diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md
@@ -30,7 +30,12 @@
     - CUDA/Runtime
     - Driver components
 
-### AMD APP SDK 3.0 (only needed for AMD GPUs)
+### AMD DRIVER/APP SDK 3.0 (only needed for AMD GPUs)
+
+- Download & install the AMD driver: https://www.amd.com/en/support
+
+**ATTENTION** Many windows driver 18.5+ creating invalid shares.
+If you have an issue with `invalid shares` please downgrade your driver.
 
 - Download and install the latest version from http://amd-dev.wpengine.netdna-cdn.com/app-sdk/installers/APPSDKInstaller/3.0.130.135-GA/full/AMD-APP-SDKInstaller-v3.0.130.135-GA-windows-F-x64.exe
   (do not wonder why it is a link to a netdna-cdn.com but AMD has removed the SDK downloads, see https://community.amd.com/thread/222855)

diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -420,6 +420,11 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 		options += " -DMEMORY=" + std::to_string(hashMemSize);
 		options += " -DALGO=" + std::to_string(miner_algo[ii]);
 		options += " -DCN_UNROLL=" + std::to_string(ctx->unroll);
+		/* AMD driver output is something like: `1445.5 (VM)`
+		 * and is mapped to `14` only. The value is only used for a compiler
+		 * workaround.
+		 */
+		options += " -DOPENCL_DRIVER_MAJOR=" + std::to_string(std::stoi(openCLDriverVer.data()) / 100);
 
 		/* create a hash for the compile time cache
 		 * used data:
@@ -928,13 +933,6 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
 	// create a directory  for the OpenCL compile cache
 	create_directory(get_home() + "/.openclcache");
 
-	// check if cryptonight_monero_v8 is selected for the user or dev pool
-	bool useCryptonight_v8 =
-		::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_monero_v8 ||
-		::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot() == cryptonight_monero_v8 ||
-		::jconf::inst()->GetCurrentCoinSelection().GetDescription(0).GetMiningAlgo() == cryptonight_monero_v8 ||
-		::jconf::inst()->GetCurrentCoinSelection().GetDescription(0).GetMiningAlgoRoot() == cryptonight_monero_v8;
-
 	for(int i = 0; i < num_gpus; ++i)
 	{
 		const std::string backendName = xmrstak::params::inst().openCLVendor;

diff --git a/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl b/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl
@@ -2,6 +2,10 @@ R"===(
 /*
  * @author SChernykh
  */
+
+// cryptonight_monero_v8
+#if(ALGO==11)
+
 static const __constant uint RCP_C[256] =
 {
 	0xfe01be73u,0xfd07ff01u,0xfa118c5au,0xf924fb13u,0xf630cddbu,0xf558f73cu,0xf25f2934u,0xf1a3f37bu,
@@ -68,9 +72,21 @@ inline uint2 fast_div_v2(const __local uint *RCP, ulong a, uint b)
 	const ulong k = mul_hi(as_uint2(a).s0, r) + ((ulong)(r) * as_uint2(a).s1) + a;
 
 	ulong q;
-	((uint*)&q)[0] = as_uint2(k).s1;;
-	((uint*)&q)[1] = (k < a) ? 1 : 0;
-
+	((uint*)&q)[0] = as_uint2(k).s1;
+
+#if defined(cl_amd_device_attribute_query) && (OPENCL_DRIVER_MAJOR == 14)
+	/* The AMD driver 14.XX is not able to compile `(k < a)`
+	 * https://github.com/fireice-uk/xmr-stak/issues/1922
+	 * This is a workaround for the broken compiler.
+	 */
+	 ulong whyAMDwhy;
+	((uint*)&whyAMDwhy)[0] = as_uint2(k).s0;
+	((uint*)&whyAMDwhy)[1] = as_uint2(k).s1;
+	((uint*)&q)[1] = (whyAMDwhy < a) ? 1U : 0U;
+#else
+	((uint*)&q)[1] = (k < a) ? 1U : 0U;
+#endif
+
 	const long tmp = a - q * b;
 	const bool overshoot = (tmp < 0);
 	const bool undershoot = (tmp >= b);
@@ -105,4 +121,7 @@ inline uint fast_sqrt_v2(const ulong n1)
 
 	return result;
 }
+
+#endif
+
 )==="
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
@@ -296,6 +296,7 @@ bool minethd::self_test()
 			bResult = bResult &&  memcmp(out, "\x5a\x24\xa0\x29\xde\x1c\x39\x3f\x3d\x52\x7a\x2f\x9b\x39\xdc\x3d\xb3\xbc\x87\x11\x8b\x84\x52\x9b\x9f\x0\x88\x49\x25\x4b\x5\xce", 32) == 0;
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_lite);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\x5a\x24\xa0\x29\xde\x1c\x39\x3f\x3d\x52\x7a\x2f\x9b\x39\xdc\x3d\xb3\xbc\x87\x11\x8b\x84\x52\x9b\x9f\x0\x88\x49\x25\x4b\x5\xce", 32) == 0;
 		}
 		else if(algo == cryptonight_monero)

diff --git a/xmrstak/misc/coinDescription.hpp b/xmrstak/misc/coinDescription.hpp
@@ -10,9 +10,9 @@ namespace xmrstak
 {
 	struct coinDescription
 	{
-		xmrstak_algo algo;
-		xmrstak_algo algo_root;
-		uint8_t fork_version;
+		xmrstak_algo algo = xmrstak_algo::invalid_algo;
+		xmrstak_algo algo_root = xmrstak_algo::invalid_algo;
+		uint8_t fork_version = 0u;
 
 		coinDescription() = default;
 

diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp
@@ -627,8 +627,12 @@ void executor::ex_main()
 			break;
 
 		case EV_GPU_RES_ERROR:
-			log_result_error(std::string(ev.oGpuError.error_str + std::string(" GPU ID ") + std::to_string(ev.oGpuError.idx)));
+		{
+			std::string err_msg = std::string(ev.oGpuError.error_str) + " GPU ID " + std::to_string(ev.oGpuError.idx);
+			printer::inst()->print_msg(L0, err_msg.c_str());
+			log_result_error(std::move(err_msg));
 			break;
+		}
 
 		case EV_PERF_TICK:
 			for (i = 0; i < pvThreads->size(); i++)

diff --git a/xmrstak/misc/telemetry.cpp b/xmrstak/misc/telemetry.cpp
@@ -36,6 +36,7 @@ telemetry::telemetry(size_t iThd)
 	ppHashCounts = new uint64_t*[iThd];
 	ppTimestamps = new uint64_t*[iThd];
 	iBucketTop = new uint32_t[iThd];
+	mtx = new std::mutex[iThd];
 
 	for (size_t i = 0; i < iThd; i++)
 	{
@@ -49,15 +50,17 @@ telemetry::telemetry(size_t iThd)
 
 double telemetry::calc_telemetry_data(size_t iLastMillisec, size_t iThread)
 {
-	std::unique_lock<std::mutex> lk(mtx);
-	uint64_t iTimeNow = get_timestamp_ms();
+
 
 	uint64_t iEarliestHashCnt = 0;
 	uint64_t iEarliestStamp = 0;
 	uint64_t iLatestStamp = 0;
 	uint64_t iLatestHashCnt = 0;
 	bool bHaveFullSet = false;
 
+	std::unique_lock<std::mutex> lk(mtx[iThread]);
+	uint64_t iTimeNow = get_timestamp_ms();
+
 	//Start at 1, buckettop points to next empty
 	for (size_t i = 1; i < iBucketSize; i++)
 	{
@@ -81,6 +84,7 @@ double telemetry::calc_telemetry_data(size_t iLastMillisec, size_t iThread)
 		iEarliestStamp = ppTimestamps[iThread][idx];
 		iEarliestHashCnt = ppHashCounts[iThread][idx];
 	}
+	lk.unlock();
 
 	if (!bHaveFullSet || iEarliestStamp == 0 || iLatestStamp == 0)
 		return nan("");
@@ -99,7 +103,7 @@ double telemetry::calc_telemetry_data(size_t iLastMillisec, size_t iThread)
 
 void telemetry::push_perf_value(size_t iThd, uint64_t iHashCount, uint64_t iTimestamp)
 {
-	std::unique_lock<std::mutex> lk(mtx);
+	std::unique_lock<std::mutex> lk(mtx[iThd]);
 	size_t iTop = iBucketTop[iThd];
 	ppHashCounts[iThd][iTop] = iHashCount;
 	ppTimestamps[iThd][iTop] = iTimestamp;

diff --git a/xmrstak/misc/telemetry.hpp b/xmrstak/misc/telemetry.hpp
@@ -15,7 +15,7 @@ class telemetry
 	double calc_telemetry_data(size_t iLastMillisec, size_t iThread);
 
 private:
-	mutable std::mutex mtx;
+	std::mutex* mtx;
 	constexpr static size_t iBucketSize = 2 << 11; //Power of 2 to simplify calculations
 	constexpr static size_t iBucketMask = iBucketSize - 1;
 	uint32_t* iBucketTop;

diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp
@@ -18,7 +18,7 @@
 #endif
 
 #define XMR_STAK_NAME "xmr-stak"
-#define XMR_STAK_VERSION "2.5.0"
+#define XMR_STAK_VERSION "2.5.1"
 
 #if defined(_WIN32)
 #define OS_TYPE "win"