fix main memory leaks

firoorg · Jan 8, 2019 · 59861c6 · 59861c6
1 parent 32e99ff
commit 59861c6
Show file tree

Hide file tree

Showing 10 changed files with 143 additions and 85 deletions.
diff --git a/RUN-ZCOIN-MTP.cmd b/RUN-ZCOIN-MTP.cmd
@@ -1,7 +1,7 @@
 
 rem x64\Release\ccminer -a mtp -o  http://127.0.0.1:8382   -u djm34 -p password --coinbase-addr aDn7MMYjVQqenT11VFDYHfFdwXmSTRUTak -d 1070 --no-getwork -i 18
 rem x64\Release\ccminer -a mtp -o  stratum+tcp://xzc.2miners.com:8080  -u  aDn7MMYjVQqenT11VFDYHfFdwXmSTRUTak.worker -p 0 -i 16 -d 0,1 
-x64\Release\ccminer -a mtp -o  stratum+tcp://zcoin.mintpond.com:3000  -u  aDn7MMYjVQqenT11VFDYHfFdwXmSTRUTak.worker -p 0,strict,verbose,d=2800 -d 0,1 -i 20
+x64\Release\ccminer -a mtp -o  stratum+tcp://zcoin.mintpond.com:3000  -u  aDn7MMYjVQqenT11VFDYHfFdwXmSTRUTak.worker -p 0,strict,verbose,d=280 -d 0,1 -i 20
 
 
 pause
diff --git a/ccminer.cpp b/ccminer.cpp
@@ -1155,7 +1155,8 @@ static bool submit_upstream_work_mtp(CURL *curl, struct work *work, struct mtp *
 			applog(LOG_ERR, "submit_upstream_work stratum_send_line failed");
 			return false;
 		}
-
+		json_decref(MyObject);
+		free(serialized);
 		//		stratum_recv_line_compact(&stratum);
 
 //		free(mtp);
@@ -2868,7 +2869,6 @@ static bool wanna_mine(int thr_id)
 static void *miner_thread(void *userdata)
 {
 
-
 	struct thr_info *mythr = (struct thr_info *)userdata;
 //	struct mtp * mtp = (struct mtp*)malloc(sizeof(struct mtp));
 	int switchn = pool_switch_count;
@@ -3075,6 +3075,8 @@ static void *miner_thread(void *userdata)
 				}
 			}
 			#endif
+
+
 			memcpy(&work, &g_work, sizeof(struct work));
 
 			nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr
@@ -3146,6 +3148,7 @@ static void *miner_thread(void *userdata)
 		if (!wanna_mine(thr_id)) {
 
 			// free gpu resources
+printf("*******************freeing gpu ressource here ***********************\n");
 			algo_free_all(thr_id);
 			// clear any free error (algo switch)
 			cuda_clear_lasterror();
@@ -4062,7 +4065,7 @@ static void *stratum_thread(void *userdata)
 		if (stratum.job.job_id &&
 		    (!g_work_time || strncmp(stratum.job.job_id, g_work.job_id + 8, sizeof(g_work.job_id)-8))) {
 			pthread_mutex_lock(&g_work_lock);
-
+//printf("*************************new work found *************************\n");
 			if (opt_algo == ALGO_M7) 
 				if (stratum_gen_work_m7(&stratum, &g_work))	g_work_time = time(NULL);
 			else 
@@ -4072,7 +4075,7 @@ static void *stratum_thread(void *userdata)
 
 				static uint32_t last_bloc_height;
 				if (!opt_quiet && stratum.job.height != last_bloc_height) {
-					last_bloc_height = stratum.job.height;
+					last_bloc_height = stratum.job.height; 
 					if (net_diff > 0.)
 						applog(LOG_BLUE, "%s block %d, diff %.3f", algo_names[opt_algo],
 							stratum.job.height, net_diff);
@@ -4116,7 +4119,7 @@ static void *stratum_thread(void *userdata)
 			MyObject = recode_message(MyObject2);
 			isok = stratum_handle_method_bos_json(ctx, MyObject);
 			json_decref(MyObject2);
-			if (!isok) { // not an answer
+			if (!isok) { // is an answer upon share submission
 				stratum_handle_response_json(MyObject);
 				json_decref(MyObject);
 			}
@@ -4125,7 +4128,7 @@ static void *stratum_thread(void *userdata)
 		free(boserror);
 		ctx->sockbuf[0] = '\0';
 		ctx->sockbuf_bossize = 0;
-
+		ctx->sockbuf = (char*)realloc(ctx->sockbuf, ctx->sockbuf_bossize +1);
 			} else {
 
 			s = stratum_recv_line(&stratum);
@@ -4943,7 +4946,7 @@ int main(int argc, char *argv[])
 	struct thr_info *thr;
 	long flags;
 	int i;
-	 
+	json_set_alloc_funcs(malloc, free);
 	printf("*** ccminer " PACKAGE_VERSION " for nVidia GPUs by djm34 ***\n");
 #ifdef _MSC_VER
 	printf("    Built with VC++ %d and nVidia CUDA SDK %d.%d\n\n", msver(),

diff --git a/compat/ccminer-config.h b/compat/ccminer-config.h
@@ -164,7 +164,7 @@
 #define PACKAGE_URL "http://github.com/djm34/ccminer-msvc2015"
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.1.11-djm34"
+#define PACKAGE_VERSION "1.1.12-djm34"
 
 /* If using the C implementation of alloca, define if you know the
    direction of stack growth for your system; otherwise it will be

diff --git a/configure.ac b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([ccminer], [1.1.11-djm34], [], [ccminer], [http://github.com/zcoinofficial/ccminer])
+AC_INIT([ccminer], [1.1.12-djm34], [], [ccminer], [http://github.com/zcoinofficial/ccminer])
 
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM

diff --git a/cuda_mtp/cuda_mtp.cu b/cuda_mtp/cuda_mtp.cu
@@ -15,7 +15,7 @@ static uint32_t *d_MinNonces[16];
 __constant__ uint32_t pTarget[8];
 __constant__ uint32_t pData[20]; // truncated data
 __constant__ uint4 Elements[1];
-uint4 * HBlock[16];
+ uint4 * HBlock[16];
 /*__device__*/ uint32_t *Header[16];
 /*__device__*/ uint2 *buffer_a[16];
 
@@ -1404,6 +1404,7 @@ __global__ void mtp_fc(uint32_t threads, uint4  *  DBlock, uint2 *a) {
 
 
 __host__ void get_tree(int thr_id, uint8_t* d) {
+	cudaSetDevice(device_map[thr_id]);
 	cudaMemcpy(d, buffer_a[thr_id], sizeof(uint2) * 2 * 1048576 * 4, cudaMemcpyDeviceToHost);
 }
 
@@ -1419,11 +1420,12 @@ __host__ uint8_t* get_tree2(int thr_id) {
 
 
 __host__ void get_block(int thr_id, void* d, uint32_t index) {
+	cudaSetDevice(device_map[thr_id]);
 	cudaMemcpy(d, &HBlock[thr_id][64 * index], sizeof(uint64_t) * 128, cudaMemcpyDeviceToHost);
 }
 __host__ void mtp_i_cpu(int thr_id, uint32_t *block_header) {
 
-
+	cudaSetDevice(device_map[thr_id]);
 	cudaError_t err = cudaMemcpy(Header[thr_id], block_header, 8 * sizeof(uint32_t), cudaMemcpyHostToDevice);
 	if (err != cudaSuccess)
 	{

diff --git a/cuda_mtp/mtp.cu b/cuda_mtp/mtp.cu
@@ -28,8 +28,8 @@ static __thread uint32_t throughput = 0;
 static uint32_t JobId[MAX_GPUS] = {0};
 static uint64_t XtraNonce2[MAX_GPUS] = {0};
 static bool fillGpu[MAX_GPUS] = {false};
-static  MerkleTree::Elements TheElements;
-static  MerkleTree ordered_tree[MAX_GPUS];
+//static  MerkleTree::Elements TheElements;
+static  MerkleTree *ordered_tree[MAX_GPUS];
 static  unsigned char TheMerkleRoot[MAX_GPUS][16];
 static  argon2_context context[MAX_GPUS];
 static argon2_instance_t instance[MAX_GPUS];
@@ -76,7 +76,7 @@ extern "C" int scanhash_mtp(int nthreads,int thr_id, struct work* work, uint32_t
 		cudaDeviceProp props;
 		cudaGetDeviceProperties(&props, dev_id);
 
-		cudaMallocHost(&dx[thr_id], sizeof(uint2) * 2 * 1048576 * 4);
+//		cudaMallocHost(&dx[thr_id], sizeof(uint2) * 2 * 1048576 * 4);
 		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
 
 
@@ -86,7 +86,10 @@ extern "C" int scanhash_mtp(int nthreads,int thr_id, struct work* work, uint32_t
 
 
 	}
-
+//sleep(10);
+//cudaFreeHost(dx[thr_id]);
+//printf("freed\n");
+//sleep(60);
 	uint32_t _ALIGN(128) endiandata[20];
 	((uint32_t*)pdata)[19] = (pdata[20]); //*/0x00100000; // mtp version not the actual nonce
 //	((uint32_t*)pdata)[19] = 0x1000;
@@ -149,12 +152,17 @@ pthread_mutex_unlock(&work_lock);
 */
 
 if (JobId[thr_id] != work->data[17] || XtraNonce2[thr_id] != ((uint64_t*)work->xnonce2)[0]) {
- 
+
 	if (JobId[thr_id] != 0) {
+
 		free_memory(&context[thr_id], (unsigned char *)instance[thr_id].memory, instance[thr_id].memory_blocks, sizeof(block));
-		ordered_tree[thr_id].Destructor();
-	}
+		ordered_tree[thr_id]->Destructor();
+		cudaFreeHost(dx[thr_id]);
+//		ordered_tree[thr_id].Destructor();
 
+		delete  ordered_tree[thr_id];
+	}
+	cudaMallocHost(&dx[thr_id], sizeof(uint2) * 2 * 1048576 * 4);
 	context[thr_id] = init_argon2d_param((const char*)endiandata);
 
 	argon2_ctx_from_mtp(&context[thr_id], &instance[thr_id]);
@@ -165,22 +173,28 @@ if (JobId[thr_id] != work->data[17] || XtraNonce2[thr_id] != ((uint64_t*)work->x
 
 	get_tree(thr_id,dx[thr_id]);
 	printf("Step 2 : Compute the root Φ of the Merkle hash tree \n");
-
-	ordered_tree[thr_id] = MerkleTree(dx[thr_id], true);
-
+//sleep(10);
+	ordered_tree[thr_id] = new MerkleTree(dx[thr_id], true);
+/*
+printf("after ordered tree\n");
+sleep(10);
+printf("delete ordered tree\n");
+ordered_tree[thr_id]->Destructor();
+//delete ordered_tree[thr_id];
+sleep(10);
+printf("deleted ordered tree\n");
+sleep(30);
+*/
 	JobId[thr_id] = work->data[17];
 	XtraNonce2[thr_id] = ((uint64_t*)work->xnonce2)[0];
-	MerkleTree::Buffer root = ordered_tree[thr_id].getRoot();
+	MerkleTree::Buffer root = ordered_tree[thr_id]->getRoot();
 
 	std::copy(root.begin(), root.end(), TheMerkleRoot[thr_id]);
 
 	mtp_setBlockTarget(thr_id, endiandata, ptarget, &TheMerkleRoot[thr_id]);
-
+	root.resize(0);
 }
 
-
-
-
 /*
 if (fillGpu[thr_id]) {
 
@@ -221,7 +235,7 @@ fillGpu[thr_id]=false;
 			blockS nBlockMTP[MTP_L *2];
 			unsigned char nProofMTP[MTP_L * 3 * 353 ];
 
-			uint32_t is_sol = mtp_solver(thr_id,foundNonce, &instance[thr_id], nBlockMTP,nProofMTP, TheMerkleRoot[thr_id], mtpHashValue, ordered_tree[thr_id], endiandata,TheUint256Target[0]);
+			uint32_t is_sol = mtp_solver(thr_id,foundNonce, &instance[thr_id], nBlockMTP,nProofMTP, TheMerkleRoot[thr_id], mtpHashValue, *ordered_tree[thr_id], endiandata,TheUint256Target[0]);
 
 			if (is_sol==1 /*&& fulltest(vhash64, ptarget)*/) {
 				int res = 1;

diff --git a/merkletree/merkle-tree.cpp b/merkletree/merkle-tree.cpp
@@ -20,8 +20,8 @@ MerkleTree::MerkleTree(uint8_t * elements, bool preserveOrder)
     : preserveOrder_(preserveOrder)//, elements_(elements)
 {
 //   mem[0]=(elements);
-
-	mem.resize(0);
+//	uint8_t* Truc(elements);
+
 	mem.push_back(elements);
 
 /*
@@ -83,13 +83,12 @@ MerkleTree::~MerkleTree()
 }
 void MerkleTree::Destructor()
 {
-
-//        printf("destroying tree %d\n",mem.size());
-		mem.resize(0);
-//		printf("destroying tree %d\n", mem.size());
-//	for(int i=0;i<mem.size();i++){
-//		free(mem[i]);
-//	}
+	for(int i=1;i<mem.size();i++) { // element 0 is.... aaahh !!!
+		free(mem[i]);
+		};
+//	mem.clear();
+//	mem.shrink_to_fit();
+
 }
 
 MerkleTree::Buffer MerkleTree::hash(const Buffer& data)
@@ -294,8 +293,11 @@ for(int i=0;i<mem.size();i++)
 	size/=2;
 //printf("size %d %d %d\n",size, mem.size(), 1024*1024*4*16);
 uint8_t *new_mem=(uint8_t *)malloc(size);
-mem.push_back(new_mem);
+
 gen_layer(prev_mem, new_mem, size/16);
+mem.push_back(new_mem);
+
+
 //for(;;);
 /*
     const Elements& previous_layer = layers_.back();

diff --git a/merkletree/merkle-tree.hpp b/merkletree/merkle-tree.hpp
@@ -86,12 +86,12 @@ public :
 	return ret;
 //        return layers_.back()[0];
     }
-
+/*
     std::vector<uint8_t*> getMem() const
 	{
 	return mem;
 	}
-
+*/
     /** Compute a root hash given a set of hashes
      *
      * This function builds a temporary Merkle Tree and extracts its root
@@ -221,6 +221,7 @@ private :
      * which is the top-level hash, aka the root. The last layer has a length
      * of one.
      */
+
     typedef std::deque<Elements> Layers;
 
     bool     preserveOrder_; /**< Whether to preserve the initial order */