This repository has been archived by the owner on Nov 17, 2023. It is now read-only.
Replies: 2 comments
-
@idealboy @mxnet-label-bot add [Question] |
Beta Was this translation helpful? Give feedback.
0 replies
-
@mxnet-label-bot add [Question] |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
I just use the crossentropy.py in https://github.com/miraclewkf/multilabel-MXNet.git to train multi-label classifier(1000 classes), but I found that the inference result is always strange
And the crossentropy loss is also located in example/recommenders in mxnet source.
(1)The result score is very low, and a little number of output is smaller than 0.5
(2)when I change actually_calculate_loss = True, then the train loss is nan.
follow is the custom acc and loss:
135 def acc(label, pred, label_width = num_class):
136 return float((label == np.round(pred)).sum()) / label_width / pred.shape[0]
137
138 def loss(label, pred):
139 loss_all = 0
140 for i in range(len(pred)):
141 loss = 0
142 loss -= label[i] * np.log(pred[i] + 1e-6) + (1.- label[i]) * np.log(1. + 1e-6 - pred[i])
143 loss_all += np.sum(loss)
144 loss_all = float(loss_all)/float(len(pred) + 0.000001)
145 return loss_all
and follow is the loss layer implement:
44 eps = 1e-6 # Avoid -inf when taking log(0)
45 eps1 = 1. + eps
46 eps_1 = 1. - eps
47
48 def forward(self, is_train, req, in_data, out_data, aux):
49 # Shapes:
50 # b = minibatch size
51 # d = number of dimensions
52 actually_calculate_loss = False
53 if actually_calculate_loss:
54 p = in_data[0].asnumpy() # shape=(b,d)
55 y = in_data[1].asnumpy()
56 out = y * np.log(p+self.eps) + (1.-y) * np.log((self.eps1) - p)
57 self.assign(out_data[0], req[0], mx.nd.array(out))
58 else:
59 # Just copy the predictions forward
60 self.assign(out_data[0], req[0], in_data[0])
61
62
63 def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
64 self.approx_backward(req, out_grad, in_data, out_data, in_grad, aux)
65 #self.exact_backward(req, out_grad, in_data, out_data, in_grad, aux)
66
67 def approx_backward(self, req, out_grad, in_data, out_data, in_grad, aux):
68 """Correct grad = (y-p)/(p-p^2)
69 But if y is just 1 or 0, then this simplifies to
70 grad = 1/(p-1+y)
71 which is more numerically stable
72 """
73 p = in_data[0].asnumpy() # shape=(b,d)
74 y = in_data[1].asnumpy()
75 grad = -1. / (p - self.eps_1 + y)
76 self.assign(in_grad[0], req[0], mx.nd.array(grad))
77
78
79 def exact_backward(self, req, out_grad, in_data, out_data, in_grad, aux):
80 """grad = (y-p)/(p-p^2)
81 """
82 p = in_data[0].asnumpy() # shape=(b,d)
83 y = in_data[1].asnumpy() # seems right
84 grad = (p - y) / ((p+self.eps) * (self.eps1 - p))
85 self.assign(in_grad[0], req[0], mx.nd.array(grad))
and folow is the train code:
20 def get_fine_tune_model(sym, arg_params, num_classes, layer_name):
21
22 all_layers = sym.get_internals()
23 net = all_layers[layer_name+'_output']
24 net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes, name='fc1')
25 net = mx.symbol.sigmoid(data=net, name='sig')
26 net = mx.symbol.Custom(data=net, name='softmax', op_type='CrossEntropyLoss')
27 #net = mx.symbol.LogisticRegressionOutput(data=net, name='softmax')
28
29 #new_args = dict({k:arg_params[k] for k in arg_params if 'fc' not in k})
30 #return (net, new_args)
31 return (net, arg_params)
32 #return net
34 def multi_factor_scheduler(begin_epoch, epoch_size, step=[5,10], factor=0.1):
35 step_ = [epoch_size * (x-begin_epoch) for x in step if x-begin_epoch > 0]
36 return mx.lr_scheduler.MultiFactorScheduler(step=step_, factor=factor) if len(step_) else None
65 train = mx.io.ImageRecordIter(
66 path_imgrec = train_data,
67 label_width = num_class,
68 data_shape = (3, 224, 224),
69 batch_size = args.batch_size,
70 rand_crop = True,
82 preprocess_threads = 20,
83 rand_mirror = True,
84 shuffle = True,
85 num_parts = kv.num_workers,
86 part_index = kv.rank)
90 val = mx.io.ImageRecordIter(
91 path_imgrec = val_data,
92 label_width = num_class,
93 batch_size = args.batch_size,
94 data_shape = (3, 224, 224),
98 rand_crop = False,
99 rand_mirror = False,
100 num_parts = kv.num_workers,
101 part_index = kv.rank)
102
103 kv = mx.kvstore.create(args.kv_store)
104
105 prefix = model
106 sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
108 (new_sym,new_args) = get_fine_tune_model(
109 sym, arg_params, args.num_classes, 'flatten0')
110
111 epoch_size = max(int(args.num_examples / args.batch_size / kv.num_workers), 1)
112 lr_scheduler=multi_factor_scheduler(args.epoch, epoch_size, step=[60,120,180])
113
114 optimizer_params = {
115 'learning_rate': args.lr,
116 'momentum' : args.mom,
117 'wd' : args.wd,
118 'lr_scheduler': lr_scheduler}
119 initializer = mx.init.Xavier(
120 rnd_type='gaussian', factor_type="in", magnitude=2)
121
122 if gpus == '':
123 devs = mx.cpu()
124 else:
125 devs = [mx.gpu(int(i)) for i in gpus.split(',')]
126
127 model = mx.mod.Module(
128 context = devs,
129 symbol = new_sym,
130
131 )
132
133 checkpoint = mx.callback.do_checkpoint(args.save_result+args.save_name)
134
135 def acc(label, pred, label_width = num_class):
136 return float((label == np.round(pred)).sum()) / label_width / pred.shape[0]
137
138 def loss(label, pred):
139 loss_all = 0
140 for i in range(len(pred)):
141 loss = 0
142 loss -= label[i] * np.log(pred[i] + 1e-6) + (1.- label[i]) * np.log(1. + 1e-6 - pred[i])
143 loss_all += np.sum(loss)
144 loss_all = float(loss_all)/float(len(pred) + 0.000001)
145 return loss_all
146
147
148 eval_metric = list()
149 eval_metric.append(mx.metric.np(acc))
150 eval_metric.append(mx.metric.np(loss))
152 model.fit(train,
153 begin_epoch=epoch,
154 num_epoch=num_epoch,
155 eval_data=val,
156 eval_metric=eval_metric,
157 validation_metric=eval_metric,
158 kvstore=kv,
159 optimizer='sgd',
160 optimizer_params=optimizer_params,
161 arg_params=new_args,
162 aux_params=aux_params,
163 initializer=initializer,
164 allow_missing=True,
165 batch_end_callback=mx.callback.Speedometer(args.batch_size, 20),
166 epoch_end_callback=checkpoint)
Does it has an error? Thank you very much for indication!
(3) When I use LogisticRegressionOutput, it has a lower inference score
Beta Was this translation helpful? Give feedback.
All reactions