dropout convolutions

ab · ab · commit d22e1947b0a1 · 2015-04-19T09:10:46.000+02:00
diff --git a/Sources/convolutional_layer.hpp b/Sources/convolutional_layer.hpp
@@ -43,11 +43,20 @@ class convolutional_layer : public layer {
         assert(in.size() == in_feature_maps_ * in_width_ * in_width_);
         assert(weights_.size() == in_feature_maps_*out_feature_maps_*filter_width_*filter_width_);
 
+        if (dropout_prob_ > 0.0)
+            sample_dropout();
+
         for (uint_t out_fm=0; out_fm<out_feature_maps_; out_fm++) {
 
             for (uint_t ox=0; ox<out_width_; ox++) {
                 for (uint_t oy=0; oy<out_width_; oy++) {
 
+                    /* dropout while training: [0,1), dropout while testing: 1 */
+                    if (dropout_prob_!= 0 && dropout_prob_!=1 && dropout_sample_[(out_fm*out_width_+ ox)*out_width_ + oy]<dropout_prob_) {
+                        output_[(out_fm*out_width_+ ox)*out_width_ + oy] = 0.0;
+                        continue;
+                    }
+
                     float_t sum = 0.0;
                     for (uint_t in_fm=0; in_fm<in_feature_maps_; in_fm++) {
 
@@ -61,6 +70,10 @@ class convolutional_layer : public layer {
                             }
                         }
                     }
+                    /* dropout while testing */
+                    if (dropout_prob_==1)
+                        sum = sum*0.5;
+
                     output_[(out_fm*out_width_+ ox)*out_width_ + oy] = A_.f(sum + bias_[out_fm]);
                 }
             }
diff --git a/Sources/fullyconnected_layer.hpp b/Sources/fullyconnected_layer.hpp
@@ -47,7 +47,7 @@ class fullyconnected_layer : public output_layer {
             
             /* dropout while testing */
             if (dropout_prob_==1)
-                sum = sum*0.5;
+                sum = sum*0.1;
 
             output_[o] = ActFunc.f(sum + bias_[o]);
         }
diff --git a/Sources/main.cpp b/Sources/main.cpp
@@ -155,9 +155,8 @@ cnn_training_test_mnist()
     vec_t soll {0,0,0,0,0,0,0,0,0,0};
     int last_label = 0;
     for(int s=1; s<=steps; s++) {
-        
-        
-        S4.set_dropout_prob(0.5);
+
+        S4.set_dropout_prob(0.9);
 #if TRAINING_MOMENTUM
         if (s%1000==0) {
             nn.set_learning_rate( s==0? 0.00085 : nn.learning_rate()*0.85);
diff --git a/Sources/subsampling_layer.hpp b/Sources/subsampling_layer.hpp
@@ -29,8 +29,10 @@ class subsampling_layer : public layer {
     }
 
     void forward(const vec_t& in /*[in_feature_map * in_width_ * in_width_]*/) {
-        
         assert(in.size() == feature_maps_ * in_width_ * in_width_);
+        
+        if (dropout_prob_ > 0.0)
+            sample_dropout();
 
         for (uint_t fm=0; fm<feature_maps_; fm++) {
             

Original file line number	Diff line number	Diff line change
`@@ -43,11 +43,20 @@ class convolutional_layer : public layer {`
`43`	`43`	`assert(in.size() == in_feature_maps_ * in_width_ * in_width_);`
`44`	`44`	`assert(weights_.size() == in_feature_maps_out_feature_maps_filter_width_*filter_width_);`
`45`	`45`
	`46`	`+ if (dropout_prob_ > 0.0)`
	`47`	`+ sample_dropout();`
	`48`	`+`
`46`	`49`	`for (uint_t out_fm=0; out_fm<out_feature_maps_; out_fm++) {`
`47`	`50`
`48`	`51`	`for (uint_t ox=0; ox<out_width_; ox++) {`
`49`	`52`	`for (uint_t oy=0; oy<out_width_; oy++) {`
`50`	`53`
	`54`	`+ /* dropout while training: [0,1), dropout while testing: 1 */`
	`55`	`+ if (dropout_prob_!= 0 && dropout_prob_!=1 && dropout_sample_[(out_fmout_width_+ ox)out_width_ + oy]<dropout_prob_) {`
	`56`	`+ output_[(out_fmout_width_+ ox)out_width_ + oy] = 0.0;`
	`57`	`+ continue;`
	`58`	`+ }`
	`59`	`+`
`51`	`60`	`float_t sum = 0.0;`
`52`	`61`	`for (uint_t in_fm=0; in_fm<in_feature_maps_; in_fm++) {`
`53`	`62`
`@@ -61,6 +70,10 @@ class convolutional_layer : public layer {`
`61`	`70`	`}`
`62`	`71`	`}`
`63`	`72`	`}`
	`73`	`+ /* dropout while testing */`
	`74`	`+ if (dropout_prob_==1)`
	`75`	`+ sum = sum*0.5;`
	`76`	`+`
`64`	`77`	`output_[(out_fmout_width_+ ox)out_width_ + oy] = A_.f(sum + bias_[out_fm]);`
`65`	`78`	`}`
`66`	`79`	`}`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ class fullyconnected_layer : public output_layer {`
`47`	`47`
`48`	`48`	`/* dropout while testing */`
`49`	`49`	`if (dropout_prob_==1)`
`50`		`- sum = sum*0.5;`
	`50`	`+ sum = sum*0.1;`
`51`	`51`
`52`	`52`	`output_[o] = ActFunc.f(sum + bias_[o]);`
`53`	`53`	`}`
Original file line number	Diff line number	Diff line change
`@@ -29,8 +29,10 @@ class subsampling_layer : public layer {`
`29`	`29`	`}`
`30`	`30`
`31`	`31`	`void forward(const vec_t& in /[in_feature_map in_width_ * in_width_]*/) {`
`32`		`-`
`33`	`32`	`assert(in.size() == feature_maps_ * in_width_ * in_width_);`
	`33`	`+`
	`34`	`+ if (dropout_prob_ > 0.0)`
	`35`	`+ sample_dropout();`
`34`	`36`
`35`	`37`	`for (uint_t fm=0; fm<feature_maps_; fm++) {`
`36`	`38`