diff --git a/C/Addition.c b/C/Addition.c
new file mode 100644
index 00000000..06f23b88
--- /dev/null
+++ b/C/Addition.c
@@ -0,0 +1,8 @@
+#include<stdio.h>
+void main()
+{ int a,b,c;
+printf("Enter two Numbers : ");
+scanf("%d %d",&a,&b);
+c=a+b;
+printf("The Sum is %d",c);
+}
diff --git a/C/Address of 1-D array.c b/C/Address of 1-D array.c
new file mode 100644
index 00000000..bad40b3f
--- /dev/null
+++ b/C/Address of 1-D array.c	
@@ -0,0 +1,17 @@
+#include<stdio.h>
+int main()
+{
+	int a[100],i,n,*add;
+	printf("enter the size");
+	scanf("%d",&n);
+	printf("enter the no");
+	for(i=0;i<n;i++)
+	{
+		scanf("%d",&a[i]);
+	}
+	for(i=0;i<n;i++)
+	{
+		add=(a+(i*sizeof(int)));
+		printf("%u\n",add);
+	}
+}
diff --git a/C/AllTempScalesConv.c b/C/AllTempScalesConv.c
new file mode 100644
index 00000000..3480bb1c
--- /dev/null
+++ b/C/AllTempScalesConv.c
@@ -0,0 +1,98 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+// Function that performs the conversion
+double convertTemp(double initValue, int initScale, int finalScale){
+    double finalValue;
+    switch(initScale){
+        // Celsius
+        case 1:
+            // Celsius to Kelvin
+            if(finalScale == 1){
+                finalValue = initValue + 273.15;
+            }
+            // Celsius to Fahrenheit
+            else if(finalScale == 2){
+                finalValue = (initValue * 9 / 5) + 32;
+            }
+            break;
+        case 2:
+            // Kelvin to Celsius
+            if(finalScale == 1){
+                finalValue = initValue - 273.15;
+            }
+            // Kelvin to Fahrenheit
+            else if(finalScale == 2){
+                finalValue = ((initValue - 273.15) * 9 / 5) + 32;
+            }
+            break;
+        case 3:
+            // Fahrenheit to Celsius
+            if(finalScale == 1){
+                finalValue = (initValue - 32) * 5 / 9;
+            }
+            // Fahrenheit to Kelvin
+            else if(finalScale == 2){
+                finalValue = ((initValue - 32) * 5 / 9) + 273,15;
+            }
+            break;
+
+    }
+    return finalValue;
+}
+
+int main(){
+    int option;
+    double initialValue, finalValue;
+    while(1){
+        // main menu
+        printf("\n0 - Exit\n");
+        printf("1 - Convert from Celsius to Kelvin\n");
+        printf("2 - Convert from Celsius to Fahrenheit\n");
+        printf("3 - Convert from Kelvin to Fahrenheit\n");
+        printf("4 - Convert from Kelvin to Celsius\n");
+        printf("5 - Convert from Fahrenheit to Celsius\n");
+        printf("6 - Convert from Fahrenheit to Kelvin\n");
+        
+        printf("Select a number: ");
+        scanf("%d",&option);
+        if(!option){
+            printf("Ending program\n");
+            return 0;
+        }
+
+        printf("Please enter the initial value: ");
+        scanf("%lf",&initialValue);
+        
+        switch(option){
+            case 1:
+                finalValue = convertTemp(initialValue,1,1);
+                printf("Valor em Kelvin: %.2lf",finalValue);
+                break;
+            case 2:
+                finalValue = convertTemp(initialValue,1,2);
+                printf("Valor em Fahrenheit: %.2lf",finalValue);
+                break;
+            case 3:
+                finalValue = convertTemp(initialValue,2,1);
+                printf("Valor em Celsius: %.2lf",finalValue);
+                break;
+            case 4:
+                finalValue = convertTemp(initialValue,2,2);
+                printf("Valor em Fahrenheit: %.2lf",finalValue);
+                break;
+            case 5:
+                finalValue = convertTemp(initialValue,3,1);
+                printf("Valor em Celsius: %.2lf",finalValue);
+                break;
+            case 6:
+                finalValue = convertTemp(initialValue,3,1);
+                printf("Valor em Kelvin: %.2lf",finalValue);
+                break;
+
+        }
+        printf("\n");
+    }
+
+    return 0;
+}
\ No newline at end of file
diff --git a/C/Anagram-Program-in-C b/C/Anagram-Program-in-C
new file mode 100644
index 00000000..68510848
--- /dev/null
+++ b/C/Anagram-Program-in-C
@@ -0,0 +1,71 @@
+#include <stdio.h>  
+#include <string.h> 
+#include <stdlib.h>  
+
+int checkAnagram(char *str1, char *str2);
+
+int main()
+{
+    char str1[100], str2[100];
+    
+    printf("Function : whether two given strings are anagram :");
+    printf("\nExample : pears and  spare, stone and tones :");
+    
+    printf(" Input the  first String : ");
+    fgets(str1, sizeof str1, stdin);
+    printf(" Input the  second String : ");
+    fgets(str2, sizeof str2, stdin);
+ 
+    if(checkAnagram(str1, str2) == 1)
+    {
+       str1[strlen(str1)-1] = '\0';
+       str2[strlen(str2)-1] = '\0';
+       printf(" %s and %s are Anagram.\n\n",str1,str2);
+    } 
+    else 
+    {
+       str1[strlen(str1)-1] = '\0';
+       str2[strlen(str2)-1] = '\0';
+       printf(" %s and %s are not Anagram.\n\n",str1,str2);
+    }
+    return 0;
+}
+ 
+    
+//Function to check whether two passed strings are anagram or not
+
+int checkAnagram(char *str1, char *str2)
+{
+    int str1ChrCtr[256] = {0}, str2ChrCtr[256] = {0};
+    int ctr;
+    
+    /* check the length of equality of Two Strings */
+    
+    if(strlen(str1) != strlen(str2))
+    {
+        return 0;
+    }
+    
+    //count frequency of characters in str1 
+    
+    for(ctr = 0; str1[ctr] != '\0'; ctr++)
+    {
+        str1ChrCtr[str1[ctr]]++;
+    }
+    
+    //count frequency of characters in str2 
+    
+    for(ctr = 0; str2[ctr] != '\0'; ctr++)
+    {
+        str2ChrCtr[str2[ctr]]++;
+    }
+    
+    //compare character counts of both strings 
+    
+    for(ctr = 0; ctr < 256; ctr++)
+    {
+        if(str1ChrCtr[ctr] != str2ChrCtr[ctr])
+            return 0;
+    }
+    return 1;
+}
diff --git a/C/SwapByRefandCopy.c b/C/SwapByRefandCopy.c
new file mode 100644
index 00000000..f888a2ca
--- /dev/null
+++ b/C/SwapByRefandCopy.c
@@ -0,0 +1,43 @@
+#include <stdio.h>
+
+// Swap_ref creates a temporary variable temp and dereffrences the address that was sent to it(this is done to get the actual
+// value of the int in the memory space). the dereffrencing allows us to change the actual values.
+void swap_ref(int* a, int* b){
+ int temp = *a;
+
+ *a = *b;
+ *b = temp;
+}
+
+// Swap however does not use addresses and reffrences and only alters the ints within it's own scope
+void swap(int a, int b)
+{
+ int temp = a;
+ a = b;
+ b = temp;
+}
+
+int main(void) {
+  //Declare 2 int variables that are going to be swapped by reference
+  int a = 1;
+  int b = 2;
+  
+  printf("%d\n", a);
+  printf("%d\n", b);
+  
+  //Pass the Address of the Values to the Swap function
+  swap_ref(&a, &b);
+  
+  printf("%d\n", a);
+  printf("%d\n", b);
+  
+  int c = 5;
+  int d = 10;
+  printf("%d\n", c);
+  printf("%d\n", d);
+   
+  swap(c, d);
+  
+  printf("%d\n", c);
+  printf("%d\n", d);
+}
diff --git a/C/SwapIntegers.c b/C/SwapIntegers.c
new file mode 100644
index 00000000..b57df938
--- /dev/null
+++ b/C/SwapIntegers.c
@@ -0,0 +1,15 @@
+#include <stdio.h>
+int main()
+{
+	int num1=5, num2=10;
+	
+	printf("The numbers are: %d and %d", &num1,&num2);
+	
+	num1=num1+num2; //num1=15 and num2=10
+	num2=num1-num2; //num1=15 and num2=5
+	num1=num1-num2; //num1=10 and num2=5
+
+	printf("The numbers have been swapped with new positions: %d and %d", &num1,&num2);
+
+	return 0;
+}
\ No newline at end of file
diff --git a/C/SwapIntegersWithout3rdVariable(Arithmatic).c b/C/SwapIntegersWithout3rdVariable(Arithmatic).c
new file mode 100644
index 00000000..b57df938
--- /dev/null
+++ b/C/SwapIntegersWithout3rdVariable(Arithmatic).c
@@ -0,0 +1,15 @@
+#include <stdio.h>
+int main()
+{
+	int num1=5, num2=10;
+	
+	printf("The numbers are: %d and %d", &num1,&num2);
+	
+	num1=num1+num2; //num1=15 and num2=10
+	num2=num1-num2; //num1=15 and num2=5
+	num1=num1-num2; //num1=10 and num2=5
+
+	printf("The numbers have been swapped with new positions: %d and %d", &num1,&num2);
+
+	return 0;
+}
\ No newline at end of file
diff --git a/C/SwapValueUsingThirdVariable.c b/C/SwapValueUsingThirdVariable.c
new file mode 100644
index 00000000..9da8e755
--- /dev/null
+++ b/C/SwapValueUsingThirdVariable.c
@@ -0,0 +1,15 @@
+// Swap two integers using third variable
+
+
+#include<stdio.h>
+int main()
+{
+    int a,b,c;
+    printf("Enter two no :\n");
+    scanf("%d%d",&a,&b);
+    c=a;
+    a=b;
+    b=c;
+    printf("After swapping value of a = %d\n b =%d",a,b);
+    return 0;
+}
diff --git a/C/SwapValueWithoutUsingThirdVariable.c b/C/SwapValueWithoutUsingThirdVariable.c
new file mode 100644
index 00000000..d78454fa
--- /dev/null
+++ b/C/SwapValueWithoutUsingThirdVariable.c
@@ -0,0 +1,15 @@
+// Swap two integers without using third variable
+
+
+#include<stdio.h>
+int main()
+{
+    int a, b;
+    printf("Enter two no :\n");
+    scanf("%d%d",&a,&b);
+    a = a^b;
+    b = a^b;
+    a=  a^b;
+    printf("After swapping value of a and b : %d,%d",a,b);
+    return 0;
+}
diff --git a/C/Swapping(without using extra variable).cpp b/C/Swapping(without using extra variable).cpp
new file mode 100644
index 00000000..cf03d3c5
--- /dev/null
+++ b/C/Swapping(without using extra variable).cpp	
@@ -0,0 +1,13 @@
+#include<stdio.h>
+#include<conio.h>
+void main()
+{
+	int a,b;
+	printf("Enter a and b\n");
+	scanf("%d%d",&a,&b);
+	a=a+b;
+	b=a-b;
+	a=a-b;
+	printf("After swapping a and b are %d %d",a,b);
+    getch();
+}
diff --git a/C/Temperature.c b/C/Temperature.c
new file mode 100644
index 00000000..a85a7bb8
--- /dev/null
+++ b/C/Temperature.c
@@ -0,0 +1,8 @@
+#include<stdio.h>
+void main()
+{ float a,c,f;
+printf("Enter the Temperature in Celcius : ");
+scanf("%f",&c);
+f=c*(9/5)+32;
+printf("Temperature in Fahernheit is %f",f);
+}
diff --git a/C/TemperatureSwitch.c b/C/TemperatureSwitch.c
new file mode 100644
index 00000000..d997d6bb
--- /dev/null
+++ b/C/TemperatureSwitch.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+
+int main()
+{
+	// The resultant temperatures can be in decimals as well, so we use double
+	double c, f, result;
+	// We use an integer type data to run the switch statement
+	int choice;
+	printf("Select your choice: \n");
+	printf("1. Celcius to Fahrenheit\n");
+	printf("2. Fahrenheit to Celcius\n");
+	
+	printf("Enter your choice: ");
+	scanf("%d", &choice);
+
+	// We compute the temperatures for both the cases here respectively
+	switch(choice)
+	{
+		case 1:
+			printf("Enter the temperature in Celcius: ");
+			scanf("%lf", &c);
+			result = (9 / 5) * c + 32;
+			break;
+		case 2:
+			printf("Enter the temperature in Fahrenheit: ");
+			scanf("%lf", &f);
+			result = (5 / 9) * (f - 32);
+			break;
+
+		// This case gets activated when the user inputs anything othrer than 1 or 2
+		default: 
+			printf("Invalid case!\n");
+	}
+
+	// Printing out the result according to the computation
+	printf("The resultant temperature is: %lf", result);
+	return 0;
+}
diff --git a/C/TernaryOperator.c b/C/TernaryOperator.c
new file mode 100644
index 00000000..5eac1939
--- /dev/null
+++ b/C/TernaryOperator.c
@@ -0,0 +1,13 @@
+//Largest number among 3 numbers using ternary operator
+
+#include<stdio.h>
+int main()
+{
+    float a,b,c,large;
+    printf("Enter any 3 numbers\n");
+    scanf("%f%f%f",&a,&b,&c);
+    large = a>b? (a>c?a:c): (b>c?b:c);
+    printf("The larger no is :%f\n", large);
+    return 0;
+
+}
diff --git a/C/Trif.c b/C/Trif.c
new file mode 100644
index 00000000..242172fa
--- /dev/null
+++ b/C/Trif.c
@@ -0,0 +1,10 @@
+#include<stdio.h>
+#include<math.h>
+void main()
+{ double a,b;
+printf("Enter the degree : ");
+scanf("%lf",&a);
+a=(a*3.14)/180;
+b=sin(a);
+printf("Sine is %lf",b);
+}
diff --git a/C/UppercaseToLowercase.c b/C/UppercaseToLowercase.c
new file mode 100644
index 00000000..aae975aa
--- /dev/null
+++ b/C/UppercaseToLowercase.c
@@ -0,0 +1,14 @@
+// Uppercase character to lowercase character
+
+
+#include<stdio.h>
+int main()
+{
+    char a,u;
+    printf("Enter Uppercase letter :\n");
+    scanf("%c", &a);
+    u = a + 32;
+    printf("Lowercase is : %c", u);
+    return 0;
+
+}
diff --git a/C/VowelorConsonant.c b/C/VowelorConsonant.c
new file mode 100644
index 00000000..89e3be23
--- /dev/null
+++ b/C/VowelorConsonant.c
@@ -0,0 +1,27 @@
+// Program to input a character and check whether it is vowel or consonant using switch case
+#include <stdio.h>
+int main(){
+    char ch;
+    printf("Enter a character\n");
+    scanf("%c", &ch);
+    switch(ch)
+    {
+    case 'a':
+    case 'e':
+    case 'i':
+    case 'o':
+    case 'u':
+    case 'A':
+    case 'E':
+    case 'I':
+    case 'O':
+    case 'U':
+        printf("Entered character is a vowel");
+        break;
+    default:
+        printf("Entered character is a consonant");
+
+    }
+
+    return 0;
+}
diff --git a/C/alphabetTriangle.cpp b/C/alphabetTriangle.cpp
new file mode 100644
index 00000000..cd6ae215
--- /dev/null
+++ b/C/alphabetTriangle.cpp
@@ -0,0 +1,28 @@
+#include<stdio.h>    
+#include<stdlib.h>  
+
+/*
+     A                                                                                                                                                        
+    ABA                                                                                                                                                       
+   ABCBA                                                                                                                                                      
+  ABCDCBA                                                                                                                                                     
+ ABCDEDCBA 
+ 
+*/
+int main(){  
+  int ch=65;    
+    int i,j,k,m;    
+    for(i=1;i<=5;i++)    
+    {    
+        for(j=5;j>=i;j--)    
+            printf(" ");    
+        for(k=1;k<=i;k++)    
+            printf("%c",ch++);    
+            ch--;    
+        for(m=1;m<i;m++)    
+            printf("%c",--ch);    
+        printf("\n");    
+        ch=65;    
+    }    
+return 0;  
+} 
\ No newline at end of file
diff --git a/C/swappingwithoutthirdvariable.c b/C/swappingwithoutthirdvariable.c
new file mode 100644
index 00000000..21a41c2f
--- /dev/null
+++ b/C/swappingwithoutthirdvariable.c
@@ -0,0 +1,13 @@
+//Swap Values Without using Third Variable.
+#include<stdio.h>
+int main()
+{
+    int a,b;  /*declaration of variables*/
+    printf("Enter Two Numbers\n");
+    scanf("%d %d",&a,&b); /*Taking input of Two Variables*/
+    a=a+b; /*swapping */
+    b=a-b; /* the two */
+    a=a-b; /*numbers*/
+    printf("the number after swapping are %d %d\n",a,b);/*printing*/
+    return 0;
+}
\ No newline at end of file
diff --git a/C/transposeOfMatrix.c b/C/transposeOfMatrix.c
new file mode 100644
index 00000000..1d9d0c6c
--- /dev/null
+++ b/C/transposeOfMatrix.c
@@ -0,0 +1,40 @@
+//C program to input a matrix of order MxN and find its transpose
+
+#include <stdio.h>
+#include<conio.h>
+int main()
+{
+    static int array[10][10];
+    int i, j, row, col;
+
+    printf("Enter the order of the matrix \n");
+    scanf("%d %d", &row, &col);
+    printf("Enter the coefficients of the matrix\n");
+    for (i = 0; i < row; ++i)
+    {
+        for (j = 0; j < col; ++j)
+        {
+            scanf("%d", &array[i][j]);
+        }
+    }
+    printf("The given matrix is \n");
+    for (i = 0; i < row; ++i)
+    {
+        for (j = 0; j < col; ++j)
+        {
+            printf(" %d", array[i][j]);
+        }
+        printf("\n");
+    }
+    printf("Transpose of matrix is \n");
+    for (j = 0; j < col; ++j)
+    {
+        for (i = 0; i < row; ++i)
+        {
+            printf(" %d", array[i][j]);
+        }
+        printf("\n");
+    }
+
+    return 0;
+}
diff --git a/Contributors.md b/Contributors.md
index 272a6a58..964e40dd 100644
--- a/Contributors.md
+++ b/Contributors.md
@@ -117,5 +117,10 @@ Name: [Muhammad Iqbal R](https://github.com/miqbalrr) <br/>
 Place: Indonesia <br/>
 About: BACKEND Developer <br/>
 
+Name: [Yash Agarwal](https://github.com/yashagarwaldev) <br/>
+Place: India <br/>
+About: Developer <br/>
+
+
 
 
diff --git a/DeepLearning/c/DBN.c b/DeepLearning/c/DBN.c
new file mode 100644
index 00000000..eee1e1ca
--- /dev/null
+++ b/DeepLearning/c/DBN.c
@@ -0,0 +1,596 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "HiddenLayer.h"
+#include "RBM.h"
+#include "LogisticRegression.h"
+#include "DBN.h"
+#include "utils.h"
+
+void test_dbn(void);
+
+
+double uniform(double min, double max) {
+  return rand() / (RAND_MAX + 1.0) * (max - min) + min;  
+}
+
+int binomial(int n, double p) {
+  if(p < 0 || p > 1) return 0;
+
+  int i;
+  int c = 0;
+  double r;
+
+  for(i=0; i<n; i++) {
+    r = rand() / (RAND_MAX + 1.0);
+    if (r < p) c++;
+  }
+
+  return c;
+}
+
+double sigmoid(double x) {
+  return 1.0 / (1.0 + exp(-x));
+}
+
+
+
+// DBN
+void DBN__construct(DBN* this, int N, \
+                    int n_ins, int *hidden_layer_sizes, int n_outs, int n_layers) {
+  int i, input_size;
+
+  this->N = N;
+  this->n_ins = n_ins;
+  this->hidden_layer_sizes = hidden_layer_sizes;
+  this->n_outs = n_outs;
+  this->n_layers = n_layers;
+
+  this->sigmoid_layers = (HiddenLayer *)malloc(sizeof(HiddenLayer) * n_layers);
+  this->rbm_layers = (RBM *)malloc(sizeof(RBM) * n_layers);
+
+  // construct multi-layer
+  for(i=0; i<n_layers; i++) {
+    if(i == 0) {
+      input_size = n_ins;
+    } else {
+      input_size = hidden_layer_sizes[i-1];
+    }
+
+    // construct sigmoid_layer
+    HiddenLayer__construct(&(this->sigmoid_layers[i]), \
+                           N, input_size, hidden_layer_sizes[i], NULL, NULL);
+
+    // construct rbm_layer
+    RBM__construct(&(this->rbm_layers[i]), N, input_size, hidden_layer_sizes[i], \
+                   this->sigmoid_layers[i].W, this->sigmoid_layers[i].b, NULL);
+    
+  }
+
+  // layer for output using LogisticRegression
+  LogisticRegression__construct(&(this->log_layer), \
+                                N, hidden_layer_sizes[n_layers-1], n_outs);
+  
+}
+
+void DBN__destruct(DBN* this) {
+  int i;
+  for(i=0; i<this->n_layers; i++) {
+    HiddenLayer__destruct(&(this->sigmoid_layers[i]));
+    RBM__destruct(&(this->rbm_layers[i]));
+  }
+  free(this->sigmoid_layers);
+  free(this->rbm_layers);
+}
+
+void DBN_pretrain(DBN* this, int *input, double lr, int k, int epochs) {
+  int i, j, l, m, n, epoch;
+  
+  int *layer_input;
+  int prev_layer_input_size;
+  int *prev_layer_input;
+
+  int *train_X = (int *)malloc(sizeof(int) * this->n_ins);
+
+  for(i=0; i<this->n_layers; i++) { // layer-wise
+
+    for(epoch=0; epoch<epochs; epoch++) { // training epochs
+
+      for(n=0; n<this->N; n++) { // input x1...xN
+        // initial input
+        for(m=0; m<this->n_ins; m++) train_X[m] = input[n * this->n_ins + m];
+
+        // layer input
+        for(l=0; l<=i; l++) {
+          if(l == 0) {
+            layer_input = (int *)malloc(sizeof(int) * this->n_ins);
+            for(j=0; j<this->n_ins; j++) layer_input[j] = train_X[j];
+          } else {
+            if(l == 1) prev_layer_input_size = this->n_ins;
+            else prev_layer_input_size = this->hidden_layer_sizes[l-2];
+
+            prev_layer_input = (int *)malloc(sizeof(int) * prev_layer_input_size);
+            for(j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+            free(layer_input);
+
+            layer_input = (int *)malloc(sizeof(int) * this->hidden_layer_sizes[l-1]);
+
+            HiddenLayer_sample_h_given_v(&(this->sigmoid_layers[l-1]), \
+                                         prev_layer_input, layer_input);
+            free(prev_layer_input);
+          }
+        }
+
+        RBM_contrastive_divergence(&(this->rbm_layers[i]), layer_input, lr, k);
+      }
+      
+    }
+  }
+  
+  free(train_X);
+  free(layer_input);
+}
+
+void DBN_finetune(DBN* this, int *input, int *label, double lr, int epochs) {
+  int i, j, m, n, epoch;
+  
+  int *layer_input;
+  // int prev_layer_input_size;
+  int *prev_layer_input;
+
+  int *train_X = (int *)malloc(sizeof(int) * this->n_ins);
+  int *train_Y = (int *)malloc(sizeof(int) * this->n_outs);
+
+  for(epoch=0; epoch<epochs; epoch++) {
+    for(n=0; n<this->N; n++) { // input x1...xN
+      // initial input
+      for(m=0; m<this->n_ins; m++)  train_X[m] = input[n * this->n_ins + m];
+      for(m=0; m<this->n_outs; m++) train_Y[m] = label[n * this->n_outs + m];
+
+      // layer input
+      for(i=0; i<this->n_layers; i++) {
+        if(i == 0) {
+          prev_layer_input = (int *)malloc(sizeof(int) * this->n_ins);
+          for(j=0; j<this->n_ins; j++) prev_layer_input[j] = train_X[j];
+        } else {
+          prev_layer_input = (int *)malloc(sizeof(int) * this->hidden_layer_sizes[i-1]);
+          for(j=0; j<this->hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+          free(layer_input);
+        }
+
+
+        layer_input = (int *)malloc(sizeof(int) * this->hidden_layer_sizes[i]);
+        HiddenLayer_sample_h_given_v(&(this->sigmoid_layers[i]), \
+                                     prev_layer_input, layer_input);
+        free(prev_layer_input);
+      }
+
+      LogisticRegression_train(&(this->log_layer), layer_input, train_Y, lr);
+    }
+    // lr *= 0.95;
+  }
+
+  free(layer_input);
+  free(train_X);
+  free(train_Y);
+}
+
+void DBN_predict(DBN* this, int *x, double *y) {
+  int i, j, k;
+  double *layer_input;
+  // int prev_layer_input_size;
+  double *prev_layer_input;
+
+  double linear_output;
+
+  prev_layer_input = (double *)malloc(sizeof(double) * this->n_ins);
+  for(j=0; j<this->n_ins; j++) prev_layer_input[j] = x[j];
+
+  // layer activation
+  for(i=0; i<this->n_layers; i++) {
+    layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out);
+
+    for(k=0; k<this->sigmoid_layers[i].n_out; k++) {
+      linear_output = 0.0;
+
+      for(j=0; j<this->sigmoid_layers[i].n_in; j++) {
+        linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j];
+      }
+      linear_output += this->sigmoid_layers[i].b[k];
+      layer_input[k] = sigmoid(linear_output);
+    }
+    free(prev_layer_input);
+
+    if(i < this->n_layers-1) {
+      prev_layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out);
+      for(j=0; j<this->sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
+      free(layer_input);
+    }
+  }
+
+  for(i=0; i<this->log_layer.n_out; i++) {
+    y[i] = 0;
+    for(j=0; j<this->log_layer.n_in; j++) {
+      y[i] += this->log_layer.W[i][j] * layer_input[j];
+    }
+    y[i] += this->log_layer.b[i];
+  }
+
+  LogisticRegression_softmax(&(this->log_layer), y);
+
+  free(layer_input);
+}
+
+
+
+// HiddenLayer
+void HiddenLayer__construct(HiddenLayer* this, int N, int n_in, int n_out, \
+                            double **W, double *b) {
+  int i, j;
+  double a = 1.0 / n_in;
+
+  this->N = N;
+  this->n_in = n_in;
+  this->n_out = n_out;
+  
+  if(W == NULL) {
+    this->W = (double **)malloc(sizeof(double*) * n_out);
+    this->W[0] = (double *)malloc(sizeof(double) * n_in * n_out);
+    for(i=0; i<n_out; i++) this->W[i] = this->W[0] + i * n_in;
+
+    for(i=0; i<n_out; i++) {
+      for(j=0; j<n_in; j++) {
+        this->W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    this->W = W;
+  }
+
+  if(b == NULL) {
+    this->b = (double *)malloc(sizeof(double) * n_out);
+  } else {
+    this->b = b;
+  }
+}
+
+void HiddenLayer__destruct(HiddenLayer* this) {
+  free(this->W[0]);
+  free(this->W);
+  free(this->b);
+}
+
+double HiddenLayer_output(HiddenLayer* this, int *input, double *w, double b) {
+  int j;
+  double linear_output = 0.0;
+  for(j=0; j<this->n_in; j++) {
+    linear_output += w[j] * input[j];
+  }
+  linear_output += b;
+  return sigmoid(linear_output);
+}
+
+void HiddenLayer_sample_h_given_v(HiddenLayer* this, int *input, int *sample) {
+  int i;
+  for(i=0; i<this->n_out; i++) {
+    sample[i] = binomial(1, HiddenLayer_output(this, input, this->W[i], this->b[i]));
+  }
+}
+
+
+// RBM
+void RBM__construct(RBM* this, int N, int n_visible, int n_hidden, \
+                    double **W, double *hbias, double *vbias) {
+  int i, j;
+  double a = 1.0 / n_visible;
+
+  this->N = N;
+  this->n_visible = n_visible;
+  this->n_hidden = n_hidden;
+
+  if(W == NULL) {
+    this->W = (double **)malloc(sizeof(double*) * n_hidden);
+    this->W[0] = (double *)malloc(sizeof(double) * n_visible * n_hidden);
+    for(i=0; i<n_hidden; i++) this->W[i] = this->W[0] + i * n_visible;
+
+    for(i=0; i<n_hidden; i++) {
+      for(j=0; j<n_visible; j++) {
+        this->W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    this->W = W;
+  }
+
+  if(hbias == NULL) {
+    this->hbias = (double *)malloc(sizeof(double) * n_hidden);
+    for(i=0; i<n_hidden; i++) this->hbias[i] = 0;
+  } else {
+    this->hbias = hbias;
+  }
+
+  if(vbias == NULL) {
+    this->vbias = (double *)malloc(sizeof(double) * n_visible);
+    for(i=0; i<n_visible; i++) this->vbias[i] = 0;
+  } else {
+    this->vbias = vbias;
+  }
+}
+
+void RBM__destruct(RBM* this) {
+  // free(this->W[0]);
+  // free(this->W);
+  // free(this->hbias);
+  free(this->vbias);
+}
+
+void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) {
+  int i, j, step;
+  
+  double *ph_mean = (double *)malloc(sizeof(double) * this->n_hidden);
+  int *ph_sample = (int *)malloc(sizeof(int) * this->n_hidden);
+  double *nv_means = (double *)malloc(sizeof(double) * this->n_visible);
+  int *nv_samples = (int *)malloc(sizeof(int) * this->n_visible);
+  double *nh_means = (double *)malloc(sizeof(double) * this->n_hidden);
+  int *nh_samples = (int *)malloc(sizeof(int) * this->n_hidden);
+
+  /* CD-k */
+  RBM_sample_h_given_v(this, input, ph_mean, ph_sample);
+
+  for(step=0; step<k; step++) {
+    if(step == 0) {
+      RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples);
+    } else {
+      RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples);
+    }
+  }
+
+  for(i=0; i<this->n_hidden; i++) {
+    for(j=0; j<this->n_visible; j++) {
+      // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+      this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+    }
+    this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N;
+  }
+
+  for(i=0; i<this->n_visible; i++) {
+    this->vbias[i] += lr * (input[i] - nv_samples[i]) / this->N;
+  }
+  
+
+  free(ph_mean);
+  free(ph_sample);
+  free(nv_means);
+  free(nv_samples);
+  free(nh_means);
+  free(nh_samples);
+}
+
+
+void RBM_sample_h_given_v(RBM* this, int *v0_sample, double *mean, int *sample) {
+  int i;
+  for(i=0; i<this->n_hidden; i++) {
+    mean[i] = RBM_propup(this, v0_sample, this->W[i], this->hbias[i]);
+    sample[i] = binomial(1, mean[i]);
+  }
+}
+
+void RBM_sample_v_given_h(RBM* this, int *h0_sample, double *mean, int *sample) {
+  int i;
+  for(i=0; i<this->n_visible; i++) {
+    mean[i] = RBM_propdown(this, h0_sample, i, this->vbias[i]);
+    sample[i] = binomial(1, mean[i]);
+  }
+}
+
+double RBM_propup(RBM* this, int *v, double *w, double b) {
+  int j;
+  double pre_sigmoid_activation = 0.0;
+  for(j=0; j<this->n_visible; j++) {
+    pre_sigmoid_activation += w[j] * v[j];
+  }
+  pre_sigmoid_activation += b;
+  return sigmoid(pre_sigmoid_activation);
+}
+
+double RBM_propdown(RBM* this, int *h, int i, double b) {
+  int j;
+  double pre_sigmoid_activation = 0.0;
+
+  for(j=0; j<this->n_hidden; j++) {
+    pre_sigmoid_activation += this->W[j][i] * h[j];
+  }
+  pre_sigmoid_activation += b;
+  return sigmoid(pre_sigmoid_activation);
+}
+
+void RBM_gibbs_hvh(RBM* this, int *h0_sample, double *nv_means, int *nv_samples, \
+                   double *nh_means, int *nh_samples) {
+  RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples);
+  RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples);
+}
+
+void RBM_reconstruct(RBM* this, int *v, double *reconstructed_v) {
+  int i, j;
+  double *h = (double *)malloc(sizeof(double) * this->n_hidden);
+  double pre_sigmoid_activation;
+
+  for(i=0; i<this->n_hidden; i++) {
+    h[i] = RBM_propup(this, v, this->W[i], this->hbias[i]);
+  }
+
+  for(i=0; i<this->n_visible; i++) {
+    pre_sigmoid_activation = 0.0;
+    for(j=0; j<this->n_hidden; j++) {
+      pre_sigmoid_activation += this->W[j][i] * h[j];
+    }
+    pre_sigmoid_activation += this->vbias[i];
+
+    reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
+  }
+
+  free(h);
+}
+
+
+// LogisticRegression
+void LogisticRegression__construct(LogisticRegression *this, int N, int n_in, int n_out) {
+  int i, j;
+  this->N = N;
+  this->n_in = n_in;
+  this->n_out = n_out;
+
+  this->W = (double **)malloc(sizeof(double*) * n_out);
+  this->W[0] = (double *)malloc(sizeof(double) * n_in * n_out);
+  for(i=0; i<n_out; i++) this->W[i] = this->W[0] + i * n_in;
+  this->b = (double *)malloc(sizeof(double) * n_out);
+
+  for(i=0; i<n_out; i++) {
+    for(j=0; j<n_in; j++) {
+      this->W[i][j] = 0;
+    }
+    this->b[i] = 0;
+  }
+}
+
+void LogisticRegression__destruct(LogisticRegression *this) {
+  free(this->W[0]);
+  free(this->W);
+  free(this->b);
+}
+
+void LogisticRegression_train(LogisticRegression *this, int *x, int *y, double lr) {
+  int i,j;
+  double *p_y_given_x = (double *)malloc(sizeof(double) * this->n_out);
+  double *dy = (double *)malloc(sizeof(double) * this->n_out);
+
+  for(i=0; i<this->n_out; i++) {
+    p_y_given_x[i] = 0;
+    for(j=0; j<this->n_in; j++) {
+      p_y_given_x[i] += this->W[i][j] * x[j];
+    }
+    p_y_given_x[i] += this->b[i];
+  }
+  LogisticRegression_softmax(this, p_y_given_x);
+
+  for(i=0; i<this->n_out; i++) {
+    dy[i] = y[i] - p_y_given_x[i];
+
+    for(j=0; j<this->n_in; j++) {
+      this->W[i][j] += lr * dy[i] * x[j] / this->N;
+    }
+
+    this->b[i] += lr * dy[i] / this->N;
+  }
+
+  free(p_y_given_x);
+  free(dy);
+}
+
+void LogisticRegression_softmax(LogisticRegression *this, double *x) {
+  int i;
+  double max = 0.0;
+  double sum = 0.0;
+
+  for(i=0; i<this->n_out; i++) if(max < x[i]) max = x[i];
+  for(i=0; i<this->n_out; i++) {
+    x[i] = exp(x[i] - max);
+    sum += x[i];
+  }
+
+  for(i=0; i<this->n_out; i++) x[i] /= sum;
+}
+
+void LogisticRegression_predict(LogisticRegression *this, int *x, double *y) {
+  int i,j;
+
+  for(i=0; i<this->n_out; i++) {
+    y[i] = 0;
+    for(j=0; j<this->n_in; j++) {
+      y[i] += this->W[i][j] * x[j];
+    }
+    y[i] += this->b[i];
+  }
+
+  LogisticRegression_softmax(this, y);
+}
+
+
+void test_dbn(void) {
+  srand(0);
+
+  int i, j;
+  
+  double pretrain_lr = 0.1;
+  int pretraining_epochs = 1000;
+  int k = 1;
+  double finetune_lr = 0.1;
+  int finetune_epochs = 500;
+
+  int train_N = 6;
+  int test_N = 4;
+  int n_ins = 6;
+  int n_outs = 2;
+  int hidden_layer_sizes[] = {3, 3};
+  int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]);
+
+
+  // training data
+  int train_X[6][6] = {
+    {1, 1, 1, 0, 0, 0},
+    {1, 0, 1, 0, 0, 0},
+    {1, 1, 1, 0, 0, 0},
+    {0, 0, 1, 1, 1, 0},
+    {0, 0, 1, 1, 0, 0},
+    {0, 0, 1, 1, 1, 0}
+  };
+
+  int train_Y[6][2] = {
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {0, 1},
+    {0, 1},
+    {0, 1}
+  };
+
+  // construct DBN
+  DBN dbn;
+  DBN__construct(&dbn, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers);
+
+  // pretrain
+  DBN_pretrain(&dbn, *train_X, pretrain_lr, k, pretraining_epochs);
+
+  // finetune
+  DBN_finetune(&dbn, *train_X, *train_Y, finetune_lr, finetune_epochs);
+
+  // test data
+  int test_X[4][6] = {
+    {1, 1, 0, 0, 0, 0},
+    {1, 1, 1, 1, 0, 0},
+    {0, 0, 0, 1, 1, 0},
+    {0, 0, 1, 1, 1, 0}
+  };
+
+  double test_Y[4][2];
+
+  // test
+  for(i=0; i<test_N; i++) {
+    DBN_predict(&dbn, test_X[i], test_Y[i]);
+    for(j=0; j<n_outs; j++) {
+      printf("%.5f ", test_Y[i][j]);
+    }
+    printf("\n");
+  }
+
+  // destruct DBN
+  DBN__destruct(&dbn);
+  
+}
+
+
+
+int main(void) {
+  test_dbn();
+  return 0;
+}
diff --git a/DeepLearning/c/DBN.h b/DeepLearning/c/DBN.h
new file mode 100644
index 00000000..26890748
--- /dev/null
+++ b/DeepLearning/c/DBN.h
@@ -0,0 +1,21 @@
+#ifndef DBN_H
+#define DBN_H
+
+typedef struct {
+  int N;
+  int n_ins;
+  int *hidden_layer_sizes;
+  int n_outs;
+  int n_layers;
+  HiddenLayer *sigmoid_layers;
+  RBM *rbm_layers;
+  LogisticRegression log_layer;
+} DBN;
+
+void DBN__construct(DBN*, int, int, int*, int, int);
+void DBN__destruct(DBN*);
+void DBN_pretrain(DBN*, int*, double, int, int);
+void DBN_finetune(DBN*, int*, int*, double, int);
+void DBN_predict(DBN*, int*, double*);
+
+#endif
diff --git a/DeepLearning/c/HiddenLayer.h b/DeepLearning/c/HiddenLayer.h
new file mode 100644
index 00000000..11cbdfe0
--- /dev/null
+++ b/DeepLearning/c/HiddenLayer.h
@@ -0,0 +1,17 @@
+#ifndef HIDDENLAYER_H
+#define HIDDENLAYER_H
+
+typedef struct {
+  int N;
+  int n_in;
+  int n_out;
+  double **W;
+  double *b;
+} HiddenLayer;
+
+void HiddenLayer__construct(HiddenLayer*, int, int, int, double**, double*);
+void HiddenLayer__destruct(HiddenLayer*);
+double HiddenLayer_output(HiddenLayer*, int*, double*, double);
+void HiddenLayer_sample_h_given_v(HiddenLayer*, int*, int*);
+
+#endif
diff --git a/DeepLearning/c/LogisticRegression.c b/DeepLearning/c/LogisticRegression.c
new file mode 100644
index 00000000..b55c7079
--- /dev/null
+++ b/DeepLearning/c/LogisticRegression.c
@@ -0,0 +1,170 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "LogisticRegression.h"
+
+void test_lr(void);
+
+
+void LogisticRegression__construct(LogisticRegression *this, int N, int n_in, int n_out) {
+  int i, j;
+  this->N = N;
+  this->n_in = n_in;
+  this->n_out = n_out;
+
+  this->W = (double **)malloc(sizeof(double*) * n_out);
+  this->W[0] = (double *)malloc(sizeof(double) * n_in * n_out);
+  for(i=0; i<n_out; i++) this->W[i] = this->W[0] + i * n_in;
+  this->b = (double *)malloc(sizeof(double) * n_out);
+
+  for(i=0; i<n_out; i++) {
+    for(j=0; j<n_in; j++) {
+      this->W[i][j] = 0;
+    }
+    this->b[i] = 0;
+  }
+}
+
+void LogisticRegression__destruct(LogisticRegression *this) {
+  free(this->W[0]);
+  free(this->W);
+  free(this->b);
+}
+
+void LogisticRegression_train(LogisticRegression *this, int *x, int *y, double lr) {
+  int i,j;
+  double *p_y_given_x = (double *)malloc(sizeof(double) * this->n_out);
+  double *dy = (double *)malloc(sizeof(double) * this->n_out);
+
+  for(i=0; i<this->n_out; i++) {
+    p_y_given_x[i] = 0;
+    for(j=0; j<this->n_in; j++) {
+      p_y_given_x[i] += this->W[i][j] * x[j];
+    }
+    p_y_given_x[i] += this->b[i];
+  }
+  LogisticRegression_softmax(this, p_y_given_x);
+
+  for(i=0; i<this->n_out; i++) {
+    dy[i] = y[i] - p_y_given_x[i];
+
+    for(j=0; j<this->n_in; j++) {
+      this->W[i][j] += lr * dy[i] * x[j] / this->N;
+    }
+
+    this->b[i] += lr * dy[i] / this->N;
+  }
+
+  free(p_y_given_x);
+  free(dy);
+}
+
+void LogisticRegression_softmax(LogisticRegression *this, double *x) {
+  int i;
+  double max = 0.0;
+  double sum = 0.0;
+
+  for(i=0; i<this->n_out; i++) if(max < x[i]) max = x[i];
+  for(i=0; i<this->n_out; i++) {
+    x[i] = exp(x[i] - max);
+    sum += x[i];
+  }
+
+  for(i=0; i<this->n_out; i++) x[i] /= sum;
+}
+
+void LogisticRegression_predict(LogisticRegression *this, int *x, double *y) {
+  int i,j;
+
+  for(i=0; i<this->n_out; i++) {
+    y[i] = 0;
+    for(j=0; j<this->n_in; j++) {
+      y[i] += this->W[i][j] * x[j];
+    }
+    y[i] += this->b[i];
+  }
+
+  LogisticRegression_softmax(this, y);
+}
+
+
+
+
+void test_lr(void) {
+  int i, j, epoch;
+
+  double learning_rate = 0.1;
+  int n_epochs = 500;
+
+  int train_N = 6;
+  int test_N = 2;
+  int n_in = 6;
+  int n_out = 2;
+
+
+  // training data
+  int train_X[6][6] = {
+    {1, 1, 1, 0, 0, 0},
+    {1, 0, 1, 0, 0, 0},
+    {1, 1, 1, 0, 0, 0},
+    {0, 0, 1, 1, 1, 0},
+    {0, 0, 1, 1, 0, 0},
+    {0, 0, 1, 1, 1, 0}
+  };
+
+  int train_Y[6][2] = {
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {0, 1},
+    {0, 1},
+    {0, 1}
+  };
+
+
+  // construct LogisticRegression
+  LogisticRegression classifier;
+  LogisticRegression__construct(&classifier, train_N, n_in, n_out);
+
+
+  // train
+  for(epoch=0; epoch<n_epochs; epoch++) {
+    for(i=0; i<train_N; i++) {
+      LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate);
+    }
+    // learning_rate *= 0.95;
+  }
+
+
+  // test data
+  int test_X[2][6] = {
+    {1, 0, 1, 0, 0, 0},
+    {0, 0, 1, 1, 1, 0}
+  };
+
+  double test_Y[2][2];
+
+
+  // test
+  for(i=0; i<test_N; i++) {
+    LogisticRegression_predict(&classifier, test_X[i], test_Y[i]);
+    for(j=0; j<n_out; j++) {
+      printf("%f ", test_Y[i][j]);
+    }
+    printf("\n");
+  }
+
+
+
+  // destruct LogisticRegression
+  LogisticRegression__destruct(&classifier);
+}
+
+
+
+
+int main(void) {
+  test_lr();
+  
+  return 0;
+}
diff --git a/DeepLearning/c/LogisticRegression.h b/DeepLearning/c/LogisticRegression.h
new file mode 100644
index 00000000..34eb99d9
--- /dev/null
+++ b/DeepLearning/c/LogisticRegression.h
@@ -0,0 +1,18 @@
+#ifndef LOGISTICREGRESSION_H
+#define LOGISTICREGRESSION_H
+
+typedef struct {
+  int N;
+  int n_in;
+  int n_out;
+  double **W;
+  double *b;
+} LogisticRegression;
+
+void LogisticRegression__construct(LogisticRegression*, int, int, int);
+void LogisticRegression__destruct(LogisticRegression*);
+void LogisticRegression_train(LogisticRegression*, int*, int*, double);
+void LogisticRegression_softmax(LogisticRegression*, double*);
+void LogisticRegression_predict(LogisticRegression*, int*, double*);
+
+#endif
diff --git a/DeepLearning/c/RBM.c b/DeepLearning/c/RBM.c
new file mode 100644
index 00000000..7e806bb0
--- /dev/null
+++ b/DeepLearning/c/RBM.c
@@ -0,0 +1,254 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "RBM.h"
+#include "utils.h"
+
+
+void test_rbm(void);
+
+
+double uniform(double min, double max) {
+  return rand() / (RAND_MAX + 1.0) * (max - min) + min;  
+}
+
+int binomial(int n, double p) {
+  if(p < 0 || p > 1) return 0;
+
+  int i;
+  int c = 0;
+  double r;
+
+  for(i=0; i<n; i++) {
+    r = rand() / (RAND_MAX + 1.0);
+    if (r < p) c++;
+  }
+
+  return c;
+}
+
+double sigmoid(double x) {
+  return 1.0 / (1.0 + exp(-x));
+}
+
+
+void RBM__construct(RBM* this, int N, int n_visible, int n_hidden, \
+                    double **W, double *hbias, double *vbias) {
+  int i, j;
+  double a = 1.0 / n_visible;
+
+  this->N = N;
+  this->n_visible = n_visible;
+  this->n_hidden = n_hidden;
+
+  if(W == NULL) {
+    this->W = (double **)malloc(sizeof(double*) * n_hidden);
+    this->W[0] = (double *)malloc(sizeof(double) * n_visible * n_hidden);
+    for(i=0; i<n_hidden; i++) this->W[i] = this->W[0] + i * n_visible;
+
+    for(i=0; i<n_hidden; i++) {
+      for(j=0; j<n_visible; j++) {
+        this->W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    this->W = W;
+  }
+
+  if(hbias == NULL) {
+    this->hbias = (double *)malloc(sizeof(double) * n_hidden);
+    for(i=0; i<n_hidden; i++) this->hbias[i] = 0;
+  } else {
+    this->hbias = hbias;
+  }
+
+  if(vbias == NULL) {
+    this->vbias = (double *)malloc(sizeof(double) * n_visible);
+    for(i=0; i<n_visible; i++) this->vbias[i] = 0;
+  } else {
+    this->vbias = vbias;
+  }
+}
+
+void RBM__destruct(RBM* this) {
+  free(this->W[0]);
+  free(this->W);
+  free(this->hbias);
+  free(this->vbias);
+}
+
+void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) {
+  int i, j, step;
+  
+  double *ph_mean = (double *)malloc(sizeof(double) * this->n_hidden);
+  int *ph_sample = (int *)malloc(sizeof(int) * this->n_hidden);
+  double *nv_means = (double *)malloc(sizeof(double) * this->n_visible);
+  int *nv_samples = (int *)malloc(sizeof(int) * this->n_visible);
+  double *nh_means = (double *)malloc(sizeof(double) * this->n_hidden);
+  int *nh_samples = (int *)malloc(sizeof(int) * this->n_hidden);
+
+  /* CD-k */
+  RBM_sample_h_given_v(this, input, ph_mean, ph_sample);
+
+  for(step=0; step<k; step++) {
+    if(step == 0) {
+      RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples);
+    } else {
+      RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples);
+    }
+  }
+
+  for(i=0; i<this->n_hidden; i++) {
+    for(j=0; j<this->n_visible; j++) {
+      // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+      this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+    }
+    this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N;
+  }
+
+  for(i=0; i<this->n_visible; i++) {
+    this->vbias[i] += lr * (input[i] - nv_samples[i]) / this->N;
+  }
+  
+
+  free(ph_mean);
+  free(ph_sample);
+  free(nv_means);
+  free(nv_samples);
+  free(nh_means);
+  free(nh_samples);
+}
+
+
+void RBM_sample_h_given_v(RBM* this, int *v0_sample, double *mean, int *sample) {
+  int i;
+  for(i=0; i<this->n_hidden; i++) {
+    mean[i] = RBM_propup(this, v0_sample, this->W[i], this->hbias[i]);
+    sample[i] = binomial(1, mean[i]);
+  }
+}
+
+void RBM_sample_v_given_h(RBM* this, int *h0_sample, double *mean, int *sample) {
+  int i;
+  for(i=0; i<this->n_visible; i++) {
+    mean[i] = RBM_propdown(this, h0_sample, i, this->vbias[i]);
+    sample[i] = binomial(1, mean[i]);
+  }
+}
+
+double RBM_propup(RBM* this, int *v, double *w, double b) {
+  int j;
+  double pre_sigmoid_activation = 0.0;
+  for(j=0; j<this->n_visible; j++) {
+    pre_sigmoid_activation += w[j] * v[j];
+  }
+  pre_sigmoid_activation += b;
+  return sigmoid(pre_sigmoid_activation);
+}
+
+double RBM_propdown(RBM* this, int *h, int i, double b) {
+  int j;
+  double pre_sigmoid_activation = 0.0;
+
+  for(j=0; j<this->n_hidden; j++) {
+    pre_sigmoid_activation += this->W[j][i] * h[j];
+  }
+  pre_sigmoid_activation += b;
+  return sigmoid(pre_sigmoid_activation);
+}
+
+void RBM_gibbs_hvh(RBM* this, int *h0_sample, double *nv_means, int *nv_samples, \
+                   double *nh_means, int *nh_samples) {
+  RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples);
+  RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples);
+}
+
+void RBM_reconstruct(RBM* this, int *v, double *reconstructed_v) {
+  int i, j;
+  double *h = (double *)malloc(sizeof(double) * this->n_hidden);
+  double pre_sigmoid_activation;
+
+  for(i=0; i<this->n_hidden; i++) {
+    h[i] = RBM_propup(this, v, this->W[i], this->hbias[i]);
+  }
+
+  for(i=0; i<this->n_visible; i++) {
+    pre_sigmoid_activation = 0.0;
+    for(j=0; j<this->n_hidden; j++) {
+      pre_sigmoid_activation += this->W[j][i] * h[j];
+    }
+    pre_sigmoid_activation += this->vbias[i];
+
+    reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
+  }
+
+  free(h);
+}
+
+
+
+
+void test_rbm(void) {
+  srand(0);
+
+  int i, j, epoch;
+
+  double learning_rate = 0.1;
+  int training_epochs = 1000;
+  int k = 1;
+  
+  int train_N = 6;
+  int test_N = 2;
+  int n_visible = 6;
+  int n_hidden = 3;
+
+  // training data
+  int train_X[6][6] = {
+    {1, 1, 1, 0, 0, 0},
+    {1, 0, 1, 0, 0, 0},
+    {1, 1, 1, 0, 0, 0},
+    {0, 0, 1, 1, 1, 0},
+    {0, 0, 1, 0, 1, 0},
+    {0, 0, 1, 1, 1, 0}
+  };
+
+  // construct RBM
+  RBM rbm;
+  RBM__construct(&rbm, train_N, n_visible, n_hidden, NULL, NULL, NULL);
+
+  // train
+  for(epoch=0; epoch<training_epochs; epoch++) {
+    for(i=0; i<train_N; i++) {
+      RBM_contrastive_divergence(&rbm, train_X[i], learning_rate, k);
+    }
+  }
+
+
+  // test data
+  int test_X[2][6] = {
+    {1, 1, 0, 0, 0, 0},
+    {0, 0, 0, 1, 1, 0}
+  };
+  double reconstructed_X[2][6];
+
+  // test
+  for(i=0; i<test_N; i++) {
+    RBM_reconstruct(&rbm, test_X[i], reconstructed_X[i]);
+    for(j=0; j<n_visible; j++) {
+      printf("%.5f ", reconstructed_X[i][j]);
+    }
+    printf("\n");
+  }
+
+
+  // destruct RBM
+  RBM__destruct(&rbm);
+}
+
+
+
+int main(void) {
+  test_rbm();
+  
+  return 0;
+}
diff --git a/DeepLearning/c/RBM.h b/DeepLearning/c/RBM.h
new file mode 100644
index 00000000..abc1bd72
--- /dev/null
+++ b/DeepLearning/c/RBM.h
@@ -0,0 +1,23 @@
+#ifndef RBM_H
+#define RBM_H
+
+typedef struct {
+  int N;
+  int n_visible;
+  int n_hidden;
+  double **W;
+  double *hbias;
+  double *vbias;
+} RBM;
+
+void RBM__construct(RBM*, int, int, int, double**, double*, double*);
+void RBM__destruct(RBM*);
+void RBM_contrastive_divergence(RBM*, int*, double, int);
+void RBM_sample_h_given_v(RBM*, int*, double*, int*);
+void RBM_sample_v_given_h(RBM*, int*, double*, int*);
+double RBM_propup(RBM*, int*, double*, double);
+double RBM_propdown(RBM*, int*, int, double);
+void RBM_gibbs_hvh(RBM*, int*, double*, int*, double*, int*);
+void RBM_reconstruct(RBM*, int*, double*);
+
+#endif
diff --git a/DeepLearning/c/SdA.c b/DeepLearning/c/SdA.c
new file mode 100644
index 00000000..99170ee9
--- /dev/null
+++ b/DeepLearning/c/SdA.c
@@ -0,0 +1,587 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "HiddenLayer.h"
+#include "dA.h"
+#include "LogisticRegression.h"
+#include "SdA.h"
+#include "utils.h"
+
+void test_sda(void);
+
+
+double uniform(double min, double max) {
+  return rand() / (RAND_MAX + 1.0) * (max - min) + min;  
+}
+
+int binomial(int n, double p) {
+  if(p < 0 || p > 1) return 0;
+
+  int i;
+  int c = 0;
+  double r;
+
+  for(i=0; i<n; i++) {
+    r = rand() / (RAND_MAX + 1.0);
+    if (r < p) c++;
+  }
+
+  return c;
+}
+
+double sigmoid(double x) {
+  return 1.0 / (1.0 + exp(-x));
+}
+
+
+// SdA
+void SdA__construct(SdA* this, int N, \
+                    int n_ins, int *hidden_layer_sizes, int n_outs, int n_layers) {
+  int i, input_size;
+
+  this->N = N;
+  this->n_ins = n_ins;
+  this->hidden_layer_sizes = hidden_layer_sizes;
+  this->n_outs = n_outs;
+  this->n_layers = n_layers;
+
+  this->sigmoid_layers = (HiddenLayer *)malloc(sizeof(HiddenLayer) * n_layers);
+  this->dA_layers = (dA *)malloc(sizeof(dA) * n_layers);
+
+  // construct multi-layer
+  for(i=0; i<n_layers; i++) {
+    if(i == 0) {
+      input_size = n_ins;
+    } else {
+      input_size = hidden_layer_sizes[i-1];
+    }
+
+    // construct sigmoid_layer
+    HiddenLayer__construct(&(this->sigmoid_layers[i]), \
+                           N, input_size, hidden_layer_sizes[i], NULL, NULL);
+
+    // construct dA_layer
+    dA__construct(&(this->dA_layers[i]), N, input_size, hidden_layer_sizes[i], \
+                   this->sigmoid_layers[i].W, this->sigmoid_layers[i].b, NULL);
+    
+  }
+
+  // layer for output using LogisticRegression
+  LogisticRegression__construct(&(this->log_layer), \
+                                N, hidden_layer_sizes[n_layers-1], n_outs);
+}
+
+void SdA__destruct(SdA* this) {
+  int i;
+  for(i=0; i<this->n_layers; i++) {
+    HiddenLayer__destruct(&(this->sigmoid_layers[i]));
+    dA__destruct(&(this->dA_layers[i]));
+  }
+  free(this->sigmoid_layers);
+  free(this->dA_layers);
+}
+
+void SdA_pretrain(SdA* this, int *input, double lr, double corruption_level, int epochs) {
+  int i, j, l, m, n, epoch;
+  
+  int *layer_input;
+  int prev_layer_input_size;
+  int *prev_layer_input;
+
+  int *train_X = (int *)malloc(sizeof(int) * this->n_ins);
+
+  for(i=0; i<this->n_layers; i++) { // layer-wise
+
+    for(epoch=0; epoch<epochs; epoch++) { // training epochs
+
+      for(n=0; n<this->N; n++) { // input x1...xN
+        // initial input
+        for(m=0; m<this->n_ins; m++) train_X[m] = input[n * this->n_ins + m];
+
+        // layer input
+        for(l=0; l<=i; l++) {
+          if(l == 0) {
+            layer_input = (int *)malloc(sizeof(int) * this->n_ins);
+            for(j=0; j<this->n_ins; j++) layer_input[j] = train_X[j];
+          } else {
+            if(l == 1) prev_layer_input_size = this->n_ins;
+            else prev_layer_input_size = this->hidden_layer_sizes[l-2];
+
+            prev_layer_input = (int *)malloc(sizeof(int) * prev_layer_input_size);
+            for(j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+            free(layer_input);
+
+            layer_input = (int *)malloc(sizeof(int) * this->hidden_layer_sizes[l-1]);
+
+            HiddenLayer_sample_h_given_v(&(this->sigmoid_layers[l-1]), \
+                                         prev_layer_input, layer_input);
+            free(prev_layer_input);
+          }
+        }
+
+        dA_train(&(this->dA_layers[i]), layer_input, lr, corruption_level);
+      }
+      
+    }
+  }
+  
+  free(train_X);
+  free(layer_input);
+}
+
+void SdA_finetune(SdA* this, int *input, int *label, double lr, int epochs) {
+  int i, j, m, n, epoch;
+  
+  int *layer_input;
+  int prev_layer_input_size;
+  int *prev_layer_input;
+
+  int *train_X = (int *)malloc(sizeof(int) * this->n_ins);
+  int *train_Y = (int *)malloc(sizeof(int) * this->n_outs);
+
+  for(epoch=0; epoch<epochs; epoch++) {
+    for(n=0; n<this->N; n++) { // input x1...xN
+      // initial input
+      for(m=0; m<this->n_ins; m++)  train_X[m] = input[n * this->n_ins + m];
+      for(m=0; m<this->n_outs; m++) train_Y[m] = label[n * this->n_outs + m];
+
+      // layer input
+      for(i=0; i<this->n_layers; i++) {
+        if(i == 0) {
+          prev_layer_input = (int *)malloc(sizeof(int) * this->n_ins);
+          for(j=0; j<this->n_ins; j++) prev_layer_input[j] = train_X[j];
+        } else {
+          prev_layer_input = (int *)malloc(sizeof(int) * this->hidden_layer_sizes[i-1]);
+          for(j=0; j<this->hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+          free(layer_input);
+        }
+
+
+        layer_input = (int *)malloc(sizeof(int) * this->hidden_layer_sizes[i]);
+        HiddenLayer_sample_h_given_v(&(this->sigmoid_layers[i]), \
+                                     prev_layer_input, layer_input);
+        free(prev_layer_input);
+      }
+
+      LogisticRegression_train(&(this->log_layer), layer_input, train_Y, lr);
+    }
+    // lr *= 0.95;
+  }
+
+  free(layer_input);
+  free(train_X);
+  free(train_Y);
+}
+
+void SdA_predict(SdA* this, int *x, double *y) {
+  int i, j, k;
+  double *layer_input;
+  int prev_layer_input_size;
+  double *prev_layer_input;
+
+  double linear_output;
+
+  prev_layer_input = (double *)malloc(sizeof(double) * this->n_ins);
+  for(j=0; j<this->n_ins; j++) prev_layer_input[j] = x[j];
+
+  // layer activation
+  for(i=0; i<this->n_layers; i++) {
+    layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out);
+
+    for(k=0; k<this->sigmoid_layers[i].n_out; k++) {
+      linear_output = 0.0;
+
+      for(j=0; j<this->sigmoid_layers[i].n_in; j++) {
+        linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j];
+      }
+      linear_output += this->sigmoid_layers[i].b[k];
+      layer_input[k] = sigmoid(linear_output);
+    }
+    free(prev_layer_input);
+
+    if(i < this->n_layers-1) {
+      prev_layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out);
+      for(j=0; j<this->sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
+      free(layer_input);
+    }
+  }
+
+  for(i=0; i<this->log_layer.n_out; i++) {
+    y[i] = 0;
+    for(j=0; j<this->log_layer.n_in; j++) {
+      y[i] += this->log_layer.W[i][j] * layer_input[j];
+    }
+    y[i] += this->log_layer.b[i];
+  }
+
+  LogisticRegression_softmax(&(this->log_layer), y);
+
+  free(layer_input);
+}
+
+
+// HiddenLayer
+void HiddenLayer__construct(HiddenLayer* this, int N, int n_in, int n_out, \
+                            double **W, double *b) {
+  int i, j;
+  double a = 1.0 / n_in;
+
+  this->N = N;
+  this->n_in = n_in;
+  this->n_out = n_out;
+  
+  if(W == NULL) {
+    this->W = (double **)malloc(sizeof(double*) * n_out);
+    this->W[0] = (double *)malloc(sizeof(double) * n_in * n_out);
+    for(i=0; i<n_out; i++) this->W[i] = this->W[0] + i * n_in;
+
+    for(i=0; i<n_out; i++) {
+      for(j=0; j<n_in; j++) {
+        this->W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    this->W = W;
+  }
+
+  if(b == NULL) {
+    this->b = (double *)malloc(sizeof(double) * n_out);
+  } else {
+    this->b = b;
+  }
+}
+
+void HiddenLayer__destruct(HiddenLayer* this) {
+  free(this->W[0]);
+  free(this->W);
+  free(this->b);
+}
+
+double HiddenLayer_output(HiddenLayer* this, int *input, double *w, double b) {
+  int j;
+  double linear_output = 0.0;
+  for(j=0; j<this->n_in; j++) {
+    linear_output += w[j] * input[j];
+  }
+  linear_output += b;
+  return sigmoid(linear_output);
+}
+
+void HiddenLayer_sample_h_given_v(HiddenLayer* this, int *input, int *sample) {
+  int i;
+  for(i=0; i<this->n_out; i++) {
+    sample[i] = binomial(1, HiddenLayer_output(this, input, this->W[i], this->b[i]));
+  }
+}
+
+
+// dA
+void dA__construct(dA* this, int N, int n_visible, int n_hidden, \
+                   double **W, double *hbias, double *vbias) {
+  int i, j;
+  double a = 1.0 / n_visible;
+  
+  this->N = N;
+  this->n_visible = n_visible;
+  this->n_hidden = n_hidden;
+
+  if(W == NULL) {
+    this->W = (double **)malloc(sizeof(double*) * n_hidden);
+    this->W[0] = (double *)malloc(sizeof(double) * n_visible * n_hidden);
+    for(i=0; i<n_hidden; i++) this->W[i] = this->W[0] + i * n_visible;
+
+    for(i=0; i<n_hidden; i++) {
+      for(j=0; j<n_visible; j++) {
+        this->W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    this->W = W;
+  }
+
+  if(hbias == NULL) {
+    this->hbias = (double *)malloc(sizeof(double) * n_hidden);
+    for(i=0; i<n_hidden; i++) this->hbias[i] = 0;
+  } else {
+    this->hbias = hbias;
+  }
+
+  if(vbias == NULL) {
+    this->vbias = (double *)malloc(sizeof(double) * n_visible);
+    for(i=0; i<n_visible; i++) this->vbias[i] = 0;
+  } else {
+    this->vbias = vbias;
+  }
+}
+
+void dA__destruct(dA* this) {
+  // free(this->W[0]);
+  // free(this->W);
+  // free(this->hbias);
+  free(this->vbias);
+}
+
+void dA_get_corrupted_input(dA* this, int *x, int *tilde_x, double p) {
+  int i;
+  for(i=0; i<this->n_visible; i++) {
+    if(x[i] == 0) {
+      tilde_x[i] = 0;
+    } else {
+      tilde_x[i] = binomial(1, p);
+    }
+  }
+}
+
+// Encode
+void dA_get_hidden_values(dA* this, int *x, double *y) {
+  int i,j;
+  for(i=0; i<this->n_hidden; i++) {
+    y[i] = 0;
+    for(j=0; j<this->n_visible; j++) {
+      y[i] += this->W[i][j] * x[j];
+    }
+    y[i] += this->hbias[i];
+    y[i] = sigmoid(y[i]);
+  }
+}
+
+// Decode
+void dA_get_reconstructed_input(dA* this, double *y, double *z) {
+  int i, j;
+  for(i=0; i<this->n_visible; i++) {
+    z[i] = 0;
+    for(j=0; j<this->n_hidden; j++) {
+      z[i] += this->W[j][i] * y[j];
+    }
+    z[i] += this->vbias[i];
+    z[i] = sigmoid(z[i]);
+  }
+}
+
+
+void dA_train(dA* this, int *x, double lr, double corruption_level) {
+  int i, j;
+  
+  int *tilde_x = (int *)malloc(sizeof(int) * this->n_visible);
+  double *y = (double *)malloc(sizeof(double) * this->n_hidden);
+  double *z = (double *)malloc(sizeof(double) * this->n_visible);
+
+  double *L_vbias = (double *)malloc(sizeof(double) * this->n_visible);
+  double *L_hbias = (double *)malloc(sizeof(double) * this->n_hidden);
+
+  double p = 1 - corruption_level;
+
+  dA_get_corrupted_input(this, x, tilde_x, p);
+  dA_get_hidden_values(this, tilde_x, y);
+  dA_get_reconstructed_input(this, y, z);
+
+  // vbias
+  for(i=0; i<this->n_visible; i++) {
+    L_vbias[i] = x[i] - z[i];
+    this->vbias[i] += lr * L_vbias[i] / this->N;
+  }
+
+  // hbias
+  for(i=0; i<this->n_hidden; i++) {
+    L_hbias[i] = 0;
+    for(j=0; j<this->n_visible; j++) {
+      L_hbias[i] += this->W[i][j] * L_vbias[j];
+    }
+    L_hbias[i] *= y[i] * (1 - y[i]);
+
+    this->hbias[i] += lr * L_hbias[i] / this->N;
+  }
+
+  // W
+  for(i=0; i<this->n_hidden; i++) {
+    for(j=0; j<this->n_visible; j++) {
+      this->W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / this->N;
+    }
+  }
+
+  free(L_hbias);
+  free(L_vbias);
+  free(z);
+  free(y);
+  free(tilde_x);
+}
+
+void dA_reconstruct(dA* this, int *x, double *z) {
+  int i;
+  double *y = (double *)malloc(sizeof(double) * this->n_hidden);
+
+  dA_get_hidden_values(this, x, y);
+  dA_get_reconstructed_input(this, y, z);
+
+  free(y);
+}
+
+
+// LogisticRegression
+void LogisticRegression__construct(LogisticRegression *this, int N, int n_in, int n_out) {
+  int i, j;
+  this->N = N;
+  this->n_in = n_in;
+  this->n_out = n_out;
+
+  this->W = (double **)malloc(sizeof(double*) * n_out);
+  this->W[0] = (double *)malloc(sizeof(double) * n_in * n_out);
+  for(i=0; i<n_out; i++) this->W[i] = this->W[0] + i * n_in;
+  this->b = (double *)malloc(sizeof(double) * n_out);
+
+  for(i=0; i<n_out; i++) {
+    for(j=0; j<n_in; j++) {
+      this->W[i][j] = 0;
+    }
+    this->b[i] = 0;
+  }
+}
+
+void LogisticRegression__destruct(LogisticRegression *this) {
+  free(this->W[0]);
+  free(this->W);
+  free(this->b);
+}
+
+void LogisticRegression_train(LogisticRegression *this, int *x, int *y, double lr) {
+  int i,j;
+  double *p_y_given_x = (double *)malloc(sizeof(double) * this->n_out);
+  double *dy = (double *)malloc(sizeof(double) * this->n_out);
+
+  for(i=0; i<this->n_out; i++) {
+    p_y_given_x[i] = 0;
+    for(j=0; j<this->n_in; j++) {
+      p_y_given_x[i] += this->W[i][j] * x[j];
+    }
+    p_y_given_x[i] += this->b[i];
+  }
+  LogisticRegression_softmax(this, p_y_given_x);
+
+  for(i=0; i<this->n_out; i++) {
+    dy[i] = y[i] - p_y_given_x[i];
+
+    for(j=0; j<this->n_in; j++) {
+      this->W[i][j] += lr * dy[i] * x[j] / this->N;
+    }
+
+    this->b[i] += lr * dy[i] / this->N;
+  }
+
+  free(p_y_given_x);
+  free(dy);
+}
+
+void LogisticRegression_softmax(LogisticRegression *this, double *x) {
+  int i;
+  double max = 0.0;
+  double sum = 0.0;
+
+  for(i=0; i<this->n_out; i++) if(max < x[i]) max = x[i];
+  for(i=0; i<this->n_out; i++) {
+    x[i] = exp(x[i] - max);
+    sum += x[i];
+  }
+
+  for(i=0; i<this->n_out; i++) x[i] /= sum;
+}
+
+void LogisticRegression_predict(LogisticRegression *this, int *x, double *y) {
+  int i,j;
+
+  for(i=0; i<this->n_out; i++) {
+    y[i] = 0;
+    for(j=0; j<this->n_in; j++) {
+      y[i] += this->W[i][j] * x[j];
+    }
+    y[i] += this->b[i];
+  }
+
+  LogisticRegression_softmax(this, y);
+}
+
+
+void test_sda(void) {
+  srand(0);
+
+  int i, j;
+
+  double pretrain_lr = 0.1;
+  double corruption_level = 0.3;
+  int pretraining_epochs = 1000;
+  double finetune_lr = 0.1;
+  int finetune_epochs = 500;
+
+  int train_N = 10;
+  int test_N = 4;
+  int n_ins = 28;
+  int n_outs = 2;
+  int hidden_layer_sizes[] = {15, 15};
+  int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]);
+
+  // training data
+  int train_X[10][28] = {
+    {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
+  };
+
+  int train_Y[10][2] = {
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {0, 1},
+    {0, 1},
+    {0, 1},
+    {0, 1},
+    {0, 1}
+  };
+
+  // construct SdA
+  SdA sda;
+  SdA__construct(&sda, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers);
+
+  // pretrain
+  SdA_pretrain(&sda, *train_X, pretrain_lr, corruption_level, pretraining_epochs);
+
+  // finetune
+  SdA_finetune(&sda, *train_X, *train_Y, finetune_lr, finetune_epochs);
+
+  // test data
+  int test_X[4][28] = {
+    {1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
+  };
+
+  double test_Y[4][28];
+
+
+  // test
+  for(i=0; i<test_N; i++) {
+    SdA_predict(&sda, test_X[i], test_Y[i]);
+    for(j=0; j<n_outs; j++) {
+      printf("%.5f ", test_Y[i][j]);
+    }
+    printf("\n");
+  }
+
+  
+  // destruct DBN
+  SdA__destruct(&sda);
+}
+
+
+int main(void) {
+  test_sda();
+  return 0;
+}
diff --git a/DeepLearning/c/SdA.h b/DeepLearning/c/SdA.h
new file mode 100644
index 00000000..292b6bbc
--- /dev/null
+++ b/DeepLearning/c/SdA.h
@@ -0,0 +1,21 @@
+#ifndef SDA_H
+#define SDA_H
+
+typedef struct {
+  int N;
+  int n_ins;
+  int *hidden_layer_sizes;
+  int n_outs;
+  int n_layers;
+  HiddenLayer *sigmoid_layers;
+  dA *dA_layers;
+  LogisticRegression log_layer;
+} SdA;
+
+void SdA__construct(SdA*, int, int, int*, int, int);
+void SdA__destruct(SdA*);
+void SdA_pretrain(SdA*, int*, double, double, int);
+void SdA_finetune(SdA*, int*, int*, double, int);
+void SdA_predict(SdA*, int*, double*);
+
+#endif
diff --git a/DeepLearning/c/dA.c b/DeepLearning/c/dA.c
new file mode 100644
index 00000000..f822ab13
--- /dev/null
+++ b/DeepLearning/c/dA.c
@@ -0,0 +1,239 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "dA.h"
+#include "utils.h"
+
+
+void test_dbn(void);
+
+
+double uniform(double min, double max) {
+  return rand() / (RAND_MAX + 1.0) * (max - min) + min;  
+}
+
+int binomial(int n, double p) {
+  if(p < 0 || p > 1) return 0;
+
+  int i;
+  int c = 0;
+  double r;
+
+  for(i=0; i<n; i++) {
+    r = rand() / (RAND_MAX + 1.0);
+    if (r < p) c++;
+  }
+
+  return c;
+}
+
+double sigmoid(double x) {
+  return 1.0 / (1.0 + exp(-x));
+}
+
+
+void dA__construct(dA* this, int N, int n_visible, int n_hidden, \
+                   double **W, double *hbias, double *vbias) {
+  int i, j;
+  double a = 1.0 / n_visible;
+  
+  this->N = N;
+  this->n_visible = n_visible;
+  this->n_hidden = n_hidden;
+
+  if(W == NULL) {
+    this->W = (double **)malloc(sizeof(double*) * n_hidden);
+    this->W[0] = (double *)malloc(sizeof(double) * n_visible * n_hidden);
+    for(i=0; i<n_hidden; i++) this->W[i] = this->W[0] + i * n_visible;
+
+    for(i=0; i<n_hidden; i++) {
+      for(j=0; j<n_visible; j++) {
+        this->W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    this->W = W;
+  }
+
+  if(hbias == NULL) {
+    this->hbias = (double *)malloc(sizeof(double) * n_hidden);
+    for(i=0; i<n_hidden; i++) this->hbias[i] = 0;
+  } else {
+    this->hbias = hbias;
+  }
+
+  if(vbias == NULL) {
+    this->vbias = (double *)malloc(sizeof(double) * n_visible);
+    for(i=0; i<n_visible; i++) this->vbias[i] = 0;
+  } else {
+    this->vbias = vbias;
+  }
+}
+
+void dA__destruct(dA* this) {
+  free(this->W[0]);
+  free(this->W);
+  free(this->hbias);
+  free(this->vbias);
+}
+
+void dA_get_corrupted_input(dA* this, int *x, int *tilde_x, double p) {
+  int i;
+  for(i=0; i<this->n_visible; i++) {
+    if(x[i] == 0) {
+      tilde_x[i] = 0;
+    } else {
+      tilde_x[i] = binomial(1, p);
+    }
+  }
+}
+
+// Encode
+void dA_get_hidden_values(dA* this, int *x, double *y) {
+  int i,j;
+  for(i=0; i<this->n_hidden; i++) {
+    y[i] = 0;
+    for(j=0; j<this->n_visible; j++) {
+      y[i] += this->W[i][j] * x[j];
+    }
+    y[i] += this->hbias[i];
+    y[i] = sigmoid(y[i]);
+  }
+}
+
+// Decode
+void dA_get_reconstructed_input(dA* this, double *y, double *z) {
+  int i, j;
+  for(i=0; i<this->n_visible; i++) {
+    z[i] = 0;
+    for(j=0; j<this->n_hidden; j++) {
+      z[i] += this->W[j][i] * y[j];
+    }
+    z[i] += this->vbias[i];
+    z[i] = sigmoid(z[i]);
+  }
+}
+
+
+void dA_train(dA* this, int *x, double lr, double corruption_level) {
+  int i, j;
+  
+  int *tilde_x = (int *)malloc(sizeof(int) * this->n_visible);
+  double *y = (double *)malloc(sizeof(double) * this->n_hidden);
+  double *z = (double *)malloc(sizeof(double) * this->n_visible);
+
+  double *L_vbias = (double *)malloc(sizeof(double) * this->n_visible);
+  double *L_hbias = (double *)malloc(sizeof(double) * this->n_hidden);
+
+  double p = 1 - corruption_level;
+
+  dA_get_corrupted_input(this, x, tilde_x, p);
+  dA_get_hidden_values(this, tilde_x, y);
+  dA_get_reconstructed_input(this, y, z);
+
+  // vbias
+  for(i=0; i<this->n_visible; i++) {
+    L_vbias[i] = x[i] - z[i];
+    this->vbias[i] += lr * L_vbias[i] / this->N;
+  }
+
+  // hbias
+  for(i=0; i<this->n_hidden; i++) {
+    L_hbias[i] = 0;
+    for(j=0; j<this->n_visible; j++) {
+      L_hbias[i] += this->W[i][j] * L_vbias[j];
+    }
+    L_hbias[i] *= y[i] * (1 - y[i]);
+
+    this->hbias[i] += lr * L_hbias[i] / this->N;
+  }
+
+  // W
+  for(i=0; i<this->n_hidden; i++) {
+    for(j=0; j<this->n_visible; j++) {
+      this->W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / this->N;
+    }
+  }
+
+  free(L_hbias);
+  free(L_vbias);
+  free(z);
+  free(y);
+  free(tilde_x);
+}
+
+void dA_reconstruct(dA* this, int *x, double *z) {
+  int i;
+  double *y = (double *)malloc(sizeof(double) * this->n_hidden);
+
+  dA_get_hidden_values(this, x, y);
+  dA_get_reconstructed_input(this, y, z);
+
+  free(y);
+}
+
+void test_dbn(void) {
+  srand(0);
+  int i, j, epoch;
+  
+  double learning_rate = 0.1;
+  double corruption_level = 0.3;
+  int training_epochs = 100;
+
+  int train_N = 10;
+  int test_N = 2;
+  int n_visible = 20;
+  int n_hidden = 5;
+
+  // training data
+  int train_X[10][20] = {
+    {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}
+  };
+
+  // construct dA  
+  dA da;
+  dA__construct(&da, train_N, n_visible, n_hidden, NULL, NULL, NULL);
+
+  // train
+  for(epoch=0; epoch<training_epochs; epoch++) {
+    for(i=0; i<train_N; i++) {
+      dA_train(&da, train_X[i], learning_rate, corruption_level);
+    }
+  }
+
+  // test data
+  int test_X[2][20] = {
+    {1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0}
+  };
+  double reconstructed_X[2][20];
+
+  
+  // test
+  for(i=0; i<test_N; i++) {
+    dA_reconstruct(&da, test_X[i], reconstructed_X[i]);
+    for(j=0; j<n_visible; j++) {
+      printf("%.5f ", reconstructed_X[i][j]);
+    }
+    printf("\n");
+  }
+
+
+  // destruct dA
+  dA__destruct(&da);
+}
+
+
+int main(void) {
+  test_dbn();
+  return 0;
+}
diff --git a/DeepLearning/c/dA.h b/DeepLearning/c/dA.h
new file mode 100644
index 00000000..c5d46aaf
--- /dev/null
+++ b/DeepLearning/c/dA.h
@@ -0,0 +1,21 @@
+#ifndef DA_H
+#define DA_H
+
+typedef struct {
+  int N;
+  int n_visible;
+  int n_hidden;
+  double **W;
+  double *hbias;
+  double *vbias;
+} dA;
+
+void dA__construct(dA*, int, int, int, double**, double*, double*);
+void dA__destruct(dA*);
+void dA_get_corrupted_input(dA*, int*, int*, double);
+void dA_get_hidden_values(dA*, int*, double*);
+void dA_get_reconstructed_input(dA*, double*, double*);
+void dA_train(dA*, int*, double, double);
+void dA_reconstruct(dA*, int*, double*);
+
+#endif
diff --git a/DeepLearning/c/utils.h b/DeepLearning/c/utils.h
new file mode 100644
index 00000000..2a3414c0
--- /dev/null
+++ b/DeepLearning/c/utils.h
@@ -0,0 +1,8 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+double uniform(double, double);
+int binomial(int, double);
+double sigmoid(double);
+
+#endif
diff --git a/DeepLearning/cpp/DBN.cpp b/DeepLearning/cpp/DBN.cpp
new file mode 100644
index 00000000..155e0810
--- /dev/null
+++ b/DeepLearning/cpp/DBN.cpp
@@ -0,0 +1,538 @@
+#include <iostream>
+#include <math.h>
+#include "utils.h"
+#include "HiddenLayer.h"
+#include "RBM.h"
+#include "LogisticRegression.h"
+#include "DBN.h"
+using namespace std;
+using namespace utils;
+
+
+// DBN
+DBN::DBN(int size, int n_i, int *hls, int n_o, int n_l) {
+  int input_size;
+  
+  N = size;
+  n_ins = n_i;
+  hidden_layer_sizes = hls;
+  n_outs = n_o;
+  n_layers = n_l;
+
+  sigmoid_layers = new HiddenLayer*[n_layers];
+  rbm_layers = new RBM*[n_layers];
+
+  // construct multi-layer
+  for(int i=0; i<n_layers; i++) {
+    if(i == 0) {
+      input_size = n_ins;
+    } else {
+      input_size = hidden_layer_sizes[i-1];
+    }
+
+    // construct sigmoid_layer
+    sigmoid_layers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], NULL, NULL);
+
+    // construct rbm_layer
+    rbm_layers[i] = new RBM(N, input_size, hidden_layer_sizes[i],\
+                            sigmoid_layers[i]->W, sigmoid_layers[i]->b, NULL);
+  }
+
+  // layer for output using LogisticRegression
+  log_layer = new LogisticRegression(N, hidden_layer_sizes[n_layers-1], n_outs);
+}
+
+DBN::~DBN() {
+  delete log_layer;
+
+  for(int i=0; i<n_layers; i++) {
+    delete sigmoid_layers[i];
+    delete rbm_layers[i];
+  }
+  delete[] sigmoid_layers;
+  delete[] rbm_layers;
+}
+
+
+void DBN::pretrain(int *input, double lr, int k, int epochs) {
+  int *layer_input;
+  int prev_layer_input_size;
+  int *prev_layer_input;
+
+  int *train_X = new int[n_ins];
+
+  for(int i=0; i<n_layers; i++) {  // layer-wise
+
+    for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
+
+      for(int n=0; n<N; n++) { // input x1...xN
+        // initial input
+        for(int m=0; m<n_ins; m++) train_X[m] = input[n * n_ins + m];
+
+        // layer input
+        for(int l=0; l<=i; l++) {
+
+          if(l == 0) {
+            layer_input = new int[n_ins];
+            for(int j=0; j<n_ins; j++) layer_input[j] = train_X[j];
+          } else {
+            if(l == 1) prev_layer_input_size = n_ins;
+            else prev_layer_input_size = hidden_layer_sizes[l-2];
+
+            prev_layer_input = new int[prev_layer_input_size];
+            for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+            delete[] layer_input;
+
+            layer_input = new int[hidden_layer_sizes[l-1]];
+
+            sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input);
+            delete[] prev_layer_input;
+          }
+        }
+
+        rbm_layers[i]->contrastive_divergence(layer_input, lr, k);
+      }
+
+    }
+  }
+
+  delete[] train_X;
+  delete[] layer_input;
+}
+
+void DBN::finetune(int *input, int *label, double lr, int epochs) {
+  int *layer_input;
+  // int prev_layer_input_size;
+  int *prev_layer_input;
+
+  int *train_X = new int[n_ins];
+  int *train_Y = new int[n_outs];
+
+  for(int epoch=0; epoch<epochs; epoch++) {
+    for(int n=0; n<N; n++) { // input x1...xN
+      // initial input
+      for(int m=0; m<n_ins; m++)  train_X[m] = input[n * n_ins + m];
+      for(int m=0; m<n_outs; m++) train_Y[m] = label[n * n_outs + m];
+
+      // layer input
+      for(int i=0; i<n_layers; i++) {
+        if(i == 0) {
+          prev_layer_input = new int[n_ins];
+          for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[j];
+        } else {
+          prev_layer_input = new int[hidden_layer_sizes[i-1]];
+          for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+          delete[] layer_input;
+        }
+
+
+        layer_input = new int[hidden_layer_sizes[i]];
+        sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input);
+        delete[] prev_layer_input;
+      }
+
+      log_layer->train(layer_input, train_Y, lr);
+    }
+    // lr *= 0.95;
+  }
+
+  delete[] layer_input;
+  delete[] train_X;
+  delete[] train_Y;
+}
+
+void DBN::predict(int *x, double *y) {
+  double *layer_input;
+  // int prev_layer_input_size;
+  double *prev_layer_input;
+
+  double linear_output;
+
+  prev_layer_input = new double[n_ins];
+  for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
+
+  // layer activation
+  for(int i=0; i<n_layers; i++) {
+    layer_input = new double[sigmoid_layers[i]->n_out];
+
+    for(int k=0; k<sigmoid_layers[i]->n_out; k++) {
+      linear_output = 0.0;
+
+      for(int j=0; j<sigmoid_layers[i]->n_in; j++) {
+        linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
+      }
+      linear_output += sigmoid_layers[i]->b[k];
+      layer_input[k] = sigmoid(linear_output);
+    }
+    delete[] prev_layer_input;
+
+    if(i < n_layers-1) {
+      prev_layer_input = new double[sigmoid_layers[i]->n_out];
+      for(int j=0; j<sigmoid_layers[i]->n_out; j++) prev_layer_input[j] = layer_input[j];
+      delete[] layer_input;
+    }
+  }
+  
+  for(int i=0; i<log_layer->n_out; i++) {
+    y[i] = 0;
+    for(int j=0; j<log_layer->n_in; j++) {
+      y[i] += log_layer->W[i][j] * layer_input[j];
+    }
+    y[i] += log_layer->b[i];
+  }
+  
+  log_layer->softmax(y);
+
+
+  delete[] layer_input;
+}
+
+
+// HiddenLayer
+HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) {
+  N = size;
+  n_in = in;
+  n_out = out;
+
+  if(w == NULL) {
+    W = new double*[n_out];
+    for(int i=0; i<n_out; i++) W[i] = new double[n_in];
+    double a = 1.0 / n_in;
+
+    for(int i=0; i<n_out; i++) {
+      for(int j=0; j<n_in; j++) {
+        W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    W = w;
+  }
+
+  if(bp == NULL) {
+    b = new double[n_out];
+  } else {
+    b = bp;
+  }
+}
+
+HiddenLayer::~HiddenLayer() {
+  for(int i=0; i<n_out; i++) delete W[i];
+  delete[] W;
+  delete[] b;
+}
+
+double HiddenLayer::output(int *input, double *w, double b) {
+  double linear_output = 0.0;
+  for(int j=0; j<n_in; j++) {
+    linear_output += w[j] * input[j];
+  }
+  linear_output += b;
+  return sigmoid(linear_output);
+}
+
+void HiddenLayer::sample_h_given_v(int *input, int *sample) {
+  for(int i=0; i<n_out; i++) {
+    sample[i] = binomial(1, output(input, W[i], b[i]));
+  }
+}
+
+
+// RBM 
+RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
+  N = size;
+  n_visible = n_v;
+  n_hidden = n_h;
+
+  if(w == NULL) {
+    W = new double*[n_hidden];
+    for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
+    double a = 1.0 / n_visible;
+
+    for(int i=0; i<n_hidden; i++) {
+      for(int j=0; j<n_visible; j++) {
+        W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    W = w;
+  }
+
+  if(hb == NULL) {
+    hbias = new double[n_hidden];
+    for(int i=0; i<n_hidden; i++) hbias[i] = 0;
+  } else {
+    hbias = hb;
+  }
+
+  if(vb == NULL) {
+    vbias = new double[n_visible];
+    for(int i=0; i<n_visible; i++) vbias[i] = 0;
+  } else {
+    vbias = vb;
+  }
+}
+
+RBM::~RBM() {
+  // for(int i=0; i<n_hidden; i++) delete[] W[i];
+  // delete[] W;
+  // delete[] hbias;
+  delete[] vbias;
+}
+
+
+void RBM::contrastive_divergence(int *input, double lr, int k) {
+  double *ph_mean = new double[n_hidden];
+  int *ph_sample = new int[n_hidden];
+  double *nv_means = new double[n_visible];
+  int *nv_samples = new int[n_visible];
+  double *nh_means = new double[n_hidden];
+  int *nh_samples = new int[n_hidden];
+
+  /* CD-k */
+  sample_h_given_v(input, ph_mean, ph_sample);
+
+  for(int step=0; step<k; step++) {
+    if(step == 0) {
+      gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
+    } else {
+      gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
+    }
+  }
+
+  for(int i=0; i<n_hidden; i++) {
+    for(int j=0; j<n_visible; j++) {
+      // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+      W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+    }
+    hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
+  }
+
+  for(int i=0; i<n_visible; i++) {
+    vbias[i] += lr * (input[i] - nv_samples[i]) / N;
+  }
+
+  delete[] ph_mean;
+  delete[] ph_sample;
+  delete[] nv_means;
+  delete[] nv_samples;
+  delete[] nh_means;
+  delete[] nh_samples;
+}
+
+void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample) {
+  for(int i=0; i<n_hidden; i++) {
+    mean[i] = propup(v0_sample, W[i], hbias[i]);
+    sample[i] = binomial(1, mean[i]);
+  }
+}
+
+void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample) {
+  for(int i=0; i<n_visible; i++) {
+    mean[i] = propdown(h0_sample, i, vbias[i]);
+    sample[i] = binomial(1, mean[i]);
+  }
+}
+
+double RBM::propup(int *v, double *w, double b) {
+  double pre_sigmoid_activation = 0.0;
+  for(int j=0; j<n_visible; j++) {
+    pre_sigmoid_activation += w[j] * v[j];
+  }
+  pre_sigmoid_activation += b;
+  return sigmoid(pre_sigmoid_activation);
+}
+
+double RBM::propdown(int *h, int i, double b) {
+  double pre_sigmoid_activation = 0.0;
+  for(int j=0; j<n_hidden; j++) {
+    pre_sigmoid_activation += W[j][i] * h[j];
+  }
+  pre_sigmoid_activation += b;
+  return sigmoid(pre_sigmoid_activation);
+}
+
+void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples, \
+                    double *nh_means, int *nh_samples) {
+  sample_v_given_h(h0_sample, nv_means, nv_samples);
+  sample_h_given_v(nv_samples, nh_means, nh_samples);
+}
+
+void RBM::reconstruct(int *v, double *reconstructed_v) {
+  double *h = new double[n_hidden];
+  double pre_sigmoid_activation;
+
+  for(int i=0; i<n_hidden; i++) {
+    h[i] = propup(v, W[i], hbias[i]);
+  }
+
+  for(int i=0; i<n_visible; i++) {
+    pre_sigmoid_activation = 0.0;
+    for(int j=0; j<n_hidden; j++) {
+      pre_sigmoid_activation += W[j][i] * h[j];
+    }
+    pre_sigmoid_activation += vbias[i];
+
+    reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
+  }
+
+  delete[] h;
+}
+
+
+// LogisticRegression
+LogisticRegression::LogisticRegression(int size, int in, int out) {
+  N = size;
+  n_in = in;
+  n_out = out;
+
+  W = new double*[n_out];
+  for(int i=0; i<n_out; i++) W[i] = new double[n_in];
+  b = new double[n_out];
+
+  for(int i=0; i<n_out; i++) {
+    for(int j=0; j<n_in; j++) {
+      W[i][j] = 0;
+    }
+    b[i] = 0;
+  }
+}
+
+LogisticRegression::~LogisticRegression() {
+  for(int i=0; i<n_out; i++) delete[] W[i];
+  delete[] W;
+  delete[] b;
+}
+
+
+void LogisticRegression::train(int *x, int *y, double lr) {
+  double *p_y_given_x = new double[n_out];
+  double *dy = new double[n_out];
+
+  for(int i=0; i<n_out; i++) {
+    p_y_given_x[i] = 0;
+    for(int j=0; j<n_in; j++) {
+      p_y_given_x[i] += W[i][j] * x[j];
+    }
+    p_y_given_x[i] += b[i];
+  }
+  softmax(p_y_given_x);
+
+  for(int i=0; i<n_out; i++) {
+    dy[i] = y[i] - p_y_given_x[i];
+
+    for(int j=0; j<n_in; j++) {
+      W[i][j] += lr * dy[i] * x[j] / N;
+    }
+
+    b[i] += lr * dy[i] / N;
+  }
+  
+  delete[] p_y_given_x;
+  delete[] dy;
+}
+
+void LogisticRegression::softmax(double *x) {
+  double max = 0.0;
+  double sum = 0.0;
+  
+  for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
+  for(int i=0; i<n_out; i++) {
+    x[i] = exp(x[i] - max);
+    sum += x[i];
+  } 
+
+  for(int i=0; i<n_out; i++) x[i] /= sum;
+}
+
+void LogisticRegression::predict(int *x, double *y) {
+  for(int i=0; i<n_out; i++) {
+    y[i] = 0;
+    for(int j=0; j<n_in; j++) {
+      y[i] += W[i][j] * x[j];
+    }
+    y[i] += b[i];
+  }
+
+  softmax(y);
+}
+
+
+
+
+
+void test_dbn() {
+  srand(0);
+
+  double pretrain_lr = 0.1;
+  int pretraining_epochs = 1000;
+  int k = 1;
+  double finetune_lr = 0.1;
+  int finetune_epochs = 500;
+
+  int train_N = 6;
+  int test_N = 3;
+  int n_ins = 6;
+  int n_outs = 2;
+  int hidden_layer_sizes[] = {3, 3};
+  int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]);
+
+  // training data
+  int train_X[6][6] = {
+    {1, 1, 1, 0, 0, 0},
+    {1, 0, 1, 0, 0, 0},
+    {1, 1, 1, 0, 0, 0},
+    {0, 0, 1, 1, 1, 0},
+    {0, 0, 1, 1, 0, 0},
+    {0, 0, 1, 1, 1, 0}
+  };
+
+  int train_Y[6][2] = {
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {0, 1},
+    {0, 1},
+    {0, 1}
+  };
+
+
+  
+  // construct DBN
+  DBN dbn(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers);
+
+  // pretrain
+  dbn.pretrain(*train_X, pretrain_lr, k, pretraining_epochs);
+
+  // finetune
+  dbn.finetune(*train_X, *train_Y, finetune_lr, finetune_epochs);
+  
+
+  // test data
+  int test_X[3][6] = {
+    {1, 1, 0, 0, 0, 0},
+    {0, 0, 0, 1, 1, 0},
+    {1, 1, 1, 1, 1, 0}
+  };
+
+  double test_Y[3][2];
+
+
+  // test
+  for(int i=0; i<test_N; i++) {
+    dbn.predict(test_X[i], test_Y[i]);
+    for(int j=0; j<n_outs; j++) {
+      cout << test_Y[i][j] << " ";
+    }
+    cout << endl;
+  }
+
+}
+
+
+
+
+
+int main() {
+  test_dbn();
+  return 0;
+}
diff --git a/DeepLearning/cpp/DBN.h b/DeepLearning/cpp/DBN.h
new file mode 100644
index 00000000..086ea635
--- /dev/null
+++ b/DeepLearning/cpp/DBN.h
@@ -0,0 +1,17 @@
+class DBN {
+
+public:
+  int N;
+  int n_ins;
+  int *hidden_layer_sizes;
+  int n_outs;
+  int n_layers;
+  HiddenLayer **sigmoid_layers;
+  RBM **rbm_layers;
+  LogisticRegression *log_layer;
+  DBN(int, int, int*, int, int);
+  ~DBN();
+  void pretrain(int*, double, int, int);
+  void finetune(int*, int*, double, int);
+  void predict(int*, double*);
+};
diff --git a/DeepLearning/cpp/HiddenLayer.cpp b/DeepLearning/cpp/HiddenLayer.cpp
new file mode 100644
index 00000000..fb530c65
--- /dev/null
+++ b/DeepLearning/cpp/HiddenLayer.cpp
@@ -0,0 +1,60 @@
+#include <iostream>
+#include <math.h>
+#include "HiddenLayer.h"
+#include "utils.h"
+using namespace std;
+using namespace utils;
+
+
+HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) {
+  N = size;
+  n_in = in;
+  n_out = out;
+
+  if(w == NULL) {
+    W = new double*[n_out];
+    for(int i=0; i<n_out; i++) W[i] = new double[n_in];
+    double a = 1.0 / n_in;
+
+    for(int i=0; i<n_out; i++) {
+      for(int j=0; j<n_in; j++) {
+        W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    W = w;
+  }
+
+  if(bp == NULL) {
+    b = new double[n_out];
+  } else {
+    b = bp;
+  }
+}
+
+HiddenLayer::~HiddenLayer() {
+  for(int i=0; i<n_out; i++) delete W[i];
+  delete[] W;
+  delete[] b;
+}
+
+double HiddenLayer::output(int *input, double *w, double b) {
+  double linear_output = 0.0;
+  for(int j=0; j<n_in; j++) {
+    linear_output += w[j] * input[j];
+  }
+  linear_output += b;
+  return sigmoid(linear_output);
+}
+
+void HiddenLayer::sample_h_given_v(int *input, int *sample) {
+  for(int i=0; i<n_out; i++) {
+    sample[i] = binomial(1, output(input, W[i], b[i]));
+  }
+}
+
+
+
+int main() {
+  return 0;
+}
diff --git a/DeepLearning/cpp/HiddenLayer.h b/DeepLearning/cpp/HiddenLayer.h
new file mode 100644
index 00000000..000c4b9a
--- /dev/null
+++ b/DeepLearning/cpp/HiddenLayer.h
@@ -0,0 +1,13 @@
+class HiddenLayer {
+
+public:
+  int N;
+  int n_in;
+  int n_out;
+  double **W;
+  double *b;
+  HiddenLayer(int, int, int, double**, double*);
+  ~HiddenLayer();
+  double output(int*, double*, double);
+  void sample_h_given_v(int*, int*);
+};
diff --git a/DeepLearning/cpp/LogisticRegression.cpp b/DeepLearning/cpp/LogisticRegression.cpp
new file mode 100644
index 00000000..6eca5660
--- /dev/null
+++ b/DeepLearning/cpp/LogisticRegression.cpp
@@ -0,0 +1,154 @@
+#include <iostream>
+#include <string>
+#include <math.h>
+#include "LogisticRegression.h"
+using namespace std;
+
+
+LogisticRegression::LogisticRegression(int size, int in, int out) {
+  N = size;
+  n_in = in;
+  n_out = out;
+
+  // initialize W, b
+  W = new double*[n_out];
+  for(int i=0; i<n_out; i++) W[i] = new double[n_in];
+  b = new double[n_out];
+
+  for(int i=0; i<n_out; i++) {
+    for(int j=0; j<n_in; j++) {
+      W[i][j] = 0;
+    }
+    b[i] = 0;
+  }
+}
+
+LogisticRegression::~LogisticRegression() {
+  for(int i=0; i<n_out; i++) delete[] W[i];
+  delete[] W;
+  delete[] b;
+}
+
+
+void LogisticRegression::train(int *x, int *y, double lr) {
+  double *p_y_given_x = new double[n_out];
+  double *dy = new double[n_out];
+
+  for(int i=0; i<n_out; i++) {
+    p_y_given_x[i] = 0;
+    for(int j=0; j<n_in; j++) {
+      p_y_given_x[i] += W[i][j] * x[j];
+    }
+    p_y_given_x[i] += b[i];
+  }
+  softmax(p_y_given_x);
+
+  for(int i=0; i<n_out; i++) {
+    dy[i] = y[i] - p_y_given_x[i];
+
+    for(int j=0; j<n_in; j++) {
+      W[i][j] += lr * dy[i] * x[j] / N;
+    }
+
+    b[i] += lr * dy[i] / N;
+  }
+  delete[] p_y_given_x;
+  delete[] dy;
+}
+
+void LogisticRegression::softmax(double *x) {
+  double max = 0.0;
+  double sum = 0.0;
+  
+  for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
+  for(int i=0; i<n_out; i++) {
+    x[i] = exp(x[i] - max);
+    sum += x[i];
+  } 
+
+  for(int i=0; i<n_out; i++) x[i] /= sum;
+}
+
+void LogisticRegression::predict(int *x, double *y) {
+  for(int i=0; i<n_out; i++) {
+    y[i] = 0;
+    for(int j=0; j<n_in; j++) {
+      y[i] += W[i][j] * x[j];
+    }
+    y[i] += b[i];
+  }
+
+  softmax(y);
+}
+
+
+void test_lr() {
+  srand(0);
+  
+  double learning_rate = 0.1;
+  int n_epochs = 500;
+
+  int train_N = 6;
+  int test_N = 2;
+  int n_in = 6;
+  int n_out = 2;
+
+
+  // training data
+  int train_X[6][6] = {
+    {1, 1, 1, 0, 0, 0},
+    {1, 0, 1, 0, 0, 0},
+    {1, 1, 1, 0, 0, 0},
+    {0, 0, 1, 1, 1, 0},
+    {0, 0, 1, 1, 0, 0},
+    {0, 0, 1, 1, 1, 0}
+  };
+
+  int train_Y[6][2] = {
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {0, 1},
+    {0, 1},
+    {0, 1}
+  };
+
+
+  // construct LogisticRegression
+  LogisticRegression classifier(train_N, n_in, n_out);
+
+
+  // train online
+  for(int epoch=0; epoch<n_epochs; epoch++) {
+    for(int i=0; i<train_N; i++) {
+      classifier.train(train_X[i], train_Y[i], learning_rate);
+    }
+    // learning_rate *= 0.95;
+  }
+
+
+  // test data
+  int test_X[2][6] = {
+    {1, 0, 1, 0, 0, 0},
+    {0, 0, 1, 1, 1, 0}
+  };
+
+  double test_Y[2][2];
+
+
+  // test
+  for(int i=0; i<test_N; i++) {
+    classifier.predict(test_X[i], test_Y[i]);
+    for(int j=0; j<n_out; j++) {
+      cout << test_Y[i][j] << " ";
+    }
+    cout << endl;
+  }
+
+}
+
+
+int main() {
+  test_lr();
+  return 0;
+}
diff --git a/DeepLearning/cpp/LogisticRegression.h b/DeepLearning/cpp/LogisticRegression.h
new file mode 100644
index 00000000..5fa3e5f0
--- /dev/null
+++ b/DeepLearning/cpp/LogisticRegression.h
@@ -0,0 +1,14 @@
+class LogisticRegression {
+
+public:
+  int N;  // num of inputs
+  int n_in;
+  int n_out;
+  double **W;
+  double *b;
+  LogisticRegression(int, int, int);
+  ~LogisticRegression();
+  void train(int*, int*, double);
+  void softmax(double*);
+  void predict(int*, double*);
+};
diff --git a/DeepLearning/cpp/RBM.cpp b/DeepLearning/cpp/RBM.cpp
new file mode 100644
index 00000000..1e606eec
--- /dev/null
+++ b/DeepLearning/cpp/RBM.cpp
@@ -0,0 +1,207 @@
+#include <iostream>
+#include <math.h>
+#include "utils.h"
+#include "RBM.h"
+using namespace std;
+using namespace utils;
+
+
+RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
+  N = size;
+  n_visible = n_v;
+  n_hidden = n_h;
+
+  if(w == NULL) {
+    W = new double*[n_hidden];
+    for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
+    double a = 1.0 / n_visible;
+
+    for(int i=0; i<n_hidden; i++) {
+      for(int j=0; j<n_visible; j++) {
+        W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    W = w;
+  }
+
+  if(hb == NULL) {
+    hbias = new double[n_hidden];
+    for(int i=0; i<n_hidden; i++) hbias[i] = 0;
+  } else {
+    hbias = hb;
+  }
+
+  if(vb == NULL) {
+    vbias = new double[n_visible];
+    for(int i=0; i<n_visible; i++) vbias[i] = 0;
+  } else {
+    vbias = vb;
+  }
+}
+
+RBM::~RBM() {
+  for(int i=0; i<n_hidden; i++) delete[] W[i];
+  delete[] W;
+  delete[] hbias;
+  delete[] vbias;
+}
+
+
+void RBM::contrastive_divergence(int *input, double lr, int k) {
+  double *ph_mean = new double[n_hidden];
+  int *ph_sample = new int[n_hidden];
+  double *nv_means = new double[n_visible];
+  int *nv_samples = new int[n_visible];
+  double *nh_means = new double[n_hidden];
+  int *nh_samples = new int[n_hidden];
+
+  /* CD-k */
+  sample_h_given_v(input, ph_mean, ph_sample);
+
+  for(int step=0; step<k; step++) {
+    if(step == 0) {
+      gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
+    } else {
+      gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
+    }
+  }
+
+  for(int i=0; i<n_hidden; i++) {
+    for(int j=0; j<n_visible; j++) {
+      // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+      W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+    }
+    hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
+  }
+
+  for(int i=0; i<n_visible; i++) {
+    vbias[i] += lr * (input[i] - nv_samples[i]) / N;
+  }
+
+  delete[] ph_mean;
+  delete[] ph_sample;
+  delete[] nv_means;
+  delete[] nv_samples;
+  delete[] nh_means;
+  delete[] nh_samples;
+}
+
+void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample) {
+  for(int i=0; i<n_hidden; i++) {
+    mean[i] = propup(v0_sample, W[i], hbias[i]);
+    sample[i] = binomial(1, mean[i]);
+  }
+}
+
+void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample) {
+  for(int i=0; i<n_visible; i++) {
+    mean[i] = propdown(h0_sample, i, vbias[i]);
+    sample[i] = binomial(1, mean[i]);
+  }
+}
+
+double RBM::propup(int *v, double *w, double b) {
+  double pre_sigmoid_activation = 0.0;
+  for(int j=0; j<n_visible; j++) {
+    pre_sigmoid_activation += w[j] * v[j];
+  }
+  pre_sigmoid_activation += b;
+  return sigmoid(pre_sigmoid_activation);
+}
+
+double RBM::propdown(int *h, int i, double b) {
+  double pre_sigmoid_activation = 0.0;
+  for(int j=0; j<n_hidden; j++) {
+    pre_sigmoid_activation += W[j][i] * h[j];
+  }
+  pre_sigmoid_activation += b;
+  return sigmoid(pre_sigmoid_activation);
+}
+
+void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples, \
+                    double *nh_means, int *nh_samples) {
+  sample_v_given_h(h0_sample, nv_means, nv_samples);
+  sample_h_given_v(nv_samples, nh_means, nh_samples);
+}
+
+void RBM::reconstruct(int *v, double *reconstructed_v) {
+  double *h = new double[n_hidden];
+  double pre_sigmoid_activation;
+
+  for(int i=0; i<n_hidden; i++) {
+    h[i] = propup(v, W[i], hbias[i]);
+  }
+
+  for(int i=0; i<n_visible; i++) {
+    pre_sigmoid_activation = 0.0;
+    for(int j=0; j<n_hidden; j++) {
+      pre_sigmoid_activation += W[j][i] * h[j];
+    }
+    pre_sigmoid_activation += vbias[i];
+
+    reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
+  }
+
+  delete[] h;
+}
+
+
+void test_rbm() {
+  srand(0);
+
+  double learning_rate = 0.1;
+  int training_epochs = 1000;
+  int k = 1;
+  
+  int train_N = 6;
+  int test_N = 2;
+  int n_visible = 6;
+  int n_hidden = 3;
+
+  // training data
+  int train_X[6][6] = {
+    {1, 1, 1, 0, 0, 0},
+    {1, 0, 1, 0, 0, 0},
+    {1, 1, 1, 0, 0, 0},
+    {0, 0, 1, 1, 1, 0},
+    {0, 0, 1, 0, 1, 0},
+    {0, 0, 1, 1, 1, 0}
+  };
+
+
+  // construct RBM
+  RBM rbm(train_N, n_visible, n_hidden, NULL, NULL, NULL);
+
+  // train
+  for(int epoch=0; epoch<training_epochs; epoch++) {
+    for(int i=0; i<train_N; i++) {
+      rbm.contrastive_divergence(train_X[i], learning_rate, k);
+    }
+  }
+
+  // test data
+  int test_X[2][6] = {
+    {1, 1, 0, 0, 0, 0},
+    {0, 0, 0, 1, 1, 0}
+  };
+  double reconstructed_X[2][6];
+
+
+  // test
+  for(int i=0; i<test_N; i++) {
+    rbm.reconstruct(test_X[i], reconstructed_X[i]);
+    for(int j=0; j<n_visible; j++) {
+      printf("%.5f ", reconstructed_X[i][j]);
+    }
+    cout << endl;
+  }
+
+}
+
+
+
+int main() {
+  test_rbm();
+  return 0;
+}
diff --git a/DeepLearning/cpp/RBM.h b/DeepLearning/cpp/RBM.h
new file mode 100644
index 00000000..f34c993d
--- /dev/null
+++ b/DeepLearning/cpp/RBM.h
@@ -0,0 +1,20 @@
+class RBM {
+
+public:
+  int N;
+  int n_visible;
+  int n_hidden;
+  double **W;
+  double *hbias;
+  double *vbias;
+  RBM(int, int, int, double**, double*, double*);
+  ~RBM();
+  void contrastive_divergence(int*, double, int);
+  void sample_h_given_v(int*, double*, int*);
+  void sample_v_given_h(int*, double*, int*);
+  double propup(int*, double*, double);
+  double propdown(int*, int, double);
+  void gibbs_hvh(int*, double*, int*, double*, int*);
+  void reconstruct(int*, double*);
+};
+
diff --git a/DeepLearning/cpp/SdA.cpp b/DeepLearning/cpp/SdA.cpp
new file mode 100644
index 00000000..56323060
--- /dev/null
+++ b/DeepLearning/cpp/SdA.cpp
@@ -0,0 +1,527 @@
+#include <iostream>
+#include <math.h>
+#include "utils.h"
+
+#include "HiddenLayer.h"
+#include "dA.h"
+#include "LogisticRegression.h"
+#include "SdA.h"
+using namespace std;
+using namespace utils;
+
+
+// SdA
+SdA::SdA(int size, int n_i, int *hls, int n_o, int n_l) {
+  int input_size;
+
+  N = size;
+  n_ins = n_i;
+  hidden_layer_sizes = hls;
+  n_outs = n_o;
+  n_layers = n_l;
+
+  sigmoid_layers = new HiddenLayer*[n_layers];
+  dA_layers = new dA*[n_layers];
+
+  // construct multi-layer
+  for(int i=0; i<n_layers; i++) {
+    if(i == 0) {
+      input_size = n_ins;
+    } else {
+      input_size = hidden_layer_sizes[i-1];
+    }
+
+    // construct sigmoid_layer
+    sigmoid_layers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], NULL, NULL);
+
+    // construct dA_layer
+    dA_layers[i] = new dA(N, input_size, hidden_layer_sizes[i],\
+                          sigmoid_layers[i]->W, sigmoid_layers[i]->b, NULL);
+  }
+
+  // layer for output using LogisticRegression
+  log_layer = new LogisticRegression(N, hidden_layer_sizes[n_layers-1], n_outs);
+}
+
+SdA::~SdA() {
+  delete log_layer;
+
+  for(int i=0; i<n_layers; i++) {
+    delete sigmoid_layers[i];
+    delete dA_layers[i];
+  }
+  delete[] sigmoid_layers;
+  delete[] dA_layers;
+}
+
+void SdA::pretrain(int *input, double lr, double corruption_level, int epochs) {
+  int *layer_input;
+  int prev_layer_input_size;
+  int *prev_layer_input;
+
+  int *train_X = new int[n_ins];
+
+  for(int i=0; i<n_layers; i++) {  // layer-wise
+
+    for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
+
+      for(int n=0; n<N; n++) { // input x1...xN
+        // initial input
+        for(int m=0; m<n_ins; m++) train_X[m] = input[n * n_ins + m];
+
+        // layer input
+        for(int l=0; l<=i; l++) {
+
+          if(l == 0) {
+            layer_input = new int[n_ins];
+            for(int j=0; j<n_ins; j++) layer_input[j] = train_X[j];
+          } else {
+            if(l == 1) prev_layer_input_size = n_ins;
+            else prev_layer_input_size = hidden_layer_sizes[l-2];
+
+            prev_layer_input = new int[prev_layer_input_size];
+            for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+            delete[] layer_input;
+
+            layer_input = new int[hidden_layer_sizes[l-1]];
+
+            sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input);
+            delete[] prev_layer_input;
+          }
+        }
+
+        dA_layers[i]->train(layer_input, lr, corruption_level);
+
+      }
+    }
+  }
+
+  delete[] train_X;
+  delete[] layer_input;
+}
+
+void SdA::finetune(int *input, int *label, double lr, int epochs) {
+  int *layer_input;
+  int prev_layer_input_size;
+  int *prev_layer_input;
+
+  int *train_X = new int[n_ins];
+  int *train_Y = new int[n_outs];
+
+  for(int epoch=0; epoch<epochs; epoch++) {
+    for(int n=0; n<N; n++) { // input x1...xN
+      // initial input
+      for(int m=0; m<n_ins; m++)  train_X[m] = input[n * n_ins + m];
+      for(int m=0; m<n_outs; m++) train_Y[m] = label[n * n_outs + m];
+
+      // layer input
+      for(int i=0; i<n_layers; i++) {
+        if(i == 0) {
+          prev_layer_input = new int[n_ins];
+          for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[j];
+        } else {
+          prev_layer_input = new int[hidden_layer_sizes[i-1]];
+          for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+          delete[] layer_input;
+        }
+
+
+        layer_input = new int[hidden_layer_sizes[i]];
+        sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input);
+        delete[] prev_layer_input;
+      }
+
+      log_layer->train(layer_input, train_Y, lr);
+    }
+    // lr *= 0.95;
+  }
+
+  delete[] layer_input;
+  delete[] train_X;
+  delete[] train_Y;
+}
+
+void SdA::predict(int *x, double *y) {
+  double *layer_input;
+  int prev_layer_input_size;
+  double *prev_layer_input;
+
+  double linear_output;
+
+  prev_layer_input = new double[n_ins];
+  for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
+
+  // layer activation
+  for(int i=0; i<n_layers; i++) {
+    layer_input = new double[sigmoid_layers[i]->n_out];
+
+    for(int k=0; k<sigmoid_layers[i]->n_out; k++) {
+      linear_output = 0.0;
+
+      for(int j=0; j<sigmoid_layers[i]->n_in; j++) {
+        linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
+      }
+      linear_output += sigmoid_layers[i]->b[k];
+      layer_input[k] = sigmoid(linear_output);
+    }
+    delete[] prev_layer_input;
+
+    if(i < n_layers-1) {
+      prev_layer_input = new double[sigmoid_layers[i]->n_out];
+      for(int j=0; j<sigmoid_layers[i]->n_out; j++) prev_layer_input[j] = layer_input[j];
+      delete[] layer_input;
+    }
+  }
+  
+  for(int i=0; i<log_layer->n_out; i++) {
+    y[i] = 0;
+    for(int j=0; j<log_layer->n_in; j++) {
+      y[i] += log_layer->W[i][j] * layer_input[j];
+    }
+    y[i] += log_layer->b[i];
+  }
+  
+  log_layer->softmax(y);
+
+
+  delete[] layer_input;
+}
+
+
+// HiddenLayer
+HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) {
+  N = size;
+  n_in = in;
+  n_out = out;
+
+  if(w == NULL) {
+    W = new double*[n_out];
+    for(int i=0; i<n_out; i++) W[i] = new double[n_in];
+    double a = 1.0 / n_in;
+
+    for(int i=0; i<n_out; i++) {
+      for(int j=0; j<n_in; j++) {
+        W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    W = w;
+  }
+
+  if(bp == NULL) {
+    b = new double[n_out];
+  } else {
+    b = bp;
+  }
+}
+
+HiddenLayer::~HiddenLayer() {
+  for(int i=0; i<n_out; i++) delete W[i];
+  delete[] W;
+  delete[] b;
+}
+
+double HiddenLayer::output(int *input, double *w, double b) {
+  double linear_output = 0.0;
+  for(int j=0; j<n_in; j++) {
+    linear_output += w[j] * input[j];
+  }
+  linear_output += b;
+  return sigmoid(linear_output);
+}
+
+void HiddenLayer::sample_h_given_v(int *input, int *sample) {
+  for(int i=0; i<n_out; i++) {
+    sample[i] = binomial(1, output(input, W[i], b[i]));
+  }
+}
+
+
+// dA
+dA::dA(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
+  N = size;
+  n_visible = n_v;
+  n_hidden = n_h;
+
+  if(w == NULL) {
+    W = new double*[n_hidden];
+    for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
+    double a = 1.0 / n_visible;
+
+    for(int i=0; i<n_hidden; i++) {
+      for(int j=0; j<n_visible; j++) {
+        W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    W = w;
+  }
+
+  if(hb == NULL) {
+    hbias = new double[n_hidden];
+    for(int i=0; i<n_hidden; i++) hbias[i] = 0;
+  } else {
+    hbias = hb;
+  }
+
+  if(vb == NULL) {
+    vbias = new double[n_visible];
+    for(int i=0; i<n_visible; i++) vbias[i] = 0;
+  } else {
+    vbias = vb;
+  }
+}
+
+dA::~dA() {
+  // for(int i=0; i<n_hidden; i++) delete[] W[i];
+  // delete[] W;
+  // delete[] hbias;
+  delete[] vbias;
+}
+
+void dA::get_corrupted_input(int *x, int *tilde_x, double p) {
+  for(int i=0; i<n_visible; i++) {
+    if(x[i] == 0) {
+      tilde_x[i] = 0;
+    } else {
+      tilde_x[i] = binomial(1, p);
+    }
+  }
+}
+
+// Encode
+void dA::get_hidden_values(int *x, double *y) {
+  for(int i=0; i<n_hidden; i++) {
+    y[i] = 0;
+    for(int j=0; j<n_visible; j++) {
+      y[i] += W[i][j] * x[j];
+    }
+    y[i] += hbias[i];
+    y[i] = sigmoid(y[i]);
+  }
+}
+
+// Decode
+void dA::get_reconstructed_input(double *y, double *z) {
+  for(int i=0; i<n_visible; i++) {
+    z[i] = 0;
+    for(int j=0; j<n_hidden; j++) {
+      z[i] += W[j][i] * y[j];
+    }
+    z[i] += vbias[i];
+    z[i] = sigmoid(z[i]);
+  }
+}
+
+void dA::train(int *x, double lr, double corruption_level) {
+  int *tilde_x = new int[n_visible];
+  double *y = new double[n_hidden];
+  double *z = new double[n_visible];
+
+  double *L_vbias = new double[n_visible];
+  double *L_hbias = new double[n_hidden];
+
+  double p = 1 - corruption_level;
+
+  get_corrupted_input(x, tilde_x, p);
+  get_hidden_values(tilde_x, y);
+  get_reconstructed_input(y, z);
+  
+  // vbias
+  for(int i=0; i<n_visible; i++) {
+    L_vbias[i] = x[i] - z[i];
+    vbias[i] += lr * L_vbias[i] / N;
+  }
+
+  // hbias
+  for(int i=0; i<n_hidden; i++) {
+    L_hbias[i] = 0;
+    for(int j=0; j<n_visible; j++) {
+      L_hbias[i] += W[i][j] * L_vbias[j];
+    }
+    L_hbias[i] *= y[i] * (1 - y[i]);
+
+    hbias[i] += lr * L_hbias[i] / N;
+  }
+  
+  // W
+  for(int i=0; i<n_hidden; i++) {
+    for(int j=0; j<n_visible; j++) {
+      W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
+    }
+  }
+
+  delete[] L_hbias;
+  delete[] L_vbias;
+  delete[] z;
+  delete[] y;
+  delete[] tilde_x;
+}
+
+void dA::reconstruct(int *x, double *z) {
+  double *y = new double[n_hidden];
+
+  get_hidden_values(x, y);
+  get_reconstructed_input(y, z);
+
+  delete[] y;
+}
+
+
+// LogisticRegression
+LogisticRegression::LogisticRegression(int size, int in, int out) {
+  N = size;
+  n_in = in;
+  n_out = out;
+
+  W = new double*[n_out];
+  for(int i=0; i<n_out; i++) W[i] = new double[n_in];
+  b = new double[n_out];
+
+  for(int i=0; i<n_out; i++) {
+    for(int j=0; j<n_in; j++) {
+      W[i][j] = 0;
+    }
+    b[i] = 0;
+  }
+}
+
+LogisticRegression::~LogisticRegression() {
+  for(int i=0; i<n_out; i++) delete[] W[i];
+  delete[] W;
+  delete[] b;
+}
+
+
+void LogisticRegression::train(int *x, int *y, double lr) {
+  double *p_y_given_x = new double[n_out];
+  double *dy = new double[n_out];
+
+  for(int i=0; i<n_out; i++) {
+    p_y_given_x[i] = 0;
+    for(int j=0; j<n_in; j++) {
+      p_y_given_x[i] += W[i][j] * x[j];
+    }
+    p_y_given_x[i] += b[i];
+  }
+  softmax(p_y_given_x);
+
+  for(int i=0; i<n_out; i++) {
+    dy[i] = y[i] - p_y_given_x[i];
+
+    for(int j=0; j<n_in; j++) {
+      W[i][j] += lr * dy[i] * x[j] / N;
+    }
+
+    b[i] += lr * dy[i] / N;
+  }
+  
+  delete[] p_y_given_x;
+  delete[] dy;
+}
+
+void LogisticRegression::softmax(double *x) {
+  double max = 0.0;
+  double sum = 0.0;
+  
+  for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
+  for(int i=0; i<n_out; i++) {
+    x[i] = exp(x[i] - max);
+    sum += x[i];
+  } 
+
+  for(int i=0; i<n_out; i++) x[i] /= sum;
+}
+
+void LogisticRegression::predict(int *x, double *y) {
+  for(int i=0; i<n_out; i++) {
+    y[i] = 0;
+    for(int j=0; j<n_in; j++) {
+      y[i] += W[i][j] * x[j];
+    }
+    y[i] += b[i];
+  }
+
+  softmax(y);
+}
+
+
+void test_sda() {
+  srand(0);
+
+  double pretrain_lr = 0.1;
+  double corruption_level = 0.3;
+  int pretraining_epochs = 1000;
+  double finetune_lr = 0.1;
+  int finetune_epochs = 500;
+
+  int train_N = 10;
+  int test_N = 4;
+  int n_ins = 28;
+  int n_outs = 2;
+  int hidden_layer_sizes[] = {15, 15};
+  int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]);
+
+  // training data
+  int train_X[10][28] = {
+    {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
+  };
+
+  int train_Y[10][2] = {
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {0, 1},
+    {0, 1},
+    {0, 1},
+    {0, 1},
+    {0, 1}
+  };
+
+  // construct SdA
+  SdA sda(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers);
+
+  // pretrain
+  sda.pretrain(*train_X, pretrain_lr, corruption_level, pretraining_epochs);
+
+  // finetune
+  sda.finetune(*train_X, *train_Y, finetune_lr, finetune_epochs);
+
+
+  // test data
+  int test_X[4][28] = {
+    {1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
+  };
+
+  double test_Y[4][28];
+
+  // test
+  for(int i=0; i<test_N; i++) {
+    sda.predict(test_X[i], test_Y[i]);
+    for(int j=0; j<n_outs; j++) {
+      printf("%.5f ", test_Y[i][j]);
+    }
+    cout << endl;
+  }
+  
+}
+
+
+int main() {
+  test_sda();
+  return 0;
+}
diff --git a/DeepLearning/cpp/SdA.h b/DeepLearning/cpp/SdA.h
new file mode 100644
index 00000000..50ccfe9d
--- /dev/null
+++ b/DeepLearning/cpp/SdA.h
@@ -0,0 +1,17 @@
+class SdA {
+
+public:
+  int N;
+  int n_ins;
+  int *hidden_layer_sizes;
+  int n_outs;
+  int n_layers;
+  HiddenLayer **sigmoid_layers;
+  dA **dA_layers;
+  LogisticRegression *log_layer;
+  SdA(int, int, int*, int, int);
+  ~SdA();
+  void pretrain(int*, double, double, int);
+  void finetune(int*, int*, double, int);
+  void predict(int*, double*);
+};
diff --git a/DeepLearning/cpp/dA.cpp b/DeepLearning/cpp/dA.cpp
new file mode 100644
index 00000000..783327ee
--- /dev/null
+++ b/DeepLearning/cpp/dA.cpp
@@ -0,0 +1,202 @@
+#include <iostream>
+#include <math.h>
+#include "utils.h"
+
+#include "dA.h"
+using namespace std;
+using namespace utils;
+
+
+dA::dA(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
+  N = size;
+  n_visible = n_v;
+  n_hidden = n_h;
+
+  if(w == NULL) {
+    W = new double*[n_hidden];
+    for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
+    double a = 1.0 / n_visible;
+
+    for(int i=0; i<n_hidden; i++) {
+      for(int j=0; j<n_visible; j++) {
+        W[i][j] = uniform(-a, a);
+      }
+    }
+  } else {
+    W = w;
+  }
+
+  if(hb == NULL) {
+    hbias = new double[n_hidden];
+    for(int i=0; i<n_hidden; i++) hbias[i] = 0;
+  } else {
+    hbias = hb;
+  }
+
+  if(vb == NULL) {
+    vbias = new double[n_visible];
+    for(int i=0; i<n_visible; i++) vbias[i] = 0;
+  } else {
+    vbias = vb;
+  }
+}
+
+dA::~dA() {
+  for(int i=0; i<n_hidden; i++) delete[] W[i];
+  delete[] W;
+  delete[] hbias;
+  delete[] vbias;
+}
+
+void dA::get_corrupted_input(int *x, int *tilde_x, double p) {
+  for(int i=0; i<n_visible; i++) {
+    if(x[i] == 0) {
+      tilde_x[i] = 0;
+    } else {
+      tilde_x[i] = binomial(1, p);
+    }
+  }
+}
+
+// Encode
+void dA::get_hidden_values(int *x, double *y) {
+  for(int i=0; i<n_hidden; i++) {
+    y[i] = 0;
+    for(int j=0; j<n_visible; j++) {
+      y[i] += W[i][j] * x[j];
+    }
+    y[i] += hbias[i];
+    y[i] = sigmoid(y[i]);
+  }
+}
+
+// Decode
+void dA::get_reconstructed_input(double *y, double *z) {
+  for(int i=0; i<n_visible; i++) {
+    z[i] = 0;
+    for(int j=0; j<n_hidden; j++) {
+      z[i] += W[j][i] * y[j];
+    }
+    z[i] += vbias[i];
+    z[i] = sigmoid(z[i]);
+  }
+}
+
+void dA::train(int *x, double lr, double corruption_level) {
+  int *tilde_x = new int[n_visible];
+  double *y = new double[n_hidden];
+  double *z = new double[n_visible];
+
+  double *L_vbias = new double[n_visible];
+  double *L_hbias = new double[n_hidden];
+
+  double p = 1 - corruption_level;
+
+  get_corrupted_input(x, tilde_x, p);
+  get_hidden_values(tilde_x, y);
+  get_reconstructed_input(y, z);
+  
+  // vbias
+  for(int i=0; i<n_visible; i++) {
+    L_vbias[i] = x[i] - z[i];
+    vbias[i] += lr * L_vbias[i] / N;
+  }
+
+  // hbias
+  for(int i=0; i<n_hidden; i++) {
+    L_hbias[i] = 0;
+    for(int j=0; j<n_visible; j++) {
+      L_hbias[i] += W[i][j] * L_vbias[j];
+    }
+    L_hbias[i] *= y[i] * (1 - y[i]);
+
+    hbias[i] += lr * L_hbias[i] / N;
+  }
+  
+  // W
+  for(int i=0; i<n_hidden; i++) {
+    for(int j=0; j<n_visible; j++) {
+      W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
+    }
+  }
+
+  delete[] L_hbias;
+  delete[] L_vbias;
+  delete[] z;
+  delete[] y;
+  delete[] tilde_x;
+}
+
+void dA::reconstruct(int *x, double *z) {
+  double *y = new double[n_hidden];
+
+  get_hidden_values(x, y);
+  get_reconstructed_input(y, z);
+
+  delete[] y;
+}
+
+
+
+void test_dA() {
+  srand(0);
+  
+  double learning_rate = 0.1;
+  double corruption_level = 0.3;
+  int training_epochs = 100;
+
+  int train_N = 10;
+  int test_N = 2;
+  int n_visible = 20;
+  int n_hidden = 5;
+
+  // training data
+  int train_X[10][20] = {
+    {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}
+  };
+
+  // construct dA
+  dA da(train_N, n_visible, n_hidden, NULL, NULL, NULL);
+
+  // train
+  for(int epoch=0; epoch<training_epochs; epoch++) {
+    for(int i=0; i<train_N; i++) {
+      da.train(train_X[i], learning_rate, corruption_level);
+    }
+  }
+
+  // test data
+  int test_X[2][20] = {
+    {1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0}
+  };
+  double reconstructed_X[2][20];
+
+
+  // test
+  for(int i=0; i<test_N; i++) {
+    da.reconstruct(test_X[i], reconstructed_X[i]);
+    for(int j=0; j<n_visible; j++) {
+      printf("%.5f ", reconstructed_X[i][j]);
+    }
+    cout << endl;
+  }
+
+  cout << endl;
+}
+
+
+
+int main() {
+  test_dA();
+  return 0;
+}
diff --git a/DeepLearning/cpp/dA.h b/DeepLearning/cpp/dA.h
new file mode 100644
index 00000000..3c931e04
--- /dev/null
+++ b/DeepLearning/cpp/dA.h
@@ -0,0 +1,17 @@
+class dA {
+
+public:
+  int N;
+  int n_visible;
+  int n_hidden;
+  double **W;
+  double *hbias;
+  double *vbias;
+  dA(int, int, int , double**, double*, double*);
+  ~dA();
+  void get_corrupted_input(int*, int*, double);
+  void get_hidden_values(int*, double*);
+  void get_reconstructed_input(double*, double*);
+  void train(int*, double, double);
+  void reconstruct(int*, double*);
+};
diff --git a/DeepLearning/cpp/utils.h b/DeepLearning/cpp/utils.h
new file mode 100644
index 00000000..78fb1828
--- /dev/null
+++ b/DeepLearning/cpp/utils.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <iostream>
+#include <math.h>
+using namespace std;
+
+
+namespace utils {
+  
+  double uniform(double min, double max) {
+    return rand() / (RAND_MAX + 1.0) * (max - min) + min;
+  }
+
+  int binomial(int n, double p) {
+    if(p < 0 || p > 1) return 0;
+  
+    int c = 0;
+    double r;
+  
+    for(int i=0; i<n; i++) {
+      r = rand() / (RAND_MAX + 1.0);
+      if (r < p) c++;
+    }
+
+    return c;
+  }
+
+  double sigmoid(double x) {
+    return 1.0 / (1.0 + exp(-x));
+  }
+
+}
diff --git a/DeepLearning/data/.gitkeep b/DeepLearning/data/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/DeepLearning/go/DBN.go b/DeepLearning/go/DBN.go
new file mode 100644
index 00000000..e5522a6e
--- /dev/null
+++ b/DeepLearning/go/DBN.go
@@ -0,0 +1,237 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+	H "./HiddenLayer"
+	R "./RBM"
+	L "./LogisticRegression"
+)
+
+type DBN struct {
+	N int
+	n_ins int
+	hidden_layer_sizes []int
+	n_outs int
+	n_layers int
+	sigmoid_layers []H.HiddenLayer
+	rbm_layers []R.RBM
+	log_layer L.LogisticRegression
+}
+
+
+func DBN__construct(this *DBN, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) {
+	var input_size int
+	
+	this.N = N
+	this.n_ins = n_ins
+	this.hidden_layer_sizes = hidden_layer_sizes
+	this.n_outs = n_outs
+	this.n_layers = n_layers
+
+	this.sigmoid_layers = make([]H.HiddenLayer, n_layers)
+	this.rbm_layers = make([]R.RBM, n_layers)
+	
+	// construct multi-layer
+	for i := 0; i < n_layers; i++ {
+		if i == 0 {
+			input_size = n_ins
+		} else {
+			input_size = hidden_layer_sizes[i-1]
+		}
+
+		// construct sigmoid_layer
+		H.HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil)
+
+		// construct rbm_layer
+		R.RBM__construct(&(this.rbm_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].B, nil)
+	}
+
+	// layer for output using LogisticRegression
+	L.LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs)
+}
+
+func DBN_pretrain(this *DBN, train_X [][]int, lr float64, k int, epochs int){
+	var (
+		layer_input []int
+		prev_layer_input_size int
+		prev_layer_input []int
+	)
+
+
+	for i := 0; i < this.n_layers; i++ {	// layer-wise
+		for epoch := 0; epoch < epochs; epoch++ {	 // training epochs
+			for n := 0; n < this.N; n++ {	 // input x1...xN
+
+				// layer input
+				for l := 0; l <= i; l++ {
+					if l == 0 {
+						layer_input = make([]int, this.n_ins)
+						for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] }
+					} else {
+						if l == 1 {
+							prev_layer_input_size = this.n_ins
+						} else {
+							prev_layer_input_size = this.hidden_layer_sizes[l-2]
+						}
+
+						prev_layer_input = make([]int, prev_layer_input_size)
+						for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] }
+
+						layer_input = make([]int, this.hidden_layer_sizes[l-1])
+
+						H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input)
+					}
+				}
+
+				R.RBM_contrastive_divergence(&(this.rbm_layers[i]), layer_input, lr, k)
+			}
+		}
+	}
+}
+
+func DBN_finetune(this *DBN, train_X [][]int, train_Y [][]int, lr float64, epochs int) {
+	var (
+		layer_input []int
+		prev_layer_input []int
+	)
+
+	for epoch := 0; epoch < epochs; epoch++ {
+		for n := 0; n < this.N; n++ {	 // input x1...xN
+
+			// layer input
+			for i := 0; i < this.n_layers; i++ {
+				if i == 0 {
+					prev_layer_input = make([]int, this.n_ins)
+					for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] }
+				} else {
+					prev_layer_input = make([]int, this.hidden_layer_sizes[i-1])
+					for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] }
+				}
+
+				layer_input = make([]int, this.hidden_layer_sizes[i])
+				H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input)
+			}
+
+			L.LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr)
+		}
+		// lr *= 0.95
+	}
+}
+
+func DBN_predict(this *DBN, x []int, y []float64) {
+	var (
+		layer_input []float64
+	)	
+	prev_layer_input := make([]float64, this.n_ins)
+	for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) }
+
+	
+	// layer activation
+	for i := 0; i < this.n_layers; i++ {
+		layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+		for k := 0; k < this.sigmoid_layers[i].N_out; k++ {
+			linear_outuput := 0.0
+
+			for j := 0; j < this.sigmoid_layers[i].N_in; j++ {
+				linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j]
+			}
+			linear_outuput += this.sigmoid_layers[i].B[k]
+			layer_input[k] = u.Sigmoid(linear_outuput)
+		}
+
+		if i < this.n_layers-1 {
+			prev_layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+			for j := 0; j < this.sigmoid_layers[i].N_out; j++ {
+				prev_layer_input[j] = layer_input[j]
+			}
+		}
+	}
+
+	for i := 0; i < this.log_layer.N_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.log_layer.N_in; j++ {
+			y[i] += this.log_layer.W[i][j] * layer_input[j]
+		}
+		y[i] += this.log_layer.B[i]
+	}
+
+	L.LogisticRegression_softmax(&(this.log_layer), y)
+}
+
+
+func test_dbn() {
+	rand.Seed(0)
+
+	pretrain_lr := 0.1
+	pretraining_epochs := 1000
+	k := 1
+	finetune_lr := 0.1
+	finetune_epochs := 500
+
+	train_N := 6
+	test_N := 4
+	n_ins := 6
+	n_outs := 2
+	hidden_layer_sizes := []int {3, 3}
+	n_layers := len(hidden_layer_sizes)
+
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 0, 0, 0},
+		{1, 0, 1, 0, 0, 0},
+		{1, 1, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+		{0, 0, 1, 1, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+
+	train_Y := [][]int {
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+	}
+
+	// construct DBN
+	var dbn DBN
+	DBN__construct(&dbn, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers)
+
+	// pretrain
+	DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs)
+
+	// finetune
+	DBN_finetune(&dbn, train_X, train_Y, finetune_lr, finetune_epochs)
+
+	// test data
+	test_X := [][]int {
+		{1, 1, 0, 0, 0, 0},
+		{1, 1, 1, 1, 0, 0},
+		{0, 0, 0, 1, 1, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+
+	test_Y := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)}
+
+	// test
+	for i := 0; i < test_N; i++ {
+		DBN_predict(&dbn, test_X[i], test_Y[i])
+		for j := 0; j < n_outs; j++ {
+			fmt.Printf("%.5f ", test_Y[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+
+
+func main() {
+	test_dbn()
+}
diff --git a/DeepLearning/go/HiddenLayer/HiddenLayer.go b/DeepLearning/go/HiddenLayer/HiddenLayer.go
new file mode 100644
index 00000000..995ca447
--- /dev/null
+++ b/DeepLearning/go/HiddenLayer/HiddenLayer.go
@@ -0,0 +1,60 @@
+package HiddenLayer
+
+import (
+	u "../utils"
+)
+
+
+type HiddenLayer struct {
+	N int
+	N_in int
+	N_out int
+	W [][]float64
+	B []float64
+}
+
+
+// HiddenLayer
+func HiddenLayer__construct(this *HiddenLayer, N int, n_in int, n_out int, W [][]float64, b []float64) {
+	a := 1.0 / float64(n_in)
+
+	this.N = N
+	this.N_in = n_in
+	this.N_out = n_out
+
+	if W == nil {
+		this.W = make([][]float64, n_out)
+		for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+		
+		for i := 0; i < n_out; i++ {
+			for j := 0; j < n_in; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if b == nil {
+		this.B = make([]float64, n_out)
+	} else {
+		this.B = b
+	}
+}
+
+func HiddenLayer_output(this *HiddenLayer, input []int, w []float64, b float64) float64 {
+	linear_output := 0.0
+
+	for j := 0; j < this.N_in; j++ {
+		linear_output += w[j] * float64(input[j])
+	}
+	linear_output += b
+
+	return u.Sigmoid(linear_output)
+}
+
+func HiddenLayer_sample_h_given_v(this *HiddenLayer, input []int, sample []int) {
+	for i := 0; i < this.N_out; i++ {
+		sample[i] = u.Binomial(1, HiddenLayer_output(this, input, this.W[i], this.B[i]))
+	}
+}
diff --git a/DeepLearning/go/LogisticRegression.go b/DeepLearning/go/LogisticRegression.go
new file mode 100644
index 00000000..cbc7e0e5
--- /dev/null
+++ b/DeepLearning/go/LogisticRegression.go
@@ -0,0 +1,150 @@
+package main
+
+import (
+	"fmt"
+	"math"
+)
+
+type LogisticRegression struct {
+	N int
+	n_in int
+	n_out int
+	W [][]float64
+	b []float64
+}
+
+
+func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) {
+	this.N = N
+	this.n_in = n_in
+	this.n_out = n_out
+
+	this.W = make([][]float64, n_out)
+	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+	
+	this.b = make([]float64, n_out)
+}
+
+func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) {
+	p_y_given_x := make([]float64, this.n_out)
+	dy := make([]float64, this.n_out)
+	
+	for i := 0; i < this.n_out; i++ {
+		p_y_given_x[i] = 0
+		for j := 0; j < this.n_in; j++ {
+			p_y_given_x[i] += this.W[i][j] * float64(x[j])
+		}
+		p_y_given_x[i] += this.b[i]
+	}
+	LogisticRegression_softmax(this, p_y_given_x)
+	
+	for i := 0; i < this.n_out; i++ {
+		dy[i] = float64(y[i]) - p_y_given_x[i]
+		
+		for j := 0; j < this.n_in; j++ {
+			this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
+		}
+
+		this.b[i] += lr * dy[i] / float64(this.N)
+	}
+	
+}
+
+func LogisticRegression_softmax(this *LogisticRegression, x []float64) {
+	var (
+		max float64
+		sum float64
+	)
+
+	for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} }
+	for i := 0; i < this.n_out; i++ {
+		x[i] = math.Exp(x[i] - max)
+		sum += x[i]
+	}
+
+	for i := 0; i < this.n_out; i++ { x[i] /= sum }
+}
+
+func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) {
+	for i := 0; i < this.n_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.n_in; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.b[i]
+	}
+
+	LogisticRegression_softmax(this, y)
+}
+
+
+
+func test_lr() {
+	
+	learning_rate := 0.1
+	n_epochs := 500
+
+	train_N := 6
+	test_N := 2
+	n_in := 6
+	n_out := 2
+
+	
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 0, 0, 0},
+		{1, 0, 1, 0, 0, 0},
+		{1, 1, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+		{0, 0, 1, 1, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+
+
+	train_Y := [][]int {
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+	}
+
+	
+	// construct LogisticRegression
+	var classifier LogisticRegression
+	LogisticRegression__construct(&classifier, train_N, n_in, n_out)
+
+	// train
+	for epoch := 0; epoch < n_epochs; epoch++ {
+		for i := 0; i < train_N; i++ {
+			LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate)
+		}
+	}
+	
+	// test data
+	test_X := [][]int {
+		{1, 0, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+	
+	test_Y := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) }
+
+
+	// test
+	for i := 0; i < test_N; i++ {
+		LogisticRegression_predict(&classifier, test_X[i], test_Y[i])
+		for j := 0; j < n_out; j++ {
+			fmt.Printf("%f ", test_Y[i][j])
+		}
+		fmt.Printf("\n")
+	}
+	
+}
+
+
+func main() {
+	test_lr()
+}
+
diff --git a/DeepLearning/go/LogisticRegression/LogisticRegression.go b/DeepLearning/go/LogisticRegression/LogisticRegression.go
new file mode 100644
index 00000000..2f68ef5a
--- /dev/null
+++ b/DeepLearning/go/LogisticRegression/LogisticRegression.go
@@ -0,0 +1,77 @@
+package LogisticRegression
+
+import (
+	"math"
+)
+
+type LogisticRegression struct {
+	N int
+	N_in int
+	N_out int
+	W [][]float64
+	B []float64
+}
+
+
+func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) {
+	this.N = N
+	this.N_in = n_in
+	this.N_out = n_out
+
+	this.W = make([][]float64, n_out)
+	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+	
+	this.B = make([]float64, n_out)
+}
+
+func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) {
+	p_y_given_x := make([]float64, this.N_out)
+	dy := make([]float64, this.N_out)
+	
+	for i := 0; i < this.N_out; i++ {
+		p_y_given_x[i] = 0
+		for j := 0; j < this.N_in; j++ {
+			p_y_given_x[i] += this.W[i][j] * float64(x[j])
+		}
+		p_y_given_x[i] += this.B[i]
+	}
+	LogisticRegression_softmax(this, p_y_given_x)
+	
+	for i := 0; i < this.N_out; i++ {
+		dy[i] = float64(y[i]) - p_y_given_x[i]
+		
+		for j := 0; j < this.N_in; j++ {
+			this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
+		}
+
+		this.B[i] += lr * dy[i] / float64(this.N)
+	}
+	
+}
+
+func LogisticRegression_softmax(this *LogisticRegression, x []float64) {
+	var (
+		max float64
+		sum float64
+	)
+
+	for i := 0; i < this.N_out; i++ { if max < x[i] {max = x[i]} }
+	for i := 0; i < this.N_out; i++ {
+		x[i] = math.Exp(x[i] - max)
+		sum += x[i]
+	}
+
+	for i := 0; i < this.N_out; i++ { x[i] /= sum }
+}
+
+func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) {
+	for i := 0; i < this.N_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.N_in; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.B[i]
+	}
+
+	LogisticRegression_softmax(this, y)
+}
diff --git a/DeepLearning/go/RBM.go b/DeepLearning/go/RBM.go
new file mode 100644
index 00000000..6369da5c
--- /dev/null
+++ b/DeepLearning/go/RBM.go
@@ -0,0 +1,200 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+)
+
+type RBM struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func RBM__construct(this *RBM, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func RBM_contrastive_divergence(this *RBM, input []int, lr float64, k int) {
+	ph_mean := make([]float64, this.n_hidden)
+	ph_sample := make([]int, this.n_hidden)
+	nv_means := make([]float64, this.n_visible)
+	nv_samples := make([]int, this.n_visible)
+	nh_means := make([]float64, this.n_hidden)
+	nh_samples := make([]int, this.n_hidden)
+
+	/* CD-k */
+	RBM_sample_h_given_v(this, input, ph_mean, ph_sample)
+
+	for step := 0; step < k; step++ {
+		if step == 0 {
+			RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples)
+		} else {
+			RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples)
+		}
+	}
+
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (ph_mean[i] * float64(input[j]) - nh_means[i] * float64(nv_samples[j])) / float64(this.N)
+		}
+		this.hbias[i] += lr * (float64(ph_sample[i]) - nh_means[i]) / float64(this.N)
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		this.vbias[i] += lr * float64(input[i] - nv_samples[i]) / float64(this.N)
+	}
+}
+
+func RBM_sample_h_given_v(this *RBM, v0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_hidden; i++ {
+		mean[i] = RBM_propup(this, v0_sample, this.W[i], this.hbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_sample_v_given_h(this *RBM, h0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_visible; i++ {
+		mean[i] = RBM_propdown(this, h0_sample, i, this.vbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_propup(this *RBM, v []int, w []float64, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_visible; j++ {
+		pre_sigmoid_activation += w[j] * float64(v[j])
+	}
+	pre_sigmoid_activation += b
+	
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_propdown(this *RBM,	h []int, i int, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_hidden; j++ {
+		pre_sigmoid_activation += this.W[j][i] * float64(h[j])
+	}
+	pre_sigmoid_activation += b
+
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_gibbs_hvh(this *RBM, h0_sample []int, nv_means []float64, nv_samples []int, nh_means []float64, nh_samples []int) {
+	RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples)
+	RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples)
+}
+
+func RBM_reconstruct(this *RBM, v []int, reconstructed_v []float64) {
+	h := make([]float64, this.n_hidden)
+	var pre_sigmoid_activation float64
+
+	for i := 0; i < this.n_hidden; i++ {
+		h[i] = RBM_propup(this, v, this.W[i], this.hbias[i])
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		pre_sigmoid_activation = 0.0
+		for j := 0; j < this.n_hidden; j++ {
+			pre_sigmoid_activation += this.W[j][i] * h[j]
+		}
+		pre_sigmoid_activation += this.vbias[i]
+
+		reconstructed_v[i] = u.Sigmoid(pre_sigmoid_activation)
+	}
+}
+
+
+func test_rbm() {
+	rand.Seed(0)
+
+	learning_rate := 0.1
+	training_epochs := 1000
+	k := 1
+	
+	train_N := 6
+	test_N := 2
+	n_visible := 6
+	n_hidden := 3
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 0, 0, 0},
+		{1, 0, 1, 0, 0, 0},
+		{1, 1, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+		{0, 0, 1, 0, 1, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+	
+
+	// construct RBM
+	var rbm RBM
+	RBM__construct(&rbm, train_N, n_visible, n_hidden, nil, nil, nil)
+
+	// train
+	for epoch := 0; epoch < training_epochs; epoch++ {
+		for i := 0; i < train_N; i++ {
+			RBM_contrastive_divergence(&rbm, train_X[i], learning_rate, k)
+		}
+	}
+
+	// test data
+	test_X := [][]int {
+		{1, 1, 0, 0, 0, 0},
+		{0, 0, 0, 1, 1, 0},
+	}
+	reconstructed_X := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { reconstructed_X[i] = make([]float64, n_visible)}
+
+
+	// test
+	for i := 0;  i < test_N; i++ {
+		RBM_reconstruct(&rbm, test_X[i], reconstructed_X[i])
+		for j := 0; j < n_visible; j++ {
+			fmt.Printf("%.5f ", reconstructed_X[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+
+func main() {
+	test_rbm()
+}
diff --git a/DeepLearning/go/RBM/RBM.go b/DeepLearning/go/RBM/RBM.go
new file mode 100644
index 00000000..708f8b7a
--- /dev/null
+++ b/DeepLearning/go/RBM/RBM.go
@@ -0,0 +1,139 @@
+package RBM
+
+import (
+	u "../utils"
+)
+
+type RBM struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func RBM__construct(this *RBM, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func RBM_contrastive_divergence(this *RBM, input []int, lr float64, k int) {
+	ph_mean := make([]float64, this.n_hidden)
+	ph_sample := make([]int, this.n_hidden)
+	nv_means := make([]float64, this.n_visible)
+	nv_samples := make([]int, this.n_visible)
+	nh_means := make([]float64, this.n_hidden)
+	nh_samples := make([]int, this.n_hidden)
+
+	/* CD-k */
+	RBM_sample_h_given_v(this, input, ph_mean, ph_sample)
+
+	for step := 0; step < k; step++ {
+		if step == 0 {
+			RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples)
+		} else {
+			RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples)
+		}
+	}
+
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (ph_mean[i] * float64(input[j]) - nh_means[i] * float64(nv_samples[j])) / float64(this.N)
+		}
+		this.hbias[i] += lr * (float64(ph_sample[i]) - nh_means[i]) / float64(this.N)
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		this.vbias[i] += lr * float64(input[i] - nv_samples[i]) / float64(this.N)
+	}
+}
+
+func RBM_sample_h_given_v(this *RBM, v0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_hidden; i++ {
+		mean[i] = RBM_propup(this, v0_sample, this.W[i], this.hbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_sample_v_given_h(this *RBM, h0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_visible; i++ {
+		mean[i] = RBM_propdown(this, h0_sample, i, this.vbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_propup(this *RBM, v []int, w []float64, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_visible; j++ {
+		pre_sigmoid_activation += w[j] * float64(v[j])
+	}
+	pre_sigmoid_activation += b
+	
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_propdown(this *RBM,	h []int, i int, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_hidden; j++ {
+		pre_sigmoid_activation += this.W[j][i] * float64(h[j])
+	}
+	pre_sigmoid_activation += b
+
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_gibbs_hvh(this *RBM, h0_sample []int, nv_means []float64, nv_samples []int, nh_means []float64, nh_samples []int) {
+	RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples)
+	RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples)
+}
+
+func RBM_reconstruct(this *RBM, v []int, reconstructed_v []float64) {
+	h := make([]float64, this.n_hidden)
+	var pre_sigmoid_activation float64
+
+	for i := 0; i < this.n_hidden; i++ {
+		h[i] = RBM_propup(this, v, this.W[i], this.hbias[i])
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		pre_sigmoid_activation = 0.0
+		for j := 0; j < this.n_hidden; j++ {
+			pre_sigmoid_activation += this.W[j][i] * h[j]
+		}
+		pre_sigmoid_activation += this.vbias[i]
+
+		reconstructed_v[i] = u.Sigmoid(pre_sigmoid_activation)
+	}
+}
diff --git a/DeepLearning/go/SdA.go b/DeepLearning/go/SdA.go
new file mode 100644
index 00000000..27ccaf63
--- /dev/null
+++ b/DeepLearning/go/SdA.go
@@ -0,0 +1,241 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+	H "./HiddenLayer"
+	D "./dA"
+	L "./LogisticRegression"
+)
+
+type SdA struct {
+	N int
+	n_ins int
+	hidden_layer_sizes []int
+	n_outs int
+	n_layers int
+	sigmoid_layers []H.HiddenLayer
+	dA_layers []D.DA
+	log_layer L.LogisticRegression
+}
+
+
+func SdA__construct(this *SdA, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) {
+	var input_size int
+
+	this.N = N
+	this.n_ins = n_ins
+	this.hidden_layer_sizes = hidden_layer_sizes
+	this.n_outs = n_outs
+	this.n_layers = n_layers
+
+	this.sigmoid_layers = make([]H.HiddenLayer, n_layers)
+	this.dA_layers = make([]D.DA, n_layers)
+
+	// construct multi-layer
+	for i := 0; i < n_layers; i++ {
+		if i == 0 {
+			input_size = n_ins
+		} else {
+			input_size = hidden_layer_sizes[i-1]
+		}
+
+		// construct sigmoid_layer
+		H.HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil)
+
+		// construct dA_layer
+		D.DA__construct(&(this.dA_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].B, nil)
+	}
+
+	// layer for output using LogisticRegression
+	L.LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs)
+}
+
+func SdA_pretrain(this *SdA, train_X [][]int, lr float64, corruption_level float64, epochs int) {
+	var (
+		layer_input []int
+		prev_layer_input_size int
+		prev_layer_input []int
+	)
+	for i := 0; i < this.n_layers; i++ {	// layer-wise
+		for epoch := 0; epoch < epochs; epoch++ {	 // training epochs
+			for n := 0; n < this.N; n++ {	 // input x1...xN
+
+				// layer input
+				for l := 0; l <= i; l++ {
+					if l == 0 {
+						layer_input = make([]int, this.n_ins)
+						for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] }
+					} else {
+						if l == 1 {
+							prev_layer_input_size = this.n_ins
+						} else {
+							prev_layer_input_size = this.hidden_layer_sizes[l-2]
+						}
+
+						prev_layer_input = make([]int, prev_layer_input_size)
+						for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] }
+
+						layer_input = make([]int, this.hidden_layer_sizes[l-1])
+
+						H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input)
+					}
+				}
+
+				D.DA_train(&(this.dA_layers[i]), layer_input, lr, corruption_level)
+			}
+		}
+	}
+}
+
+func SdA_finetune(this *SdA, train_X [][]int, train_Y [][]int, lr float64, epochs int) {
+	var (
+		layer_input []int
+		prev_layer_input []int
+	)
+	
+	for epoch := 0; epoch < epochs; epoch++ {
+		for n := 0; n < this.N; n++ {	 // input x1...xN
+
+			// layer input
+			for i := 0; i < this.n_layers; i++ {
+				if i == 0 {
+					prev_layer_input = make([]int, this.n_ins)
+					for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] }
+				} else {
+					prev_layer_input = make([]int, this.hidden_layer_sizes[i-1])
+					for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] }
+				}
+
+				layer_input = make([]int, this.hidden_layer_sizes[i])
+				H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input)
+			}
+
+			L.LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr)
+		}
+		// lr *= 0.95
+	}
+}
+
+func SdA_predict(this *SdA, x []int, y []float64) {
+	var (
+		layer_input []float64
+	)	
+	prev_layer_input := make([]float64, this.n_ins)
+	for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) }
+
+	// layer activation
+	for i := 0; i < this.n_layers; i++ {
+		layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+		for k := 0; k < this.sigmoid_layers[i].N_out; k++ {
+			linear_outuput := 0.0
+
+			for j := 0; j < this.sigmoid_layers[i].N_in; j++ {
+				linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j]
+			}
+			linear_outuput += this.sigmoid_layers[i].B[k]
+			layer_input[k] = u.Sigmoid(linear_outuput)
+		}
+
+		if i < this.n_layers-1 {
+			prev_layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+			for j := 0; j < this.sigmoid_layers[i].N_out; j++ {
+				prev_layer_input[j] = layer_input[j]
+			}
+		}
+	}
+
+	for i := 0; i < this.log_layer.N_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.log_layer.N_in; j++ {
+			y[i] += this.log_layer.W[i][j] * layer_input[j]
+		}
+		y[i] += this.log_layer.B[i]
+	}
+
+	L.LogisticRegression_softmax(&(this.log_layer), y)
+}
+
+func test_SdA() {
+	rand.Seed(0)
+
+	pretrain_lr := 0.1
+	corruption_level := 0.3
+	pretraining_epochs := 1000
+	finetune_lr := 0.1
+	finetune_epochs := 500
+	
+	train_N := 10
+	test_N := 4
+	n_ins := 28
+	n_outs := 2
+	hidden_layer_sizes := []int {15, 15}
+	n_layers := len(hidden_layer_sizes)
+
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
+	}
+
+	train_Y := [][]int {
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+	}
+
+	// construct SdA
+	var sda SdA
+	SdA__construct(&sda, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers)
+
+	// pretrain
+	SdA_pretrain(&sda, train_X, pretrain_lr, corruption_level, pretraining_epochs)
+
+	// finetune
+	SdA_finetune(&sda, train_X, train_Y, finetune_lr, finetune_epochs)
+
+
+  // test data
+	test_X := [][]int {
+		{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
+	}
+	
+	test_Y := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)}
+
+	// test
+	for i := 0; i < test_N; i++ {
+		SdA_predict(&sda, test_X[i], test_Y[i])
+		for j := 0; j < n_outs; j++ {
+			fmt.Printf("%.5f ", test_Y[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+
+func main() {
+	test_SdA()
+}
diff --git a/DeepLearning/go/dA.go b/DeepLearning/go/dA.go
new file mode 100644
index 00000000..a36c2267
--- /dev/null
+++ b/DeepLearning/go/dA.go
@@ -0,0 +1,192 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+)
+
+type dA struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func dA__construct(this *dA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+	
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func dA_get_corrupted_input(this *dA, x []int, tilde_x []int, p float64) {
+	for i := 0; i < this.n_visible; i++ {
+		if x[i] == 0 {
+			tilde_x[i] = 0
+		} else {
+			tilde_x[i] = u.Binomial(1, p)
+		}
+	}
+}
+
+// Encode
+func dA_get_hidden_values(this *dA, x []int, y []float64) {
+	for i := 0; i < this.n_hidden; i++ {
+		y[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.hbias[i]
+		y[i] = u.Sigmoid(y[i])
+	}
+}
+
+// Decode
+func dA_get_reconstructed_input(this *dA, y []float64, z []float64) {
+	for i := 0; i < this.n_visible; i++ {
+		z[i] = 0
+		for j := 0; j < this.n_hidden; j++ {
+			z[i] += this.W[j][i] * y[j]
+		}
+		z[i] += this.vbias[i]
+		z[i] = u.Sigmoid(z[i])
+	}
+}
+
+func dA_train(this *dA, x []int, lr float64, corruption_level float64) {
+	tilde_x := make([]int, this.n_visible)
+	y := make([]float64, this.n_hidden)
+	z := make([]float64, this.n_visible)
+
+	L_vbias := make([]float64, this.n_visible)
+	L_hbias := make([]float64, this.n_hidden)
+
+	p := 1 - corruption_level
+
+	dA_get_corrupted_input(this, x, tilde_x, p)
+	dA_get_hidden_values(this, tilde_x, y)
+	dA_get_reconstructed_input(this, y, z)
+
+	// vbias
+	for i := 0; i < this.n_visible; i++ {
+		L_vbias[i] = float64(x[i]) - z[i]
+		this.vbias[i] += lr * L_vbias[i] / float64(this.N)
+	}
+
+	// hbias
+	for i := 0; i < this.n_hidden; i++ {
+		L_hbias[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			L_hbias[i] += this.W[i][j] * L_vbias[j]
+		}
+		L_hbias[i] *= y[i] * (1- y[i])
+		this.hbias[i] += lr * L_hbias[i] / float64(this.N)
+	}
+
+	// W
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N)
+		}
+	}
+}
+
+func dA_reconstruct(this *dA, x []int, z []float64) {
+	y := make([]float64, this.n_hidden)
+
+	dA_get_hidden_values(this, x, y)
+	dA_get_reconstructed_input(this, y, z)
+}
+
+
+
+
+func test_dA() {
+	rand.Seed(0)
+
+	learning_rate := 0.1
+	corruption_level := 0.3
+	training_epochs := 1000
+
+	train_N := 6
+	test_N := 2
+	n_visible := 20
+	n_hidden := 5
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0},
+	}
+
+	// construct dA
+	var da dA
+	dA__construct(&da, train_N, n_visible, n_hidden, nil, nil, nil)
+
+	// train
+	for epoch := 0; epoch < training_epochs; epoch++ {
+		for i := 0; i < train_N; i++ {
+			dA_train(&da, train_X[i], learning_rate, corruption_level)
+		}
+	}
+
+	// test data
+	test_X := [][]int {
+		{1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0},
+	}
+	reconstructed_X := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { reconstructed_X[i] = make([]float64, n_visible)}
+
+	
+	// test
+	for i := 0;  i < test_N; i++ {
+		dA_reconstruct(&da, test_X[i], reconstructed_X[i])
+		for j := 0; j < n_visible; j++ {
+			fmt.Printf("%.5f ", reconstructed_X[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+func main() {
+	test_dA()
+}
diff --git a/DeepLearning/go/dA/dA.go b/DeepLearning/go/dA/dA.go
new file mode 100644
index 00000000..b41d1ce7
--- /dev/null
+++ b/DeepLearning/go/dA/dA.go
@@ -0,0 +1,128 @@
+package dA
+
+import (
+	u "../utils"
+)
+
+
+type DA struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func DA__construct(this *DA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+	
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func dA_get_corrupted_input(this *DA, x []int, tilde_x []int, p float64) {
+	for i := 0; i < this.n_visible; i++ {
+		if x[i] == 0 {
+			tilde_x[i] = 0
+		} else {
+			tilde_x[i] = u.Binomial(1, p)
+		}
+	}
+}
+
+// Encode
+func dA_get_hidden_values(this *DA, x []int, y []float64) {
+	for i := 0; i < this.n_hidden; i++ {
+		y[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.hbias[i]
+		y[i] = u.Sigmoid(y[i])
+	}
+}
+
+// Decode
+func dA_get_reconstructed_input(this *DA, y []float64, z []float64) {
+	for i := 0; i < this.n_visible; i++ {
+		z[i] = 0
+		for j := 0; j < this.n_hidden; j++ {
+			z[i] += this.W[j][i] * y[j]
+		}
+		z[i] += this.vbias[i]
+		z[i] = u.Sigmoid(z[i])
+	}
+}
+
+func DA_train(this *DA, x []int, lr float64, corruption_level float64) {
+	tilde_x := make([]int, this.n_visible)
+	y := make([]float64, this.n_hidden)
+	z := make([]float64, this.n_visible)
+
+	L_vbias := make([]float64, this.n_visible)
+	L_hbias := make([]float64, this.n_hidden)
+
+	p := 1 - corruption_level
+
+	dA_get_corrupted_input(this, x, tilde_x, p)
+	dA_get_hidden_values(this, tilde_x, y)
+	dA_get_reconstructed_input(this, y, z)
+
+	// vbias
+	for i := 0; i < this.n_visible; i++ {
+		L_vbias[i] = float64(x[i]) - z[i]
+		this.vbias[i] += lr * L_vbias[i] / float64(this.N)
+	}
+
+	// hbias
+	for i := 0; i < this.n_hidden; i++ {
+		L_hbias[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			L_hbias[i] += this.W[i][j] * L_vbias[j]
+		}
+		L_hbias[i] *= y[i] * (1- y[i])
+		this.hbias[i] += lr * L_hbias[i] / float64(this.N)
+	}
+
+	// W
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N)
+		}
+	}
+}
+
+func dA_reconstruct(this *DA, x []int, z []float64) {
+	y := make([]float64, this.n_hidden)
+
+	dA_get_hidden_values(this, x, y)
+	dA_get_reconstructed_input(this, y, z)
+}
diff --git a/DeepLearning/go/utils/utils.go b/DeepLearning/go/utils/utils.go
new file mode 100644
index 00000000..44b3af20
--- /dev/null
+++ b/DeepLearning/go/utils/utils.go
@@ -0,0 +1,28 @@
+package utils
+
+import (
+	"math"
+	"math/rand"
+)
+
+func Uniform(min float64, max float64) float64 {
+	return rand.Float64() * (max - min) + min
+}
+
+func Binomial(n int, p float64) int {
+	if p < 0 || p > 1 { return 0 }
+
+	c := 0
+	var r float64
+	
+	for i := 0; i < n; i++ {
+		r = rand.Float64()		
+		if r < p { c++ }
+	}
+
+	return c
+}
+
+func Sigmoid(x float64) float64 {
+	return 1.0 / (1.0 + math.Exp(-x))
+}
diff --git a/DeepLearning/java/.gitkeep b/DeepLearning/java/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/DeepLearning/java/src/DeepLearning/DBN.java b/DeepLearning/java/src/DeepLearning/DBN.java
new file mode 100644
index 00000000..e070faf7
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/DBN.java
@@ -0,0 +1,222 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class DBN {
+    public int N;
+    public int n_ins;
+    public int[] hidden_layer_sizes;
+    public int n_outs;
+    public int n_layers;
+    public HiddenLayerDiscrete[] sigmoid_layers;
+    public RBM[] rbm_layers;
+    public LogisticRegressionDiscrete log_layer;
+    public Random rng;
+
+
+    public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
+        int input_size;
+
+        this.N = N;
+        this.n_ins = n_ins;
+        this.hidden_layer_sizes = hidden_layer_sizes;
+        this.n_outs = n_outs;
+        this.n_layers = n_layers;
+
+        this.sigmoid_layers = new HiddenLayerDiscrete[n_layers];
+        this.rbm_layers = new RBM[n_layers];
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        // construct multi-layer
+        for(int i=0; i<this.n_layers; i++) {
+            if(i == 0) {
+                input_size = this.n_ins;
+            } else {
+                input_size = this.hidden_layer_sizes[i-1];
+            }
+
+            // construct sigmoid_layer
+            this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+
+            // construct rbm_layer
+            this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
+        }
+
+        // layer for output using Logistic Regression
+        this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+    }
+
+    public void pretrain(int[][] train_X, double lr, int k, int epochs) {
+        int[] layer_input = new int[0];
+        int prev_layer_input_size;
+        int[] prev_layer_input;
+
+        for(int i=0; i<n_layers; i++) {  // layer-wise
+            for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
+                for(int n=0; n<N; n++) {  // input x1...xN
+                    // layer input
+                    for(int l=0; l<=i; l++) {
+
+                        if(l == 0) {
+                            layer_input = new int[n_ins];
+                            for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
+                        } else {
+                            if(l == 1) prev_layer_input_size = n_ins;
+                            else prev_layer_input_size = hidden_layer_sizes[l-2];
+
+                            prev_layer_input = new int[prev_layer_input_size];
+                            for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+
+                            layer_input = new int[hidden_layer_sizes[l-1]];
+
+                            sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
+                        }
+                    }
+
+                    rbm_layers[i].contrastive_divergence(layer_input, lr, k);
+                }
+            }
+        }
+    }
+
+    public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
+        int[] layer_input = new int[0];
+        // int prev_layer_input_size;
+        int[] prev_layer_input = new int[0];
+
+        for(int epoch=0; epoch<epochs; epoch++) {
+            for(int n=0; n<N; n++) {
+
+                // layer input
+                for(int i=0; i<n_layers; i++) {
+                    if(i == 0) {
+                        prev_layer_input = new int[n_ins];
+                        for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
+                    } else {
+                        prev_layer_input = new int[hidden_layer_sizes[i-1]];
+                        for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+                    }
+
+                    layer_input = new int[hidden_layer_sizes[i]];
+                    sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
+                }
+
+                log_layer.train(layer_input, train_Y[n], lr);
+            }
+            // lr *= 0.95;
+        }
+    }
+
+    public void predict(int[] x, double[] y) {
+        double[] layer_input = new double[0];
+        // int prev_layer_input_size;
+        double[] prev_layer_input = new double[n_ins];
+        for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
+
+        double linear_output;
+
+
+        // layer activation
+        for(int i=0; i<n_layers; i++) {
+            layer_input = new double[sigmoid_layers[i].n_out];
+
+            for(int k=0; k<sigmoid_layers[i].n_out; k++) {
+                linear_output = 0.0;
+
+                for(int j=0; j<sigmoid_layers[i].n_in; j++) {
+                    linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
+                }
+                linear_output += sigmoid_layers[i].b[k];
+                layer_input[k] = sigmoid(linear_output);
+            }
+
+            if(i < n_layers-1) {
+                prev_layer_input = new double[sigmoid_layers[i].n_out];
+                for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
+            }
+        }
+
+        for(int i=0; i<log_layer.n_out; i++) {
+            y[i] = 0;
+            for(int j=0; j<log_layer.n_in; j++) {
+                y[i] += log_layer.W[i][j] * layer_input[j];
+            }
+            y[i] += log_layer.b[i];
+        }
+
+        log_layer.softmax(y);
+    }
+
+    private static void test_dbn() {
+        Random rng = new Random(123);
+
+        double pretrain_lr = 0.1;
+        int pretraining_epochs = 1000;
+        int k = 1;
+        double finetune_lr = 0.1;
+        int finetune_epochs = 500;
+
+        int train_N = 6;
+        int test_N = 4;
+        int n_ins = 6;
+        int n_outs = 2;
+        int[] hidden_layer_sizes = {3, 3};
+        int n_layers = hidden_layer_sizes.length;
+
+        // training data
+        int[][] train_X = {
+                {1, 1, 1, 0, 0, 0},
+                {1, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0},
+                {0, 0, 1, 1, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1},
+        };
+
+
+        // construct DNN.DBN
+        DBN dbn = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
+
+        // pretrain
+        dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs);
+
+        // finetune
+        dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
+
+
+        // test data
+        int[][] test_X = {
+                {1, 1, 0, 0, 0, 0},
+                {1, 1, 1, 1, 0, 0},
+                {0, 0, 0, 1, 1, 0},
+                {0, 0, 1, 1, 1, 0},
+        };
+
+        double[][] test_Y = new double[test_N][n_outs];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            dbn.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_outs; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_dbn();
+    }
+}
diff --git a/DeepLearning/java/src/DeepLearning/Dropout.java b/DeepLearning/java/src/DeepLearning/Dropout.java
new file mode 100644
index 00000000..bb6378fb
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/Dropout.java
@@ -0,0 +1,225 @@
+package DeepLearning;
+
+import java.util.Random;
+import java.util.List;
+import java.util.ArrayList;
+
+public class Dropout {
+    public int N;
+    public int n_in;
+    public int[] hidden_layer_sizes;
+    public int n_out;
+    public int n_layers;
+    public HiddenLayer[] hiddenLayers;
+    public LogisticRegression logisticLayer;
+    public Random rng;
+
+
+    public Dropout(int N, int n_in, int[] hidden_layer_sizes, int n_out, Random rng, String activation) {
+        this.N = N;
+        this.n_in = n_in;
+        this.hidden_layer_sizes = hidden_layer_sizes;
+        this.n_layers = hidden_layer_sizes.length;
+        this.n_out = n_out;
+
+        this.hiddenLayers = new HiddenLayer[n_layers];
+
+        if (rng == null) rng = new Random(1234);
+        this.rng = rng;
+
+        if (activation == null) activation = "ReLU";
+
+        // construct multi-layer
+        int input_size;
+        for(int i=0; i<this.n_layers; i++) {
+            // layer_size
+            if(i == 0) {
+                input_size = n_in;
+            } else {
+                input_size = hidden_layer_sizes[i-1];
+            }
+
+            // construct hiddenLayer
+            this.hiddenLayers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], null, null, rng, activation);
+
+        }
+
+        // construct logisticLayer
+        this.logisticLayer = new LogisticRegression(N, hidden_layer_sizes[this.n_layers-1], n_out);
+
+    }
+
+    public void train(int epochs, double[][] train_X, int[][] train_Y, boolean dropout, double p_dropout, double lr) {
+        List<int[]> dropout_masks;
+        List<double[]> layer_inputs;
+        double[] layer_input;
+        double[] layer_output = new double[0];
+
+        for(int epoch=0; epoch<epochs; epoch++) {
+
+            for(int n=0; n<N; n++) {
+
+                dropout_masks = new ArrayList<>(n_layers);
+                layer_inputs = new ArrayList<>(n_layers+1);  // +1 for logistic layer
+
+                // forward hiddenLayers
+                for(int i=0; i<n_layers; i++) {
+
+                    if(i == 0) layer_input = train_X[n];
+                    else layer_input = layer_output.clone();
+
+                    layer_inputs.add(layer_input.clone());
+
+                    layer_output = new double[hidden_layer_sizes[i]];
+                    hiddenLayers[i].forward(layer_input, layer_output);
+
+                    if(dropout) {
+                        int[] mask;
+                        mask = hiddenLayers[i].dropout(layer_output.length, p_dropout, rng);
+                        for(int j=0; j<layer_output.length; j++) layer_output[j] *= mask[j];
+
+                        dropout_masks.add(mask.clone());
+                    }
+
+                }
+
+
+                // forward & backward logisticLayer
+                double[] logistic_layer_dy; // = new double[n_out];
+                logistic_layer_dy = logisticLayer.train(layer_output, train_Y[n], lr); //, logistic_layer_dy);
+                layer_inputs.add(layer_output.clone());
+
+                // backward hiddenLayers
+                double[] prev_dy = logistic_layer_dy;
+                double[][] prev_W;
+                double[] dy = new double[0];
+
+                for(int i=n_layers-1; i>=0; i--) {
+
+                    if(i == n_layers-1) {
+                        prev_W = logisticLayer.W;
+                    } else {
+                        prev_dy = dy.clone();
+                        prev_W = hiddenLayers[i+1].W;
+                    }
+
+                    if(dropout) {
+                        for(int j=0; j<prev_dy.length; j++) {
+                            prev_dy[j] *= dropout_masks.get(i)[j];
+                        }
+                    }
+
+                    dy = new double[hidden_layer_sizes[i]];
+                    hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr);
+                }
+
+            }
+        }
+    }
+
+
+    public void pretest(double p_dropout) {
+        for(int i=0; i<n_layers; i++) {
+            int in;
+            int out;
+
+            if (i == 0) in = n_in;
+            else in = hidden_layer_sizes[i];
+
+            if (i == n_layers - 1) out = n_out;
+            else out = hidden_layer_sizes[i+1];
+
+
+            for (int l = 0; l < out; l++) {
+                for (int m = 0; m < in; m++) {
+                    hiddenLayers[i].W[l][m] *= 1 - p_dropout;
+                }
+            }
+        }
+    }
+
+
+    public void predict(double[] x, double[] y) {
+        double[] layer_input;
+        double[] layer_output = new double[0];
+
+        for(int i=0; i<n_layers; i++) {
+
+            if(i == 0) layer_input = x;
+            else layer_input = layer_output.clone();
+
+            layer_output = new double[hidden_layer_sizes[i]];
+
+            hiddenLayers[i].forward(layer_input, layer_output);
+        }
+
+        logisticLayer.predict(layer_output, y);
+    }
+
+
+    private static void test_dropout() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        int n_epochs = 5000;
+
+        int train_N = 4;
+        int test_N = 4;
+        int n_in = 2;
+        int[] hidden_layer_sizes = {10, 10};
+        int n_out = 2;
+
+        boolean dropout = true;
+        double p_dropout = 0.5;
+
+
+        double[][] train_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        int[][] train_Y = {
+                {0, 1},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+        };
+
+        // construct Dropout
+        Dropout classifier = new Dropout(train_N, n_in, hidden_layer_sizes, n_out, rng, "ReLU");
+
+        // train
+        classifier.train(n_epochs, train_X, train_Y, dropout, p_dropout, learning_rate);
+
+        // pretest
+        if(dropout) classifier.pretest(p_dropout);
+
+
+        // test data
+        double[][] test_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+
+    }
+
+
+    public static void main(String[] args) {
+        test_dropout();
+    }
+}
diff --git a/DeepLearning/java/src/DeepLearning/HiddenLayer.java b/DeepLearning/java/src/DeepLearning/HiddenLayer.java
new file mode 100644
index 00000000..028727d9
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/HiddenLayer.java
@@ -0,0 +1,106 @@
+package DeepLearning;
+
+import java.util.Random;
+import java.util.function.DoubleFunction;
+import static DeepLearning.utils.*;
+
+public class HiddenLayer {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+    public Random rng;
+    public DoubleFunction<Double> activation;
+    public DoubleFunction<Double> dactivation;
+
+    public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng, String activation) {
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        if (rng == null) this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if (W == null) {
+            this.W = new double[n_out][n_in];
+            double a = 1.0 / this.n_in;
+
+            for(int i=0; i<n_out; i++) {
+                for(int j=0; j<n_in; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if (b == null) this.b = new double[n_out];
+        else this.b = b;
+
+        if (activation == "sigmoid" || activation == null) {
+            this.activation = (double x) -> sigmoid(x);
+            this.dactivation = (double x) -> dsigmoid(x);
+
+        } else if (activation == "tanh") {
+            this.activation = (double x) -> tanh(x);
+            this.dactivation = (double x) -> dtanh(x);
+        } else if (activation == "ReLU") {
+            this.activation = (double x) -> ReLU(x);
+            this.dactivation = (double x) -> dReLU(x);
+        } else {
+            throw new IllegalArgumentException("activation function not supported");
+        }
+
+    }
+
+    public double output(double[] input, double[] w, double b) {
+        double linear_output = 0.0;
+        for(int j=0; j<n_in; j++) {
+            linear_output += w[j] * input[j];
+        }
+        linear_output += b;
+
+        return activation.apply(linear_output);
+    }
+
+
+    public void forward(double[] input, double[] output) {
+        for(int i=0; i<n_out; i++) {
+            output[i] = this.output(input, W[i], b[i]);
+        }
+    }
+
+    public void backward(double[] input, double[] dy, double[] prev_layer_input, double[] prev_layer_dy, double[][] prev_layer_W, double lr) {
+        if(dy == null) dy = new double[n_out];
+
+        int prev_n_in = n_out;
+        int prev_n_out = prev_layer_dy.length;
+
+        for(int i=0; i<prev_n_in; i++) {
+            dy[i] = 0;
+            for(int j=0; j<prev_n_out; j++) {
+                dy[i] += prev_layer_dy[j] * prev_layer_W[j][i];
+            }
+
+            dy[i] *= dactivation.apply(prev_layer_input[i]);
+        }
+
+        for(int i=0; i<n_out; i++) {
+            for(int j=0; j<n_in; j++) {
+                W[i][j] += lr * dy[i] * input[j] / N;
+            }
+            b[i] += lr * dy[i] / N;
+        }
+    }
+
+    public int[] dropout(int size, double p, Random rng) {
+        int[] mask = new int[size];
+
+        for(int i=0; i<size; i++) {
+            mask[i] = binomial(1, p, rng);
+        }
+
+        return mask;
+    }
+}
diff --git a/DeepLearning/java/src/DeepLearning/HiddenLayerDiscrete.java b/DeepLearning/java/src/DeepLearning/HiddenLayerDiscrete.java
new file mode 100644
index 00000000..b399db98
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/HiddenLayerDiscrete.java
@@ -0,0 +1,56 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class HiddenLayerDiscrete extends HiddenLayer {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+    public Random rng;
+
+
+    public HiddenLayerDiscrete(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
+        super(N, n_in, n_out, W, b, rng, null);
+
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if(W == null) {
+            this.W = new double[n_out][n_in];
+            double a = 1.0 / this.n_in;
+
+            for(int i=0; i<n_out; i++) {
+                for(int j=0; j<n_in; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if(b == null) this.b = new double[n_out];
+        else this.b = b;
+    }
+
+    public double output(int[] input, double[] w, double b) {
+        double linear_output = 0.0;
+        for(int j=0; j<n_in; j++) {
+            linear_output += w[j] * input[j];
+        }
+        linear_output += b;
+        return sigmoid(linear_output);
+    }
+
+    public void sample_h_given_v(int[] input, int[] sample) {
+        for(int i=0; i<n_out; i++) {
+            sample[i] = binomial(1, output(input, W[i], b[i]), rng);
+        }
+    }
+}
diff --git a/DeepLearning/java/src/DeepLearning/LogisticRegression.java b/DeepLearning/java/src/DeepLearning/LogisticRegression.java
new file mode 100644
index 00000000..af5a9a51
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/LogisticRegression.java
@@ -0,0 +1,131 @@
+package DeepLearning;
+
+public class LogisticRegression {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+
+    public LogisticRegression(int N, int n_in, int n_out) {
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        W = new double[n_out][n_in];
+        b = new double[n_out];
+    }
+
+    public double[] train(double[] x, int[] y, double lr) {
+        double[] p_y_given_x = new double[n_out];
+        double[] dy = new double[n_out];
+
+        for(int i=0; i<n_out; i++) {
+            p_y_given_x[i] = 0;
+            for(int j=0; j<n_in; j++) {
+                p_y_given_x[i] += W[i][j] * x[j];
+            }
+            p_y_given_x[i] += b[i];
+        }
+        softmax(p_y_given_x);
+
+        for(int i=0; i<n_out; i++) {
+            dy[i] = y[i] - p_y_given_x[i];
+
+            for(int j=0; j<n_in; j++) {
+                W[i][j] += lr * dy[i] * x[j] / N;
+            }
+
+            b[i] += lr * dy[i] / N;
+        }
+
+        return dy;
+    }
+
+    public void softmax(double[] x) {
+        double max = 0.0;
+        double sum = 0.0;
+
+        for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
+
+        for(int i=0; i<n_out; i++) {
+            x[i] = Math.exp(x[i] - max);
+            sum += x[i];
+        }
+
+        for(int i=0; i<n_out; i++) x[i] /= sum;
+    }
+
+    public void predict(double[] x, double[] y) {
+        for(int i=0; i<n_out; i++) {
+            y[i] = 0.;
+            for(int j=0; j<n_in; j++) {
+                y[i] += W[i][j] * x[j];
+            }
+            y[i] += b[i];
+        }
+
+        softmax(y);
+    }
+
+    private static void test_lr() {
+        double learning_rate = 0.1;
+        int n_epochs = 500;
+
+        int train_N = 6;
+        int test_N = 2;
+        int n_in = 6;
+        int n_out = 2;
+
+        double[][] train_X = {
+                {1., 1., 1., 0., 0., 0.},
+                {1., 0., 1., 0., 0., 0.},
+                {1., 1., 1., 0., 0., 0.},
+                {0., 0., 1., 1., 1., 0.},
+                {0., 0., 1., 1., 0., 0.},
+                {0., 0., 1., 1., 1., 0.}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1}
+        };
+
+        // construct
+        LogisticRegression classifier = new LogisticRegression(train_N, n_in, n_out);
+
+        // train
+        for(int epoch=0; epoch<n_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                classifier.train(train_X[i], train_Y[i], learning_rate);
+            }
+            //learning_rate *= 0.95;
+        }
+
+        // test data
+        double[][] test_X = {
+                {1., 0., 1., 0., 0., 0.},
+                {0., 0., 1., 1., 1., 0.}
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_lr();
+    }
+}
diff --git a/DeepLearning/java/src/DeepLearning/LogisticRegressionDiscrete.java b/DeepLearning/java/src/DeepLearning/LogisticRegressionDiscrete.java
new file mode 100644
index 00000000..fce51727
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/LogisticRegressionDiscrete.java
@@ -0,0 +1,117 @@
+package DeepLearning;
+
+public class LogisticRegressionDiscrete extends LogisticRegression {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+
+    public LogisticRegressionDiscrete(int N, int n_in, int n_out) {
+        super(N, n_in, n_out);
+
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        W = new double[this.n_out][this.n_in];
+        b = new double[this.n_out];
+    }
+
+    public void train(int[] x, int[] y, double lr) {
+        double[] p_y_given_x = new double[n_out];
+        double[] dy = new double[n_out];
+
+        for(int i=0; i<n_out; i++) {
+            p_y_given_x[i] = 0;
+            for(int j=0; j<n_in; j++) {
+                p_y_given_x[i] += W[i][j] * x[j];
+            }
+            p_y_given_x[i] += b[i];
+        }
+        softmax(p_y_given_x);
+
+        for(int i=0; i<n_out; i++) {
+            dy[i] = y[i] - p_y_given_x[i];
+
+            for(int j=0; j<n_in; j++) {
+                W[i][j] += lr * dy[i] * x[j] / N;
+            }
+
+            b[i] += lr * dy[i] / N;
+        }
+    }
+
+    public void predict(int[] x, double[] y) {
+        for(int i=0; i<n_out; i++) {
+            y[i] = 0;
+            for(int j=0; j<n_in; j++) {
+                y[i] += W[i][j] * x[j];
+            }
+            y[i] += b[i];
+        }
+
+        softmax(y);
+    }
+
+    private static void test_lr() {
+        double learning_rate = 0.1;
+        int n_epochs = 500;
+
+        int train_N = 6;
+        int test_N = 2;
+        int n_in = 6;
+        int n_out = 2;
+
+        int[][] train_X = {
+                {1, 1, 1, 0, 0, 0},
+                {1, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0},
+                {0, 0, 1, 1, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1}
+        };
+
+        // construct
+        LogisticRegressionDiscrete classifier = new LogisticRegressionDiscrete(train_N, n_in, n_out);
+
+        // train
+        for(int epoch=0; epoch<n_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                classifier.train(train_X[i], train_Y[i], learning_rate);
+            }
+            //learning_rate *= 0.95;
+        }
+
+        // test data
+        int[][] test_X = {
+                {1, 0, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_lr();
+    }
+}
diff --git a/DeepLearning/java/src/DeepLearning/MLP.java b/DeepLearning/java/src/DeepLearning/MLP.java
new file mode 100644
index 00000000..7acece84
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/MLP.java
@@ -0,0 +1,124 @@
+package DeepLearning;
+
+import java.util.Random;
+
+public class MLP {
+    public int N;
+    public int n_in;
+    public int n_hidden;
+    public int n_out;
+    public HiddenLayer hiddenLayer;
+    public LogisticRegression logisticLayer;
+    public Random rng;
+
+
+    public MLP(int N, int n_in, int n_hidden, int n_out, Random rng) {
+
+        this.N = N;
+        this.n_in = n_in;
+        this.n_hidden = n_hidden;
+        this.n_out = n_out;
+
+        if (rng == null)rng = new Random(1234);
+        this.rng = rng;
+
+        // construct hiddenLayer
+        this.hiddenLayer = new HiddenLayer(N, n_in, n_hidden, null, null, rng, "tanh");
+
+        // construct logisticLayer
+        this.logisticLayer = new LogisticRegression(N, n_hidden, n_out);
+    }
+
+
+    public void train(double[][] train_X, int[][] train_Y, double lr) {
+        double[] hidden_layer_input;
+        double[] logistic_layer_input;
+        double[] dy;
+
+        for(int n=0; n<N; n++) {
+            hidden_layer_input = new double[n_in];
+            logistic_layer_input = new double[n_hidden];
+
+            for(int j=0; j<n_in; j++) hidden_layer_input[j] = train_X[n][j];
+
+            // forward hiddenLayer
+            hiddenLayer.forward(hidden_layer_input, logistic_layer_input);
+
+            // forward and backward logisticLayer
+            // dy = new double[n_out];  // define delta of y for backpropagation
+            dy = logisticLayer.train(logistic_layer_input, train_Y[n], lr); //, dy);
+
+            // backward hiddenLayer
+            hiddenLayer.backward(hidden_layer_input, null, logistic_layer_input, dy, logisticLayer.W, lr);
+
+        }
+    }
+
+    public void predict(double[] x, double[] y) {
+        double[] logistic_layer_input = new double[n_hidden];
+        hiddenLayer.forward(x, logistic_layer_input);
+        logisticLayer.predict(logistic_layer_input, y);
+    }
+
+
+
+    private static void test_mlp() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        int n_epochs = 5000;
+
+        int train_N = 4;
+        int test_N = 4;
+        int n_in = 2;
+        int n_hidden = 3;
+        int n_out = 2;
+
+        double[][] train_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        int[][] train_Y = {
+                {0, 1},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+        };
+
+        // construct MLP
+        MLP classifier = new MLP(train_N, n_in, n_hidden, n_out, rng);
+
+        // train
+        for(int epoch=0; epoch<n_epochs; epoch++) {
+            classifier.train(train_X, train_Y, learning_rate);
+        }
+
+        // test data
+        double[][] test_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+
+    }
+
+    public static void main(String[] args) {
+        test_mlp();
+    }
+}
diff --git a/DeepLearning/java/src/DeepLearning/RBM.java b/DeepLearning/java/src/DeepLearning/RBM.java
new file mode 100644
index 00000000..f3268f84
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/RBM.java
@@ -0,0 +1,202 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class RBM {
+    public int N;
+    public int n_visible;
+    public int n_hidden;
+    public double[][] W;
+    public double[] hbias;
+    public double[] vbias;
+    public Random rng;
+
+
+    public RBM(int N, int n_visible, int n_hidden,
+               double[][] W, double[] hbias, double[] vbias, Random rng) {
+        this.N = N;
+        this.n_visible = n_visible;
+        this.n_hidden = n_hidden;
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if(W == null) {
+            this.W = new double[this.n_hidden][this.n_visible];
+            double a = 1.0 / this.n_visible;
+
+            for(int i=0; i<this.n_hidden; i++) {
+                for(int j=0; j<this.n_visible; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if(hbias == null) {
+            this.hbias = new double[this.n_hidden];
+            for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
+        } else {
+            this.hbias = hbias;
+        }
+
+        if(vbias == null) {
+            this.vbias = new double[this.n_visible];
+            for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
+        } else {
+            this.vbias = vbias;
+        }
+    }
+
+
+    public void contrastive_divergence(int[] input, double lr, int k) {
+        double[] ph_mean = new double[n_hidden];
+        int[] ph_sample = new int[n_hidden];
+        double[] nv_means = new double[n_visible];
+        int[] nv_samples = new int[n_visible];
+        double[] nh_means = new double[n_hidden];
+        int[] nh_samples = new int[n_hidden];
+		
+		/* CD-k */
+        sample_h_given_v(input, ph_mean, ph_sample);
+
+        for(int step=0; step<k; step++) {
+            if(step == 0) {
+                gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
+            } else {
+                gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
+            }
+        }
+
+        for(int i=0; i<n_hidden; i++) {
+            for(int j=0; j<n_visible; j++) {
+                // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+                W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+            }
+            hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
+        }
+
+
+        for(int i=0; i<n_visible; i++) {
+            vbias[i] += lr * (input[i] - nv_samples[i]) / N;
+        }
+
+    }
+
+
+    public void sample_h_given_v(int[] v0_sample, double[] mean, int[] sample) {
+        for(int i=0; i<n_hidden; i++) {
+            mean[i] = propup(v0_sample, W[i], hbias[i]);
+            sample[i] = binomial(1, mean[i], rng);
+        }
+    }
+
+    public void sample_v_given_h(int[] h0_sample, double[] mean, int[] sample) {
+        for(int i=0; i<n_visible; i++) {
+            mean[i] = propdown(h0_sample, i, vbias[i]);
+            sample[i] = binomial(1, mean[i], rng);
+        }
+    }
+
+    public double propup(int[] v, double[] w, double b) {
+        double pre_sigmoid_activation = 0.0;
+        for(int j=0; j<n_visible; j++) {
+            pre_sigmoid_activation += w[j] * v[j];
+        }
+        pre_sigmoid_activation += b;
+        return sigmoid(pre_sigmoid_activation);
+    }
+
+    public double propdown(int[] h, int i, double b) {
+        double pre_sigmoid_activation = 0.0;
+        for(int j=0; j<n_hidden; j++) {
+            pre_sigmoid_activation += W[j][i] * h[j];
+        }
+        pre_sigmoid_activation += b;
+        return sigmoid(pre_sigmoid_activation);
+    }
+
+    public void gibbs_hvh(int[] h0_sample, double[] nv_means, int[] nv_samples, double[] nh_means, int[] nh_samples) {
+        sample_v_given_h(h0_sample, nv_means, nv_samples);
+        sample_h_given_v(nv_samples, nh_means, nh_samples);
+    }
+
+
+    public void reconstruct(int[] v, double[] reconstructed_v) {
+        double[] h = new double[n_hidden];
+        double pre_sigmoid_activation;
+
+        for(int i=0; i<n_hidden; i++) {
+            h[i] = propup(v, W[i], hbias[i]);
+        }
+
+        for(int i=0; i<n_visible; i++) {
+            pre_sigmoid_activation = 0.0;
+            for(int j=0; j<n_hidden; j++) {
+                pre_sigmoid_activation += W[j][i] * h[j];
+            }
+            pre_sigmoid_activation += vbias[i];
+
+            reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
+        }
+    }
+
+
+
+    private static void test_rbm() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        int training_epochs = 1000;
+        int k = 1;
+
+        int train_N = 6;
+        int test_N = 2;
+        int n_visible = 6;
+        int n_hidden = 3;
+
+        // training data
+        int[][] train_X = {
+                {1, 1, 1, 0, 0, 0},
+                {1, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0},
+                {0, 0, 1, 0, 1, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+
+
+        RBM rbm = new RBM(train_N, n_visible, n_hidden, null, null, null, rng);
+
+        // train
+        for(int epoch=0; epoch<training_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                rbm.contrastive_divergence(train_X[i], learning_rate, k);
+            }
+        }
+
+        // test data
+        int[][] test_X = {
+                {1, 1, 0, 0, 0, 0},
+                {0, 0, 0, 1, 1, 0}
+        };
+
+        double[][] reconstructed_X = new double[test_N][n_visible];
+
+        for(int i=0; i<test_N; i++) {
+            rbm.reconstruct(test_X[i], reconstructed_X[i]);
+            for(int j=0; j<n_visible; j++) {
+                System.out.printf("%.5f ", reconstructed_X[i][j]);
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_rbm();
+    }
+
+}
diff --git a/DeepLearning/java/src/DeepLearning/SdA.java b/DeepLearning/java/src/DeepLearning/SdA.java
new file mode 100644
index 00000000..af761e70
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/SdA.java
@@ -0,0 +1,230 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class SdA {
+    public int N;
+    public int n_ins;
+    public int[] hidden_layer_sizes;
+    public int n_outs;
+    public int n_layers;
+    public HiddenLayerDiscrete[] sigmoid_layers;
+    public dA[] dA_layers;
+    public LogisticRegressionDiscrete log_layer;
+    public Random rng;
+
+
+    public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
+        int input_size;
+
+        this.N = N;
+        this.n_ins = n_ins;
+        this.hidden_layer_sizes = hidden_layer_sizes;
+        this.n_outs = n_outs;
+        this.n_layers = n_layers;
+
+        this.sigmoid_layers = new HiddenLayerDiscrete[n_layers];
+        this.dA_layers = new dA[n_layers];
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        // construct multi-layer
+        for(int i=0; i<this.n_layers; i++) {
+            if(i == 0) {
+                input_size = this.n_ins;
+            } else {
+                input_size = this.hidden_layer_sizes[i-1];
+            }
+
+            // construct sigmoid_layer
+            this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+
+            // construct dA_layer
+            this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
+        }
+
+        // layer for output using Logistic Regression
+        this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+    }
+
+    public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) {
+        int[] layer_input = new int[0];
+        int prev_layer_input_size;
+        int[] prev_layer_input;
+
+        for(int i=0; i<n_layers; i++) {  // layer-wise
+            for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
+                for(int n=0; n<N; n++) {  // input x1...xN
+                    // layer input
+                    for(int l=0; l<=i; l++) {
+
+                        if(l == 0) {
+                            layer_input = new int[n_ins];
+                            for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
+                        } else {
+                            if(l == 1) prev_layer_input_size = n_ins;
+                            else prev_layer_input_size = hidden_layer_sizes[l-2];
+
+                            prev_layer_input = new int[prev_layer_input_size];
+                            for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+
+                            layer_input = new int[hidden_layer_sizes[l-1]];
+
+                            sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
+                        }
+                    }
+
+                    dA_layers[i].train(layer_input, lr, corruption_level);
+                }
+            }
+        }
+    }
+
+    public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
+        int[] layer_input = new int[0];
+        // int prev_layer_input_size;
+        int[] prev_layer_input = new int[0];
+
+        for(int epoch=0; epoch<epochs; epoch++) {
+            for(int n=0; n<N; n++) {
+
+                // layer input
+                for(int i=0; i<n_layers; i++) {
+                    if(i == 0) {
+                        prev_layer_input = new int[n_ins];
+                        for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
+                    } else {
+                        prev_layer_input = new int[hidden_layer_sizes[i-1]];
+                        for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+                    }
+
+                    layer_input = new int[hidden_layer_sizes[i]];
+                    sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
+                }
+
+                log_layer.train(layer_input, train_Y[n], lr);
+            }
+            // lr *= 0.95;
+        }
+    }
+
+    public void predict(int[] x, double[] y) {
+        double[] layer_input = new double[0];
+        // int prev_layer_input_size;
+        double[] prev_layer_input = new double[n_ins];
+        for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
+
+        double linear_output;
+
+
+        // layer activation
+        for(int i=0; i<n_layers; i++) {
+            layer_input = new double[sigmoid_layers[i].n_out];
+
+            for(int k=0; k<sigmoid_layers[i].n_out; k++) {
+                linear_output = 0.0;
+
+                for(int j=0; j<sigmoid_layers[i].n_in; j++) {
+                    linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
+                }
+                linear_output += sigmoid_layers[i].b[k];
+                layer_input[k] = sigmoid(linear_output);
+            }
+
+            if(i < n_layers-1) {
+                prev_layer_input = new double[sigmoid_layers[i].n_out];
+                for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
+            }
+        }
+
+        for(int i=0; i<log_layer.n_out; i++) {
+            y[i] = 0;
+            for(int j=0; j<log_layer.n_in; j++) {
+                y[i] += log_layer.W[i][j] * layer_input[j];
+            }
+            y[i] += log_layer.b[i];
+        }
+
+        log_layer.softmax(y);
+    }
+
+
+    private static void test_sda() {
+        Random rng = new Random(123);
+
+        double pretrain_lr = 0.1;
+        double corruption_level = 0.3;
+        int pretraining_epochs = 1000;
+        double finetune_lr = 0.1;
+        int finetune_epochs = 500;
+
+        int train_N = 10;
+        int test_N = 4;
+        int n_ins = 28;
+        int n_outs = 2;
+        int[] hidden_layer_sizes = {15, 15};
+        int n_layers = hidden_layer_sizes.length;
+
+        // training data
+        int[][] train_X = {
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1},
+                {0, 1},
+                {0, 1}
+        };
+
+        // construct SdA
+        SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
+
+        // pretrain
+        sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs);
+
+        // finetune
+        sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
+
+
+        // test data
+        int[][] test_X = {
+                {1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
+        };
+
+        double[][] test_Y = new double[test_N][n_outs];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            sda.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_outs; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_sda();
+    }
+}
diff --git a/DeepLearning/java/src/DeepLearning/dA.java b/DeepLearning/java/src/DeepLearning/dA.java
new file mode 100644
index 00000000..acf3b397
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/dA.java
@@ -0,0 +1,187 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class dA {
+    public int n_visible;
+    public int N;
+    public int n_hidden;
+    public double[][] W;
+    public double[] hbias;
+    public double[] vbias;
+    public Random rng;
+
+
+    public dA(int N, int n_visible, int n_hidden,
+              double[][] W, double[] hbias, double[] vbias, Random rng) {
+        this.N = N;
+        this.n_visible = n_visible;
+        this.n_hidden = n_hidden;
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if(W == null) {
+            this.W = new double[this.n_hidden][this.n_visible];
+            double a = 1.0 / this.n_visible;
+
+            for(int i=0; i<this.n_hidden; i++) {
+                for(int j=0; j<this.n_visible; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if(hbias == null) {
+            this.hbias = new double[this.n_hidden];
+            for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
+        } else {
+            this.hbias = hbias;
+        }
+
+        if(vbias == null) {
+            this.vbias = new double[this.n_visible];
+            for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
+        } else {
+            this.vbias = vbias;
+        }
+    }
+
+    public void get_corrupted_input(int[] x, int[] tilde_x, double p) {
+        for(int i=0; i<n_visible; i++) {
+            if(x[i] == 0) {
+                tilde_x[i] = 0;
+            } else {
+                tilde_x[i] = binomial(1, p, rng);
+            }
+        }
+    }
+
+    // Encode
+    public void get_hidden_values(int[] x, double[] y) {
+        for(int i=0; i<n_hidden; i++) {
+            y[i] = 0;
+            for(int j=0; j<n_visible; j++) {
+                y[i] += W[i][j] * x[j];
+            }
+            y[i] += hbias[i];
+            y[i] = sigmoid(y[i]);
+        }
+    }
+
+    // Decode
+    public void get_reconstructed_input(double[] y, double[] z) {
+        for(int i=0; i<n_visible; i++) {
+            z[i] = 0;
+            for(int j=0; j<n_hidden; j++) {
+                z[i] += W[j][i] * y[j];
+            }
+            z[i] += vbias[i];
+            z[i] = sigmoid(z[i]);
+        }
+    }
+
+    public void train(int[] x, double lr, double corruption_level) {
+        int[] tilde_x = new int[n_visible];
+        double[] y = new double[n_hidden];
+        double[] z = new double[n_visible];
+
+        double[] L_vbias = new double[n_visible];
+        double[] L_hbias = new double[n_hidden];
+
+        double p = 1 - corruption_level;
+
+        get_corrupted_input(x, tilde_x, p);
+        get_hidden_values(tilde_x, y);
+        get_reconstructed_input(y, z);
+
+        // vbias
+        for(int i=0; i<n_visible; i++) {
+            L_vbias[i] = x[i] - z[i];
+            vbias[i] += lr * L_vbias[i] / N;
+        }
+
+        // hbias
+        for(int i=0; i<n_hidden; i++) {
+            L_hbias[i] = 0;
+            for(int j=0; j<n_visible; j++) {
+                L_hbias[i] += W[i][j] * L_vbias[j];
+            }
+            L_hbias[i] *= y[i] * (1 - y[i]);
+            hbias[i] += lr * L_hbias[i] / N;
+        }
+
+        // W
+        for(int i=0; i<n_hidden; i++) {
+            for(int j=0; j<n_visible; j++) {
+                W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
+            }
+        }
+    }
+
+    public void reconstruct(int[] x, double[] z) {
+        double[] y = new double[n_hidden];
+
+        get_hidden_values(x, y);
+        get_reconstructed_input(y, z);
+    }
+
+    private static void test_dA() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        double corruption_level = 0.3;
+        int training_epochs = 100;
+
+        int train_N = 10;
+        int test_N = 2;
+        int n_visible = 20;
+        int n_hidden = 5;
+
+        int[][] train_X = {
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}
+        };
+
+        dA da = new dA(train_N, n_visible, n_hidden, null, null, null, rng);
+
+        // train
+        for(int epoch=0; epoch<training_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                da.train(train_X[i], learning_rate, corruption_level);
+            }
+        }
+
+        // test data
+        int[][] test_X = {
+                {1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0}
+        };
+
+        double[][] reconstructed_X = new double[test_N][n_visible];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            da.reconstruct(test_X[i], reconstructed_X[i]);
+            for(int j=0; j<n_visible; j++) {
+                System.out.printf("%.5f ", reconstructed_X[i][j]);
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_dA();
+    }
+}
diff --git a/DeepLearning/java/src/DeepLearning/utils.java b/DeepLearning/java/src/DeepLearning/utils.java
new file mode 100644
index 00000000..95d95f70
--- /dev/null
+++ b/DeepLearning/java/src/DeepLearning/utils.java
@@ -0,0 +1,55 @@
+package DeepLearning;
+
+import java.util.Random;
+
+public class utils {
+    public static double uniform(double min, double max, Random rng) {
+        return rng.nextDouble() * (max - min) + min;
+    }
+
+    public static int binomial(int n, double p, Random rng) {
+        if(p < 0 || p > 1) return 0;
+
+        int c = 0;
+        double r;
+
+        for(int i=0; i<n; i++) {
+            r = rng.nextDouble();
+            if (r < p) c++;
+        }
+
+        return c;
+    }
+
+    public static double sigmoid(double x) {
+        return 1. / (1. + Math.pow(Math.E, -x));
+    }
+
+    public static double dsigmoid(double x) {
+        return x * (1. - x);
+    }
+
+    public static double tanh(double x) {
+        return Math.tanh(x);
+    }
+
+    public static double dtanh(double x) {
+        return 1. - x * x;
+    }
+
+    public static double ReLU(double x) {
+        if(x > 0) {
+            return x;
+        } else {
+            return 0.;
+        }
+    }
+
+    public static double dReLU(double x) {
+        if(x > 0) {
+            return 1.;
+        } else {
+            return 0.;
+        }
+    }
+}
diff --git a/DeepLearning/python/CDBN.py b/DeepLearning/python/CDBN.py
new file mode 100644
index 00000000..dbf6648c
--- /dev/null
+++ b/DeepLearning/python/CDBN.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from HiddenLayer import HiddenLayer
+from LogisticRegression import LogisticRegression
+from RBM import RBM
+from CRBM import CRBM
+from DBN import DBN
+from utils import *
+
+ 
+class CDBN(DBN):
+    def __init__(self, input=None, label=None,\
+                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
+                 rng=None):
+        
+        self.x = input
+        self.y = label
+
+        self.sigmoid_layers = []
+        self.rbm_layers = []
+        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
+
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+        
+        assert self.n_layers > 0
+
+
+        # construct multi-layer
+        for i in xrange(self.n_layers):
+            # layer_size
+            if i == 0:
+                input_size = n_ins
+            else:
+                input_size = hidden_layer_sizes[i - 1]
+
+            # layer_input
+            if i == 0:
+                layer_input = self.x
+            else:
+                layer_input = self.sigmoid_layers[-1].sample_h_given_v()
+                
+            # construct sigmoid_layer
+            sigmoid_layer = HiddenLayer(input=layer_input,
+                                        n_in=input_size,
+                                        n_out=hidden_layer_sizes[i],
+                                        rng=rng,
+                                        activation=sigmoid)
+            self.sigmoid_layers.append(sigmoid_layer)
+
+            # construct rbm_layer
+            if i == 0:
+                rbm_layer = CRBM(input=layer_input,     # continuous-valued inputs
+                                 n_visible=input_size,
+                                 n_hidden=hidden_layer_sizes[i],
+                                 W=sigmoid_layer.W,     # W, b are shared
+                                 hbias=sigmoid_layer.b)
+            else:
+                rbm_layer = RBM(input=layer_input,
+                                n_visible=input_size,
+                                n_hidden=hidden_layer_sizes[i],
+                                W=sigmoid_layer.W,     # W, b are shared
+                                hbias=sigmoid_layer.b)
+                
+            self.rbm_layers.append(rbm_layer)
+
+
+        # layer for output using Logistic Regression
+        self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(),
+                                            label=self.y,
+                                            n_in=hidden_layer_sizes[-1],
+                                            n_out=n_outs)
+
+        # finetune cost: the negative log likelihood of the logistic regression layer
+        self.finetune_cost = self.log_layer.negative_log_likelihood()
+
+
+
+def test_cdbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \
+             finetune_lr=0.1, finetune_epochs=200):
+
+    x = numpy.array([[0.4, 0.5, 0.5, 0.,  0.,  0.],
+                     [0.5, 0.3,  0.5, 0.,  0.,  0.],
+                     [0.4, 0.5, 0.5, 0.,  0.,  0.],
+                     [0.,  0.,  0.5, 0.3, 0.5, 0.],
+                     [0.,  0.,  0.5, 0.4, 0.5, 0.],
+                     [0.,  0.,  0.5, 0.5, 0.5, 0.]])
+    
+    y = numpy.array([[1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1]])
+
+    
+    rng = numpy.random.RandomState(123)
+
+    # construct DBN
+    dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, rng=rng)
+
+    # pre-training (TrainUnsupervisedDBN)
+    dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs)
+    
+    # fine-tuning (DBNSupervisedFineTuning)
+    dbn.finetune(lr=finetune_lr, epochs=finetune_epochs)
+
+
+    # test
+    x = numpy.array([[0.5, 0.5, 0., 0., 0., 0.],
+                     [0., 0., 0., 0.5, 0.5, 0.],
+                     [0.5, 0.5, 0.5, 0.5, 0.5, 0.]])
+
+    
+    print dbn.predict(x)
+
+
+
+
+if __name__ == "__main__":
+    test_cdbn()
diff --git a/DeepLearning/python/CRBM.py b/DeepLearning/python/CRBM.py
new file mode 100644
index 00000000..e8700470
--- /dev/null
+++ b/DeepLearning/python/CRBM.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from RBM import RBM
+from utils import *
+
+
+class CRBM(RBM):
+    def propdown(self, h):
+        pre_activation = numpy.dot(h, self.W.T) + self.vbias
+        return pre_activation
+        
+
+
+    def sample_v_given_h(self, h0_sample):
+        a_h = self.propdown(h0_sample)
+        en = numpy.exp(-a_h)
+        ep = numpy.exp(a_h)
+
+        v1_mean = 1 / (1 - en) - 1 / a_h
+        U = numpy.array(self.rng.uniform(
+            low=0,
+            high=1,
+            size=v1_mean.shape))
+
+        v1_sample = numpy.log((1 - U * (1 - ep))) / a_h
+
+        return [v1_mean, v1_sample]
+
+
+
+def test_crbm(learning_rate=0.1, k=1, training_epochs=1000):
+    data = numpy.array([[0.4, 0.5, 0.5, 0.,  0.,  0.],
+                        [0.5, 0.3,  0.5, 0.,  0.,  0.],
+                        [0.4, 0.5, 0.5, 0.,  0.,  0.],
+                        [0.,  0.,  0.5, 0.3, 0.5, 0.],
+                        [0.,  0.,  0.5, 0.4, 0.5, 0.],
+                        [0.,  0.,  0.5, 0.5, 0.5, 0.]])
+
+
+    rng = numpy.random.RandomState(123)
+
+    # construct CRBM
+    rbm = CRBM(input=data, n_visible=6, n_hidden=5, rng=rng)
+
+    # train
+    for epoch in xrange(training_epochs):
+        rbm.contrastive_divergence(lr=learning_rate, k=k)
+        # cost = rbm.get_reconstruction_cross_entropy()
+        # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
+
+
+    # test
+    v = numpy.array([[0.5, 0.5, 0., 0., 0., 0.],
+                     [0., 0., 0., 0.5, 0.5, 0.]])
+
+    print rbm.reconstruct(v)
+
+
+if __name__ == "__main__":
+    test_crbm()
diff --git a/DeepLearning/python/DBN.py b/DeepLearning/python/DBN.py
new file mode 100644
index 00000000..b1b351bb
--- /dev/null
+++ b/DeepLearning/python/DBN.py
@@ -0,0 +1,154 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from HiddenLayer import HiddenLayer
+from LogisticRegression import LogisticRegression
+from RBM import RBM
+from utils import *
+
+
+class DBN(object):
+    def __init__(self, input=None, label=None,\
+                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
+                 rng=None):
+        
+        self.x = input
+        self.y = label
+
+        self.sigmoid_layers = []
+        self.rbm_layers = []
+        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
+
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+        
+        assert self.n_layers > 0
+
+
+        # construct multi-layer
+        for i in xrange(self.n_layers):
+            # layer_size
+            if i == 0:
+                input_size = n_ins
+            else:
+                input_size = hidden_layer_sizes[i - 1]
+
+            # layer_input
+            if i == 0:
+                layer_input = self.x
+            else:
+                layer_input = self.sigmoid_layers[-1].sample_h_given_v()
+                
+            # construct sigmoid_layer
+            sigmoid_layer = HiddenLayer(input=layer_input,
+                                        n_in=input_size,
+                                        n_out=hidden_layer_sizes[i],
+                                        rng=rng,
+                                        activation=sigmoid)
+            self.sigmoid_layers.append(sigmoid_layer)
+
+
+            # construct rbm_layer
+            rbm_layer = RBM(input=layer_input,
+                            n_visible=input_size,
+                            n_hidden=hidden_layer_sizes[i],
+                            W=sigmoid_layer.W,     # W, b are shared
+                            hbias=sigmoid_layer.b)
+            self.rbm_layers.append(rbm_layer)
+
+
+        # layer for output using Logistic Regression
+        self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(),
+                                            label=self.y,
+                                            n_in=hidden_layer_sizes[-1],
+                                            n_out=n_outs)
+
+        # finetune cost: the negative log likelihood of the logistic regression layer
+        self.finetune_cost = self.log_layer.negative_log_likelihood()
+
+
+
+    def pretrain(self, lr=0.1, k=1, epochs=100):
+        # pre-train layer-wise
+        for i in xrange(self.n_layers):
+            if i == 0:
+                layer_input = self.x
+            else:
+                layer_input = self.sigmoid_layers[i-1].sample_h_given_v(layer_input)
+            rbm = self.rbm_layers[i]
+            
+            for epoch in xrange(epochs):
+                rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
+                # cost = rbm.get_reconstruction_cross_entropy()
+                # print >> sys.stderr, \
+                #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost
+
+
+    def finetune(self, lr=0.1, epochs=100):
+        layer_input = self.sigmoid_layers[-1].sample_h_given_v()
+
+        # train log_layer
+        epoch = 0
+        done_looping = False
+        while (epoch < epochs) and (not done_looping):
+            self.log_layer.train(lr=lr, input=layer_input)
+            # self.finetune_cost = self.log_layer.negative_log_likelihood()
+            # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost
+            
+            lr *= 0.95
+            epoch += 1
+
+
+    def predict(self, x):
+        layer_input = x
+        
+        for i in xrange(self.n_layers):
+            sigmoid_layer = self.sigmoid_layers[i]
+            layer_input = sigmoid_layer.output(input=layer_input)
+
+        out = self.log_layer.predict(layer_input)
+        return out
+
+
+
+def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \
+             finetune_lr=0.1, finetune_epochs=200):
+
+    x = numpy.array([[1,1,1,0,0,0],
+                     [1,0,1,0,0,0],
+                     [1,1,1,0,0,0],
+                     [0,0,1,1,1,0],
+                     [0,0,1,1,0,0],
+                     [0,0,1,1,1,0]])
+    y = numpy.array([[1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1]])
+    
+    rng = numpy.random.RandomState(123)
+
+    # construct DBN
+    dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, rng=rng)
+
+    # pre-training (TrainUnsupervisedDBN)
+    dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs)
+    
+    # fine-tuning (DBNSupervisedFineTuning)
+    dbn.finetune(lr=finetune_lr, epochs=finetune_epochs)
+
+
+    # test
+    x = numpy.array([[1, 1, 0, 0, 0, 0],
+                     [0, 0, 0, 1, 1, 0],
+                     [1, 1, 1, 1, 1, 0]])
+    
+    print dbn.predict(x)
+
+
+
+if __name__ == "__main__":
+    test_dbn()
diff --git a/DeepLearning/python/Dropout.py b/DeepLearning/python/Dropout.py
new file mode 100644
index 00000000..ba991169
--- /dev/null
+++ b/DeepLearning/python/Dropout.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from HiddenLayer import HiddenLayer
+from LogisticRegression import LogisticRegression
+from utils import *
+
+
+class Dropout(object):
+    def __init__(self, input, label,\
+                 n_in, hidden_layer_sizes, n_out,\
+                 rng=None, activation=ReLU):
+
+        self.x = input
+        self.y = label
+
+        self.hidden_layers = []
+        self.n_layers = len(hidden_layer_sizes)
+        
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+        assert self.n_layers > 0
+
+
+        # construct multi-layer 
+        for i in xrange(self.n_layers):
+
+            # layer_size
+            if i == 0:
+                input_size = n_in
+            else:
+                input_size = hidden_layer_sizes[i-1]
+
+            # layer_input
+            if i == 0:
+                layer_input = self.x
+
+            else:
+                layer_input = self.hidden_layers[-1].output()
+
+            # construct hidden_layer
+            hidden_layer = HiddenLayer(input=layer_input,
+                                       n_in=input_size,
+                                       n_out=hidden_layer_sizes[i],
+                                       rng=rng,
+                                       activation=activation)
+            
+            self.hidden_layers.append(hidden_layer)
+
+
+        # layer for ouput using Logistic Regression (softmax)
+        self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(),
+                                            label=self.y,
+                                            n_in=hidden_layer_sizes[-1],
+                                            n_out=n_out)
+
+
+    def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None):
+
+        for epoch in xrange(epochs):
+            dropout_masks = []  # create different masks in each training epoch
+
+            # forward hidden_layers
+            for i in xrange(self.n_layers):
+                if i == 0:
+                    layer_input = self.x
+
+                layer_input = self.hidden_layers[i].forward(input=layer_input)
+
+                if dropout == True:
+                    mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng)
+                    layer_input *= mask
+
+                    dropout_masks.append(mask)
+
+
+            # forward & backward log_layer
+            self.log_layer.train(input=layer_input)
+
+
+            # backward hidden_layers
+            for i in reversed(xrange(0, self.n_layers)):
+                if i == self.n_layers-1:
+                    prev_layer = self.log_layer
+                else:
+                    prev_layer = self.hidden_layers[i+1]
+
+                if dropout == True:
+                    self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i])
+                else:
+                    self.hidden_layers[i].backward(prev_layer=prev_layer)
+                
+
+
+    def predict(self, x, dropout=True, p_dropout=0.5):
+        layer_input = x
+
+        for i in xrange(self.n_layers):
+            if dropout == True:
+                self.hidden_layers[i].W = (1 - p_dropout) * self.hidden_layers[i].W
+            
+            layer_input = self.hidden_layers[i].output(input=layer_input)
+
+        return self.log_layer.predict(layer_input)
+
+
+
+def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5):
+
+    x = numpy.array([[0,  0],
+                     [0,  1],
+                     [1,  0],
+                     [1,  1]])
+
+    y = numpy.array([[0, 1],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1]])
+
+    rng = numpy.random.RandomState(123)
+
+
+    # construct Dropout MLP
+    classifier = Dropout(input=x, label=y, \
+                         n_in=2, hidden_layer_sizes=[10, 10], n_out=2, \
+                         rng=rng, activation=ReLU)
+
+
+    # train XOR
+    classifier.train(epochs=n_epochs, dropout=dropout, \
+                     p_dropout=p_dropout, rng=rng)
+
+
+    # test
+    print classifier.predict(x)
+
+
+
+if __name__ == "__main__":
+    test_dropout()
diff --git a/DeepLearning/python/HiddenLayer.py b/DeepLearning/python/HiddenLayer.py
new file mode 100644
index 00000000..a97bc616
--- /dev/null
+++ b/DeepLearning/python/HiddenLayer.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from utils import *
+
+
+class HiddenLayer(object):
+    def __init__(self, input, n_in, n_out,\
+                 W=None, b=None, rng=None, activation=tanh):
+        
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+        if W is None:
+            a = 1. / n_in
+            W = numpy.array(rng.uniform(  # initialize W uniformly
+                low=-a,
+                high=a,
+                size=(n_in, n_out)))
+
+        if b is None:
+            b = numpy.zeros(n_out)  # initialize bias 0
+
+        self.rng = rng
+        self.x = input
+
+        self.W = W
+        self.b = b
+
+        if activation == tanh:
+            self.dactivation = dtanh
+
+        elif activation == sigmoid:
+            self.dactivation = dsigmoid
+
+        elif activation == ReLU:
+            self.dactivation = dReLU
+
+        else:
+            raise ValueError('activation function not supported.')
+
+        
+        self.activation = activation
+        
+
+
+    def output(self, input=None):
+        if input is not None:
+            self.x = input
+        
+        linear_output = numpy.dot(self.x, self.W) + self.b
+        return self.activation(linear_output)
+
+
+    def forward(self, input=None):
+        return self.output(input=input)
+
+
+    def backward(self, prev_layer, lr=0.1, input=None, dropout=False, mask=None):
+        if input is not None:
+            self.x = input
+
+        d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T)
+
+        if dropout == True:
+            d_y *= mask
+
+        self.W += lr * numpy.dot(self.x.T, d_y)
+        self.b += lr * numpy.mean(d_y, axis=0)
+        self.d_y = d_y
+
+
+    def dropout(self, input, p, rng=None):
+        if rng is None:
+            rng = numpy.random.RandomState(123)
+
+        mask = rng.binomial(size=input.shape,
+                            n=1,
+                            p=1-p)  # p is the prob of dropping
+
+        return mask
+                     
+
+    def sample_h_given_v(self, input=None):
+        if input is not None:
+            self.x = input
+
+        v_mean = self.output()
+        h_sample = self.rng.binomial(size=v_mean.shape,
+                                           n=1,
+                                           p=v_mean)
+        return h_sample
+
+
diff --git a/DeepLearning/python/LogisticRegression.py b/DeepLearning/python/LogisticRegression.py
new file mode 100644
index 00000000..708a1b3e
--- /dev/null
+++ b/DeepLearning/python/LogisticRegression.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from utils import *
+
+
+class LogisticRegression(object):
+    def __init__(self, input, label, n_in, n_out):
+        self.x = input
+        self.y = label
+
+        self.W = numpy.zeros((n_in, n_out))  # initialize W 0
+        self.b = numpy.zeros(n_out)  # initialize bias 0
+
+
+    def train(self, lr=0.1, input=None, L2_reg=0.00):        
+        if input is not None:
+            self.x = input
+
+        p_y_given_x = self.output(self.x)
+        d_y = self.y - p_y_given_x
+
+        self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W
+        self.b += lr * numpy.mean(d_y, axis=0)
+        self.d_y = d_y
+        
+
+    # def train(self, lr=0.1, input=None, L2_reg=0.00):
+    #     self.forward(input)
+    #     self.backward(lr, L2_reg)
+
+    # def forward(self, input=None):
+    #     if input is not None:
+    #         self.x = input
+
+    #     p_y_given_x = self.output(self.x)
+    #     self.d_y = self.y - p_y_given_x
+        
+    # def backward(self, lr=0.1, L2_reg=0.00):
+    #     self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W
+    #     self.b += lr * numpy.mean(self.d_y, axis=0)
+
+
+    def output(self, x):
+        # return sigmoid(numpy.dot(x, self.W) + self.b)
+        return softmax(numpy.dot(x, self.W) + self.b)
+
+    def predict(self, x):
+        return self.output(x)
+
+
+    def negative_log_likelihood(self):
+        # sigmoid_activation = sigmoid(numpy.dot(self.x, self.W) + self.b)
+        sigmoid_activation = softmax(numpy.dot(self.x, self.W) + self.b)
+
+        cross_entropy = - numpy.mean(
+            numpy.sum(self.y * numpy.log(sigmoid_activation) +
+            (1 - self.y) * numpy.log(1 - sigmoid_activation),
+                      axis=1))
+
+        return cross_entropy
+
+
+def test_lr(learning_rate=0.1, n_epochs=500):
+
+    rng = numpy.random.RandomState(123)
+
+    # training data
+    d = 2
+    N = 10
+    x1 = rng.randn(N, d) + numpy.array([0, 0])
+    x2 = rng.randn(N, d) + numpy.array([20, 10])
+    y1 = [[1, 0] for i in xrange(N)]
+    y2 = [[0, 1] for i in xrange(N)]
+
+    x = numpy.r_[x1.astype(int), x2.astype(int)]
+    y = numpy.r_[y1, y2]
+
+
+    # construct LogisticRegression
+    classifier = LogisticRegression(input=x, label=y, n_in=d, n_out=2)
+
+    # train
+    for epoch in xrange(n_epochs):
+        classifier.train(lr=learning_rate)
+        # cost = classifier.negative_log_likelihood()
+        # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
+        learning_rate *= 0.995
+
+
+    # test
+    result = classifier.predict(x)
+    for i in xrange(N):
+        print result[i]
+    print
+    for i in xrange(N):
+        print result[N+i]
+
+
+
+if __name__ == "__main__":
+    test_lr()
diff --git a/DeepLearning/python/MLP.py b/DeepLearning/python/MLP.py
new file mode 100644
index 00000000..e9ded0bf
--- /dev/null
+++ b/DeepLearning/python/MLP.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from HiddenLayer import HiddenLayer
+from LogisticRegression import LogisticRegression
+from utils import *
+
+
+class MLP(object):
+    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
+
+        self.x = input
+        self.y = label
+
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+        # construct hidden_layer
+        self.hidden_layer = HiddenLayer(input=self.x,
+                                        n_in=n_in,
+                                        n_out=n_hidden,
+                                        rng=rng,
+                                        activation=tanh)
+
+        # construct log_layer
+        self.log_layer = LogisticRegression(input=self.hidden_layer.output,
+                                            label=self.y,
+                                            n_in=n_hidden,
+                                            n_out=n_out)
+
+    def train(self):
+        # forward hidden_layer
+        layer_input = self.hidden_layer.forward()
+
+        # forward & backward log_layer
+        # self.log_layer.forward(input=layer_input)
+        self.log_layer.train(input=layer_input)
+
+        # backward hidden_layer
+        self.hidden_layer.backward(prev_layer=self.log_layer)
+
+        # backward log_layer
+        # self.log_layer.backward()
+
+
+    def predict(self, x):
+        x = self.hidden_layer.output(input=x)
+        return self.log_layer.predict(x)
+
+
+def test_mlp(n_epochs=5000):
+
+    x = numpy.array([[0,  0],
+                     [0,  1],
+                     [1,  0],
+                     [1,  1]])
+
+    y = numpy.array([[0, 1],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1]])
+
+
+    rng = numpy.random.RandomState(123)
+
+
+    # construct MLP
+    classifier = MLP(input=x, label=y, n_in=2, n_hidden=3, n_out=2, rng=rng)
+
+    # train
+    for epoch in xrange(n_epochs):
+        classifier.train()
+
+
+    # test
+    print classifier.predict(x)
+        
+
+if __name__ == "__main__":
+    test_mlp()
diff --git a/DeepLearning/python/RBM.py b/DeepLearning/python/RBM.py
new file mode 100644
index 00000000..7a127d81
--- /dev/null
+++ b/DeepLearning/python/RBM.py
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from utils import *
+
+class RBM(object):
+    def __init__(self, input=None, n_visible=2, n_hidden=3, \
+        W=None, hbias=None, vbias=None, rng=None):
+        
+        self.n_visible = n_visible  # num of units in visible (input) layer
+        self.n_hidden = n_hidden    # num of units in hidden layer
+
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+
+        if W is None:
+            a = 1. / n_visible
+            initial_W = numpy.array(rng.uniform(  # initialize W uniformly
+                low=-a,
+                high=a,
+                size=(n_visible, n_hidden)))
+
+            W = initial_W
+
+        if hbias is None:
+            hbias = numpy.zeros(n_hidden)  # initialize h bias 0
+
+        if vbias is None:
+            vbias = numpy.zeros(n_visible)  # initialize v bias 0
+
+
+        self.rng = rng
+        self.input = input
+        self.W = W
+        self.hbias = hbias
+        self.vbias = vbias
+
+
+    def contrastive_divergence(self, lr=0.1, k=1, input=None):
+        if input is not None:
+            self.input = input
+        
+        ''' CD-k '''
+        ph_mean, ph_sample = self.sample_h_given_v(self.input)
+
+        chain_start = ph_sample
+
+        for step in xrange(k):
+            if step == 0:
+                nv_means, nv_samples,\
+                nh_means, nh_samples = self.gibbs_hvh(chain_start)
+            else:
+                nv_means, nv_samples,\
+                nh_means, nh_samples = self.gibbs_hvh(nh_samples)
+
+        # chain_end = nv_samples
+
+
+        self.W += lr * (numpy.dot(self.input.T, ph_mean)
+                        - numpy.dot(nv_samples.T, nh_means))
+        self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0)
+        self.hbias += lr * numpy.mean(ph_mean - nh_means, axis=0)
+
+        # cost = self.get_reconstruction_cross_entropy()
+        # return cost
+
+
+    def sample_h_given_v(self, v0_sample):
+        h1_mean = self.propup(v0_sample)
+        h1_sample = self.rng.binomial(size=h1_mean.shape,   # discrete: binomial
+                                       n=1,
+                                       p=h1_mean)
+
+        return [h1_mean, h1_sample]
+
+
+    def sample_v_given_h(self, h0_sample):
+        v1_mean = self.propdown(h0_sample)
+        v1_sample = self.rng.binomial(size=v1_mean.shape,   # discrete: binomial
+                                            n=1,
+                                            p=v1_mean)
+        
+        return [v1_mean, v1_sample]
+
+    def propup(self, v):
+        pre_sigmoid_activation = numpy.dot(v, self.W) + self.hbias
+        return sigmoid(pre_sigmoid_activation)
+
+    def propdown(self, h):
+        pre_sigmoid_activation = numpy.dot(h, self.W.T) + self.vbias
+        return sigmoid(pre_sigmoid_activation)
+
+
+    def gibbs_hvh(self, h0_sample):
+        v1_mean, v1_sample = self.sample_v_given_h(h0_sample)
+        h1_mean, h1_sample = self.sample_h_given_v(v1_sample)
+
+        return [v1_mean, v1_sample,
+                h1_mean, h1_sample]
+    
+
+    def get_reconstruction_cross_entropy(self):
+        pre_sigmoid_activation_h = numpy.dot(self.input, self.W) + self.hbias
+        sigmoid_activation_h = sigmoid(pre_sigmoid_activation_h)
+        
+        pre_sigmoid_activation_v = numpy.dot(sigmoid_activation_h, self.W.T) + self.vbias
+        sigmoid_activation_v = sigmoid(pre_sigmoid_activation_v)
+
+        cross_entropy =  - numpy.mean(
+            numpy.sum(self.input * numpy.log(sigmoid_activation_v) +
+            (1 - self.input) * numpy.log(1 - sigmoid_activation_v),
+                      axis=1))
+        
+        return cross_entropy
+
+    def reconstruct(self, v):
+        h = sigmoid(numpy.dot(v, self.W) + self.hbias)
+        reconstructed_v = sigmoid(numpy.dot(h, self.W.T) + self.vbias)
+        return reconstructed_v
+
+
+
+
+
+def test_rbm(learning_rate=0.1, k=1, training_epochs=1000):
+    data = numpy.array([[1,1,1,0,0,0],
+                        [1,0,1,0,0,0],
+                        [1,1,1,0,0,0],
+                        [0,0,1,1,1,0],
+                        [0,0,1,1,0,0],
+                        [0,0,1,1,1,0]])
+
+
+    rng = numpy.random.RandomState(123)
+
+    # construct RBM
+    rbm = RBM(input=data, n_visible=6, n_hidden=2, rng=rng)
+
+    # train
+    for epoch in xrange(training_epochs):
+        rbm.contrastive_divergence(lr=learning_rate, k=k)
+        # cost = rbm.get_reconstruction_cross_entropy()
+        # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
+
+
+    # test
+    v = numpy.array([[1, 1, 0, 0, 0, 0],
+                     [0, 0, 0, 1, 1, 0]])
+
+    print rbm.reconstruct(v)
+
+
+
+if __name__ == "__main__":
+    test_rbm()
diff --git a/DeepLearning/python/SdA.py b/DeepLearning/python/SdA.py
new file mode 100644
index 00000000..5f8de37b
--- /dev/null
+++ b/DeepLearning/python/SdA.py
@@ -0,0 +1,160 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from HiddenLayer import HiddenLayer
+from LogisticRegression import LogisticRegression
+from dA import dA
+from utils import *
+
+
+class SdA(object):
+    def __init__(self, input=None, label=None,\
+                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
+                 rng=None):
+        
+        self.x = input
+        self.y = label
+
+        self.sigmoid_layers = []
+        self.dA_layers = []
+        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
+
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+        
+        assert self.n_layers > 0
+
+
+        # construct multi-layer
+        for i in xrange(self.n_layers):
+            # layer_size
+            if i == 0:
+                input_size = n_ins
+            else:
+                input_size = hidden_layer_sizes[i - 1]
+
+            # layer_input
+            if i == 0:
+                layer_input = self.x
+            else:
+                layer_input = self.sigmoid_layers[-1].sample_h_given_v()
+                
+            # construct sigmoid_layer
+            sigmoid_layer = HiddenLayer(input=layer_input,
+                                        n_in=input_size,
+                                        n_out=hidden_layer_sizes[i],
+                                        rng=rng,
+                                        activation=sigmoid)
+            self.sigmoid_layers.append(sigmoid_layer)
+
+
+            # construct dA_layers
+            dA_layer = dA(input=layer_input,
+                          n_visible=input_size,
+                          n_hidden=hidden_layer_sizes[i],
+                          W=sigmoid_layer.W,
+                          hbias=sigmoid_layer.b)
+            self.dA_layers.append(dA_layer)
+
+
+        # layer for output using Logistic Regression
+        self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(),
+                                            label=self.y,
+                                            n_in=hidden_layer_sizes[-1],
+                                            n_out=n_outs)
+
+        # finetune cost: the negative log likelihood of the logistic regression layer
+        self.finetune_cost = self.log_layer.negative_log_likelihood()
+
+
+    def pretrain(self, lr=0.1, corruption_level=0.3, epochs=100):
+        for i in xrange(self.n_layers):
+            if i == 0:
+                layer_input = self.x
+            else:
+                layer_input = self.sigmoid_layers[i-1].sample_h_given_v(layer_input)
+
+            da = self.dA_layers[i]
+
+            for epoch in xrange(epochs):
+                da.train(lr=lr, corruption_level=corruption_level, input=layer_input)
+
+    def finetune(self, lr=0.1, epochs=100):
+        layer_input = self.sigmoid_layers[-1].sample_h_given_v()
+
+        # train log_layer
+        epoch = 0
+
+        while epoch < epochs:
+            self.log_layer.train(lr=lr, input=layer_input)
+            # self.finetune_cost = self.log_layer.negative_log_likelihood()
+            # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost
+            
+            lr *= 0.95
+            epoch += 1
+
+
+    def predict(self, x):
+        layer_input = x
+        
+        for i in xrange(self.n_layers):
+            sigmoid_layer = self.sigmoid_layers[i]
+            layer_input = sigmoid_layer.output(input=layer_input)
+
+        return self.log_layer.predict(layer_input)
+
+
+
+
+def test_SdA(pretrain_lr=0.1, pretraining_epochs=1000, corruption_level=0.3, \
+             finetune_lr=0.1, finetune_epochs=200):
+    x = numpy.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]])
+
+    y = numpy.array([[1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1]])
+
+
+    rng = numpy.random.RandomState(123)
+
+    # construct SdA
+    sda = SdA(input=x, label=y, \
+              n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, rng=rng)
+
+    # pre-training
+    sda.pretrain(lr=pretrain_lr, corruption_level=corruption_level, epochs=pretraining_epochs)
+
+    # fine-tuning
+    sda.finetune(lr=finetune_lr, epochs=finetune_epochs)
+
+
+    # test
+    x = numpy.array([[1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1]])
+
+    print sda.predict(x)
+    
+    
+
+if __name__ == "__main__":
+    test_SdA()
diff --git a/DeepLearning/python/dA.py b/DeepLearning/python/dA.py
new file mode 100644
index 00000000..edbf6c76
--- /dev/null
+++ b/DeepLearning/python/dA.py
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from utils import *
+
+
+class dA(object):
+    def __init__(self, input=None, n_visible=2, n_hidden=3, \
+        W=None, hbias=None, vbias=None, rng=None):
+
+        self.n_visible = n_visible  # num of units in visible (input) layer
+        self.n_hidden = n_hidden    # num of units in hidden layer
+
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+            
+        if W is None:
+            a = 1. / n_visible
+            W = numpy.array(rng.uniform(  # initialize W uniformly
+                low=-a,
+                high=a,
+                size=(n_visible, n_hidden)))
+
+        if hbias is None:
+            hbias = numpy.zeros(n_hidden)  # initialize h bias 0
+
+        if vbias is None:
+            vbias = numpy.zeros(n_visible)  # initialize v bias 0
+
+        self.rng = rng
+        self.x = input
+        self.W = W
+        self.W_prime = self.W.T
+        self.hbias = hbias
+        self.vbias = vbias
+
+        
+    def get_corrupted_input(self, input, corruption_level):
+        assert corruption_level < 1
+
+        return self.rng.binomial(size=input.shape,
+                                       n=1,
+                                       p=1-corruption_level) * input
+
+    # Encode
+    def get_hidden_values(self, input):
+        return sigmoid(numpy.dot(input, self.W) + self.hbias)
+
+    # Decode
+    def get_reconstructed_input(self, hidden):
+        return sigmoid(numpy.dot(hidden, self.W_prime) + self.vbias)
+
+
+    def train(self, lr=0.1, corruption_level=0.3, input=None):
+        if input is not None:
+            self.x = input
+
+        x = self.x
+        tilde_x = self.get_corrupted_input(x, corruption_level)
+        y = self.get_hidden_values(tilde_x)
+        z = self.get_reconstructed_input(y)
+
+        L_h2 = x - z
+        L_h1 = numpy.dot(L_h2, self.W) * y * (1 - y)
+
+        L_vbias = L_h2
+        L_hbias = L_h1
+        L_W =  numpy.dot(tilde_x.T, L_h1) + numpy.dot(L_h2.T, y)
+
+
+        self.W += lr * L_W
+        self.hbias += lr * numpy.mean(L_hbias, axis=0)
+        self.vbias += lr * numpy.mean(L_vbias, axis=0)
+
+
+
+    def negative_log_likelihood(self, corruption_level=0.3):
+        tilde_x = self.get_corrupted_input(self.x, corruption_level)
+        y = self.get_hidden_values(tilde_x)
+        z = self.get_reconstructed_input(y)
+
+        cross_entropy = - numpy.mean(
+            numpy.sum(self.x * numpy.log(z) +
+            (1 - self.x) * numpy.log(1 - z),
+                      axis=1))
+
+        return cross_entropy
+
+
+    def reconstruct(self, x):
+        y = self.get_hidden_values(x)
+        z = self.get_reconstructed_input(y)
+        return z
+
+
+
+def test_dA(learning_rate=0.1, corruption_level=0.3, training_epochs=50):
+    data = numpy.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]])
+    
+    rng = numpy.random.RandomState(123)
+
+    # construct dA
+    da = dA(input=data, n_visible=20, n_hidden=5, rng=rng)
+
+    # train
+    for epoch in xrange(training_epochs):
+        da.train(lr=learning_rate, corruption_level=corruption_level)
+        # cost = da.negative_log_likelihood(corruption_level=corruption_level)
+        # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
+        # learning_rate *= 0.95
+
+
+    # test
+    x = numpy.array([[1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
+                     [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0]])
+
+    print da.reconstruct(x)
+
+
+
+if __name__ == "__main__":
+    test_dA()
diff --git a/DeepLearning/python/utils.py b/DeepLearning/python/utils.py
new file mode 100644
index 00000000..7aca40dd
--- /dev/null
+++ b/DeepLearning/python/utils.py
@@ -0,0 +1,40 @@
+
+import numpy
+numpy.seterr(all='ignore')
+
+
+def sigmoid(x):
+    return 1. / (1 + numpy.exp(-x))
+
+
+def dsigmoid(x):
+    return x * (1. - x)
+
+def tanh(x):
+    return numpy.tanh(x)
+
+def dtanh(x):
+    return 1. - x * x
+
+def softmax(x):
+    e = numpy.exp(x - numpy.max(x))  # prevent overflow
+    if e.ndim == 1:
+        return e / numpy.sum(e, axis=0)
+    else:  
+        return e / numpy.array([numpy.sum(e, axis=1)]).T  # ndim = 2
+
+
+def ReLU(x):
+    return x * (x > 0)
+
+def dReLU(x):
+    return 1. * (x > 0)
+
+
+# # probability density for the Gaussian dist
+# def gaussian(x, mean=0.0, scale=1.0):
+#     s = 2 * numpy.power(scale, 2)
+#     e = numpy.exp( - numpy.power((x - mean), 2) / s )
+
+#     return e / numpy.square(numpy.pi * s)
+
diff --git a/do_closing.py b/do_closing.py
new file mode 100644
index 00000000..320f1f72
--- /dev/null
+++ b/do_closing.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from contextlib import contextmanager
+
+@contextmanager
+def closing(fname):
+    f = None
+    try:
+        f = open(fname, 'r')
+        yield f
+    finally:
+        if f:
+            f.close()
+
+with closing('test.txt') as f:
+    print(f.read())
diff --git a/do_suppress.py b/do_suppress.py
new file mode 100644
index 00000000..3a750c4b
--- /dev/null
+++ b/do_suppress.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os
+
+from contextlib import suppress
+
+with suppress(FileNotFoundError):
+    os.remove('tempfile.1')
+    os.remove('tempfile.2')
+    os.remove('tempfile.3')
diff --git a/do_with.py b/do_with.py
new file mode 100644
index 00000000..073399ed
--- /dev/null
+++ b/do_with.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from contextlib import contextmanager
+
+@contextmanager
+def log(name):
+    print('[%s] start...' % name)
+    yield
+    print('[%s] end.' % name)
+
+with log('DEBUG'):
+    print('Hello, world!')
+    print('Hello, Python!')