diff --git a/C/Addition.c b/C/Addition.c new file mode 100644 index 00000000..06f23b88 --- /dev/null +++ b/C/Addition.c @@ -0,0 +1,8 @@ +#include +void main() +{ int a,b,c; +printf("Enter two Numbers : "); +scanf("%d %d",&a,&b); +c=a+b; +printf("The Sum is %d",c); +} diff --git a/C/Address of 1-D array.c b/C/Address of 1-D array.c new file mode 100644 index 00000000..bad40b3f --- /dev/null +++ b/C/Address of 1-D array.c @@ -0,0 +1,17 @@ +#include +int main() +{ + int a[100],i,n,*add; + printf("enter the size"); + scanf("%d",&n); + printf("enter the no"); + for(i=0;i +#include + +// Function that performs the conversion +double convertTemp(double initValue, int initScale, int finalScale){ + double finalValue; + switch(initScale){ + // Celsius + case 1: + // Celsius to Kelvin + if(finalScale == 1){ + finalValue = initValue + 273.15; + } + // Celsius to Fahrenheit + else if(finalScale == 2){ + finalValue = (initValue * 9 / 5) + 32; + } + break; + case 2: + // Kelvin to Celsius + if(finalScale == 1){ + finalValue = initValue - 273.15; + } + // Kelvin to Fahrenheit + else if(finalScale == 2){ + finalValue = ((initValue - 273.15) * 9 / 5) + 32; + } + break; + case 3: + // Fahrenheit to Celsius + if(finalScale == 1){ + finalValue = (initValue - 32) * 5 / 9; + } + // Fahrenheit to Kelvin + else if(finalScale == 2){ + finalValue = ((initValue - 32) * 5 / 9) + 273,15; + } + break; + + } + return finalValue; +} + +int main(){ + int option; + double initialValue, finalValue; + while(1){ + // main menu + printf("\n0 - Exit\n"); + printf("1 - Convert from Celsius to Kelvin\n"); + printf("2 - Convert from Celsius to Fahrenheit\n"); + printf("3 - Convert from Kelvin to Fahrenheit\n"); + printf("4 - Convert from Kelvin to Celsius\n"); + printf("5 - Convert from Fahrenheit to Celsius\n"); + printf("6 - Convert from Fahrenheit to Kelvin\n"); + + printf("Select a number: "); + scanf("%d",&option); + if(!option){ + printf("Ending program\n"); + return 0; + } + + printf("Please enter the initial value: "); + scanf("%lf",&initialValue); + + switch(option){ + case 1: + finalValue = convertTemp(initialValue,1,1); + printf("Valor em Kelvin: %.2lf",finalValue); + break; + case 2: + finalValue = convertTemp(initialValue,1,2); + printf("Valor em Fahrenheit: %.2lf",finalValue); + break; + case 3: + finalValue = convertTemp(initialValue,2,1); + printf("Valor em Celsius: %.2lf",finalValue); + break; + case 4: + finalValue = convertTemp(initialValue,2,2); + printf("Valor em Fahrenheit: %.2lf",finalValue); + break; + case 5: + finalValue = convertTemp(initialValue,3,1); + printf("Valor em Celsius: %.2lf",finalValue); + break; + case 6: + finalValue = convertTemp(initialValue,3,1); + printf("Valor em Kelvin: %.2lf",finalValue); + break; + + } + printf("\n"); + } + + return 0; +} \ No newline at end of file diff --git a/C/Anagram-Program-in-C b/C/Anagram-Program-in-C new file mode 100644 index 00000000..68510848 --- /dev/null +++ b/C/Anagram-Program-in-C @@ -0,0 +1,71 @@ +#include +#include +#include + +int checkAnagram(char *str1, char *str2); + +int main() +{ + char str1[100], str2[100]; + + printf("Function : whether two given strings are anagram :"); + printf("\nExample : pears and spare, stone and tones :"); + + printf(" Input the first String : "); + fgets(str1, sizeof str1, stdin); + printf(" Input the second String : "); + fgets(str2, sizeof str2, stdin); + + if(checkAnagram(str1, str2) == 1) + { + str1[strlen(str1)-1] = '\0'; + str2[strlen(str2)-1] = '\0'; + printf(" %s and %s are Anagram.\n\n",str1,str2); + } + else + { + str1[strlen(str1)-1] = '\0'; + str2[strlen(str2)-1] = '\0'; + printf(" %s and %s are not Anagram.\n\n",str1,str2); + } + return 0; +} + + +//Function to check whether two passed strings are anagram or not + +int checkAnagram(char *str1, char *str2) +{ + int str1ChrCtr[256] = {0}, str2ChrCtr[256] = {0}; + int ctr; + + /* check the length of equality of Two Strings */ + + if(strlen(str1) != strlen(str2)) + { + return 0; + } + + //count frequency of characters in str1 + + for(ctr = 0; str1[ctr] != '\0'; ctr++) + { + str1ChrCtr[str1[ctr]]++; + } + + //count frequency of characters in str2 + + for(ctr = 0; str2[ctr] != '\0'; ctr++) + { + str2ChrCtr[str2[ctr]]++; + } + + //compare character counts of both strings + + for(ctr = 0; ctr < 256; ctr++) + { + if(str1ChrCtr[ctr] != str2ChrCtr[ctr]) + return 0; + } + return 1; +} diff --git a/C/SwapByRefandCopy.c b/C/SwapByRefandCopy.c new file mode 100644 index 00000000..f888a2ca --- /dev/null +++ b/C/SwapByRefandCopy.c @@ -0,0 +1,43 @@ +#include + +// Swap_ref creates a temporary variable temp and dereffrences the address that was sent to it(this is done to get the actual +// value of the int in the memory space). the dereffrencing allows us to change the actual values. +void swap_ref(int* a, int* b){ + int temp = *a; + + *a = *b; + *b = temp; +} + +// Swap however does not use addresses and reffrences and only alters the ints within it's own scope +void swap(int a, int b) +{ + int temp = a; + a = b; + b = temp; +} + +int main(void) { + //Declare 2 int variables that are going to be swapped by reference + int a = 1; + int b = 2; + + printf("%d\n", a); + printf("%d\n", b); + + //Pass the Address of the Values to the Swap function + swap_ref(&a, &b); + + printf("%d\n", a); + printf("%d\n", b); + + int c = 5; + int d = 10; + printf("%d\n", c); + printf("%d\n", d); + + swap(c, d); + + printf("%d\n", c); + printf("%d\n", d); +} diff --git a/C/SwapIntegers.c b/C/SwapIntegers.c new file mode 100644 index 00000000..b57df938 --- /dev/null +++ b/C/SwapIntegers.c @@ -0,0 +1,15 @@ +#include +int main() +{ + int num1=5, num2=10; + + printf("The numbers are: %d and %d", &num1,&num2); + + num1=num1+num2; //num1=15 and num2=10 + num2=num1-num2; //num1=15 and num2=5 + num1=num1-num2; //num1=10 and num2=5 + + printf("The numbers have been swapped with new positions: %d and %d", &num1,&num2); + + return 0; +} \ No newline at end of file diff --git a/C/SwapIntegersWithout3rdVariable(Arithmatic).c b/C/SwapIntegersWithout3rdVariable(Arithmatic).c new file mode 100644 index 00000000..b57df938 --- /dev/null +++ b/C/SwapIntegersWithout3rdVariable(Arithmatic).c @@ -0,0 +1,15 @@ +#include +int main() +{ + int num1=5, num2=10; + + printf("The numbers are: %d and %d", &num1,&num2); + + num1=num1+num2; //num1=15 and num2=10 + num2=num1-num2; //num1=15 and num2=5 + num1=num1-num2; //num1=10 and num2=5 + + printf("The numbers have been swapped with new positions: %d and %d", &num1,&num2); + + return 0; +} \ No newline at end of file diff --git a/C/SwapValueUsingThirdVariable.c b/C/SwapValueUsingThirdVariable.c new file mode 100644 index 00000000..9da8e755 --- /dev/null +++ b/C/SwapValueUsingThirdVariable.c @@ -0,0 +1,15 @@ +// Swap two integers using third variable + + +#include +int main() +{ + int a,b,c; + printf("Enter two no :\n"); + scanf("%d%d",&a,&b); + c=a; + a=b; + b=c; + printf("After swapping value of a = %d\n b =%d",a,b); + return 0; +} diff --git a/C/SwapValueWithoutUsingThirdVariable.c b/C/SwapValueWithoutUsingThirdVariable.c new file mode 100644 index 00000000..d78454fa --- /dev/null +++ b/C/SwapValueWithoutUsingThirdVariable.c @@ -0,0 +1,15 @@ +// Swap two integers without using third variable + + +#include +int main() +{ + int a, b; + printf("Enter two no :\n"); + scanf("%d%d",&a,&b); + a = a^b; + b = a^b; + a= a^b; + printf("After swapping value of a and b : %d,%d",a,b); + return 0; +} diff --git a/C/Swapping(without using extra variable).cpp b/C/Swapping(without using extra variable).cpp new file mode 100644 index 00000000..cf03d3c5 --- /dev/null +++ b/C/Swapping(without using extra variable).cpp @@ -0,0 +1,13 @@ +#include +#include +void main() +{ + int a,b; + printf("Enter a and b\n"); + scanf("%d%d",&a,&b); + a=a+b; + b=a-b; + a=a-b; + printf("After swapping a and b are %d %d",a,b); + getch(); +} diff --git a/C/Temperature.c b/C/Temperature.c new file mode 100644 index 00000000..a85a7bb8 --- /dev/null +++ b/C/Temperature.c @@ -0,0 +1,8 @@ +#include +void main() +{ float a,c,f; +printf("Enter the Temperature in Celcius : "); +scanf("%f",&c); +f=c*(9/5)+32; +printf("Temperature in Fahernheit is %f",f); +} diff --git a/C/TemperatureSwitch.c b/C/TemperatureSwitch.c new file mode 100644 index 00000000..d997d6bb --- /dev/null +++ b/C/TemperatureSwitch.c @@ -0,0 +1,38 @@ +#include + +int main() +{ + // The resultant temperatures can be in decimals as well, so we use double + double c, f, result; + // We use an integer type data to run the switch statement + int choice; + printf("Select your choice: \n"); + printf("1. Celcius to Fahrenheit\n"); + printf("2. Fahrenheit to Celcius\n"); + + printf("Enter your choice: "); + scanf("%d", &choice); + + // We compute the temperatures for both the cases here respectively + switch(choice) + { + case 1: + printf("Enter the temperature in Celcius: "); + scanf("%lf", &c); + result = (9 / 5) * c + 32; + break; + case 2: + printf("Enter the temperature in Fahrenheit: "); + scanf("%lf", &f); + result = (5 / 9) * (f - 32); + break; + + // This case gets activated when the user inputs anything othrer than 1 or 2 + default: + printf("Invalid case!\n"); + } + + // Printing out the result according to the computation + printf("The resultant temperature is: %lf", result); + return 0; +} diff --git a/C/TernaryOperator.c b/C/TernaryOperator.c new file mode 100644 index 00000000..5eac1939 --- /dev/null +++ b/C/TernaryOperator.c @@ -0,0 +1,13 @@ +//Largest number among 3 numbers using ternary operator + +#include +int main() +{ + float a,b,c,large; + printf("Enter any 3 numbers\n"); + scanf("%f%f%f",&a,&b,&c); + large = a>b? (a>c?a:c): (b>c?b:c); + printf("The larger no is :%f\n", large); + return 0; + +} diff --git a/C/Trif.c b/C/Trif.c new file mode 100644 index 00000000..242172fa --- /dev/null +++ b/C/Trif.c @@ -0,0 +1,10 @@ +#include +#include +void main() +{ double a,b; +printf("Enter the degree : "); +scanf("%lf",&a); +a=(a*3.14)/180; +b=sin(a); +printf("Sine is %lf",b); +} diff --git a/C/UppercaseToLowercase.c b/C/UppercaseToLowercase.c new file mode 100644 index 00000000..aae975aa --- /dev/null +++ b/C/UppercaseToLowercase.c @@ -0,0 +1,14 @@ +// Uppercase character to lowercase character + + +#include +int main() +{ + char a,u; + printf("Enter Uppercase letter :\n"); + scanf("%c", &a); + u = a + 32; + printf("Lowercase is : %c", u); + return 0; + +} diff --git a/C/VowelorConsonant.c b/C/VowelorConsonant.c new file mode 100644 index 00000000..89e3be23 --- /dev/null +++ b/C/VowelorConsonant.c @@ -0,0 +1,27 @@ +// Program to input a character and check whether it is vowel or consonant using switch case +#include +int main(){ + char ch; + printf("Enter a character\n"); + scanf("%c", &ch); + switch(ch) + { + case 'a': + case 'e': + case 'i': + case 'o': + case 'u': + case 'A': + case 'E': + case 'I': + case 'O': + case 'U': + printf("Entered character is a vowel"); + break; + default: + printf("Entered character is a consonant"); + + } + + return 0; +} diff --git a/C/alphabetTriangle.cpp b/C/alphabetTriangle.cpp new file mode 100644 index 00000000..cd6ae215 --- /dev/null +++ b/C/alphabetTriangle.cpp @@ -0,0 +1,28 @@ +#include +#include + +/* + A + ABA + ABCBA + ABCDCBA + ABCDEDCBA + +*/ +int main(){ + int ch=65; + int i,j,k,m; + for(i=1;i<=5;i++) + { + for(j=5;j>=i;j--) + printf(" "); + for(k=1;k<=i;k++) + printf("%c",ch++); + ch--; + for(m=1;m +int main() +{ + int a,b; /*declaration of variables*/ + printf("Enter Two Numbers\n"); + scanf("%d %d",&a,&b); /*Taking input of Two Variables*/ + a=a+b; /*swapping */ + b=a-b; /* the two */ + a=a-b; /*numbers*/ + printf("the number after swapping are %d %d\n",a,b);/*printing*/ + return 0; +} \ No newline at end of file diff --git a/C/transposeOfMatrix.c b/C/transposeOfMatrix.c new file mode 100644 index 00000000..1d9d0c6c --- /dev/null +++ b/C/transposeOfMatrix.c @@ -0,0 +1,40 @@ +//C program to input a matrix of order MxN and find its transpose + +#include +#include +int main() +{ + static int array[10][10]; + int i, j, row, col; + + printf("Enter the order of the matrix \n"); + scanf("%d %d", &row, &col); + printf("Enter the coefficients of the matrix\n"); + for (i = 0; i < row; ++i) + { + for (j = 0; j < col; ++j) + { + scanf("%d", &array[i][j]); + } + } + printf("The given matrix is \n"); + for (i = 0; i < row; ++i) + { + for (j = 0; j < col; ++j) + { + printf(" %d", array[i][j]); + } + printf("\n"); + } + printf("Transpose of matrix is \n"); + for (j = 0; j < col; ++j) + { + for (i = 0; i < row; ++i) + { + printf(" %d", array[i][j]); + } + printf("\n"); + } + + return 0; +} diff --git a/Contributors.md b/Contributors.md index 272a6a58..964e40dd 100644 --- a/Contributors.md +++ b/Contributors.md @@ -117,5 +117,10 @@ Name: [Muhammad Iqbal R](https://github.com/miqbalrr)
Place: Indonesia
About: BACKEND Developer
+Name: [Yash Agarwal](https://github.com/yashagarwaldev)
+Place: India
+About: Developer
+ + diff --git a/DeepLearning/c/DBN.c b/DeepLearning/c/DBN.c new file mode 100644 index 00000000..eee1e1ca --- /dev/null +++ b/DeepLearning/c/DBN.c @@ -0,0 +1,596 @@ +#include +#include +#include +#include "HiddenLayer.h" +#include "RBM.h" +#include "LogisticRegression.h" +#include "DBN.h" +#include "utils.h" + +void test_dbn(void); + + +double uniform(double min, double max) { + return rand() / (RAND_MAX + 1.0) * (max - min) + min; +} + +int binomial(int n, double p) { + if(p < 0 || p > 1) return 0; + + int i; + int c = 0; + double r; + + for(i=0; iN = N; + this->n_ins = n_ins; + this->hidden_layer_sizes = hidden_layer_sizes; + this->n_outs = n_outs; + this->n_layers = n_layers; + + this->sigmoid_layers = (HiddenLayer *)malloc(sizeof(HiddenLayer) * n_layers); + this->rbm_layers = (RBM *)malloc(sizeof(RBM) * n_layers); + + // construct multi-layer + for(i=0; isigmoid_layers[i]), \ + N, input_size, hidden_layer_sizes[i], NULL, NULL); + + // construct rbm_layer + RBM__construct(&(this->rbm_layers[i]), N, input_size, hidden_layer_sizes[i], \ + this->sigmoid_layers[i].W, this->sigmoid_layers[i].b, NULL); + + } + + // layer for output using LogisticRegression + LogisticRegression__construct(&(this->log_layer), \ + N, hidden_layer_sizes[n_layers-1], n_outs); + +} + +void DBN__destruct(DBN* this) { + int i; + for(i=0; in_layers; i++) { + HiddenLayer__destruct(&(this->sigmoid_layers[i])); + RBM__destruct(&(this->rbm_layers[i])); + } + free(this->sigmoid_layers); + free(this->rbm_layers); +} + +void DBN_pretrain(DBN* this, int *input, double lr, int k, int epochs) { + int i, j, l, m, n, epoch; + + int *layer_input; + int prev_layer_input_size; + int *prev_layer_input; + + int *train_X = (int *)malloc(sizeof(int) * this->n_ins); + + for(i=0; in_layers; i++) { // layer-wise + + for(epoch=0; epochN; n++) { // input x1...xN + // initial input + for(m=0; mn_ins; m++) train_X[m] = input[n * this->n_ins + m]; + + // layer input + for(l=0; l<=i; l++) { + if(l == 0) { + layer_input = (int *)malloc(sizeof(int) * this->n_ins); + for(j=0; jn_ins; j++) layer_input[j] = train_X[j]; + } else { + if(l == 1) prev_layer_input_size = this->n_ins; + else prev_layer_input_size = this->hidden_layer_sizes[l-2]; + + prev_layer_input = (int *)malloc(sizeof(int) * prev_layer_input_size); + for(j=0; jhidden_layer_sizes[l-1]); + + HiddenLayer_sample_h_given_v(&(this->sigmoid_layers[l-1]), \ + prev_layer_input, layer_input); + free(prev_layer_input); + } + } + + RBM_contrastive_divergence(&(this->rbm_layers[i]), layer_input, lr, k); + } + + } + } + + free(train_X); + free(layer_input); +} + +void DBN_finetune(DBN* this, int *input, int *label, double lr, int epochs) { + int i, j, m, n, epoch; + + int *layer_input; + // int prev_layer_input_size; + int *prev_layer_input; + + int *train_X = (int *)malloc(sizeof(int) * this->n_ins); + int *train_Y = (int *)malloc(sizeof(int) * this->n_outs); + + for(epoch=0; epochN; n++) { // input x1...xN + // initial input + for(m=0; mn_ins; m++) train_X[m] = input[n * this->n_ins + m]; + for(m=0; mn_outs; m++) train_Y[m] = label[n * this->n_outs + m]; + + // layer input + for(i=0; in_layers; i++) { + if(i == 0) { + prev_layer_input = (int *)malloc(sizeof(int) * this->n_ins); + for(j=0; jn_ins; j++) prev_layer_input[j] = train_X[j]; + } else { + prev_layer_input = (int *)malloc(sizeof(int) * this->hidden_layer_sizes[i-1]); + for(j=0; jhidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j]; + free(layer_input); + } + + + layer_input = (int *)malloc(sizeof(int) * this->hidden_layer_sizes[i]); + HiddenLayer_sample_h_given_v(&(this->sigmoid_layers[i]), \ + prev_layer_input, layer_input); + free(prev_layer_input); + } + + LogisticRegression_train(&(this->log_layer), layer_input, train_Y, lr); + } + // lr *= 0.95; + } + + free(layer_input); + free(train_X); + free(train_Y); +} + +void DBN_predict(DBN* this, int *x, double *y) { + int i, j, k; + double *layer_input; + // int prev_layer_input_size; + double *prev_layer_input; + + double linear_output; + + prev_layer_input = (double *)malloc(sizeof(double) * this->n_ins); + for(j=0; jn_ins; j++) prev_layer_input[j] = x[j]; + + // layer activation + for(i=0; in_layers; i++) { + layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out); + + for(k=0; ksigmoid_layers[i].n_out; k++) { + linear_output = 0.0; + + for(j=0; jsigmoid_layers[i].n_in; j++) { + linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j]; + } + linear_output += this->sigmoid_layers[i].b[k]; + layer_input[k] = sigmoid(linear_output); + } + free(prev_layer_input); + + if(i < this->n_layers-1) { + prev_layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out); + for(j=0; jsigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j]; + free(layer_input); + } + } + + for(i=0; ilog_layer.n_out; i++) { + y[i] = 0; + for(j=0; jlog_layer.n_in; j++) { + y[i] += this->log_layer.W[i][j] * layer_input[j]; + } + y[i] += this->log_layer.b[i]; + } + + LogisticRegression_softmax(&(this->log_layer), y); + + free(layer_input); +} + + + +// HiddenLayer +void HiddenLayer__construct(HiddenLayer* this, int N, int n_in, int n_out, \ + double **W, double *b) { + int i, j; + double a = 1.0 / n_in; + + this->N = N; + this->n_in = n_in; + this->n_out = n_out; + + if(W == NULL) { + this->W = (double **)malloc(sizeof(double*) * n_out); + this->W[0] = (double *)malloc(sizeof(double) * n_in * n_out); + for(i=0; iW[i] = this->W[0] + i * n_in; + + for(i=0; iW[i][j] = uniform(-a, a); + } + } + } else { + this->W = W; + } + + if(b == NULL) { + this->b = (double *)malloc(sizeof(double) * n_out); + } else { + this->b = b; + } +} + +void HiddenLayer__destruct(HiddenLayer* this) { + free(this->W[0]); + free(this->W); + free(this->b); +} + +double HiddenLayer_output(HiddenLayer* this, int *input, double *w, double b) { + int j; + double linear_output = 0.0; + for(j=0; jn_in; j++) { + linear_output += w[j] * input[j]; + } + linear_output += b; + return sigmoid(linear_output); +} + +void HiddenLayer_sample_h_given_v(HiddenLayer* this, int *input, int *sample) { + int i; + for(i=0; in_out; i++) { + sample[i] = binomial(1, HiddenLayer_output(this, input, this->W[i], this->b[i])); + } +} + + +// RBM +void RBM__construct(RBM* this, int N, int n_visible, int n_hidden, \ + double **W, double *hbias, double *vbias) { + int i, j; + double a = 1.0 / n_visible; + + this->N = N; + this->n_visible = n_visible; + this->n_hidden = n_hidden; + + if(W == NULL) { + this->W = (double **)malloc(sizeof(double*) * n_hidden); + this->W[0] = (double *)malloc(sizeof(double) * n_visible * n_hidden); + for(i=0; iW[i] = this->W[0] + i * n_visible; + + for(i=0; iW[i][j] = uniform(-a, a); + } + } + } else { + this->W = W; + } + + if(hbias == NULL) { + this->hbias = (double *)malloc(sizeof(double) * n_hidden); + for(i=0; ihbias[i] = 0; + } else { + this->hbias = hbias; + } + + if(vbias == NULL) { + this->vbias = (double *)malloc(sizeof(double) * n_visible); + for(i=0; ivbias[i] = 0; + } else { + this->vbias = vbias; + } +} + +void RBM__destruct(RBM* this) { + // free(this->W[0]); + // free(this->W); + // free(this->hbias); + free(this->vbias); +} + +void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) { + int i, j, step; + + double *ph_mean = (double *)malloc(sizeof(double) * this->n_hidden); + int *ph_sample = (int *)malloc(sizeof(int) * this->n_hidden); + double *nv_means = (double *)malloc(sizeof(double) * this->n_visible); + int *nv_samples = (int *)malloc(sizeof(int) * this->n_visible); + double *nh_means = (double *)malloc(sizeof(double) * this->n_hidden); + int *nh_samples = (int *)malloc(sizeof(int) * this->n_hidden); + + /* CD-k */ + RBM_sample_h_given_v(this, input, ph_mean, ph_sample); + + for(step=0; stepn_hidden; i++) { + for(j=0; jn_visible; j++) { + // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + } + this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N; + } + + for(i=0; in_visible; i++) { + this->vbias[i] += lr * (input[i] - nv_samples[i]) / this->N; + } + + + free(ph_mean); + free(ph_sample); + free(nv_means); + free(nv_samples); + free(nh_means); + free(nh_samples); +} + + +void RBM_sample_h_given_v(RBM* this, int *v0_sample, double *mean, int *sample) { + int i; + for(i=0; in_hidden; i++) { + mean[i] = RBM_propup(this, v0_sample, this->W[i], this->hbias[i]); + sample[i] = binomial(1, mean[i]); + } +} + +void RBM_sample_v_given_h(RBM* this, int *h0_sample, double *mean, int *sample) { + int i; + for(i=0; in_visible; i++) { + mean[i] = RBM_propdown(this, h0_sample, i, this->vbias[i]); + sample[i] = binomial(1, mean[i]); + } +} + +double RBM_propup(RBM* this, int *v, double *w, double b) { + int j; + double pre_sigmoid_activation = 0.0; + for(j=0; jn_visible; j++) { + pre_sigmoid_activation += w[j] * v[j]; + } + pre_sigmoid_activation += b; + return sigmoid(pre_sigmoid_activation); +} + +double RBM_propdown(RBM* this, int *h, int i, double b) { + int j; + double pre_sigmoid_activation = 0.0; + + for(j=0; jn_hidden; j++) { + pre_sigmoid_activation += this->W[j][i] * h[j]; + } + pre_sigmoid_activation += b; + return sigmoid(pre_sigmoid_activation); +} + +void RBM_gibbs_hvh(RBM* this, int *h0_sample, double *nv_means, int *nv_samples, \ + double *nh_means, int *nh_samples) { + RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples); + RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples); +} + +void RBM_reconstruct(RBM* this, int *v, double *reconstructed_v) { + int i, j; + double *h = (double *)malloc(sizeof(double) * this->n_hidden); + double pre_sigmoid_activation; + + for(i=0; in_hidden; i++) { + h[i] = RBM_propup(this, v, this->W[i], this->hbias[i]); + } + + for(i=0; in_visible; i++) { + pre_sigmoid_activation = 0.0; + for(j=0; jn_hidden; j++) { + pre_sigmoid_activation += this->W[j][i] * h[j]; + } + pre_sigmoid_activation += this->vbias[i]; + + reconstructed_v[i] = sigmoid(pre_sigmoid_activation); + } + + free(h); +} + + +// LogisticRegression +void LogisticRegression__construct(LogisticRegression *this, int N, int n_in, int n_out) { + int i, j; + this->N = N; + this->n_in = n_in; + this->n_out = n_out; + + this->W = (double **)malloc(sizeof(double*) * n_out); + this->W[0] = (double *)malloc(sizeof(double) * n_in * n_out); + for(i=0; iW[i] = this->W[0] + i * n_in; + this->b = (double *)malloc(sizeof(double) * n_out); + + for(i=0; iW[i][j] = 0; + } + this->b[i] = 0; + } +} + +void LogisticRegression__destruct(LogisticRegression *this) { + free(this->W[0]); + free(this->W); + free(this->b); +} + +void LogisticRegression_train(LogisticRegression *this, int *x, int *y, double lr) { + int i,j; + double *p_y_given_x = (double *)malloc(sizeof(double) * this->n_out); + double *dy = (double *)malloc(sizeof(double) * this->n_out); + + for(i=0; in_out; i++) { + p_y_given_x[i] = 0; + for(j=0; jn_in; j++) { + p_y_given_x[i] += this->W[i][j] * x[j]; + } + p_y_given_x[i] += this->b[i]; + } + LogisticRegression_softmax(this, p_y_given_x); + + for(i=0; in_out; i++) { + dy[i] = y[i] - p_y_given_x[i]; + + for(j=0; jn_in; j++) { + this->W[i][j] += lr * dy[i] * x[j] / this->N; + } + + this->b[i] += lr * dy[i] / this->N; + } + + free(p_y_given_x); + free(dy); +} + +void LogisticRegression_softmax(LogisticRegression *this, double *x) { + int i; + double max = 0.0; + double sum = 0.0; + + for(i=0; in_out; i++) if(max < x[i]) max = x[i]; + for(i=0; in_out; i++) { + x[i] = exp(x[i] - max); + sum += x[i]; + } + + for(i=0; in_out; i++) x[i] /= sum; +} + +void LogisticRegression_predict(LogisticRegression *this, int *x, double *y) { + int i,j; + + for(i=0; in_out; i++) { + y[i] = 0; + for(j=0; jn_in; j++) { + y[i] += this->W[i][j] * x[j]; + } + y[i] += this->b[i]; + } + + LogisticRegression_softmax(this, y); +} + + +void test_dbn(void) { + srand(0); + + int i, j; + + double pretrain_lr = 0.1; + int pretraining_epochs = 1000; + int k = 1; + double finetune_lr = 0.1; + int finetune_epochs = 500; + + int train_N = 6; + int test_N = 4; + int n_ins = 6; + int n_outs = 2; + int hidden_layer_sizes[] = {3, 3}; + int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]); + + + // training data + int train_X[6][6] = { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 1, 0, 0}, + {0, 0, 1, 1, 1, 0} + }; + + int train_Y[6][2] = { + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1} + }; + + // construct DBN + DBN dbn; + DBN__construct(&dbn, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers); + + // pretrain + DBN_pretrain(&dbn, *train_X, pretrain_lr, k, pretraining_epochs); + + // finetune + DBN_finetune(&dbn, *train_X, *train_Y, finetune_lr, finetune_epochs); + + // test data + int test_X[4][6] = { + {1, 1, 0, 0, 0, 0}, + {1, 1, 1, 1, 0, 0}, + {0, 0, 0, 1, 1, 0}, + {0, 0, 1, 1, 1, 0} + }; + + double test_Y[4][2]; + + // test + for(i=0; i +#include +#include +#include "LogisticRegression.h" + +void test_lr(void); + + +void LogisticRegression__construct(LogisticRegression *this, int N, int n_in, int n_out) { + int i, j; + this->N = N; + this->n_in = n_in; + this->n_out = n_out; + + this->W = (double **)malloc(sizeof(double*) * n_out); + this->W[0] = (double *)malloc(sizeof(double) * n_in * n_out); + for(i=0; iW[i] = this->W[0] + i * n_in; + this->b = (double *)malloc(sizeof(double) * n_out); + + for(i=0; iW[i][j] = 0; + } + this->b[i] = 0; + } +} + +void LogisticRegression__destruct(LogisticRegression *this) { + free(this->W[0]); + free(this->W); + free(this->b); +} + +void LogisticRegression_train(LogisticRegression *this, int *x, int *y, double lr) { + int i,j; + double *p_y_given_x = (double *)malloc(sizeof(double) * this->n_out); + double *dy = (double *)malloc(sizeof(double) * this->n_out); + + for(i=0; in_out; i++) { + p_y_given_x[i] = 0; + for(j=0; jn_in; j++) { + p_y_given_x[i] += this->W[i][j] * x[j]; + } + p_y_given_x[i] += this->b[i]; + } + LogisticRegression_softmax(this, p_y_given_x); + + for(i=0; in_out; i++) { + dy[i] = y[i] - p_y_given_x[i]; + + for(j=0; jn_in; j++) { + this->W[i][j] += lr * dy[i] * x[j] / this->N; + } + + this->b[i] += lr * dy[i] / this->N; + } + + free(p_y_given_x); + free(dy); +} + +void LogisticRegression_softmax(LogisticRegression *this, double *x) { + int i; + double max = 0.0; + double sum = 0.0; + + for(i=0; in_out; i++) if(max < x[i]) max = x[i]; + for(i=0; in_out; i++) { + x[i] = exp(x[i] - max); + sum += x[i]; + } + + for(i=0; in_out; i++) x[i] /= sum; +} + +void LogisticRegression_predict(LogisticRegression *this, int *x, double *y) { + int i,j; + + for(i=0; in_out; i++) { + y[i] = 0; + for(j=0; jn_in; j++) { + y[i] += this->W[i][j] * x[j]; + } + y[i] += this->b[i]; + } + + LogisticRegression_softmax(this, y); +} + + + + +void test_lr(void) { + int i, j, epoch; + + double learning_rate = 0.1; + int n_epochs = 500; + + int train_N = 6; + int test_N = 2; + int n_in = 6; + int n_out = 2; + + + // training data + int train_X[6][6] = { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 1, 0, 0}, + {0, 0, 1, 1, 1, 0} + }; + + int train_Y[6][2] = { + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1} + }; + + + // construct LogisticRegression + LogisticRegression classifier; + LogisticRegression__construct(&classifier, train_N, n_in, n_out); + + + // train + for(epoch=0; epoch +#include +#include +#include "RBM.h" +#include "utils.h" + + +void test_rbm(void); + + +double uniform(double min, double max) { + return rand() / (RAND_MAX + 1.0) * (max - min) + min; +} + +int binomial(int n, double p) { + if(p < 0 || p > 1) return 0; + + int i; + int c = 0; + double r; + + for(i=0; iN = N; + this->n_visible = n_visible; + this->n_hidden = n_hidden; + + if(W == NULL) { + this->W = (double **)malloc(sizeof(double*) * n_hidden); + this->W[0] = (double *)malloc(sizeof(double) * n_visible * n_hidden); + for(i=0; iW[i] = this->W[0] + i * n_visible; + + for(i=0; iW[i][j] = uniform(-a, a); + } + } + } else { + this->W = W; + } + + if(hbias == NULL) { + this->hbias = (double *)malloc(sizeof(double) * n_hidden); + for(i=0; ihbias[i] = 0; + } else { + this->hbias = hbias; + } + + if(vbias == NULL) { + this->vbias = (double *)malloc(sizeof(double) * n_visible); + for(i=0; ivbias[i] = 0; + } else { + this->vbias = vbias; + } +} + +void RBM__destruct(RBM* this) { + free(this->W[0]); + free(this->W); + free(this->hbias); + free(this->vbias); +} + +void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) { + int i, j, step; + + double *ph_mean = (double *)malloc(sizeof(double) * this->n_hidden); + int *ph_sample = (int *)malloc(sizeof(int) * this->n_hidden); + double *nv_means = (double *)malloc(sizeof(double) * this->n_visible); + int *nv_samples = (int *)malloc(sizeof(int) * this->n_visible); + double *nh_means = (double *)malloc(sizeof(double) * this->n_hidden); + int *nh_samples = (int *)malloc(sizeof(int) * this->n_hidden); + + /* CD-k */ + RBM_sample_h_given_v(this, input, ph_mean, ph_sample); + + for(step=0; stepn_hidden; i++) { + for(j=0; jn_visible; j++) { + // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + } + this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N; + } + + for(i=0; in_visible; i++) { + this->vbias[i] += lr * (input[i] - nv_samples[i]) / this->N; + } + + + free(ph_mean); + free(ph_sample); + free(nv_means); + free(nv_samples); + free(nh_means); + free(nh_samples); +} + + +void RBM_sample_h_given_v(RBM* this, int *v0_sample, double *mean, int *sample) { + int i; + for(i=0; in_hidden; i++) { + mean[i] = RBM_propup(this, v0_sample, this->W[i], this->hbias[i]); + sample[i] = binomial(1, mean[i]); + } +} + +void RBM_sample_v_given_h(RBM* this, int *h0_sample, double *mean, int *sample) { + int i; + for(i=0; in_visible; i++) { + mean[i] = RBM_propdown(this, h0_sample, i, this->vbias[i]); + sample[i] = binomial(1, mean[i]); + } +} + +double RBM_propup(RBM* this, int *v, double *w, double b) { + int j; + double pre_sigmoid_activation = 0.0; + for(j=0; jn_visible; j++) { + pre_sigmoid_activation += w[j] * v[j]; + } + pre_sigmoid_activation += b; + return sigmoid(pre_sigmoid_activation); +} + +double RBM_propdown(RBM* this, int *h, int i, double b) { + int j; + double pre_sigmoid_activation = 0.0; + + for(j=0; jn_hidden; j++) { + pre_sigmoid_activation += this->W[j][i] * h[j]; + } + pre_sigmoid_activation += b; + return sigmoid(pre_sigmoid_activation); +} + +void RBM_gibbs_hvh(RBM* this, int *h0_sample, double *nv_means, int *nv_samples, \ + double *nh_means, int *nh_samples) { + RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples); + RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples); +} + +void RBM_reconstruct(RBM* this, int *v, double *reconstructed_v) { + int i, j; + double *h = (double *)malloc(sizeof(double) * this->n_hidden); + double pre_sigmoid_activation; + + for(i=0; in_hidden; i++) { + h[i] = RBM_propup(this, v, this->W[i], this->hbias[i]); + } + + for(i=0; in_visible; i++) { + pre_sigmoid_activation = 0.0; + for(j=0; jn_hidden; j++) { + pre_sigmoid_activation += this->W[j][i] * h[j]; + } + pre_sigmoid_activation += this->vbias[i]; + + reconstructed_v[i] = sigmoid(pre_sigmoid_activation); + } + + free(h); +} + + + + +void test_rbm(void) { + srand(0); + + int i, j, epoch; + + double learning_rate = 0.1; + int training_epochs = 1000; + int k = 1; + + int train_N = 6; + int test_N = 2; + int n_visible = 6; + int n_hidden = 3; + + // training data + int train_X[6][6] = { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 0, 1, 0}, + {0, 0, 1, 1, 1, 0} + }; + + // construct RBM + RBM rbm; + RBM__construct(&rbm, train_N, n_visible, n_hidden, NULL, NULL, NULL); + + // train + for(epoch=0; epoch +#include +#include +#include "HiddenLayer.h" +#include "dA.h" +#include "LogisticRegression.h" +#include "SdA.h" +#include "utils.h" + +void test_sda(void); + + +double uniform(double min, double max) { + return rand() / (RAND_MAX + 1.0) * (max - min) + min; +} + +int binomial(int n, double p) { + if(p < 0 || p > 1) return 0; + + int i; + int c = 0; + double r; + + for(i=0; iN = N; + this->n_ins = n_ins; + this->hidden_layer_sizes = hidden_layer_sizes; + this->n_outs = n_outs; + this->n_layers = n_layers; + + this->sigmoid_layers = (HiddenLayer *)malloc(sizeof(HiddenLayer) * n_layers); + this->dA_layers = (dA *)malloc(sizeof(dA) * n_layers); + + // construct multi-layer + for(i=0; isigmoid_layers[i]), \ + N, input_size, hidden_layer_sizes[i], NULL, NULL); + + // construct dA_layer + dA__construct(&(this->dA_layers[i]), N, input_size, hidden_layer_sizes[i], \ + this->sigmoid_layers[i].W, this->sigmoid_layers[i].b, NULL); + + } + + // layer for output using LogisticRegression + LogisticRegression__construct(&(this->log_layer), \ + N, hidden_layer_sizes[n_layers-1], n_outs); +} + +void SdA__destruct(SdA* this) { + int i; + for(i=0; in_layers; i++) { + HiddenLayer__destruct(&(this->sigmoid_layers[i])); + dA__destruct(&(this->dA_layers[i])); + } + free(this->sigmoid_layers); + free(this->dA_layers); +} + +void SdA_pretrain(SdA* this, int *input, double lr, double corruption_level, int epochs) { + int i, j, l, m, n, epoch; + + int *layer_input; + int prev_layer_input_size; + int *prev_layer_input; + + int *train_X = (int *)malloc(sizeof(int) * this->n_ins); + + for(i=0; in_layers; i++) { // layer-wise + + for(epoch=0; epochN; n++) { // input x1...xN + // initial input + for(m=0; mn_ins; m++) train_X[m] = input[n * this->n_ins + m]; + + // layer input + for(l=0; l<=i; l++) { + if(l == 0) { + layer_input = (int *)malloc(sizeof(int) * this->n_ins); + for(j=0; jn_ins; j++) layer_input[j] = train_X[j]; + } else { + if(l == 1) prev_layer_input_size = this->n_ins; + else prev_layer_input_size = this->hidden_layer_sizes[l-2]; + + prev_layer_input = (int *)malloc(sizeof(int) * prev_layer_input_size); + for(j=0; jhidden_layer_sizes[l-1]); + + HiddenLayer_sample_h_given_v(&(this->sigmoid_layers[l-1]), \ + prev_layer_input, layer_input); + free(prev_layer_input); + } + } + + dA_train(&(this->dA_layers[i]), layer_input, lr, corruption_level); + } + + } + } + + free(train_X); + free(layer_input); +} + +void SdA_finetune(SdA* this, int *input, int *label, double lr, int epochs) { + int i, j, m, n, epoch; + + int *layer_input; + int prev_layer_input_size; + int *prev_layer_input; + + int *train_X = (int *)malloc(sizeof(int) * this->n_ins); + int *train_Y = (int *)malloc(sizeof(int) * this->n_outs); + + for(epoch=0; epochN; n++) { // input x1...xN + // initial input + for(m=0; mn_ins; m++) train_X[m] = input[n * this->n_ins + m]; + for(m=0; mn_outs; m++) train_Y[m] = label[n * this->n_outs + m]; + + // layer input + for(i=0; in_layers; i++) { + if(i == 0) { + prev_layer_input = (int *)malloc(sizeof(int) * this->n_ins); + for(j=0; jn_ins; j++) prev_layer_input[j] = train_X[j]; + } else { + prev_layer_input = (int *)malloc(sizeof(int) * this->hidden_layer_sizes[i-1]); + for(j=0; jhidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j]; + free(layer_input); + } + + + layer_input = (int *)malloc(sizeof(int) * this->hidden_layer_sizes[i]); + HiddenLayer_sample_h_given_v(&(this->sigmoid_layers[i]), \ + prev_layer_input, layer_input); + free(prev_layer_input); + } + + LogisticRegression_train(&(this->log_layer), layer_input, train_Y, lr); + } + // lr *= 0.95; + } + + free(layer_input); + free(train_X); + free(train_Y); +} + +void SdA_predict(SdA* this, int *x, double *y) { + int i, j, k; + double *layer_input; + int prev_layer_input_size; + double *prev_layer_input; + + double linear_output; + + prev_layer_input = (double *)malloc(sizeof(double) * this->n_ins); + for(j=0; jn_ins; j++) prev_layer_input[j] = x[j]; + + // layer activation + for(i=0; in_layers; i++) { + layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out); + + for(k=0; ksigmoid_layers[i].n_out; k++) { + linear_output = 0.0; + + for(j=0; jsigmoid_layers[i].n_in; j++) { + linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j]; + } + linear_output += this->sigmoid_layers[i].b[k]; + layer_input[k] = sigmoid(linear_output); + } + free(prev_layer_input); + + if(i < this->n_layers-1) { + prev_layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out); + for(j=0; jsigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j]; + free(layer_input); + } + } + + for(i=0; ilog_layer.n_out; i++) { + y[i] = 0; + for(j=0; jlog_layer.n_in; j++) { + y[i] += this->log_layer.W[i][j] * layer_input[j]; + } + y[i] += this->log_layer.b[i]; + } + + LogisticRegression_softmax(&(this->log_layer), y); + + free(layer_input); +} + + +// HiddenLayer +void HiddenLayer__construct(HiddenLayer* this, int N, int n_in, int n_out, \ + double **W, double *b) { + int i, j; + double a = 1.0 / n_in; + + this->N = N; + this->n_in = n_in; + this->n_out = n_out; + + if(W == NULL) { + this->W = (double **)malloc(sizeof(double*) * n_out); + this->W[0] = (double *)malloc(sizeof(double) * n_in * n_out); + for(i=0; iW[i] = this->W[0] + i * n_in; + + for(i=0; iW[i][j] = uniform(-a, a); + } + } + } else { + this->W = W; + } + + if(b == NULL) { + this->b = (double *)malloc(sizeof(double) * n_out); + } else { + this->b = b; + } +} + +void HiddenLayer__destruct(HiddenLayer* this) { + free(this->W[0]); + free(this->W); + free(this->b); +} + +double HiddenLayer_output(HiddenLayer* this, int *input, double *w, double b) { + int j; + double linear_output = 0.0; + for(j=0; jn_in; j++) { + linear_output += w[j] * input[j]; + } + linear_output += b; + return sigmoid(linear_output); +} + +void HiddenLayer_sample_h_given_v(HiddenLayer* this, int *input, int *sample) { + int i; + for(i=0; in_out; i++) { + sample[i] = binomial(1, HiddenLayer_output(this, input, this->W[i], this->b[i])); + } +} + + +// dA +void dA__construct(dA* this, int N, int n_visible, int n_hidden, \ + double **W, double *hbias, double *vbias) { + int i, j; + double a = 1.0 / n_visible; + + this->N = N; + this->n_visible = n_visible; + this->n_hidden = n_hidden; + + if(W == NULL) { + this->W = (double **)malloc(sizeof(double*) * n_hidden); + this->W[0] = (double *)malloc(sizeof(double) * n_visible * n_hidden); + for(i=0; iW[i] = this->W[0] + i * n_visible; + + for(i=0; iW[i][j] = uniform(-a, a); + } + } + } else { + this->W = W; + } + + if(hbias == NULL) { + this->hbias = (double *)malloc(sizeof(double) * n_hidden); + for(i=0; ihbias[i] = 0; + } else { + this->hbias = hbias; + } + + if(vbias == NULL) { + this->vbias = (double *)malloc(sizeof(double) * n_visible); + for(i=0; ivbias[i] = 0; + } else { + this->vbias = vbias; + } +} + +void dA__destruct(dA* this) { + // free(this->W[0]); + // free(this->W); + // free(this->hbias); + free(this->vbias); +} + +void dA_get_corrupted_input(dA* this, int *x, int *tilde_x, double p) { + int i; + for(i=0; in_visible; i++) { + if(x[i] == 0) { + tilde_x[i] = 0; + } else { + tilde_x[i] = binomial(1, p); + } + } +} + +// Encode +void dA_get_hidden_values(dA* this, int *x, double *y) { + int i,j; + for(i=0; in_hidden; i++) { + y[i] = 0; + for(j=0; jn_visible; j++) { + y[i] += this->W[i][j] * x[j]; + } + y[i] += this->hbias[i]; + y[i] = sigmoid(y[i]); + } +} + +// Decode +void dA_get_reconstructed_input(dA* this, double *y, double *z) { + int i, j; + for(i=0; in_visible; i++) { + z[i] = 0; + for(j=0; jn_hidden; j++) { + z[i] += this->W[j][i] * y[j]; + } + z[i] += this->vbias[i]; + z[i] = sigmoid(z[i]); + } +} + + +void dA_train(dA* this, int *x, double lr, double corruption_level) { + int i, j; + + int *tilde_x = (int *)malloc(sizeof(int) * this->n_visible); + double *y = (double *)malloc(sizeof(double) * this->n_hidden); + double *z = (double *)malloc(sizeof(double) * this->n_visible); + + double *L_vbias = (double *)malloc(sizeof(double) * this->n_visible); + double *L_hbias = (double *)malloc(sizeof(double) * this->n_hidden); + + double p = 1 - corruption_level; + + dA_get_corrupted_input(this, x, tilde_x, p); + dA_get_hidden_values(this, tilde_x, y); + dA_get_reconstructed_input(this, y, z); + + // vbias + for(i=0; in_visible; i++) { + L_vbias[i] = x[i] - z[i]; + this->vbias[i] += lr * L_vbias[i] / this->N; + } + + // hbias + for(i=0; in_hidden; i++) { + L_hbias[i] = 0; + for(j=0; jn_visible; j++) { + L_hbias[i] += this->W[i][j] * L_vbias[j]; + } + L_hbias[i] *= y[i] * (1 - y[i]); + + this->hbias[i] += lr * L_hbias[i] / this->N; + } + + // W + for(i=0; in_hidden; i++) { + for(j=0; jn_visible; j++) { + this->W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / this->N; + } + } + + free(L_hbias); + free(L_vbias); + free(z); + free(y); + free(tilde_x); +} + +void dA_reconstruct(dA* this, int *x, double *z) { + int i; + double *y = (double *)malloc(sizeof(double) * this->n_hidden); + + dA_get_hidden_values(this, x, y); + dA_get_reconstructed_input(this, y, z); + + free(y); +} + + +// LogisticRegression +void LogisticRegression__construct(LogisticRegression *this, int N, int n_in, int n_out) { + int i, j; + this->N = N; + this->n_in = n_in; + this->n_out = n_out; + + this->W = (double **)malloc(sizeof(double*) * n_out); + this->W[0] = (double *)malloc(sizeof(double) * n_in * n_out); + for(i=0; iW[i] = this->W[0] + i * n_in; + this->b = (double *)malloc(sizeof(double) * n_out); + + for(i=0; iW[i][j] = 0; + } + this->b[i] = 0; + } +} + +void LogisticRegression__destruct(LogisticRegression *this) { + free(this->W[0]); + free(this->W); + free(this->b); +} + +void LogisticRegression_train(LogisticRegression *this, int *x, int *y, double lr) { + int i,j; + double *p_y_given_x = (double *)malloc(sizeof(double) * this->n_out); + double *dy = (double *)malloc(sizeof(double) * this->n_out); + + for(i=0; in_out; i++) { + p_y_given_x[i] = 0; + for(j=0; jn_in; j++) { + p_y_given_x[i] += this->W[i][j] * x[j]; + } + p_y_given_x[i] += this->b[i]; + } + LogisticRegression_softmax(this, p_y_given_x); + + for(i=0; in_out; i++) { + dy[i] = y[i] - p_y_given_x[i]; + + for(j=0; jn_in; j++) { + this->W[i][j] += lr * dy[i] * x[j] / this->N; + } + + this->b[i] += lr * dy[i] / this->N; + } + + free(p_y_given_x); + free(dy); +} + +void LogisticRegression_softmax(LogisticRegression *this, double *x) { + int i; + double max = 0.0; + double sum = 0.0; + + for(i=0; in_out; i++) if(max < x[i]) max = x[i]; + for(i=0; in_out; i++) { + x[i] = exp(x[i] - max); + sum += x[i]; + } + + for(i=0; in_out; i++) x[i] /= sum; +} + +void LogisticRegression_predict(LogisticRegression *this, int *x, double *y) { + int i,j; + + for(i=0; in_out; i++) { + y[i] = 0; + for(j=0; jn_in; j++) { + y[i] += this->W[i][j] * x[j]; + } + y[i] += this->b[i]; + } + + LogisticRegression_softmax(this, y); +} + + +void test_sda(void) { + srand(0); + + int i, j; + + double pretrain_lr = 0.1; + double corruption_level = 0.3; + int pretraining_epochs = 1000; + double finetune_lr = 0.1; + int finetune_epochs = 500; + + int train_N = 10; + int test_N = 4; + int n_ins = 28; + int n_outs = 2; + int hidden_layer_sizes[] = {15, 15}; + int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]); + + // training data + int train_X[10][28] = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1} + }; + + int train_Y[10][2] = { + {1, 0}, + {1, 0}, + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1}, + {0, 1}, + {0, 1} + }; + + // construct SdA + SdA sda; + SdA__construct(&sda, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers); + + // pretrain + SdA_pretrain(&sda, *train_X, pretrain_lr, corruption_level, pretraining_epochs); + + // finetune + SdA_finetune(&sda, *train_X, *train_Y, finetune_lr, finetune_epochs); + + // test data + int test_X[4][28] = { + {1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1} + }; + + double test_Y[4][28]; + + + // test + for(i=0; i +#include +#include +#include "dA.h" +#include "utils.h" + + +void test_dbn(void); + + +double uniform(double min, double max) { + return rand() / (RAND_MAX + 1.0) * (max - min) + min; +} + +int binomial(int n, double p) { + if(p < 0 || p > 1) return 0; + + int i; + int c = 0; + double r; + + for(i=0; iN = N; + this->n_visible = n_visible; + this->n_hidden = n_hidden; + + if(W == NULL) { + this->W = (double **)malloc(sizeof(double*) * n_hidden); + this->W[0] = (double *)malloc(sizeof(double) * n_visible * n_hidden); + for(i=0; iW[i] = this->W[0] + i * n_visible; + + for(i=0; iW[i][j] = uniform(-a, a); + } + } + } else { + this->W = W; + } + + if(hbias == NULL) { + this->hbias = (double *)malloc(sizeof(double) * n_hidden); + for(i=0; ihbias[i] = 0; + } else { + this->hbias = hbias; + } + + if(vbias == NULL) { + this->vbias = (double *)malloc(sizeof(double) * n_visible); + for(i=0; ivbias[i] = 0; + } else { + this->vbias = vbias; + } +} + +void dA__destruct(dA* this) { + free(this->W[0]); + free(this->W); + free(this->hbias); + free(this->vbias); +} + +void dA_get_corrupted_input(dA* this, int *x, int *tilde_x, double p) { + int i; + for(i=0; in_visible; i++) { + if(x[i] == 0) { + tilde_x[i] = 0; + } else { + tilde_x[i] = binomial(1, p); + } + } +} + +// Encode +void dA_get_hidden_values(dA* this, int *x, double *y) { + int i,j; + for(i=0; in_hidden; i++) { + y[i] = 0; + for(j=0; jn_visible; j++) { + y[i] += this->W[i][j] * x[j]; + } + y[i] += this->hbias[i]; + y[i] = sigmoid(y[i]); + } +} + +// Decode +void dA_get_reconstructed_input(dA* this, double *y, double *z) { + int i, j; + for(i=0; in_visible; i++) { + z[i] = 0; + for(j=0; jn_hidden; j++) { + z[i] += this->W[j][i] * y[j]; + } + z[i] += this->vbias[i]; + z[i] = sigmoid(z[i]); + } +} + + +void dA_train(dA* this, int *x, double lr, double corruption_level) { + int i, j; + + int *tilde_x = (int *)malloc(sizeof(int) * this->n_visible); + double *y = (double *)malloc(sizeof(double) * this->n_hidden); + double *z = (double *)malloc(sizeof(double) * this->n_visible); + + double *L_vbias = (double *)malloc(sizeof(double) * this->n_visible); + double *L_hbias = (double *)malloc(sizeof(double) * this->n_hidden); + + double p = 1 - corruption_level; + + dA_get_corrupted_input(this, x, tilde_x, p); + dA_get_hidden_values(this, tilde_x, y); + dA_get_reconstructed_input(this, y, z); + + // vbias + for(i=0; in_visible; i++) { + L_vbias[i] = x[i] - z[i]; + this->vbias[i] += lr * L_vbias[i] / this->N; + } + + // hbias + for(i=0; in_hidden; i++) { + L_hbias[i] = 0; + for(j=0; jn_visible; j++) { + L_hbias[i] += this->W[i][j] * L_vbias[j]; + } + L_hbias[i] *= y[i] * (1 - y[i]); + + this->hbias[i] += lr * L_hbias[i] / this->N; + } + + // W + for(i=0; in_hidden; i++) { + for(j=0; jn_visible; j++) { + this->W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / this->N; + } + } + + free(L_hbias); + free(L_vbias); + free(z); + free(y); + free(tilde_x); +} + +void dA_reconstruct(dA* this, int *x, double *z) { + int i; + double *y = (double *)malloc(sizeof(double) * this->n_hidden); + + dA_get_hidden_values(this, x, y); + dA_get_reconstructed_input(this, y, z); + + free(y); +} + +void test_dbn(void) { + srand(0); + int i, j, epoch; + + double learning_rate = 0.1; + double corruption_level = 0.3; + int training_epochs = 100; + + int train_N = 10; + int test_N = 2; + int n_visible = 20; + int n_hidden = 5; + + // training data + int train_X[10][20] = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0} + }; + + // construct dA + dA da; + dA__construct(&da, train_N, n_visible, n_hidden, NULL, NULL, NULL); + + // train + for(epoch=0; epoch +#include +#include "utils.h" +#include "HiddenLayer.h" +#include "RBM.h" +#include "LogisticRegression.h" +#include "DBN.h" +using namespace std; +using namespace utils; + + +// DBN +DBN::DBN(int size, int n_i, int *hls, int n_o, int n_l) { + int input_size; + + N = size; + n_ins = n_i; + hidden_layer_sizes = hls; + n_outs = n_o; + n_layers = n_l; + + sigmoid_layers = new HiddenLayer*[n_layers]; + rbm_layers = new RBM*[n_layers]; + + // construct multi-layer + for(int i=0; iW, sigmoid_layers[i]->b, NULL); + } + + // layer for output using LogisticRegression + log_layer = new LogisticRegression(N, hidden_layer_sizes[n_layers-1], n_outs); +} + +DBN::~DBN() { + delete log_layer; + + for(int i=0; isample_h_given_v(prev_layer_input, layer_input); + delete[] prev_layer_input; + } + } + + rbm_layers[i]->contrastive_divergence(layer_input, lr, k); + } + + } + } + + delete[] train_X; + delete[] layer_input; +} + +void DBN::finetune(int *input, int *label, double lr, int epochs) { + int *layer_input; + // int prev_layer_input_size; + int *prev_layer_input; + + int *train_X = new int[n_ins]; + int *train_Y = new int[n_outs]; + + for(int epoch=0; epochsample_h_given_v(prev_layer_input, layer_input); + delete[] prev_layer_input; + } + + log_layer->train(layer_input, train_Y, lr); + } + // lr *= 0.95; + } + + delete[] layer_input; + delete[] train_X; + delete[] train_Y; +} + +void DBN::predict(int *x, double *y) { + double *layer_input; + // int prev_layer_input_size; + double *prev_layer_input; + + double linear_output; + + prev_layer_input = new double[n_ins]; + for(int j=0; jn_out]; + + for(int k=0; kn_out; k++) { + linear_output = 0.0; + + for(int j=0; jn_in; j++) { + linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j]; + } + linear_output += sigmoid_layers[i]->b[k]; + layer_input[k] = sigmoid(linear_output); + } + delete[] prev_layer_input; + + if(i < n_layers-1) { + prev_layer_input = new double[sigmoid_layers[i]->n_out]; + for(int j=0; jn_out; j++) prev_layer_input[j] = layer_input[j]; + delete[] layer_input; + } + } + + for(int i=0; in_out; i++) { + y[i] = 0; + for(int j=0; jn_in; j++) { + y[i] += log_layer->W[i][j] * layer_input[j]; + } + y[i] += log_layer->b[i]; + } + + log_layer->softmax(y); + + + delete[] layer_input; +} + + +// HiddenLayer +HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) { + N = size; + n_in = in; + n_out = out; + + if(w == NULL) { + W = new double*[n_out]; + for(int i=0; i +#include +#include "HiddenLayer.h" +#include "utils.h" +using namespace std; +using namespace utils; + + +HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) { + N = size; + n_in = in; + n_out = out; + + if(w == NULL) { + W = new double*[n_out]; + for(int i=0; i +#include +#include +#include "LogisticRegression.h" +using namespace std; + + +LogisticRegression::LogisticRegression(int size, int in, int out) { + N = size; + n_in = in; + n_out = out; + + // initialize W, b + W = new double*[n_out]; + for(int i=0; i +#include +#include "utils.h" +#include "RBM.h" +using namespace std; +using namespace utils; + + +RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) { + N = size; + n_visible = n_v; + n_hidden = n_h; + + if(w == NULL) { + W = new double*[n_hidden]; + for(int i=0; i +#include +#include "utils.h" + +#include "HiddenLayer.h" +#include "dA.h" +#include "LogisticRegression.h" +#include "SdA.h" +using namespace std; +using namespace utils; + + +// SdA +SdA::SdA(int size, int n_i, int *hls, int n_o, int n_l) { + int input_size; + + N = size; + n_ins = n_i; + hidden_layer_sizes = hls; + n_outs = n_o; + n_layers = n_l; + + sigmoid_layers = new HiddenLayer*[n_layers]; + dA_layers = new dA*[n_layers]; + + // construct multi-layer + for(int i=0; iW, sigmoid_layers[i]->b, NULL); + } + + // layer for output using LogisticRegression + log_layer = new LogisticRegression(N, hidden_layer_sizes[n_layers-1], n_outs); +} + +SdA::~SdA() { + delete log_layer; + + for(int i=0; isample_h_given_v(prev_layer_input, layer_input); + delete[] prev_layer_input; + } + } + + dA_layers[i]->train(layer_input, lr, corruption_level); + + } + } + } + + delete[] train_X; + delete[] layer_input; +} + +void SdA::finetune(int *input, int *label, double lr, int epochs) { + int *layer_input; + int prev_layer_input_size; + int *prev_layer_input; + + int *train_X = new int[n_ins]; + int *train_Y = new int[n_outs]; + + for(int epoch=0; epochsample_h_given_v(prev_layer_input, layer_input); + delete[] prev_layer_input; + } + + log_layer->train(layer_input, train_Y, lr); + } + // lr *= 0.95; + } + + delete[] layer_input; + delete[] train_X; + delete[] train_Y; +} + +void SdA::predict(int *x, double *y) { + double *layer_input; + int prev_layer_input_size; + double *prev_layer_input; + + double linear_output; + + prev_layer_input = new double[n_ins]; + for(int j=0; jn_out]; + + for(int k=0; kn_out; k++) { + linear_output = 0.0; + + for(int j=0; jn_in; j++) { + linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j]; + } + linear_output += sigmoid_layers[i]->b[k]; + layer_input[k] = sigmoid(linear_output); + } + delete[] prev_layer_input; + + if(i < n_layers-1) { + prev_layer_input = new double[sigmoid_layers[i]->n_out]; + for(int j=0; jn_out; j++) prev_layer_input[j] = layer_input[j]; + delete[] layer_input; + } + } + + for(int i=0; in_out; i++) { + y[i] = 0; + for(int j=0; jn_in; j++) { + y[i] += log_layer->W[i][j] * layer_input[j]; + } + y[i] += log_layer->b[i]; + } + + log_layer->softmax(y); + + + delete[] layer_input; +} + + +// HiddenLayer +HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) { + N = size; + n_in = in; + n_out = out; + + if(w == NULL) { + W = new double*[n_out]; + for(int i=0; i +#include +#include "utils.h" + +#include "dA.h" +using namespace std; +using namespace utils; + + +dA::dA(int size, int n_v, int n_h, double **w, double *hb, double *vb) { + N = size; + n_visible = n_v; + n_hidden = n_h; + + if(w == NULL) { + W = new double*[n_hidden]; + for(int i=0; i +#include +using namespace std; + + +namespace utils { + + double uniform(double min, double max) { + return rand() / (RAND_MAX + 1.0) * (max - min) + min; + } + + int binomial(int n, double p) { + if(p < 0 || p > 1) return 0; + + int c = 0; + double r; + + for(int i=0; i 1 { return 0 } + + c := 0 + var r float64 + + for i := 0; i < n; i++ { + r = rand.Float64() + if r < p { c++ } + } + + return c +} + +func Sigmoid(x float64) float64 { + return 1.0 / (1.0 + math.Exp(-x)) +} diff --git a/DeepLearning/java/.gitkeep b/DeepLearning/java/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/DeepLearning/java/src/DeepLearning/DBN.java b/DeepLearning/java/src/DeepLearning/DBN.java new file mode 100644 index 00000000..e070faf7 --- /dev/null +++ b/DeepLearning/java/src/DeepLearning/DBN.java @@ -0,0 +1,222 @@ +package DeepLearning; + +import java.util.Random; +import static DeepLearning.utils.*; + +public class DBN { + public int N; + public int n_ins; + public int[] hidden_layer_sizes; + public int n_outs; + public int n_layers; + public HiddenLayerDiscrete[] sigmoid_layers; + public RBM[] rbm_layers; + public LogisticRegressionDiscrete log_layer; + public Random rng; + + + public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) { + int input_size; + + this.N = N; + this.n_ins = n_ins; + this.hidden_layer_sizes = hidden_layer_sizes; + this.n_outs = n_outs; + this.n_layers = n_layers; + + this.sigmoid_layers = new HiddenLayerDiscrete[n_layers]; + this.rbm_layers = new RBM[n_layers]; + + if(rng == null) this.rng = new Random(1234); + else this.rng = rng; + + // construct multi-layer + for(int i=0; i dropout_masks; + List layer_inputs; + double[] layer_input; + double[] layer_output = new double[0]; + + for(int epoch=0; epoch(n_layers); + layer_inputs = new ArrayList<>(n_layers+1); // +1 for logistic layer + + // forward hiddenLayers + for(int i=0; i=0; i--) { + + if(i == n_layers-1) { + prev_W = logisticLayer.W; + } else { + prev_dy = dy.clone(); + prev_W = hiddenLayers[i+1].W; + } + + if(dropout) { + for(int j=0; j activation; + public DoubleFunction dactivation; + + public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng, String activation) { + this.N = N; + this.n_in = n_in; + this.n_out = n_out; + + if (rng == null) this.rng = new Random(1234); + else this.rng = rng; + + if (W == null) { + this.W = new double[n_out][n_in]; + double a = 1.0 / this.n_in; + + for(int i=0; i sigmoid(x); + this.dactivation = (double x) -> dsigmoid(x); + + } else if (activation == "tanh") { + this.activation = (double x) -> tanh(x); + this.dactivation = (double x) -> dtanh(x); + } else if (activation == "ReLU") { + this.activation = (double x) -> ReLU(x); + this.dactivation = (double x) -> dReLU(x); + } else { + throw new IllegalArgumentException("activation function not supported"); + } + + } + + public double output(double[] input, double[] w, double b) { + double linear_output = 0.0; + for(int j=0; j 1) return 0; + + int c = 0; + double r; + + for(int i=0; i 0) { + return x; + } else { + return 0.; + } + } + + public static double dReLU(double x) { + if(x > 0) { + return 1.; + } else { + return 0.; + } + } +} diff --git a/DeepLearning/python/CDBN.py b/DeepLearning/python/CDBN.py new file mode 100644 index 00000000..dbf6648c --- /dev/null +++ b/DeepLearning/python/CDBN.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from RBM import RBM +from CRBM import CRBM +from DBN import DBN +from utils import * + + +class CDBN(DBN): + def __init__(self, input=None, label=None,\ + n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ + rng=None): + + self.x = input + self.y = label + + self.sigmoid_layers = [] + self.rbm_layers = [] + self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) + + if rng is None: + rng = numpy.random.RandomState(1234) + + + assert self.n_layers > 0 + + + # construct multi-layer + for i in xrange(self.n_layers): + # layer_size + if i == 0: + input_size = n_ins + else: + input_size = hidden_layer_sizes[i - 1] + + # layer_input + if i == 0: + layer_input = self.x + else: + layer_input = self.sigmoid_layers[-1].sample_h_given_v() + + # construct sigmoid_layer + sigmoid_layer = HiddenLayer(input=layer_input, + n_in=input_size, + n_out=hidden_layer_sizes[i], + rng=rng, + activation=sigmoid) + self.sigmoid_layers.append(sigmoid_layer) + + # construct rbm_layer + if i == 0: + rbm_layer = CRBM(input=layer_input, # continuous-valued inputs + n_visible=input_size, + n_hidden=hidden_layer_sizes[i], + W=sigmoid_layer.W, # W, b are shared + hbias=sigmoid_layer.b) + else: + rbm_layer = RBM(input=layer_input, + n_visible=input_size, + n_hidden=hidden_layer_sizes[i], + W=sigmoid_layer.W, # W, b are shared + hbias=sigmoid_layer.b) + + self.rbm_layers.append(rbm_layer) + + + # layer for output using Logistic Regression + self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), + label=self.y, + n_in=hidden_layer_sizes[-1], + n_out=n_outs) + + # finetune cost: the negative log likelihood of the logistic regression layer + self.finetune_cost = self.log_layer.negative_log_likelihood() + + + +def test_cdbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \ + finetune_lr=0.1, finetune_epochs=200): + + x = numpy.array([[0.4, 0.5, 0.5, 0., 0., 0.], + [0.5, 0.3, 0.5, 0., 0., 0.], + [0.4, 0.5, 0.5, 0., 0., 0.], + [0., 0., 0.5, 0.3, 0.5, 0.], + [0., 0., 0.5, 0.4, 0.5, 0.], + [0., 0., 0.5, 0.5, 0.5, 0.]]) + + y = numpy.array([[1, 0], + [1, 0], + [1, 0], + [0, 1], + [0, 1], + [0, 1]]) + + + rng = numpy.random.RandomState(123) + + # construct DBN + dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, rng=rng) + + # pre-training (TrainUnsupervisedDBN) + dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) + + # fine-tuning (DBNSupervisedFineTuning) + dbn.finetune(lr=finetune_lr, epochs=finetune_epochs) + + + # test + x = numpy.array([[0.5, 0.5, 0., 0., 0., 0.], + [0., 0., 0., 0.5, 0.5, 0.], + [0.5, 0.5, 0.5, 0.5, 0.5, 0.]]) + + + print dbn.predict(x) + + + + +if __name__ == "__main__": + test_cdbn() diff --git a/DeepLearning/python/CRBM.py b/DeepLearning/python/CRBM.py new file mode 100644 index 00000000..e8700470 --- /dev/null +++ b/DeepLearning/python/CRBM.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from RBM import RBM +from utils import * + + +class CRBM(RBM): + def propdown(self, h): + pre_activation = numpy.dot(h, self.W.T) + self.vbias + return pre_activation + + + + def sample_v_given_h(self, h0_sample): + a_h = self.propdown(h0_sample) + en = numpy.exp(-a_h) + ep = numpy.exp(a_h) + + v1_mean = 1 / (1 - en) - 1 / a_h + U = numpy.array(self.rng.uniform( + low=0, + high=1, + size=v1_mean.shape)) + + v1_sample = numpy.log((1 - U * (1 - ep))) / a_h + + return [v1_mean, v1_sample] + + + +def test_crbm(learning_rate=0.1, k=1, training_epochs=1000): + data = numpy.array([[0.4, 0.5, 0.5, 0., 0., 0.], + [0.5, 0.3, 0.5, 0., 0., 0.], + [0.4, 0.5, 0.5, 0., 0., 0.], + [0., 0., 0.5, 0.3, 0.5, 0.], + [0., 0., 0.5, 0.4, 0.5, 0.], + [0., 0., 0.5, 0.5, 0.5, 0.]]) + + + rng = numpy.random.RandomState(123) + + # construct CRBM + rbm = CRBM(input=data, n_visible=6, n_hidden=5, rng=rng) + + # train + for epoch in xrange(training_epochs): + rbm.contrastive_divergence(lr=learning_rate, k=k) + # cost = rbm.get_reconstruction_cross_entropy() + # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost + + + # test + v = numpy.array([[0.5, 0.5, 0., 0., 0., 0.], + [0., 0., 0., 0.5, 0.5, 0.]]) + + print rbm.reconstruct(v) + + +if __name__ == "__main__": + test_crbm() diff --git a/DeepLearning/python/DBN.py b/DeepLearning/python/DBN.py new file mode 100644 index 00000000..b1b351bb --- /dev/null +++ b/DeepLearning/python/DBN.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from RBM import RBM +from utils import * + + +class DBN(object): + def __init__(self, input=None, label=None,\ + n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ + rng=None): + + self.x = input + self.y = label + + self.sigmoid_layers = [] + self.rbm_layers = [] + self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) + + if rng is None: + rng = numpy.random.RandomState(1234) + + + assert self.n_layers > 0 + + + # construct multi-layer + for i in xrange(self.n_layers): + # layer_size + if i == 0: + input_size = n_ins + else: + input_size = hidden_layer_sizes[i - 1] + + # layer_input + if i == 0: + layer_input = self.x + else: + layer_input = self.sigmoid_layers[-1].sample_h_given_v() + + # construct sigmoid_layer + sigmoid_layer = HiddenLayer(input=layer_input, + n_in=input_size, + n_out=hidden_layer_sizes[i], + rng=rng, + activation=sigmoid) + self.sigmoid_layers.append(sigmoid_layer) + + + # construct rbm_layer + rbm_layer = RBM(input=layer_input, + n_visible=input_size, + n_hidden=hidden_layer_sizes[i], + W=sigmoid_layer.W, # W, b are shared + hbias=sigmoid_layer.b) + self.rbm_layers.append(rbm_layer) + + + # layer for output using Logistic Regression + self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), + label=self.y, + n_in=hidden_layer_sizes[-1], + n_out=n_outs) + + # finetune cost: the negative log likelihood of the logistic regression layer + self.finetune_cost = self.log_layer.negative_log_likelihood() + + + + def pretrain(self, lr=0.1, k=1, epochs=100): + # pre-train layer-wise + for i in xrange(self.n_layers): + if i == 0: + layer_input = self.x + else: + layer_input = self.sigmoid_layers[i-1].sample_h_given_v(layer_input) + rbm = self.rbm_layers[i] + + for epoch in xrange(epochs): + rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) + # cost = rbm.get_reconstruction_cross_entropy() + # print >> sys.stderr, \ + # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost + + + def finetune(self, lr=0.1, epochs=100): + layer_input = self.sigmoid_layers[-1].sample_h_given_v() + + # train log_layer + epoch = 0 + done_looping = False + while (epoch < epochs) and (not done_looping): + self.log_layer.train(lr=lr, input=layer_input) + # self.finetune_cost = self.log_layer.negative_log_likelihood() + # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost + + lr *= 0.95 + epoch += 1 + + + def predict(self, x): + layer_input = x + + for i in xrange(self.n_layers): + sigmoid_layer = self.sigmoid_layers[i] + layer_input = sigmoid_layer.output(input=layer_input) + + out = self.log_layer.predict(layer_input) + return out + + + +def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \ + finetune_lr=0.1, finetune_epochs=200): + + x = numpy.array([[1,1,1,0,0,0], + [1,0,1,0,0,0], + [1,1,1,0,0,0], + [0,0,1,1,1,0], + [0,0,1,1,0,0], + [0,0,1,1,1,0]]) + y = numpy.array([[1, 0], + [1, 0], + [1, 0], + [0, 1], + [0, 1], + [0, 1]]) + + rng = numpy.random.RandomState(123) + + # construct DBN + dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, rng=rng) + + # pre-training (TrainUnsupervisedDBN) + dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) + + # fine-tuning (DBNSupervisedFineTuning) + dbn.finetune(lr=finetune_lr, epochs=finetune_epochs) + + + # test + x = numpy.array([[1, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 0], + [1, 1, 1, 1, 1, 0]]) + + print dbn.predict(x) + + + +if __name__ == "__main__": + test_dbn() diff --git a/DeepLearning/python/Dropout.py b/DeepLearning/python/Dropout.py new file mode 100644 index 00000000..ba991169 --- /dev/null +++ b/DeepLearning/python/Dropout.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from utils import * + + +class Dropout(object): + def __init__(self, input, label,\ + n_in, hidden_layer_sizes, n_out,\ + rng=None, activation=ReLU): + + self.x = input + self.y = label + + self.hidden_layers = [] + self.n_layers = len(hidden_layer_sizes) + + if rng is None: + rng = numpy.random.RandomState(1234) + + assert self.n_layers > 0 + + + # construct multi-layer + for i in xrange(self.n_layers): + + # layer_size + if i == 0: + input_size = n_in + else: + input_size = hidden_layer_sizes[i-1] + + # layer_input + if i == 0: + layer_input = self.x + + else: + layer_input = self.hidden_layers[-1].output() + + # construct hidden_layer + hidden_layer = HiddenLayer(input=layer_input, + n_in=input_size, + n_out=hidden_layer_sizes[i], + rng=rng, + activation=activation) + + self.hidden_layers.append(hidden_layer) + + + # layer for ouput using Logistic Regression (softmax) + self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), + label=self.y, + n_in=hidden_layer_sizes[-1], + n_out=n_out) + + + def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None): + + for epoch in xrange(epochs): + dropout_masks = [] # create different masks in each training epoch + + # forward hidden_layers + for i in xrange(self.n_layers): + if i == 0: + layer_input = self.x + + layer_input = self.hidden_layers[i].forward(input=layer_input) + + if dropout == True: + mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng) + layer_input *= mask + + dropout_masks.append(mask) + + + # forward & backward log_layer + self.log_layer.train(input=layer_input) + + + # backward hidden_layers + for i in reversed(xrange(0, self.n_layers)): + if i == self.n_layers-1: + prev_layer = self.log_layer + else: + prev_layer = self.hidden_layers[i+1] + + if dropout == True: + self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i]) + else: + self.hidden_layers[i].backward(prev_layer=prev_layer) + + + + def predict(self, x, dropout=True, p_dropout=0.5): + layer_input = x + + for i in xrange(self.n_layers): + if dropout == True: + self.hidden_layers[i].W = (1 - p_dropout) * self.hidden_layers[i].W + + layer_input = self.hidden_layers[i].output(input=layer_input) + + return self.log_layer.predict(layer_input) + + + +def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5): + + x = numpy.array([[0, 0], + [0, 1], + [1, 0], + [1, 1]]) + + y = numpy.array([[0, 1], + [1, 0], + [1, 0], + [0, 1]]) + + rng = numpy.random.RandomState(123) + + + # construct Dropout MLP + classifier = Dropout(input=x, label=y, \ + n_in=2, hidden_layer_sizes=[10, 10], n_out=2, \ + rng=rng, activation=ReLU) + + + # train XOR + classifier.train(epochs=n_epochs, dropout=dropout, \ + p_dropout=p_dropout, rng=rng) + + + # test + print classifier.predict(x) + + + +if __name__ == "__main__": + test_dropout() diff --git a/DeepLearning/python/HiddenLayer.py b/DeepLearning/python/HiddenLayer.py new file mode 100644 index 00000000..a97bc616 --- /dev/null +++ b/DeepLearning/python/HiddenLayer.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from utils import * + + +class HiddenLayer(object): + def __init__(self, input, n_in, n_out,\ + W=None, b=None, rng=None, activation=tanh): + + if rng is None: + rng = numpy.random.RandomState(1234) + + if W is None: + a = 1. / n_in + W = numpy.array(rng.uniform( # initialize W uniformly + low=-a, + high=a, + size=(n_in, n_out))) + + if b is None: + b = numpy.zeros(n_out) # initialize bias 0 + + self.rng = rng + self.x = input + + self.W = W + self.b = b + + if activation == tanh: + self.dactivation = dtanh + + elif activation == sigmoid: + self.dactivation = dsigmoid + + elif activation == ReLU: + self.dactivation = dReLU + + else: + raise ValueError('activation function not supported.') + + + self.activation = activation + + + + def output(self, input=None): + if input is not None: + self.x = input + + linear_output = numpy.dot(self.x, self.W) + self.b + return self.activation(linear_output) + + + def forward(self, input=None): + return self.output(input=input) + + + def backward(self, prev_layer, lr=0.1, input=None, dropout=False, mask=None): + if input is not None: + self.x = input + + d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + + if dropout == True: + d_y *= mask + + self.W += lr * numpy.dot(self.x.T, d_y) + self.b += lr * numpy.mean(d_y, axis=0) + self.d_y = d_y + + + def dropout(self, input, p, rng=None): + if rng is None: + rng = numpy.random.RandomState(123) + + mask = rng.binomial(size=input.shape, + n=1, + p=1-p) # p is the prob of dropping + + return mask + + + def sample_h_given_v(self, input=None): + if input is not None: + self.x = input + + v_mean = self.output() + h_sample = self.rng.binomial(size=v_mean.shape, + n=1, + p=v_mean) + return h_sample + + diff --git a/DeepLearning/python/LogisticRegression.py b/DeepLearning/python/LogisticRegression.py new file mode 100644 index 00000000..708a1b3e --- /dev/null +++ b/DeepLearning/python/LogisticRegression.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from utils import * + + +class LogisticRegression(object): + def __init__(self, input, label, n_in, n_out): + self.x = input + self.y = label + + self.W = numpy.zeros((n_in, n_out)) # initialize W 0 + self.b = numpy.zeros(n_out) # initialize bias 0 + + + def train(self, lr=0.1, input=None, L2_reg=0.00): + if input is not None: + self.x = input + + p_y_given_x = self.output(self.x) + d_y = self.y - p_y_given_x + + self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W + self.b += lr * numpy.mean(d_y, axis=0) + self.d_y = d_y + + + # def train(self, lr=0.1, input=None, L2_reg=0.00): + # self.forward(input) + # self.backward(lr, L2_reg) + + # def forward(self, input=None): + # if input is not None: + # self.x = input + + # p_y_given_x = self.output(self.x) + # self.d_y = self.y - p_y_given_x + + # def backward(self, lr=0.1, L2_reg=0.00): + # self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W + # self.b += lr * numpy.mean(self.d_y, axis=0) + + + def output(self, x): + # return sigmoid(numpy.dot(x, self.W) + self.b) + return softmax(numpy.dot(x, self.W) + self.b) + + def predict(self, x): + return self.output(x) + + + def negative_log_likelihood(self): + # sigmoid_activation = sigmoid(numpy.dot(self.x, self.W) + self.b) + sigmoid_activation = softmax(numpy.dot(self.x, self.W) + self.b) + + cross_entropy = - numpy.mean( + numpy.sum(self.y * numpy.log(sigmoid_activation) + + (1 - self.y) * numpy.log(1 - sigmoid_activation), + axis=1)) + + return cross_entropy + + +def test_lr(learning_rate=0.1, n_epochs=500): + + rng = numpy.random.RandomState(123) + + # training data + d = 2 + N = 10 + x1 = rng.randn(N, d) + numpy.array([0, 0]) + x2 = rng.randn(N, d) + numpy.array([20, 10]) + y1 = [[1, 0] for i in xrange(N)] + y2 = [[0, 1] for i in xrange(N)] + + x = numpy.r_[x1.astype(int), x2.astype(int)] + y = numpy.r_[y1, y2] + + + # construct LogisticRegression + classifier = LogisticRegression(input=x, label=y, n_in=d, n_out=2) + + # train + for epoch in xrange(n_epochs): + classifier.train(lr=learning_rate) + # cost = classifier.negative_log_likelihood() + # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost + learning_rate *= 0.995 + + + # test + result = classifier.predict(x) + for i in xrange(N): + print result[i] + print + for i in xrange(N): + print result[N+i] + + + +if __name__ == "__main__": + test_lr() diff --git a/DeepLearning/python/MLP.py b/DeepLearning/python/MLP.py new file mode 100644 index 00000000..e9ded0bf --- /dev/null +++ b/DeepLearning/python/MLP.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from utils import * + + +class MLP(object): + def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): + + self.x = input + self.y = label + + if rng is None: + rng = numpy.random.RandomState(1234) + + # construct hidden_layer + self.hidden_layer = HiddenLayer(input=self.x, + n_in=n_in, + n_out=n_hidden, + rng=rng, + activation=tanh) + + # construct log_layer + self.log_layer = LogisticRegression(input=self.hidden_layer.output, + label=self.y, + n_in=n_hidden, + n_out=n_out) + + def train(self): + # forward hidden_layer + layer_input = self.hidden_layer.forward() + + # forward & backward log_layer + # self.log_layer.forward(input=layer_input) + self.log_layer.train(input=layer_input) + + # backward hidden_layer + self.hidden_layer.backward(prev_layer=self.log_layer) + + # backward log_layer + # self.log_layer.backward() + + + def predict(self, x): + x = self.hidden_layer.output(input=x) + return self.log_layer.predict(x) + + +def test_mlp(n_epochs=5000): + + x = numpy.array([[0, 0], + [0, 1], + [1, 0], + [1, 1]]) + + y = numpy.array([[0, 1], + [1, 0], + [1, 0], + [0, 1]]) + + + rng = numpy.random.RandomState(123) + + + # construct MLP + classifier = MLP(input=x, label=y, n_in=2, n_hidden=3, n_out=2, rng=rng) + + # train + for epoch in xrange(n_epochs): + classifier.train() + + + # test + print classifier.predict(x) + + +if __name__ == "__main__": + test_mlp() diff --git a/DeepLearning/python/RBM.py b/DeepLearning/python/RBM.py new file mode 100644 index 00000000..7a127d81 --- /dev/null +++ b/DeepLearning/python/RBM.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from utils import * + +class RBM(object): + def __init__(self, input=None, n_visible=2, n_hidden=3, \ + W=None, hbias=None, vbias=None, rng=None): + + self.n_visible = n_visible # num of units in visible (input) layer + self.n_hidden = n_hidden # num of units in hidden layer + + if rng is None: + rng = numpy.random.RandomState(1234) + + + if W is None: + a = 1. / n_visible + initial_W = numpy.array(rng.uniform( # initialize W uniformly + low=-a, + high=a, + size=(n_visible, n_hidden))) + + W = initial_W + + if hbias is None: + hbias = numpy.zeros(n_hidden) # initialize h bias 0 + + if vbias is None: + vbias = numpy.zeros(n_visible) # initialize v bias 0 + + + self.rng = rng + self.input = input + self.W = W + self.hbias = hbias + self.vbias = vbias + + + def contrastive_divergence(self, lr=0.1, k=1, input=None): + if input is not None: + self.input = input + + ''' CD-k ''' + ph_mean, ph_sample = self.sample_h_given_v(self.input) + + chain_start = ph_sample + + for step in xrange(k): + if step == 0: + nv_means, nv_samples,\ + nh_means, nh_samples = self.gibbs_hvh(chain_start) + else: + nv_means, nv_samples,\ + nh_means, nh_samples = self.gibbs_hvh(nh_samples) + + # chain_end = nv_samples + + + self.W += lr * (numpy.dot(self.input.T, ph_mean) + - numpy.dot(nv_samples.T, nh_means)) + self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0) + self.hbias += lr * numpy.mean(ph_mean - nh_means, axis=0) + + # cost = self.get_reconstruction_cross_entropy() + # return cost + + + def sample_h_given_v(self, v0_sample): + h1_mean = self.propup(v0_sample) + h1_sample = self.rng.binomial(size=h1_mean.shape, # discrete: binomial + n=1, + p=h1_mean) + + return [h1_mean, h1_sample] + + + def sample_v_given_h(self, h0_sample): + v1_mean = self.propdown(h0_sample) + v1_sample = self.rng.binomial(size=v1_mean.shape, # discrete: binomial + n=1, + p=v1_mean) + + return [v1_mean, v1_sample] + + def propup(self, v): + pre_sigmoid_activation = numpy.dot(v, self.W) + self.hbias + return sigmoid(pre_sigmoid_activation) + + def propdown(self, h): + pre_sigmoid_activation = numpy.dot(h, self.W.T) + self.vbias + return sigmoid(pre_sigmoid_activation) + + + def gibbs_hvh(self, h0_sample): + v1_mean, v1_sample = self.sample_v_given_h(h0_sample) + h1_mean, h1_sample = self.sample_h_given_v(v1_sample) + + return [v1_mean, v1_sample, + h1_mean, h1_sample] + + + def get_reconstruction_cross_entropy(self): + pre_sigmoid_activation_h = numpy.dot(self.input, self.W) + self.hbias + sigmoid_activation_h = sigmoid(pre_sigmoid_activation_h) + + pre_sigmoid_activation_v = numpy.dot(sigmoid_activation_h, self.W.T) + self.vbias + sigmoid_activation_v = sigmoid(pre_sigmoid_activation_v) + + cross_entropy = - numpy.mean( + numpy.sum(self.input * numpy.log(sigmoid_activation_v) + + (1 - self.input) * numpy.log(1 - sigmoid_activation_v), + axis=1)) + + return cross_entropy + + def reconstruct(self, v): + h = sigmoid(numpy.dot(v, self.W) + self.hbias) + reconstructed_v = sigmoid(numpy.dot(h, self.W.T) + self.vbias) + return reconstructed_v + + + + + +def test_rbm(learning_rate=0.1, k=1, training_epochs=1000): + data = numpy.array([[1,1,1,0,0,0], + [1,0,1,0,0,0], + [1,1,1,0,0,0], + [0,0,1,1,1,0], + [0,0,1,1,0,0], + [0,0,1,1,1,0]]) + + + rng = numpy.random.RandomState(123) + + # construct RBM + rbm = RBM(input=data, n_visible=6, n_hidden=2, rng=rng) + + # train + for epoch in xrange(training_epochs): + rbm.contrastive_divergence(lr=learning_rate, k=k) + # cost = rbm.get_reconstruction_cross_entropy() + # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost + + + # test + v = numpy.array([[1, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 0]]) + + print rbm.reconstruct(v) + + + +if __name__ == "__main__": + test_rbm() diff --git a/DeepLearning/python/SdA.py b/DeepLearning/python/SdA.py new file mode 100644 index 00000000..5f8de37b --- /dev/null +++ b/DeepLearning/python/SdA.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from dA import dA +from utils import * + + +class SdA(object): + def __init__(self, input=None, label=None,\ + n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ + rng=None): + + self.x = input + self.y = label + + self.sigmoid_layers = [] + self.dA_layers = [] + self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) + + if rng is None: + rng = numpy.random.RandomState(1234) + + + assert self.n_layers > 0 + + + # construct multi-layer + for i in xrange(self.n_layers): + # layer_size + if i == 0: + input_size = n_ins + else: + input_size = hidden_layer_sizes[i - 1] + + # layer_input + if i == 0: + layer_input = self.x + else: + layer_input = self.sigmoid_layers[-1].sample_h_given_v() + + # construct sigmoid_layer + sigmoid_layer = HiddenLayer(input=layer_input, + n_in=input_size, + n_out=hidden_layer_sizes[i], + rng=rng, + activation=sigmoid) + self.sigmoid_layers.append(sigmoid_layer) + + + # construct dA_layers + dA_layer = dA(input=layer_input, + n_visible=input_size, + n_hidden=hidden_layer_sizes[i], + W=sigmoid_layer.W, + hbias=sigmoid_layer.b) + self.dA_layers.append(dA_layer) + + + # layer for output using Logistic Regression + self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), + label=self.y, + n_in=hidden_layer_sizes[-1], + n_out=n_outs) + + # finetune cost: the negative log likelihood of the logistic regression layer + self.finetune_cost = self.log_layer.negative_log_likelihood() + + + def pretrain(self, lr=0.1, corruption_level=0.3, epochs=100): + for i in xrange(self.n_layers): + if i == 0: + layer_input = self.x + else: + layer_input = self.sigmoid_layers[i-1].sample_h_given_v(layer_input) + + da = self.dA_layers[i] + + for epoch in xrange(epochs): + da.train(lr=lr, corruption_level=corruption_level, input=layer_input) + + def finetune(self, lr=0.1, epochs=100): + layer_input = self.sigmoid_layers[-1].sample_h_given_v() + + # train log_layer + epoch = 0 + + while epoch < epochs: + self.log_layer.train(lr=lr, input=layer_input) + # self.finetune_cost = self.log_layer.negative_log_likelihood() + # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost + + lr *= 0.95 + epoch += 1 + + + def predict(self, x): + layer_input = x + + for i in xrange(self.n_layers): + sigmoid_layer = self.sigmoid_layers[i] + layer_input = sigmoid_layer.output(input=layer_input) + + return self.log_layer.predict(layer_input) + + + + +def test_SdA(pretrain_lr=0.1, pretraining_epochs=1000, corruption_level=0.3, \ + finetune_lr=0.1, finetune_epochs=200): + x = numpy.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]]) + + y = numpy.array([[1, 0], + [1, 0], + [1, 0], + [1, 0], + [1, 0], + [0, 1], + [0, 1], + [0, 1], + [0, 1], + [0, 1]]) + + + rng = numpy.random.RandomState(123) + + # construct SdA + sda = SdA(input=x, label=y, \ + n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, rng=rng) + + # pre-training + sda.pretrain(lr=pretrain_lr, corruption_level=corruption_level, epochs=pretraining_epochs) + + # fine-tuning + sda.finetune(lr=finetune_lr, epochs=finetune_epochs) + + + # test + x = numpy.array([[1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1]]) + + print sda.predict(x) + + + +if __name__ == "__main__": + test_SdA() diff --git a/DeepLearning/python/dA.py b/DeepLearning/python/dA.py new file mode 100644 index 00000000..edbf6c76 --- /dev/null +++ b/DeepLearning/python/dA.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from utils import * + + +class dA(object): + def __init__(self, input=None, n_visible=2, n_hidden=3, \ + W=None, hbias=None, vbias=None, rng=None): + + self.n_visible = n_visible # num of units in visible (input) layer + self.n_hidden = n_hidden # num of units in hidden layer + + if rng is None: + rng = numpy.random.RandomState(1234) + + if W is None: + a = 1. / n_visible + W = numpy.array(rng.uniform( # initialize W uniformly + low=-a, + high=a, + size=(n_visible, n_hidden))) + + if hbias is None: + hbias = numpy.zeros(n_hidden) # initialize h bias 0 + + if vbias is None: + vbias = numpy.zeros(n_visible) # initialize v bias 0 + + self.rng = rng + self.x = input + self.W = W + self.W_prime = self.W.T + self.hbias = hbias + self.vbias = vbias + + + def get_corrupted_input(self, input, corruption_level): + assert corruption_level < 1 + + return self.rng.binomial(size=input.shape, + n=1, + p=1-corruption_level) * input + + # Encode + def get_hidden_values(self, input): + return sigmoid(numpy.dot(input, self.W) + self.hbias) + + # Decode + def get_reconstructed_input(self, hidden): + return sigmoid(numpy.dot(hidden, self.W_prime) + self.vbias) + + + def train(self, lr=0.1, corruption_level=0.3, input=None): + if input is not None: + self.x = input + + x = self.x + tilde_x = self.get_corrupted_input(x, corruption_level) + y = self.get_hidden_values(tilde_x) + z = self.get_reconstructed_input(y) + + L_h2 = x - z + L_h1 = numpy.dot(L_h2, self.W) * y * (1 - y) + + L_vbias = L_h2 + L_hbias = L_h1 + L_W = numpy.dot(tilde_x.T, L_h1) + numpy.dot(L_h2.T, y) + + + self.W += lr * L_W + self.hbias += lr * numpy.mean(L_hbias, axis=0) + self.vbias += lr * numpy.mean(L_vbias, axis=0) + + + + def negative_log_likelihood(self, corruption_level=0.3): + tilde_x = self.get_corrupted_input(self.x, corruption_level) + y = self.get_hidden_values(tilde_x) + z = self.get_reconstructed_input(y) + + cross_entropy = - numpy.mean( + numpy.sum(self.x * numpy.log(z) + + (1 - self.x) * numpy.log(1 - z), + axis=1)) + + return cross_entropy + + + def reconstruct(self, x): + y = self.get_hidden_values(x) + z = self.get_reconstructed_input(y) + return z + + + +def test_dA(learning_rate=0.1, corruption_level=0.3, training_epochs=50): + data = numpy.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]]) + + rng = numpy.random.RandomState(123) + + # construct dA + da = dA(input=data, n_visible=20, n_hidden=5, rng=rng) + + # train + for epoch in xrange(training_epochs): + da.train(lr=learning_rate, corruption_level=corruption_level) + # cost = da.negative_log_likelihood(corruption_level=corruption_level) + # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost + # learning_rate *= 0.95 + + + # test + x = numpy.array([[1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1], + [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0]]) + + print da.reconstruct(x) + + + +if __name__ == "__main__": + test_dA() diff --git a/DeepLearning/python/utils.py b/DeepLearning/python/utils.py new file mode 100644 index 00000000..7aca40dd --- /dev/null +++ b/DeepLearning/python/utils.py @@ -0,0 +1,40 @@ + +import numpy +numpy.seterr(all='ignore') + + +def sigmoid(x): + return 1. / (1 + numpy.exp(-x)) + + +def dsigmoid(x): + return x * (1. - x) + +def tanh(x): + return numpy.tanh(x) + +def dtanh(x): + return 1. - x * x + +def softmax(x): + e = numpy.exp(x - numpy.max(x)) # prevent overflow + if e.ndim == 1: + return e / numpy.sum(e, axis=0) + else: + return e / numpy.array([numpy.sum(e, axis=1)]).T # ndim = 2 + + +def ReLU(x): + return x * (x > 0) + +def dReLU(x): + return 1. * (x > 0) + + +# # probability density for the Gaussian dist +# def gaussian(x, mean=0.0, scale=1.0): +# s = 2 * numpy.power(scale, 2) +# e = numpy.exp( - numpy.power((x - mean), 2) / s ) + +# return e / numpy.square(numpy.pi * s) + diff --git a/do_closing.py b/do_closing.py new file mode 100644 index 00000000..320f1f72 --- /dev/null +++ b/do_closing.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from contextlib import contextmanager + +@contextmanager +def closing(fname): + f = None + try: + f = open(fname, 'r') + yield f + finally: + if f: + f.close() + +with closing('test.txt') as f: + print(f.read()) diff --git a/do_suppress.py b/do_suppress.py new file mode 100644 index 00000000..3a750c4b --- /dev/null +++ b/do_suppress.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os + +from contextlib import suppress + +with suppress(FileNotFoundError): + os.remove('tempfile.1') + os.remove('tempfile.2') + os.remove('tempfile.3') diff --git a/do_with.py b/do_with.py new file mode 100644 index 00000000..073399ed --- /dev/null +++ b/do_with.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from contextlib import contextmanager + +@contextmanager +def log(name): + print('[%s] start...' % name) + yield + print('[%s] end.' % name) + +with log('DEBUG'): + print('Hello, world!') + print('Hello, Python!')