Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions diffprivlib/models/forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
"""
Random Forest Classifier with Differential Privacy
"""

from sklearn.ensemble import RandomForestRegressor
from diffprivlib.mechanisms import Laplace
from diffprivlib.utils import warn_unused_args

from collections import namedtuple
import warnings

Expand Down Expand Up @@ -612,6 +617,39 @@ def apply(self, X):

return out

class DifferentiallyPrivateRandomForestRegressor(RandomForestRegressor):
def __init__(self, n_estimators=100, epsilon=1.0, bounds=None, random_state=None, **kwargs):
super().__init__(n_estimators=n_estimators, **kwargs)
self.epsilon = epsilon
self.bounds = bounds
self.random_state = random_state

def _dp_mean(self, array):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears to never be called....

epsilon = self.epsilon # Privacy parameter
bounds = self.bounds if self.bounds else (np.min(array), np.max(array)) # Bounds of the array

# Compute sensitivity (max possible change in output when a single record is added or removed)
sensitivity = np.abs(bounds[1] - bounds[0]) / len(array)

# Create Laplace noise mechanism
laplace_mechanism = Laplace(epsilon=epsilon, sensitivity=sensitivity, random_state=self.random_state)

# Compute differentially private mean
private_mean = np.mean(array) + laplace_mechanism.sample()
return private_mean

def fit(self, X, y):
super().fit(X, y) # Fit the RandomForestRegressor
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here you are fitting with the parent RandomForrestRegressor which means no differential privacy will be applied during the training (ie the main point). Please go back an look at how they created the RandomForrestClassifier via custom trees. Their trees randomly partition the domain (not using the gini or entropy coefficient like in the vanilla RandomForrestRegressor). Then once the random trees are created you can use your _dp_mean method on the leaves.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please check now


def predict(self, X):
predictions = super().predict(X)
return predictions

# Example usage:
# dp_rf_regressor = DifferentiallyPrivateRandomForestRegressor(n_estimators=100, epsilon=1.0)
# dp_rf_regressor.fit(X_train, y_train)
# predictions = dp_rf_regressor.predict(X_test)


class _Node:
"""Base storage structure for the nodes in a _FittingTree object."""
Expand Down