Skip to content

Commit

Permalink
sparse estimator support fidv2 (#208)
Browse files Browse the repository at this point in the history
* fid_v2

* support fid v2

* remove run.sh

* clint

* make test.sh exec

* use_fid_v2
  • Loading branch information
codemonkey-ll authored Aug 5, 2020
1 parent da3a332 commit d2a65f5
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 32 deletions.
1 change: 1 addition & 0 deletions example/sparse_model/follower.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def model_fn(model, features, labels, mode):
if args.fid_version == 1:
slots = [512, 1023]
else:
model.set_use_fid_v2(True)
slots = [512, 1023, 32767]
hash_size = 101
embed_size = 16
Expand Down
3 changes: 3 additions & 0 deletions example/sparse_model/leader.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ def model_fn(model, features, labels, mode):
hash_size = 101
embed_size = 16

if args.fid_version == 2:
model.set_use_fid_v2(True)

for slot_id in slots:
fs = model.add_feature_slot(slot_id, hash_size)
fc = model.add_feature_column(fs)
Expand Down
24 changes: 0 additions & 24 deletions example/sparse_model/run.sh

This file was deleted.

47 changes: 47 additions & 0 deletions example/sparse_model/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
export CUDA_VISIBLE_DEVICES=""

python make_data.py --fid_version=1
python follower.py --local-addr=localhost:50010 \
--peer-addr=localhost:50011 \
--worker-rank=0 \
--data-path=data/follower/ \
--checkpoint-path=model/follower \
--save-checkpoint-steps=100 \
--export-path=model/follower/saved_model \
--sparse-estimator=True &

python leader.py --local-addr=localhost:50011 \
--peer-addr=localhost:50010 \
--worker-rank=0 \
--data-path=data/leader/ \
--checkpoint-path=model/leader \
--save-checkpoint-steps=100 \
--export-path=model/leader/saved_model \
--sparse-estimator=True

wait

rm -rf data model
python make_data.py --fid_version=2
python follower.py --local-addr=localhost:50010 \
--peer-addr=localhost:50011 \
--worker-rank=0 \
--data-path=data/follower/ \
--checkpoint-path=model/follower \
--save-checkpoint-steps=100 \
--export-path=model/follower/saved_model \
--sparse-estimator=True \
--fid_version=2 &

python leader.py --local-addr=localhost:50011 \
--peer-addr=localhost:50010 \
--worker-rank=0 \
--data-path=data/leader/ \
--checkpoint-path=model/leader \
--save-checkpoint-steps=100 \
--export-path=model/leader/saved_model \
--sparse-estimator=True \
--fid_version=2
rm -rf data model
wait
2 changes: 2 additions & 0 deletions fedlearner/trainer/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(self, config, devices=(None,)):
self._config = config
self._devices = devices
self._num_shards = len(devices)
self._use_fid_v2 = config['use_fid_v2']

self._weights = []
with tf.variable_scope("lagrange_embedding_pooling/%s"%config['name']):
Expand Down Expand Up @@ -98,6 +99,7 @@ def _lookup_one_shard(self, features, shard_id):
output = operator.lagrange_lite_ops.lagrange_embedding_pooling(
output_size=self._config['output_size'],
weight_sizes=self._config['weight_sizes'],
use_fid_v2=self._use_fid_v2,
num_shards=self._num_shards,
batch_size=batch_size,
instance_ids=instance_ids,
Expand Down
3 changes: 0 additions & 3 deletions fedlearner/trainer/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from __future__ import print_function

import tensorflow.compat.v1 as tf
from . import utils


class FeatureSlice(object):
Expand Down Expand Up @@ -89,8 +88,6 @@ def __init__(self,
bias_optimizer=None,
vec_initializer=None,
vec_optimizer=None):
assert 0 <= slot_id < utils.MAX_SLOTS, \
"Invalid slot id %d"%slot_id
assert dtype is None, "Only support float32 for now"
self._slot_id = slot_id
self._hash_table_size = int(hash_table_size)
Expand Down
4 changes: 2 additions & 2 deletions fedlearner/trainer/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def _multidevice_preprocess_fids(fids, config, num_shards):
ret = lagrange_lite_ops.lagrange_multi_device_preprocess_fid(
num_weights=config['num_groups'],
num_shards=num_shards,
use_fid_v2=False,
use_fid_v2=config['use_fid_v2'],
total_weights=num_shards*config['num_groups'],
instance_ids=fids.indices,
fids=fids.values,
Expand Down Expand Up @@ -97,7 +97,7 @@ def _get_control_input_by_name(name):
values = lagrange_lite_ops.lagrange_embedding_unpooling(
num_weights=num_weights,
weight_sizes=op.get_attr('weight_sizes'),
use_fid_v2=False,
use_fid_v2=op.get_attr('use_fid_v2'),
output_grad=grad,
instance_ids=op.inputs[1],
fids=op.inputs[2],
Expand Down
18 changes: 16 additions & 2 deletions fedlearner/trainer/sparse_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,18 @@ def __init__(self, role, bridge, example_ids, exporting=False,
self._slot_ids = []
self._feature_slots = {}
self._feature_column_v1s = {}
self._use_fid_v2 = False
self._num_embedding_groups = 3

def add_feature_slot(self, *args, **kwargs):
assert not self._frozen, "Cannot modify model after finalization"
fs = feature.FeatureSlot(*args, **kwargs)
if self._use_fid_v2:
assert 0 <= fs.slot_id < utils.MAX_SLOTS_v2, \
"Invalid slot id %d"%fs.slot_id
else:
assert 0 <= fs.slot_id < utils.MAX_SLOTS, \
"Invalid slot id %d"%fs.slot_id
self._slot_ids.append(fs.slot_id)
self._feature_slots[fs.slot_id] = fs
return fs
Expand All @@ -72,6 +79,9 @@ def add_feature_column(self, *args, **kwargs):
self._feature_column_v1s[slot_id] = fc
return fc

def set_use_fid_v2(self, use_fid_v2):
self._use_fid_v2 = use_fid_v2

def get_bias(self):
return self._bias_tensor

Expand All @@ -92,13 +102,15 @@ def _get_bias_slot_configs(self):
if not slot_list:
return None

bias_config = utils._compute_slot_config(slot_list, 1)
bias_config = utils._compute_slot_config(slot_list, 1,
self._use_fid_v2)
bias_config['name'] = 'bias'
bias_config['slot_list'] = slot_list
bias_config['initializers'] = [fs_map[i]._bias_initializer
for i in bias_config['weight_group_keys']]
bias_config['optimizers'] = [fs_map[i]._bias_optimizer
for i in bias_config['weight_group_keys']]
bias_config['use_fid_v2'] = self._use_fid_v2
return bias_config

def _get_vec_slot_configs(self):
Expand All @@ -120,13 +132,15 @@ def _get_vec_slot_configs(self):
return None

vec_config = utils._compute_slot_config(slot_list,
self._num_embedding_groups)
self._num_embedding_groups,
self._use_fid_v2)
vec_config['name'] = 'vec'
vec_config['slot_list'] = slot_list
vec_config['initializers'] = [fs_map[i]._vec_initializer
for i in vec_config['weight_group_keys']]
vec_config['optimizers'] = [fs_map[i]._vec_optimizer
for i in vec_config['weight_group_keys']]
vec_config['use_fid_v2'] = self._use_fid_v2
return vec_config

def get_feature_columns(self):
Expand Down
3 changes: 2 additions & 1 deletion integration_tests.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/bash

bash example/tree_model/test.sh
bash example/tree_model/test.sh
bash example/sparse_model/test.sh

0 comments on commit d2a65f5

Please sign in to comment.