We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
I have my own pretrained Pegasus model, now I want to finetune using BigBird, so this is my mapping function,
import re import collections def get_assignment_map_from_checkpoint(tvars, init_checkpoint): """Compute the union of the current variables and checkpoint variables.""" assignment_map = {} initialized_variable_names = {} name_to_variable = collections.OrderedDict() for var in tvars: name = var.name m = re.match('^(.*):\\d+$', name) if m is not None: name = m.group(1) name_to_variable[name] = var init_vars = tf.train.list_variables(init_checkpoint) assignment_map = collections.OrderedDict() for x in init_vars: (name, var) = (x[0], x[1]) l = 'pegasus/' + name l = l.replace('embeddings/weights', 'embeddings/word_embeddings') l = l.replace('self/output', 'output') l = l.replace('ffn/dense_1', 'output/dense') l = l.replace('ffn', 'intermediate') l = l.replace('memory_attention/output', 'attention/encdec_output') l = l.replace('memory_attention', 'attention/encdec') if l not in name_to_variable: continue assignment_map[name] = name_to_variable[l] initialized_variable_names[l + ':0'] = 1 return (assignment_map, initialized_variable_names)
output,
OrderedDict([('decoder/LayerNorm/beta', <tf.Variable 'pegasus/decoder/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_0/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_0/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_0/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_0/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_0/attention/self/key/kernel', <tf.Variable 'pegasus/decoder/layer_0/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_0/attention/self/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_0/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_0/attention/self/query/kernel', <tf.Variable 'pegasus/decoder/layer_0/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_0/attention/self/value/kernel', <tf.Variable 'pegasus/decoder/layer_0/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_0/ffn/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_0/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_0/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_0/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_0/ffn/dense/bias', <tf.Variable 'pegasus/decoder/layer_0/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('decoder/layer_0/ffn/dense/kernel', <tf.Variable 'pegasus/decoder/layer_0/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('decoder/layer_0/ffn/dense_1/bias', <tf.Variable 'pegasus/decoder/layer_0/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_0/ffn/dense_1/kernel', <tf.Variable 'pegasus/decoder/layer_0/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('decoder/layer_0/memory_attention/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_0/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_0/memory_attention/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_0/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_0/memory_attention/key/kernel', <tf.Variable 'pegasus/decoder/layer_0/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_0/memory_attention/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_0/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_0/memory_attention/query/kernel', <tf.Variable 'pegasus/decoder/layer_0/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_0/memory_attention/value/kernel', <tf.Variable 'pegasus/decoder/layer_0/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_1/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_1/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_1/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_1/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_1/attention/self/key/kernel', <tf.Variable 'pegasus/decoder/layer_1/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_1/attention/self/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_1/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_1/attention/self/query/kernel', <tf.Variable 'pegasus/decoder/layer_1/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_1/attention/self/value/kernel', <tf.Variable 'pegasus/decoder/layer_1/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_1/ffn/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_1/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_1/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_1/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_1/ffn/dense/bias', <tf.Variable 'pegasus/decoder/layer_1/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('decoder/layer_1/ffn/dense/kernel', <tf.Variable 'pegasus/decoder/layer_1/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('decoder/layer_1/ffn/dense_1/bias', <tf.Variable 'pegasus/decoder/layer_1/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_1/ffn/dense_1/kernel', <tf.Variable 'pegasus/decoder/layer_1/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('decoder/layer_1/memory_attention/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_1/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_1/memory_attention/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_1/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_1/memory_attention/key/kernel', <tf.Variable 'pegasus/decoder/layer_1/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_1/memory_attention/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_1/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_1/memory_attention/query/kernel', <tf.Variable 'pegasus/decoder/layer_1/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_1/memory_attention/value/kernel', <tf.Variable 'pegasus/decoder/layer_1/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_2/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_2/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_2/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_2/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_2/attention/self/key/kernel', <tf.Variable 'pegasus/decoder/layer_2/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_2/attention/self/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_2/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_2/attention/self/query/kernel', <tf.Variable 'pegasus/decoder/layer_2/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_2/attention/self/value/kernel', <tf.Variable 'pegasus/decoder/layer_2/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_2/ffn/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_2/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_2/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_2/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_2/ffn/dense/bias', <tf.Variable 'pegasus/decoder/layer_2/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('decoder/layer_2/ffn/dense/kernel', <tf.Variable 'pegasus/decoder/layer_2/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('decoder/layer_2/ffn/dense_1/bias', <tf.Variable 'pegasus/decoder/layer_2/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_2/ffn/dense_1/kernel', <tf.Variable 'pegasus/decoder/layer_2/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('decoder/layer_2/memory_attention/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_2/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_2/memory_attention/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_2/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_2/memory_attention/key/kernel', <tf.Variable 'pegasus/decoder/layer_2/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_2/memory_attention/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_2/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_2/memory_attention/query/kernel', <tf.Variable 'pegasus/decoder/layer_2/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_2/memory_attention/value/kernel', <tf.Variable 'pegasus/decoder/layer_2/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_3/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_3/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_3/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_3/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_3/attention/self/key/kernel', <tf.Variable 'pegasus/decoder/layer_3/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_3/attention/self/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_3/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_3/attention/self/query/kernel', <tf.Variable 'pegasus/decoder/layer_3/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_3/attention/self/value/kernel', <tf.Variable 'pegasus/decoder/layer_3/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_3/ffn/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_3/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_3/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_3/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_3/ffn/dense/bias', <tf.Variable 'pegasus/decoder/layer_3/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('decoder/layer_3/ffn/dense/kernel', <tf.Variable 'pegasus/decoder/layer_3/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('decoder/layer_3/ffn/dense_1/bias', <tf.Variable 'pegasus/decoder/layer_3/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_3/ffn/dense_1/kernel', <tf.Variable 'pegasus/decoder/layer_3/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('decoder/layer_3/memory_attention/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_3/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_3/memory_attention/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_3/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_3/memory_attention/key/kernel', <tf.Variable 'pegasus/decoder/layer_3/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_3/memory_attention/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_3/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_3/memory_attention/query/kernel', <tf.Variable 'pegasus/decoder/layer_3/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_3/memory_attention/value/kernel', <tf.Variable 'pegasus/decoder/layer_3/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_4/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_4/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_4/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_4/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_4/attention/self/key/kernel', <tf.Variable 'pegasus/decoder/layer_4/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_4/attention/self/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_4/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_4/attention/self/query/kernel', <tf.Variable 'pegasus/decoder/layer_4/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_4/attention/self/value/kernel', <tf.Variable 'pegasus/decoder/layer_4/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_4/ffn/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_4/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_4/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_4/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_4/ffn/dense/bias', <tf.Variable 'pegasus/decoder/layer_4/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('decoder/layer_4/ffn/dense/kernel', <tf.Variable 'pegasus/decoder/layer_4/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('decoder/layer_4/ffn/dense_1/bias', <tf.Variable 'pegasus/decoder/layer_4/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_4/ffn/dense_1/kernel', <tf.Variable 'pegasus/decoder/layer_4/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('decoder/layer_4/memory_attention/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_4/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_4/memory_attention/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_4/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_4/memory_attention/key/kernel', <tf.Variable 'pegasus/decoder/layer_4/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_4/memory_attention/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_4/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_4/memory_attention/query/kernel', <tf.Variable 'pegasus/decoder/layer_4/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_4/memory_attention/value/kernel', <tf.Variable 'pegasus/decoder/layer_4/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_5/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_5/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_5/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_5/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_5/attention/self/key/kernel', <tf.Variable 'pegasus/decoder/layer_5/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_5/attention/self/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_5/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_5/attention/self/query/kernel', <tf.Variable 'pegasus/decoder/layer_5/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_5/attention/self/value/kernel', <tf.Variable 'pegasus/decoder/layer_5/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_5/ffn/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_5/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_5/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_5/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_5/ffn/dense/bias', <tf.Variable 'pegasus/decoder/layer_5/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('decoder/layer_5/ffn/dense/kernel', <tf.Variable 'pegasus/decoder/layer_5/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('decoder/layer_5/ffn/dense_1/bias', <tf.Variable 'pegasus/decoder/layer_5/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_5/ffn/dense_1/kernel', <tf.Variable 'pegasus/decoder/layer_5/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('decoder/layer_5/memory_attention/LayerNorm/beta', <tf.Variable 'pegasus/decoder/layer_5/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_5/memory_attention/LayerNorm/gamma', <tf.Variable 'pegasus/decoder/layer_5/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('decoder/layer_5/memory_attention/key/kernel', <tf.Variable 'pegasus/decoder/layer_5/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_5/memory_attention/output/dense/kernel', <tf.Variable 'pegasus/decoder/layer_5/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_5/memory_attention/query/kernel', <tf.Variable 'pegasus/decoder/layer_5/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('decoder/layer_5/memory_attention/value/kernel', <tf.Variable 'pegasus/decoder/layer_5/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('embeddings/weights', <tf.Variable 'pegasus/embeddings/word_embeddings:0' shape=(32128, 512) dtype=float32_ref>), ('encoder/LayerNorm/beta', <tf.Variable 'pegasus/encoder/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_0/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_0/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_0/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_0/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_0/attention/self/key/kernel', <tf.Variable 'pegasus/encoder/layer_0/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_0/attention/self/output/dense/kernel', <tf.Variable 'pegasus/encoder/layer_0/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_0/attention/self/query/kernel', <tf.Variable 'pegasus/encoder/layer_0/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_0/attention/self/value/kernel', <tf.Variable 'pegasus/encoder/layer_0/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_0/ffn/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_0/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_0/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_0/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_0/ffn/dense/bias', <tf.Variable 'pegasus/encoder/layer_0/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('encoder/layer_0/ffn/dense/kernel', <tf.Variable 'pegasus/encoder/layer_0/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('encoder/layer_0/ffn/dense_1/bias', <tf.Variable 'pegasus/encoder/layer_0/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_0/ffn/dense_1/kernel', <tf.Variable 'pegasus/encoder/layer_0/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('encoder/layer_1/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_1/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_1/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_1/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_1/attention/self/key/kernel', <tf.Variable 'pegasus/encoder/layer_1/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_1/attention/self/output/dense/kernel', <tf.Variable 'pegasus/encoder/layer_1/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_1/attention/self/query/kernel', <tf.Variable 'pegasus/encoder/layer_1/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_1/attention/self/value/kernel', <tf.Variable 'pegasus/encoder/layer_1/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_1/ffn/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_1/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_1/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_1/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_1/ffn/dense/bias', <tf.Variable 'pegasus/encoder/layer_1/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('encoder/layer_1/ffn/dense/kernel', <tf.Variable 'pegasus/encoder/layer_1/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('encoder/layer_1/ffn/dense_1/bias', <tf.Variable 'pegasus/encoder/layer_1/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_1/ffn/dense_1/kernel', <tf.Variable 'pegasus/encoder/layer_1/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('encoder/layer_2/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_2/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_2/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_2/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_2/attention/self/key/kernel', <tf.Variable 'pegasus/encoder/layer_2/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_2/attention/self/output/dense/kernel', <tf.Variable 'pegasus/encoder/layer_2/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_2/attention/self/query/kernel', <tf.Variable 'pegasus/encoder/layer_2/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_2/attention/self/value/kernel', <tf.Variable 'pegasus/encoder/layer_2/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_2/ffn/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_2/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_2/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_2/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_2/ffn/dense/bias', <tf.Variable 'pegasus/encoder/layer_2/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('encoder/layer_2/ffn/dense/kernel', <tf.Variable 'pegasus/encoder/layer_2/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('encoder/layer_2/ffn/dense_1/bias', <tf.Variable 'pegasus/encoder/layer_2/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_2/ffn/dense_1/kernel', <tf.Variable 'pegasus/encoder/layer_2/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('encoder/layer_3/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_3/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_3/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_3/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_3/attention/self/key/kernel', <tf.Variable 'pegasus/encoder/layer_3/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_3/attention/self/output/dense/kernel', <tf.Variable 'pegasus/encoder/layer_3/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_3/attention/self/query/kernel', <tf.Variable 'pegasus/encoder/layer_3/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_3/attention/self/value/kernel', <tf.Variable 'pegasus/encoder/layer_3/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_3/ffn/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_3/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_3/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_3/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_3/ffn/dense/bias', <tf.Variable 'pegasus/encoder/layer_3/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('encoder/layer_3/ffn/dense/kernel', <tf.Variable 'pegasus/encoder/layer_3/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('encoder/layer_3/ffn/dense_1/bias', <tf.Variable 'pegasus/encoder/layer_3/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_3/ffn/dense_1/kernel', <tf.Variable 'pegasus/encoder/layer_3/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('encoder/layer_4/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_4/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_4/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_4/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_4/attention/self/key/kernel', <tf.Variable 'pegasus/encoder/layer_4/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_4/attention/self/output/dense/kernel', <tf.Variable 'pegasus/encoder/layer_4/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_4/attention/self/query/kernel', <tf.Variable 'pegasus/encoder/layer_4/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_4/attention/self/value/kernel', <tf.Variable 'pegasus/encoder/layer_4/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_4/ffn/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_4/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_4/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_4/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_4/ffn/dense/bias', <tf.Variable 'pegasus/encoder/layer_4/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('encoder/layer_4/ffn/dense/kernel', <tf.Variable 'pegasus/encoder/layer_4/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('encoder/layer_4/ffn/dense_1/bias', <tf.Variable 'pegasus/encoder/layer_4/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_4/ffn/dense_1/kernel', <tf.Variable 'pegasus/encoder/layer_4/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>), ('encoder/layer_5/attention/self/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_5/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_5/attention/self/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_5/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_5/attention/self/key/kernel', <tf.Variable 'pegasus/encoder/layer_5/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_5/attention/self/output/dense/kernel', <tf.Variable 'pegasus/encoder/layer_5/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_5/attention/self/query/kernel', <tf.Variable 'pegasus/encoder/layer_5/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_5/attention/self/value/kernel', <tf.Variable 'pegasus/encoder/layer_5/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>), ('encoder/layer_5/ffn/LayerNorm/beta', <tf.Variable 'pegasus/encoder/layer_5/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_5/ffn/LayerNorm/gamma', <tf.Variable 'pegasus/encoder/layer_5/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_5/ffn/dense/bias', <tf.Variable 'pegasus/encoder/layer_5/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>), ('encoder/layer_5/ffn/dense/kernel', <tf.Variable 'pegasus/encoder/layer_5/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>), ('encoder/layer_5/ffn/dense_1/bias', <tf.Variable 'pegasus/encoder/layer_5/output/dense/bias:0' shape=(512,) dtype=float32_ref>), ('encoder/layer_5/ffn/dense_1/kernel', <tf.Variable 'pegasus/encoder/layer_5/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>)])
My pegasus config, Copy pasted from https://github.com/google-research/bigbird/blob/master/bigbird/summarization/pegasus_large.sh
bert_config = { # transformer basic configs 'attention_probs_dropout_prob': 0.1, 'hidden_act': 'relu', 'hidden_dropout_prob': 0.1, 'hidden_size': 512, 'initializer_range': 0.02, 'intermediate_size': 3072, 'max_position_embeddings': 4096, 'max_encoder_length': 2048, 'max_decoder_length': 512, 'num_attention_heads': 8, 'num_hidden_layers': 6, 'type_vocab_size': 2, 'scope': 'pegasus', 'use_bias': False, 'rescale_embedding': True, 'vocab_model_file': None, # sparse mask configs 'attention_type': 'block_sparse', 'norm_type': 'prenorm', 'block_size': 64, 'num_rand_blocks': 3, 'vocab_size': 32128, 'beam_size': 1, 'alpha': 0.0, 'couple_encoder_decoder': False, 'num_warmup_steps': 10000, 'learning_rate': 0.1, 'label_smoothing': 0.1, 'optimizer': 'Adafactor', 'use_tpu': True, }
Not sure this is the correct one, finetuning is really slow, so any guide about variable mapping is really helpful.
The text was updated successfully, but these errors were encountered:
@manzilz
Sorry, something went wrong.
No branches or pull requests
I have my own pretrained Pegasus model, now I want to finetune using BigBird, so this is my mapping function,
output,
My pegasus config, Copy pasted from https://github.com/google-research/bigbird/blob/master/bigbird/summarization/pegasus_large.sh
Not sure this is the correct one, finetuning is really slow, so any guide about variable mapping is really helpful.
The text was updated successfully, but these errors were encountered: