{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":306980630,"defaultBranch":"main","name":"x-transformers","ownerLogin":"lucidrains","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2020-10-24T22:13:25.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/108653?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1719716672.0","currentOid":""},"activityList":{"items":[{"before":"907861187a1fa55fa2708e841845add0e78b67a8","after":"59cee27386678e7779f35c9593825cd8ec843e99","ref":"refs/heads/main","pushedAt":"2024-06-30T03:04:29.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"set a default value for the transformer output softclamp value, and allow for boolean toggle","shortMessageHtmlLink":"set a default value for the transformer output softclamp value, and a…"}},{"before":"af345a3b3ad297d3b22fd6d97025765f6971dc90","after":"907861187a1fa55fa2708e841845add0e78b67a8","ref":"refs/heads/main","pushedAt":"2024-06-30T02:35:02.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"softclamping of attention logits needs to happen before masking","shortMessageHtmlLink":"softclamping of attention logits needs to happen before masking"}},{"before":"2e74ed07f7a5c5a0fc12c00ec8f0a5a346d5d91a","after":"af345a3b3ad297d3b22fd6d97025765f6971dc90","ref":"refs/heads/main","pushedAt":"2024-06-28T12:19:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"cleanup gamma unit offset in norms","shortMessageHtmlLink":"cleanup gamma unit offset in norms"}},{"before":"4f0fc676b6cadfc143f835f9537c3d9d246adb39","after":"2e74ed07f7a5c5a0fc12c00ec8f0a5a346d5d91a","ref":"refs/heads/main","pushedAt":"2024-06-27T17:58:24.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"this should always be on to save beginners from weight decay issues","shortMessageHtmlLink":"this should always be on to save beginners from weight decay issues"}},{"before":"044a62f10e3981d3bd600c0bbdef547b82463767","after":"4f0fc676b6cadfc143f835f9537c3d9d246adb39","ref":"refs/heads/main","pushedAt":"2024-06-27T17:51:23.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"this should always be on to save beginners from weight decay issues","shortMessageHtmlLink":"this should always be on to save beginners from weight decay issues"}},{"before":"cc3b663d7b6880cdccf40e4d4d5facd5a9b884a5","after":"044a62f10e3981d3bd600c0bbdef547b82463767","ref":"refs/heads/main","pushedAt":"2024-06-27T15:52:20.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"account for unit offset in layerscale too, also fix init of gamma","shortMessageHtmlLink":"account for unit offset in layerscale too, also fix init of gamma"}},{"before":"f933207ab46733a6ad3f88f1a1cc717c29484e74","after":"cc3b663d7b6880cdccf40e4d4d5facd5a9b884a5","ref":"refs/heads/main","pushedAt":"2024-06-27T15:33:57.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add the cool trick that makes norm gammas weight decayable, from @OhadRubin . also throw in another optional soft clamp on the final logits, used in gemma2","shortMessageHtmlLink":"add the cool trick that makes norm gammas weight decayable, from @Oha…"}},{"before":"7e73791de9b7db0a41e0dac5ff4201aad5d90ee8","after":"f933207ab46733a6ad3f88f1a1cc717c29484e74","ref":"refs/heads/main","pushedAt":"2024-06-27T15:31:47.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add the cool trick that makes norm gammas weight decayable, from @OhadRubin . also throw in another optional soft clamp on the final logits, used in gemma2","shortMessageHtmlLink":"add the cool trick that makes norm gammas weight decayable, from @Oha…"}},{"before":"94269b2e5de938c11a5922b8c76af5e2645cd0bb","after":"7e73791de9b7db0a41e0dac5ff4201aad5d90ee8","ref":"refs/heads/main","pushedAt":"2024-06-23T03:20:51.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"allow for `layers_execute_order` to be overridden on forward","shortMessageHtmlLink":"allow for layers_execute_order to be overridden on forward"}},{"before":"33ea37a6016123e8345557fa1d3ec54adcb91cb6","after":"94269b2e5de938c11a5922b8c76af5e2645cd0bb","ref":"refs/heads/main","pushedAt":"2024-06-23T03:20:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"allow for `laers_execute_order` to be overridden on forward","shortMessageHtmlLink":"allow for laers_execute_order to be overridden on forward"}},{"before":"830db47c4571036ce21dfbcb758411a6fceac28e","after":"33ea37a6016123e8345557fa1d3ec54adcb91cb6","ref":"refs/heads/main","pushedAt":"2024-06-19T23:45:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"rename to adaptive layerscale","shortMessageHtmlLink":"rename to adaptive layerscale"}},{"before":"2effe6629e3f47356d0f0e07ad037fa8a58e0681","after":"830db47c4571036ce21dfbcb758411a6fceac28e","ref":"refs/heads/main","pushedAt":"2024-06-19T16:56:30.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"complete the ada-ln zero conditioning used in DiT","shortMessageHtmlLink":"complete the ada-ln zero conditioning used in DiT"}},{"before":"41a3285a74fe1d6fec1cf7e8f79d04b6a9d8237c","after":"2effe6629e3f47356d0f0e07ad037fa8a58e0681","ref":"refs/heads/main","pushedAt":"2024-06-19T16:54:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"complete the ada-ln zero conditioning used in DiT","shortMessageHtmlLink":"complete the ada-ln zero conditioning used in DiT"}},{"before":"cff3b869b8d42bd356d05ebf81869420728924f4","after":"41a3285a74fe1d6fec1cf7e8f79d04b6a9d8237c","ref":"refs/heads/main","pushedAt":"2024-06-17T23:09:36.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"mlp for condition should only be instantiated if using adaptive ln or rmsnorm","shortMessageHtmlLink":"mlp for condition should only be instantiated if using adaptive ln or…"}},{"before":"7cb0ec939b3de0d09a00b2e3558c7d1631ffdc8a","after":"cff3b869b8d42bd356d05ebf81869420728924f4","ref":"refs/heads/main","pushedAt":"2024-06-17T13:59:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"allow for single condition or across sequence, also allow for a small MLP like a lot of classic unets","shortMessageHtmlLink":"allow for single condition or across sequence, also allow for a small…"}},{"before":"5f051514ec4a4641c620eeb0bd68b6e73b7eff04","after":"7cb0ec939b3de0d09a00b2e3558c7d1631ffdc8a","ref":"refs/heads/main","pushedAt":"2024-06-16T18:08:15.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"adaptive rmsnorm too","shortMessageHtmlLink":"adaptive rmsnorm too"}},{"before":"45f19909d1ce7dd4752329e0893cc790a12559e2","after":"5f051514ec4a4641c620eeb0bd68b6e73b7eff04","ref":"refs/heads/main","pushedAt":"2024-06-16T18:02:10.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add adaptive layernorm for starters, to be used in mesh gpt","shortMessageHtmlLink":"add adaptive layernorm for starters, to be used in mesh gpt"}},{"before":"5a3151ea6f5b599dff39a9e20c40e87aec01d30d","after":"45f19909d1ce7dd4752329e0893cc790a12559e2","ref":"refs/heads/main","pushedAt":"2024-06-08T18:11:57.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add ability to turn off all normalization for each layer","shortMessageHtmlLink":"add ability to turn off all normalization for each layer"}},{"before":"d5f968017746aac91e83638fc18773e3315fdc0a","after":"5a3151ea6f5b599dff39a9e20c40e87aec01d30d","ref":"refs/heads/main","pushedAt":"2024-06-08T17:31:24.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add ability to turn off all normalization for each layer","shortMessageHtmlLink":"add ability to turn off all normalization for each layer"}},{"before":"569cf4ea0655b6d120aa1bc937f1cd09f397a878","after":"d5f968017746aac91e83638fc18773e3315fdc0a","ref":"refs/heads/main","pushedAt":"2024-06-08T17:14:12.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add ability to turn off all normalization for each layer","shortMessageHtmlLink":"add ability to turn off all normalization for each layer"}},{"before":"0c6266ee44ea99a4449cd9201ba55924a6a7eae7","after":"569cf4ea0655b6d120aa1bc937f1cd09f397a878","ref":"refs/heads/main","pushedAt":"2024-06-08T15:58:04.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add ability to turn off all normalization for each layer","shortMessageHtmlLink":"add ability to turn off all normalization for each layer"}},{"before":"0d00fca4076a5dd9d3197d1e2470bb2920393be2","after":"0c6266ee44ea99a4449cd9201ba55924a6a7eae7","ref":"refs/heads/main","pushedAt":"2024-06-04T17:44:46.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"fix talking heads in CoPE","shortMessageHtmlLink":"fix talking heads in CoPE"}},{"before":"046611ebd267c86f7336da8368fc66d4a406c17f","after":"0d00fca4076a5dd9d3197d1e2470bb2920393be2","ref":"refs/heads/main","pushedAt":"2024-06-04T14:22:19.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"optional talking heads to cope","shortMessageHtmlLink":"optional talking heads to cope"}},{"before":"7448c7af950d499617868331812432c89a27366c","after":"046611ebd267c86f7336da8368fc66d4a406c17f","ref":"refs/heads/main","pushedAt":"2024-06-04T03:42:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"default CoPE to what they did in the paper","shortMessageHtmlLink":"default CoPE to what they did in the paper"}},{"before":"306c1d92f4f7555434a81d1842785c26da5bf457","after":"7448c7af950d499617868331812432c89a27366c","ref":"refs/heads/main","pushedAt":"2024-06-04T00:40:58.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"some improvisation with cope","shortMessageHtmlLink":"some improvisation with cope"}},{"before":"8d3e1543002b932d686a43fa77287855530d5c65","after":"306c1d92f4f7555434a81d1842785c26da5bf457","ref":"refs/heads/main","pushedAt":"2024-06-04T00:20:30.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add support for CoPE","shortMessageHtmlLink":"add support for CoPE"}},{"before":"541cf1d607f21ccdc37aab5648ec24646f1b439e","after":"8d3e1543002b932d686a43fa77287855530d5c65","ref":"refs/heads/main","pushedAt":"2024-06-04T00:07:04.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add support for CoPE","shortMessageHtmlLink":"add support for CoPE"}},{"before":"2b3697ec170df67da4499027412f2b25269da409","after":null,"ref":"refs/heads/head-diversity","pushedAt":"2024-06-01T01:00:15.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"}},{"before":"536cf04fffc261c732ef502a74eabfbf95c8b95e","after":null,"ref":"refs/heads/migrate-rotary-to-less-confusing-pairing","pushedAt":"2024-06-01T01:00:14.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"}},{"before":"1ec66e9851c4231fbe82ecc6bf23030fa836e681","after":"541cf1d607f21ccdc37aab5648ec24646f1b439e","ref":"refs/heads/main","pushedAt":"2024-05-31T23:59:04.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"store attention type on intermediate","shortMessageHtmlLink":"store attention type on intermediate"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEcs8irgA","startCursor":null,"endCursor":null}},"title":"Activity · lucidrains/x-transformers"}