|
| 1 | +/* |
| 2 | +Copyright (C) 2014 insane coder (http://insanecoding.blogspot.com/, http://chacha20.insanecoding.org/) |
| 3 | +
|
| 4 | +Permission to use, copy, modify, and distribute this software for any |
| 5 | +purpose with or without fee is hereby granted, provided that the above |
| 6 | +copyright notice and this permission notice appear in all copies. |
| 7 | +
|
| 8 | +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 9 | +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 10 | +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| 11 | +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 12 | +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 13 | +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| 14 | +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 15 | +
|
| 16 | +This implementation is intended to be simple, many optimizations can be performed. |
| 17 | +*/ |
| 18 | + |
| 19 | +#include <string.h> |
| 20 | +#include "chacha20_simple.h" |
| 21 | + |
| 22 | +void chacha20_setup(chacha20_ctx *ctx, const uint8_t *key, size_t length, uint8_t nonce[8]) |
| 23 | +{ |
| 24 | + const char *constants = (length == 32) ? "expand 32-byte k" : "expand 16-byte k"; |
| 25 | + |
| 26 | + ctx->schedule[0] = LE(constants + 0); |
| 27 | + ctx->schedule[1] = LE(constants + 4); |
| 28 | + ctx->schedule[2] = LE(constants + 8); |
| 29 | + ctx->schedule[3] = LE(constants + 12); |
| 30 | + ctx->schedule[4] = LE(key + 0); |
| 31 | + ctx->schedule[5] = LE(key + 4); |
| 32 | + ctx->schedule[6] = LE(key + 8); |
| 33 | + ctx->schedule[7] = LE(key + 12); |
| 34 | + ctx->schedule[8] = LE(key + 16 % length); |
| 35 | + ctx->schedule[9] = LE(key + 20 % length); |
| 36 | + ctx->schedule[10] = LE(key + 24 % length); |
| 37 | + ctx->schedule[11] = LE(key + 28 % length); |
| 38 | + //Surprise! This is really a block cipher in CTR mode |
| 39 | + ctx->schedule[12] = 0; //Counter |
| 40 | + ctx->schedule[13] = 0; //Counter |
| 41 | + ctx->schedule[14] = LE(nonce+0); |
| 42 | + ctx->schedule[15] = LE(nonce+4); |
| 43 | + |
| 44 | + ctx->available = 0; |
| 45 | +} |
| 46 | + |
| 47 | +void chacha20_counter_set(chacha20_ctx *ctx, uint64_t counter) |
| 48 | +{ |
| 49 | + ctx->schedule[12] = counter & UINT32_C(0xFFFFFFFF); |
| 50 | + ctx->schedule[13] = counter >> 32; |
| 51 | + ctx->available = 0; |
| 52 | +} |
| 53 | + |
| 54 | +#define QUARTERROUND(x, a, b, c, d) \ |
| 55 | + x[a] += x[b]; x[d] = ROTL32(x[d] ^ x[a], 16); \ |
| 56 | + x[c] += x[d]; x[b] = ROTL32(x[b] ^ x[c], 12); \ |
| 57 | + x[a] += x[b]; x[d] = ROTL32(x[d] ^ x[a], 8); \ |
| 58 | + x[c] += x[d]; x[b] = ROTL32(x[b] ^ x[c], 7); |
| 59 | + |
| 60 | +void chacha20_block(chacha20_ctx *ctx, uint32_t output[16]) |
| 61 | +{ |
| 62 | + uint32_t *const nonce = ctx->schedule+12; //12 is where the 128 bit counter is |
| 63 | + int i = 10; |
| 64 | + |
| 65 | + memcpy(output, ctx->schedule, sizeof(ctx->schedule)); |
| 66 | + |
| 67 | + while (i--) |
| 68 | + { |
| 69 | + QUARTERROUND(output, 0, 4, 8, 12) |
| 70 | + QUARTERROUND(output, 1, 5, 9, 13) |
| 71 | + QUARTERROUND(output, 2, 6, 10, 14) |
| 72 | + QUARTERROUND(output, 3, 7, 11, 15) |
| 73 | + QUARTERROUND(output, 0, 5, 10, 15) |
| 74 | + QUARTERROUND(output, 1, 6, 11, 12) |
| 75 | + QUARTERROUND(output, 2, 7, 8, 13) |
| 76 | + QUARTERROUND(output, 3, 4, 9, 14) |
| 77 | + } |
| 78 | + for (i = 0; i < 16; ++i) |
| 79 | + { |
| 80 | + uint32_t result = output[i] + ctx->schedule[i]; |
| 81 | + FROMLE((uint8_t *)(output+i), result); |
| 82 | + } |
| 83 | + |
| 84 | + /* |
| 85 | + Official specs calls for performing a 64 bit increment here, and limit usage to 2^64 blocks. |
| 86 | + However, recommendations for CTR mode in various papers recommend including the nonce component for a 128 bit increment. |
| 87 | + This implementation will remain compatible with the official up to 2^64 blocks, and past that point, the official is not intended to be used. |
| 88 | + This implementation with this change also allows this algorithm to become compatible for a Fortuna-like construct. |
| 89 | + */ |
| 90 | + if (!++nonce[0] && !++nonce[1] && !++nonce[2]) { ++nonce[3]; } |
| 91 | +} |
| 92 | + |
| 93 | +static inline void chacha20_xor(uint8_t *keystream, const uint8_t **in, uint8_t **out, size_t length) |
| 94 | +{ |
| 95 | + uint8_t *end_keystream = keystream + length; |
| 96 | + do { *(*out)++ = *(*in)++ ^ *keystream++; } while (keystream < end_keystream); |
| 97 | +} |
| 98 | + |
| 99 | +void chacha20_encrypt(chacha20_ctx *ctx, const uint8_t *in, uint8_t *out, size_t length) |
| 100 | +{ |
| 101 | + if (length) |
| 102 | + { |
| 103 | + uint8_t *const k = (uint8_t *)ctx->keystream; |
| 104 | + |
| 105 | + //First, use any buffered keystream from previous calls |
| 106 | + if (ctx->available) |
| 107 | + { |
| 108 | + size_t amount = MIN(length, ctx->available); |
| 109 | + chacha20_xor(k + (sizeof(ctx->keystream)-ctx->available), &in, &out, amount); |
| 110 | + ctx->available -= amount; |
| 111 | + length -= amount; |
| 112 | + } |
| 113 | + |
| 114 | + //Then, handle new blocks |
| 115 | + while (length) |
| 116 | + { |
| 117 | + size_t amount = MIN(length, sizeof(ctx->keystream)); |
| 118 | + chacha20_block(ctx, ctx->keystream); |
| 119 | + chacha20_xor(k, &in, &out, amount); |
| 120 | + length -= amount; |
| 121 | + ctx->available = sizeof(ctx->keystream) - amount; |
| 122 | + } |
| 123 | + } |
| 124 | +} |
| 125 | + |
| 126 | +void chacha20_decrypt(chacha20_ctx *ctx, const uint8_t *in, uint8_t *out, size_t length) |
| 127 | +{ |
| 128 | + chacha20_encrypt(ctx, in, out, length); |
| 129 | +} |
0 commit comments