diff --git a/Makefile b/Makefile index b962c2e..57a3b6b 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,11 @@ -source = parse.cpp parserstate.cpp parser.cpp templates.cpp terminal.cpp termemu.cpp parseraction.cpp terminalfunctions.cpp swrite.cpp terminalframebuffer.cpp terminaldispatcher.cpp terminaluserinput.cpp terminaldisplay.cpp network.cpp ntester.cpp -objects = parserstate.o parser.o templates.o terminal.o parseraction.o terminalfunctions.o swrite.o terminalframebuffer.o terminaldispatcher.o terminaluserinput.o terminaldisplay.o network.o +source = parse.cpp parserstate.cpp parser.cpp templates.cpp terminal.cpp termemu.cpp parseraction.cpp terminalfunctions.cpp swrite.cpp terminalframebuffer.cpp terminaldispatcher.cpp terminaluserinput.cpp terminaldisplay.cpp network.cpp ntester.cpp ocb.cpp base64.cpp encrypt.cpp decrypt.cpp crypto.cpp +objects = parserstate.o parser.o templates.o terminal.o parseraction.o terminalfunctions.o swrite.o terminalframebuffer.o terminaldispatcher.o terminaluserinput.o terminaldisplay.o network.o ocb.o base64.o crypto.o repos = templates.rpo -executables = parse termemu ntester +executables = parse termemu ntester encrypt decrypt CXX = g++ CXXFLAGS = -g --std=c++0x -pedantic -Werror -Wall -Wextra -Weffc++ -fno-implicit-templates -fno-default-inline -pipe -D_FILE_OFFSET_BITS=64 -D_XOPEN_SOURCE=500 -D_GNU_SOURCE -LIBS = -lutil +LIBS = -lutil -lssl all: $(executables) @@ -18,6 +18,12 @@ termemu: termemu.o $(objects) parse # serialize link steps because of -frepo ntester: ntester.o $(objects) termemu # serialize link steps because of -frepo $(CXX) $(CXXFLAGS) -o $@ ntester.o $(objects) $(LIBS) +encrypt: encrypt.o $(objects) ntester # serialize link steps because of -frepo + $(CXX) $(CXXFLAGS) -o $@ encrypt.o $(objects) $(LIBS) + +decrypt: decrypt.o $(objects) encrypt # serialize link steps because of -frepo + $(CXX) $(CXXFLAGS) -o $@ decrypt.o $(objects) $(LIBS) + templates.o: templates.cpp $(CXX) $(CXXFLAGS) -frepo -c -o $@ $< diff --git a/ae.hpp b/ae.hpp new file mode 100644 index 0000000..fb5c511 --- /dev/null +++ b/ae.hpp @@ -0,0 +1,182 @@ +/* --------------------------------------------------------------------------- + * + * AEAD API 0.12 - 13 July 2011 + * + * This file gives an interface appropriate for many authenticated + * encryption with associated data (AEAD) implementations. It does not try + * to accommodate all possible options or limitations that an implementation + * might have -- you should consult the documentation of your chosen + * implementation to find things like RFC 5116 constants, alignment + * requirements, whether the incremental interface is supported, etc. + * + * This file is in the public domain. It is provided "as is", without + * warranty of any kind. Use at your own risk. + * + * Comments are welcome: Ted Krovetz . + * + * ------------------------------------------------------------------------ */ + +#ifndef _AE_H_ +#define _AE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* -------------------------------------------------------------------------- + * + * Constants + * + * ----------------------------------------------------------------------- */ + +/* Return status codes: Negative return values indicate an error occurred. + * For full explanations of error values, consult the implementation's + * documentation. */ +#define AE_SUCCESS ( 0) /* Indicates successful completion of call */ +#define AE_INVALID (-1) /* Indicates bad tag during decryption */ +#define AE_NOT_SUPPORTED (-2) /* Indicates unsupported option requested */ + +/* Flags: When data can be processed "incrementally", these flags are used + * to indicate whether the submitted data is the last or not. */ +#define AE_FINALIZE (1) /* This is the last of data */ +#define AE_PENDING (0) /* More data of is coming */ + +/* -------------------------------------------------------------------------- + * + * AEAD opaque structure definition + * + * ----------------------------------------------------------------------- */ + +typedef struct _ae_ctx ae_ctx; + +/* -------------------------------------------------------------------------- + * + * Data Structure Routines + * + * ----------------------------------------------------------------------- */ + +ae_ctx* ae_allocate (void *misc); /* Allocate ae_ctx, set optional ptr */ +void ae_free (ae_ctx *ctx); /* Deallocate ae_ctx struct */ +int ae_clear (ae_ctx *ctx); /* Undo initialization */ +int ae_ctx_sizeof(void); /* Return sizeof(ae_ctx) */ +/* ae_allocate() allocates an ae_ctx structure, but does not initialize it. + * ae_free() deallocates an ae_ctx structure, but does not zeroize it. + * ae_clear() zeroes sensitive values associated with an ae_ctx structure + * and deallocates any auxiliary structures allocated during ae_init(). + * ae_ctx_sizeof() returns sizeof(ae_ctx), to aid in any static allocations. + */ + +/* -------------------------------------------------------------------------- + * + * AEAD Routines + * + * ----------------------------------------------------------------------- */ + +int ae_init(ae_ctx *ctx, + const void *key, + int key_len, + int nonce_len, + int tag_len); +/* -------------------------------------------------------------------------- + * + * Initialize an ae_ctx context structure. + * + * Parameters: + * ctx - Pointer to an ae_ctx structure to be initialized + * key - Pointer to user-supplied key + * key_len - Length of key supplied, in bytes + * nonce_len - Length of nonces to be used for this key, in bytes + * tag_len - Length of tags to be produced for this key, in bytes + * + * Returns: + * AE_SUCCESS - Success. Ctx ready for use. + * AE_NOT_SUPPORTED - An unsupported length was supplied. Ctx is untouched. + * Otherwise - Error. Check implementation documentation for codes. + * + * ----------------------------------------------------------------------- */ + +int ae_encrypt(ae_ctx *ctx, + const void *nonce, + const void *pt, + int pt_len, + const void *ad, + int ad_len, + void *ct, + void *tag, + int final); +/* -------------------------------------------------------------------------- + * + * Encrypt plaintext; provide for authentication of ciphertext/associated data. + * + * Parameters: + * ctx - Pointer to an ae_ctx structure initialized by ae_init. + * nonce - Pointer to a nonce_len (defined in ae_init) byte nonce. + * pt - Pointer to plaintext bytes to be encrypted. + * pt_len - number of bytes pointed to by pt. + * ad - Pointer to associated data. + * ad_len - number of bytes pointed to by ad. + * ct - Pointer to buffer to receive ciphertext encryption. + * tag - Pointer to receive authentication tag; or NULL + * if tag is to be bundled into the ciphertext. + * final - Non-zero if this call completes the plaintext being encrypted. + * + * If nonce!=NULL then a message is being initiated. If final!=0 + * then a message is being finalized. If final==0 or nonce==NULL + * then the incremental interface is being used. If nonce!=NULL and + * ad_len<0, then use same ad as last message. + * + * Returns: + * non-negative - Number of bytes written to ct. + * AE_NOT_SUPPORTED - Usage mode unsupported (eg, incremental and/or sticky). + * Otherwise - Error. Check implementation documentation for codes. + * + * ----------------------------------------------------------------------- */ + +int ae_decrypt(ae_ctx *ctx, + const void *nonce, + const void *ct, + int ct_len, + const void *ad, + int ad_len, + void *pt, + const void *tag, + int final); +/* -------------------------------------------------------------------------- + * + * Decrypt ciphertext; provide authenticity of plaintext and associated data. + * + * Parameters: + * ctx - Pointer to an ae_ctx structure initialized by ae_init. + * nonce - Pointer to a nonce_len (defined in ae_init) byte nonce. + * ct - Pointer to ciphertext bytes to be decrypted. + * ct_len - number of bytes pointed to by ct. + * ad - Pointer to associated data. + * ad_len - number of bytes pointed to by ad. + * pt - Pointer to buffer to receive plaintext decryption. + * tag - Pointer to tag_len (defined in ae_init) bytes; or NULL + * if tag is bundled into the ciphertext. Non-NULL tag is only + * read when final is non-zero. + * final - Non-zero if this call completes the ciphertext being decrypted. + * + * If nonce!=NULL then "ct" points to the start of a ciphertext. If final!=0 + * then "in" points to the final piece of ciphertext. If final==0 or nonce== + * NULL then the incremental interface is being used. If nonce!=NULL and + * ad_len<0, then use same ad as last message. + * + * Returns: + * non-negative - Number of bytes written to pt. + * AE_INVALID - Authentication failure. + * AE_NOT_SUPPORTED - Usage mode unsupported (eg, incremental and/or sticky). + * Otherwise - Error. Check implementation documentation for codes. + * + * NOTE !!! NOTE !!! -- The ciphertext should be assumed possibly inauthentic + * until it has been completely written and it is + * verified that this routine did not return AE_INVALID. + * + * ----------------------------------------------------------------------- */ + +#ifdef __cplusplus +} /* closing brace for extern "C" */ +#endif + +#endif /* _AE_H_ */ diff --git a/base64.cpp b/base64.cpp new file mode 100644 index 0000000..028e2f4 --- /dev/null +++ b/base64.cpp @@ -0,0 +1,577 @@ +/* Taken from GNU coreutils */ + +/* base64.c -- Encode binary data using printable characters. + Copyright (C) 1999-2001, 2004-2006, 2009-2011 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Written by Simon Josefsson. Partially adapted from GNU MailUtils + * (mailbox/filter_trans.c, as of 2004-11-28). Improved by review + * from Paul Eggert, Bruno Haible, and Stepan Kasal. + * + * See also RFC 3548 . + * + * Be careful with error checking. Here is how you would typically + * use these functions: + * + * bool ok = base64_decode_alloc (in, inlen, &out, &outlen); + * if (!ok) + * FAIL: input was not valid base64 + * if (out == NULL) + * FAIL: memory allocation error + * OK: data in OUT/OUTLEN + * + * size_t outlen = base64_encode_alloc (in, inlen, &out); + * if (out == NULL && outlen == 0 && inlen != 0) + * FAIL: input too long + * if (out == NULL) + * FAIL: memory allocation error + * OK: data in OUT/OUTLEN. + * + */ + +// #include + +/* Get prototype. */ +#include "base64.h" + +/* Get malloc. */ +#include + +/* Get UCHAR_MAX. */ +#include + +#include + +/* C89 compliant way to cast 'char' to 'unsigned char'. */ +static inline unsigned char +to_uchar (char ch) +{ + return ch; +} + +/* Base64 encode IN array of size INLEN into OUT array of size OUTLEN. + If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as + possible. If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero + terminate the output buffer. */ +void +base64_encode (const char *restrict in, size_t inlen, + char *restrict out, size_t outlen) +{ + static const char b64str[65] = /* KJW */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + while (inlen && outlen) + { + *out++ = b64str[(to_uchar (in[0]) >> 2) & 0x3f]; + if (!--outlen) + break; + *out++ = b64str[((to_uchar (in[0]) << 4) + + (--inlen ? to_uchar (in[1]) >> 4 : 0)) + & 0x3f]; + if (!--outlen) + break; + *out++ = + (inlen + ? b64str[((to_uchar (in[1]) << 2) + + (--inlen ? to_uchar (in[2]) >> 6 : 0)) + & 0x3f] + : '='); + if (!--outlen) + break; + *out++ = inlen ? b64str[to_uchar (in[2]) & 0x3f] : '='; + if (!--outlen) + break; + if (inlen) + inlen--; + if (inlen) + in += 3; + } + + if (outlen) + *out = '\0'; +} + +/* Allocate a buffer and store zero terminated base64 encoded data + from array IN of size INLEN, returning BASE64_LENGTH(INLEN), i.e., + the length of the encoded data, excluding the terminating zero. On + return, the OUT variable will hold a pointer to newly allocated + memory that must be deallocated by the caller. If output string + length would overflow, 0 is returned and OUT is set to NULL. If + memory allocation failed, OUT is set to NULL, and the return value + indicates length of the requested memory block, i.e., + BASE64_LENGTH(inlen) + 1. */ +size_t +base64_encode_alloc (const char *in, size_t inlen, char **out) +{ + size_t outlen = 1 + BASE64_LENGTH (inlen); + + /* Check for overflow in outlen computation. + * + * If there is no overflow, outlen >= inlen. + * + * If the operation (inlen + 2) overflows then it yields at most +1, so + * outlen is 0. + * + * If the multiplication overflows, we lose at least half of the + * correct value, so the result is < ((inlen + 2) / 3) * 2, which is + * less than (inlen + 2) * 0.66667, which is less than inlen as soon as + * (inlen > 4). + */ + if (inlen > outlen) + { + *out = NULL; + return 0; + } + + *out = (char *) malloc (outlen); /* KJW */ + if (!*out) + return outlen; + + base64_encode (in, inlen, *out, outlen); + + return outlen - 1; +} + +/* With this approach this file works independent of the charset used + (think EBCDIC). However, it does assume that the characters in the + Base64 alphabet (A-Za-z0-9+/) are encoded in 0..255. POSIX + 1003.1-2001 require that char and unsigned char are 8-bit + quantities, though, taking care of that problem. But this may be a + potential problem on non-POSIX C99 platforms. + + IBM C V6 for AIX mishandles "#define B64(x) ...'x'...", so use "_" + as the formal parameter rather than "x". */ +#define B64(_) \ + ((_) == 'A' ? 0 \ + : (_) == 'B' ? 1 \ + : (_) == 'C' ? 2 \ + : (_) == 'D' ? 3 \ + : (_) == 'E' ? 4 \ + : (_) == 'F' ? 5 \ + : (_) == 'G' ? 6 \ + : (_) == 'H' ? 7 \ + : (_) == 'I' ? 8 \ + : (_) == 'J' ? 9 \ + : (_) == 'K' ? 10 \ + : (_) == 'L' ? 11 \ + : (_) == 'M' ? 12 \ + : (_) == 'N' ? 13 \ + : (_) == 'O' ? 14 \ + : (_) == 'P' ? 15 \ + : (_) == 'Q' ? 16 \ + : (_) == 'R' ? 17 \ + : (_) == 'S' ? 18 \ + : (_) == 'T' ? 19 \ + : (_) == 'U' ? 20 \ + : (_) == 'V' ? 21 \ + : (_) == 'W' ? 22 \ + : (_) == 'X' ? 23 \ + : (_) == 'Y' ? 24 \ + : (_) == 'Z' ? 25 \ + : (_) == 'a' ? 26 \ + : (_) == 'b' ? 27 \ + : (_) == 'c' ? 28 \ + : (_) == 'd' ? 29 \ + : (_) == 'e' ? 30 \ + : (_) == 'f' ? 31 \ + : (_) == 'g' ? 32 \ + : (_) == 'h' ? 33 \ + : (_) == 'i' ? 34 \ + : (_) == 'j' ? 35 \ + : (_) == 'k' ? 36 \ + : (_) == 'l' ? 37 \ + : (_) == 'm' ? 38 \ + : (_) == 'n' ? 39 \ + : (_) == 'o' ? 40 \ + : (_) == 'p' ? 41 \ + : (_) == 'q' ? 42 \ + : (_) == 'r' ? 43 \ + : (_) == 's' ? 44 \ + : (_) == 't' ? 45 \ + : (_) == 'u' ? 46 \ + : (_) == 'v' ? 47 \ + : (_) == 'w' ? 48 \ + : (_) == 'x' ? 49 \ + : (_) == 'y' ? 50 \ + : (_) == 'z' ? 51 \ + : (_) == '0' ? 52 \ + : (_) == '1' ? 53 \ + : (_) == '2' ? 54 \ + : (_) == '3' ? 55 \ + : (_) == '4' ? 56 \ + : (_) == '5' ? 57 \ + : (_) == '6' ? 58 \ + : (_) == '7' ? 59 \ + : (_) == '8' ? 60 \ + : (_) == '9' ? 61 \ + : (_) == '+' ? 62 \ + : (_) == '/' ? 63 \ + : -1) + +static const signed char b64[0x100] = { + B64 (0), B64 (1), B64 (2), B64 (3), + B64 (4), B64 (5), B64 (6), B64 (7), + B64 (8), B64 (9), B64 (10), B64 (11), + B64 (12), B64 (13), B64 (14), B64 (15), + B64 (16), B64 (17), B64 (18), B64 (19), + B64 (20), B64 (21), B64 (22), B64 (23), + B64 (24), B64 (25), B64 (26), B64 (27), + B64 (28), B64 (29), B64 (30), B64 (31), + B64 (32), B64 (33), B64 (34), B64 (35), + B64 (36), B64 (37), B64 (38), B64 (39), + B64 (40), B64 (41), B64 (42), B64 (43), + B64 (44), B64 (45), B64 (46), B64 (47), + B64 (48), B64 (49), B64 (50), B64 (51), + B64 (52), B64 (53), B64 (54), B64 (55), + B64 (56), B64 (57), B64 (58), B64 (59), + B64 (60), B64 (61), B64 (62), B64 (63), + B64 (64), B64 (65), B64 (66), B64 (67), + B64 (68), B64 (69), B64 (70), B64 (71), + B64 (72), B64 (73), B64 (74), B64 (75), + B64 (76), B64 (77), B64 (78), B64 (79), + B64 (80), B64 (81), B64 (82), B64 (83), + B64 (84), B64 (85), B64 (86), B64 (87), + B64 (88), B64 (89), B64 (90), B64 (91), + B64 (92), B64 (93), B64 (94), B64 (95), + B64 (96), B64 (97), B64 (98), B64 (99), + B64 (100), B64 (101), B64 (102), B64 (103), + B64 (104), B64 (105), B64 (106), B64 (107), + B64 (108), B64 (109), B64 (110), B64 (111), + B64 (112), B64 (113), B64 (114), B64 (115), + B64 (116), B64 (117), B64 (118), B64 (119), + B64 (120), B64 (121), B64 (122), B64 (123), + B64 (124), B64 (125), B64 (126), B64 (127), + B64 (128), B64 (129), B64 (130), B64 (131), + B64 (132), B64 (133), B64 (134), B64 (135), + B64 (136), B64 (137), B64 (138), B64 (139), + B64 (140), B64 (141), B64 (142), B64 (143), + B64 (144), B64 (145), B64 (146), B64 (147), + B64 (148), B64 (149), B64 (150), B64 (151), + B64 (152), B64 (153), B64 (154), B64 (155), + B64 (156), B64 (157), B64 (158), B64 (159), + B64 (160), B64 (161), B64 (162), B64 (163), + B64 (164), B64 (165), B64 (166), B64 (167), + B64 (168), B64 (169), B64 (170), B64 (171), + B64 (172), B64 (173), B64 (174), B64 (175), + B64 (176), B64 (177), B64 (178), B64 (179), + B64 (180), B64 (181), B64 (182), B64 (183), + B64 (184), B64 (185), B64 (186), B64 (187), + B64 (188), B64 (189), B64 (190), B64 (191), + B64 (192), B64 (193), B64 (194), B64 (195), + B64 (196), B64 (197), B64 (198), B64 (199), + B64 (200), B64 (201), B64 (202), B64 (203), + B64 (204), B64 (205), B64 (206), B64 (207), + B64 (208), B64 (209), B64 (210), B64 (211), + B64 (212), B64 (213), B64 (214), B64 (215), + B64 (216), B64 (217), B64 (218), B64 (219), + B64 (220), B64 (221), B64 (222), B64 (223), + B64 (224), B64 (225), B64 (226), B64 (227), + B64 (228), B64 (229), B64 (230), B64 (231), + B64 (232), B64 (233), B64 (234), B64 (235), + B64 (236), B64 (237), B64 (238), B64 (239), + B64 (240), B64 (241), B64 (242), B64 (243), + B64 (244), B64 (245), B64 (246), B64 (247), + B64 (248), B64 (249), B64 (250), B64 (251), + B64 (252), B64 (253), B64 (254), B64 (255) +}; + +#if UCHAR_MAX == 255 +# define uchar_in_range(c) true +#else +# define uchar_in_range(c) ((c) <= 255) +#endif + +/* Return true if CH is a character from the Base64 alphabet, and + false otherwise. Note that '=' is padding and not considered to be + part of the alphabet. */ +bool +isbase64 (char ch) +{ + return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)]; +} + +/* Initialize decode-context buffer, CTX. */ +void +base64_decode_ctx_init (struct base64_decode_context *ctx) +{ + ctx->i = 0; +} + +/* If CTX->i is 0 or 4, there are four or more bytes in [*IN..IN_END), and + none of those four is a newline, then return *IN. Otherwise, copy up to + 4 - CTX->i non-newline bytes from that range into CTX->buf, starting at + index CTX->i and setting CTX->i to reflect the number of bytes copied, + and return CTX->buf. In either case, advance *IN to point to the byte + after the last one processed, and set *N_NON_NEWLINE to the number of + verified non-newline bytes accessible through the returned pointer. */ +static inline char * +get_4 (struct base64_decode_context *ctx, + char const *restrict *in, char const *restrict in_end, + size_t *n_non_newline) +{ + if (ctx->i == 4) + ctx->i = 0; + + if (ctx->i == 0) + { + char const *t = *in; + if (4 <= in_end - *in && memchr (t, '\n', 4) == NULL) + { + /* This is the common case: no newline. */ + *in += 4; + *n_non_newline = 4; + return (char *) t; + } + } + + { + /* Copy non-newline bytes into BUF. */ + char const *p = *in; + while (p < in_end) + { + char c = *p++; + if (c != '\n') + { + ctx->buf[ctx->i++] = c; + if (ctx->i == 4) + break; + } + } + + *in = p; + *n_non_newline = ctx->i; + return ctx->buf; + } +} + +#define return_false \ + do \ + { \ + *outp = out; \ + return false; \ + } \ + while (false) + +/* Decode up to four bytes of base64-encoded data, IN, of length INLEN + into the output buffer, *OUT, of size *OUTLEN bytes. Return true if + decoding is successful, false otherwise. If *OUTLEN is too small, + as many bytes as possible are written to *OUT. On return, advance + *OUT to point to the byte after the last one written, and decrement + *OUTLEN to reflect the number of bytes remaining in *OUT. */ +static inline bool +decode_4 (char const *restrict in, size_t inlen, + char *restrict *outp, size_t *outleft) +{ + char *out = *outp; + if (inlen < 2) + return false; + + if (!isbase64 (in[0]) || !isbase64 (in[1])) + return false; + + if (*outleft) + { + *out++ = ((b64[to_uchar (in[0])] << 2) + | (b64[to_uchar (in[1])] >> 4)); + --*outleft; + } + + if (inlen == 2) + return_false; + + if (in[2] == '=') + { + if (inlen != 4) + return_false; + + if (in[3] != '=') + return_false; + } + else + { + if (!isbase64 (in[2])) + return_false; + + if (*outleft) + { + *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0) + | (b64[to_uchar (in[2])] >> 2)); + --*outleft; + } + + if (inlen == 3) + return_false; + + if (in[3] == '=') + { + if (inlen != 4) + return_false; + } + else + { + if (!isbase64 (in[3])) + return_false; + + if (*outleft) + { + *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0) + | b64[to_uchar (in[3])]); + --*outleft; + } + } + } + + *outp = out; + return true; +} + +/* Decode base64-encoded input array IN of length INLEN to output array + OUT that can hold *OUTLEN bytes. The input data may be interspersed + with newlines. Return true if decoding was successful, i.e. if the + input was valid base64 data, false otherwise. If *OUTLEN is too + small, as many bytes as possible will be written to OUT. On return, + *OUTLEN holds the length of decoded bytes in OUT. Note that as soon + as any non-alphabet, non-newline character is encountered, decoding + is stopped and false is returned. If INLEN is zero, then process + only whatever data is stored in CTX. + + Initially, CTX must have been initialized via base64_decode_ctx_init. + Subsequent calls to this function must reuse whatever state is recorded + in that buffer. It is necessary for when a quadruple of base64 input + bytes spans two input buffers. + + If CTX is NULL then newlines are treated as garbage and the input + buffer is processed as a unit. */ + +bool +base64_decode_ctx (struct base64_decode_context *ctx, + const char *restrict in, size_t inlen, + char *restrict out, size_t *outlen) +{ + size_t outleft = *outlen; + bool ignore_newlines = ctx != NULL; + bool flush_ctx = false; + unsigned int ctx_i = 0; + + if (ignore_newlines) + { + ctx_i = ctx->i; + flush_ctx = inlen == 0; + } + + + while (true) + { + size_t outleft_save = outleft; + if (ctx_i == 0 && !flush_ctx) + { + while (true) + { + /* Save a copy of outleft, in case we need to re-parse this + block of four bytes. */ + outleft_save = outleft; + if (!decode_4 (in, inlen, &out, &outleft)) + break; + + in += 4; + inlen -= 4; + } + } + + if (inlen == 0 && !flush_ctx) + break; + + /* Handle the common case of 72-byte wrapped lines. + This also handles any other multiple-of-4-byte wrapping. */ + if (inlen && *in == '\n' && ignore_newlines) + { + ++in; + --inlen; + continue; + } + + /* Restore OUT and OUTLEFT. */ + out -= outleft_save - outleft; + outleft = outleft_save; + + { + char const *in_end = in + inlen; + char const *non_nl; + + if (ignore_newlines) + non_nl = get_4 (ctx, &in, in_end, &inlen); + else + non_nl = in; /* Might have nl in this case. */ + + /* If the input is empty or consists solely of newlines (0 non-newlines), + then we're done. Likewise if there are fewer than 4 bytes when not + flushing context and not treating newlines as garbage. */ + if (inlen == 0 || (inlen < 4 && !flush_ctx && ignore_newlines)) + { + inlen = 0; + break; + } + if (!decode_4 (non_nl, inlen, &out, &outleft)) + break; + + inlen = in_end - in; + } + } + + *outlen -= outleft; + + return inlen == 0; +} + +/* Allocate an output buffer in *OUT, and decode the base64 encoded + data stored in IN of size INLEN to the *OUT buffer. On return, the + size of the decoded data is stored in *OUTLEN. OUTLEN may be NULL, + if the caller is not interested in the decoded length. *OUT may be + NULL to indicate an out of memory error, in which case *OUTLEN + contains the size of the memory block needed. The function returns + true on successful decoding and memory allocation errors. (Use the + *OUT and *OUTLEN parameters to differentiate between successful + decoding and memory error.) The function returns false if the + input was invalid, in which case *OUT is NULL and *OUTLEN is + undefined. */ +bool +base64_decode_alloc_ctx (struct base64_decode_context *ctx, + const char *in, size_t inlen, char **out, + size_t *outlen) +{ + /* This may allocate a few bytes too many, depending on input, + but it's not worth the extra CPU time to compute the exact size. + The exact size is 3 * inlen / 4, minus 1 if the input ends + with "=" and minus another 1 if the input ends with "==". + Dividing before multiplying avoids the possibility of overflow. */ + size_t needlen = 3 * (inlen / 4) + 2; + + *out = (char *) malloc (needlen); + if (!*out) + return true; + + if (!base64_decode_ctx (ctx, in, inlen, *out, &needlen)) + { + free (*out); + *out = NULL; + return false; + } + + if (outlen) + *outlen = needlen; + + return true; +} diff --git a/base64.h b/base64.h new file mode 100644 index 0000000..8efa678 --- /dev/null +++ b/base64.h @@ -0,0 +1,65 @@ +/* Taken from GNU coreutils */ + +#define restrict + +/* base64.h -- Encode binary data using printable characters. + Copyright (C) 2004-2006, 2009-2011 Free Software Foundation, Inc. + Written by Simon Josefsson. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef BASE64_H +# define BASE64_H + +/* Get size_t. */ +# include + +/* Get bool. */ +# include + +/* This uses that the expression (n+(k-1))/k means the smallest + integer >= n/k, i.e., the ceiling of n/k. */ +# define BASE64_LENGTH(inlen) ((((inlen) + 2) / 3) * 4) + +struct base64_decode_context +{ + unsigned int i; + char buf[4]; +}; + +extern bool isbase64 (char ch); + +extern void base64_encode (const char *restrict in, size_t inlen, + char *restrict out, size_t outlen); + +extern size_t base64_encode_alloc (const char *in, size_t inlen, char **out); + +extern void base64_decode_ctx_init (struct base64_decode_context *ctx); + +extern bool base64_decode_ctx (struct base64_decode_context *ctx, + const char *restrict in, size_t inlen, + char *restrict out, size_t *outlen); + +extern bool base64_decode_alloc_ctx (struct base64_decode_context *ctx, + const char *in, size_t inlen, + char **out, size_t *outlen); + +#define base64_decode(in, inlen, out, outlen) \ + base64_decode_ctx (NULL, in, inlen, out, outlen) + +#define base64_decode_alloc(in, inlen, out, outlen) \ + base64_decode_alloc_ctx (NULL, in, inlen, out, outlen) + +#endif /* BASE64_H */ diff --git a/crypto.cpp b/crypto.cpp new file mode 100644 index 0000000..6431328 --- /dev/null +++ b/crypto.cpp @@ -0,0 +1,211 @@ +#include +#include + +#include "crypto.hpp" +#include "base64.h" + +using namespace std; + +const char rdev[] = "/dev/urandom"; + +static void * sse_alloc( int len ) +{ + void *ptr = NULL; + + if( (0 != posix_memalign( (void **)&ptr, 16, len )) || (ptr == NULL) ) { + throw std::bad_alloc(); + } + + return ptr; +} + +Base64Key::Base64Key( string printable_key ) +{ + if ( printable_key.length() != 22 ) { + throw CryptoException( "Key must be 22 letters long." ); + } + + string base64 = printable_key + "=="; + + size_t len = 16; + if ( !base64_decode( base64.data(), 24, (char *)&key[ 0 ], &len ) ) { + throw CryptoException( "Key must be well-formed base64." ); + } + + if ( len != 16 ) { + throw CryptoException( "Key must represent 16 octets." ); + } + + /* to catch changes after the first 128 bits */ + if ( printable_key != this->printable_key() ) { + throw CryptoException( "Base64 key was not encoded 128-bit key." ); + } +} + +Base64Key::Base64Key() +{ + FILE *devrandom = fopen( rdev, "r" ); + if ( devrandom == NULL ) { + throw CryptoException( string( rdev ) + ": " + strerror( errno ) ); + } + + if ( 1 != fread( key, 16, 1, devrandom ) ) { + throw CryptoException( "Could not read from " + string( rdev ) ); + } + + if ( 0 != fclose( devrandom ) ) { + throw CryptoException( string( rdev ) + ": " + strerror( errno ) ); + } +} + +string Base64Key::printable_key( void ) +{ + char base64[ 25 ]; + + base64_encode( (char *)key, 16, base64, 25 ); + + if ( (base64[ 24 ] != 0) + || (base64[ 23 ] != '=') + || (base64[ 22 ] != '=') ) { + throw CryptoException( "Unexpected output from base64_encode." ); + } + + base64[ 22 ] = 0; + return string( base64 ); +} + +Session::Session( Base64Key s_key ) + : key( s_key ), ctx( NULL ) +{ + ctx = ae_allocate( NULL ); + if ( ctx == NULL ) { + throw CryptoException( "Could not allocate AES-OCB context." ); + } + + if ( AE_SUCCESS != ae_init( ctx, key.data(), 16, 12, 16 ) ) { + throw CryptoException( "Could not initialize AES-OCB context." ); + } +} + +Session::~Session() +{ + if ( ae_clear( ctx ) != AE_SUCCESS ) { + throw CryptoException( "Could not clear AES-OCB context." ); + } + + ae_free( ctx ); +} + +Nonce::Nonce( uint64_t val ) +{ + uint64_t val_net = htobe64( val ); + + memset( bytes, 0, 4 ); + memcpy( bytes + 4, &val_net, 8 ); +} + +uint64_t Nonce::val( void ) +{ + uint64_t ret; + memcpy( &ret, bytes + 4, 8 ); + return be64toh( ret ); +} + +Nonce::Nonce( char *s_bytes, size_t len ) +{ + if ( len != 8 ) { + throw CryptoException( "Nonce representation must be 8 octets long." ); + } + + memset( bytes, 0, 4 ); + memcpy( bytes + 4, s_bytes, 8 ); +} + +Message::Message( char *nonce_bytes, size_t nonce_len, + char *text_bytes, size_t text_len ) + : nonce( nonce_bytes, nonce_len ), + text( (char *)text_bytes, text_len ) +{} + +Message::Message( Nonce s_nonce, string s_text ) + : nonce( s_nonce ), + text( s_text ) +{} + +string Session::encrypt( Message plaintext ) +{ + const size_t pt_len = plaintext.text.size(); + const int ciphertext_len = pt_len + 16; + + char *ciphertext = (char *)sse_alloc( ciphertext_len ); + char *pt = (char *)sse_alloc( pt_len ); + + memcpy( pt, plaintext.text.data(), plaintext.text.size() ); + + if ( (uint64_t( plaintext.nonce.data() ) & 0xf) != 0 ) { + throw CryptoException( "Bad alignment." ); + } + + if ( ciphertext_len != ae_encrypt( ctx, /* ctx */ + plaintext.nonce.data(), /* nonce */ + pt, /* pt */ + pt_len, /* pt_len */ + NULL, /* ad */ + 0, /* ad_len */ + ciphertext, /* ct */ + NULL, /* tag */ + AE_FINALIZE ) ) { /* final */ + free( pt ); + free( ciphertext ); + throw CryptoException( "ae_encrypt() returned error." ); + } + + string text( (char *)ciphertext, ciphertext_len ); + free( pt ); + free( ciphertext ); + + return plaintext.nonce.cpp_str() + text; +} + +Message Session::decrypt( string ciphertext ) +{ + if ( ciphertext.size() < 24 ) { + throw CryptoException( "Ciphertext must contain nonce and tag." ); + } + + char *str = (char *)ciphertext.data(); + + int body_len = ciphertext.size() - 8; + int pt_len = body_len - 16; + + if ( pt_len <= 0 ) { /* super-assertion that does not equal AE_INVALID */ + fprintf( stderr, "BUG.\n" ); + exit( 1 ); + } + + Nonce __attribute__((__aligned__ (16))) nonce( str, 8 ); + char *body = (char *)sse_alloc( body_len ); + memcpy( body, str + 8, body_len ); + + char *plaintext = (char *)sse_alloc( pt_len ); + + if ( pt_len != ae_decrypt( ctx, /* ctx */ + nonce.data(), /* nonce */ + body, /* ct */ + body_len, /* ct_len */ + NULL, /* ad */ + 0, /* ad_len */ + plaintext, /* pt */ + NULL, /* tag */ + AE_FINALIZE ) ) { /* final */ + free( plaintext ); + free( body ); + throw CryptoException( "ae_decrypt() returned error." ); + } + + Message ret( nonce, string( plaintext, pt_len ) ); + free( plaintext ); + free( body ); + + return ret; +} diff --git a/crypto.hpp b/crypto.hpp new file mode 100644 index 0000000..e3be49f --- /dev/null +++ b/crypto.hpp @@ -0,0 +1,65 @@ +#ifndef CRYPTO_HPP +#define CRYPTO_HPP + +#include "ae.hpp" +#include + +using namespace std; + +class CryptoException { +public: + string text; + CryptoException( string s_text ) : text( s_text ) {}; +}; + +class Base64Key { +private: + unsigned char key[ 16 ]; + +public: + Base64Key(); /* random key */ + Base64Key( string printable_key ); + string printable_key( void ); + unsigned char *data( void ) { return key; } +}; + +class Nonce { +private: + char bytes[ 12 ]; + +public: + Nonce( uint64_t val ); + Nonce( char *s_bytes, size_t len ); + + string cpp_str( void ) { return string( (char *)( bytes + 4 ), 8 ); } + char *data( void ) { return bytes; } + uint64_t val( void ); +}; + +class Message { +public: + Nonce nonce; + string text; + + Message( char *nonce_bytes, size_t nonce_len, + char *text_bytes, size_t text_len ); + Message( Nonce s_nonce, string s_text ); +}; + +class Session { +private: + Base64Key key; + ae_ctx *ctx; + +public: + Session( Base64Key s_key ); + ~Session(); + + string encrypt( Message plaintext ); + Message decrypt( string ciphertext ); + + Session( const Session & ); + Session & operator=( const Session & ); +}; + +#endif diff --git a/decrypt.cpp b/decrypt.cpp new file mode 100644 index 0000000..5f2d13b --- /dev/null +++ b/decrypt.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "crypto.hpp" + +int main( int argc, char *argv[] ) +{ + if ( argc != 2 ) { + fprintf( stderr, "Usage: %s KEY\n", argv[ 0 ] ); + return 1; + } + + try { + Base64Key key( argv[ 1 ] ); + Session session( key ); + + /* Read input */ + char *input = NULL; + int total_size = 0; + + while ( 1 ) { + unsigned char buf[ 16384 ]; + ssize_t bytes_read = read( STDIN_FILENO, buf, 16384 ); + if ( bytes_read == 0 ) { /* EOF */ + break; + } else if ( bytes_read < 0 ) { + perror( "read" ); + exit( 1 ); + } else { + input = (char *)realloc( input, total_size + bytes_read ); + assert( input ); + memcpy( input + total_size, buf, bytes_read ); + total_size += bytes_read; + } + } + + string ciphertext( input, total_size ); + free( input ); + + /* Decrypt message */ + + Message message = session.decrypt( ciphertext ); + + fprintf( stderr, "Nonce = %ld\n", + message.nonce.val() ); + cout << message.text; + } catch ( CryptoException e ) { + cerr << e.text << endl; + exit( 1 ); + } + + return 0; +} diff --git a/encrypt.cpp b/encrypt.cpp new file mode 100644 index 0000000..b6d6d95 --- /dev/null +++ b/encrypt.cpp @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "crypto.hpp" + +long int myatoi( char *str ) +{ + char *end; + + errno = 0; + long int ret = strtol( str, &end, 10 ); + + if ( ( errno != 0 ) + || ( end != str + strlen( str ) ) ) { + throw CryptoException( "Bad integer." ); + } + + return ret; +} + +int main( int argc, char *argv[] ) +{ + if ( argc != 2 ) { + fprintf( stderr, "Usage: %s NONCE\n", argv[ 0 ] ); + return 1; + } + + try { + Base64Key key; + Session session( key ); + Nonce nonce( myatoi( argv[ 1 ] ) ); + + /* Read input */ + char *input = NULL; + int total_size = 0; + + while ( 1 ) { + unsigned char buf[ 16384 ]; + ssize_t bytes_read = read( STDIN_FILENO, buf, 16384 ); + if ( bytes_read == 0 ) { /* EOF */ + break; + } else if ( bytes_read < 0 ) { + perror( "read" ); + exit( 1 ); + } else { + input = (char *)realloc( input, total_size + bytes_read ); + assert( input ); + memcpy( input + total_size, buf, bytes_read ); + total_size += bytes_read; + } + } + + string plaintext( input, total_size ); + free( input ); + + /* Encrypt message */ + + string ciphertext = session.encrypt( Message( nonce, plaintext ) ); + + cerr << "Key: " << key.printable_key() << endl; + + cout << ciphertext; + } catch ( CryptoException e ) { + cerr << e.text << endl; + exit( 1 ); + } + + return 0; +} diff --git a/grant.htm b/grant.htm new file mode 100644 index 0000000..707d968 --- /dev/null +++ b/grant.htm @@ -0,0 +1,38 @@ +OCB - An Authenticated-Encryption Scheme - GPL Patent Grant - Rogaway + + +

OCB: + Patent Grant for GNU GPL

+ +Whereas I, Phillip Rogaway (hereinafter "Inventor") have sought +patent protection for certain technology +(hereinafter "Patented Technology"), +and Inventor wishes to aid the Free Software Foundation in achieving its goals, +and Inventor wishes to increase public awareness of Patented Technology, +Inventor hereby grants a fully paid-up, nonexclusive, +royalty-free license to +practice any patents claiming priority to the +patent applications below ("the Patents") +if practiced by +software distributed +under the terms of any version of +the GNU General Public License as published by the Free Software Foundation, +59 Temple Place, Suite 330, Boston, MA 02111. +Inventor reserves all other rights, including without limitation +licensing for software not distributed under the GNU General Public License. + +

The patents:

+ + +
    +
  • +09/918,615 - +Method and Apparatus for Facilitating Efficient Authenticated Encryption. + +
  • +09/948,084 - +Method and Apparatus for Realizing a Parallelizable Variable-Input-Length +Pseudorandom Function. +
+ + diff --git a/ocb.cpp b/ocb.cpp new file mode 100644 index 0000000..3ac86cf --- /dev/null +++ b/ocb.cpp @@ -0,0 +1,1226 @@ +/*------------------------------------------------------------------------ +/ OCB Version 3 Reference Code (Optimized C) Last modified 13-JUL-2011 +/------------------------------------------------------------------------- +/ Copyright (c) 2011 Ted Krovetz. +/ +/ Permission to use, copy, modify, and/or distribute this software for any +/ purpose with or without fee is hereby granted, provided that the above +/ copyright notice and this permission notice appear in all copies. +/ +/ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +/ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +/ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +/ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +/ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +/ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +/ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +/ +/ Phillip Rogaway holds patents relevant to OCB. See the following for +/ his patent grant: http://www.cs.ucdavis.edu/~rogaway/ocb/grant.htm +/ +/ Comments are welcome: Ted Krovetz - Dedicated to Laurel K +/------------------------------------------------------------------------- */ + +/* ----------------------------------------------------------------------- */ +/* Usage notes */ +/* ----------------------------------------------------------------------- */ + +/* - When AE_PENDING is passed as the 'final' parameter of any function, +/ the length parameters must be a multiple of (BPI*16). +/ - When available, SSE or AltiVec registers are used to manipulate data. +/ So, when on machines with these facilities, all pointers passed to +/ any function should be 16-byte aligned. +/ - Plaintext and ciphertext pointers may be equal (ie, plaintext gets +/ encrypted in-place), but no other pair of pointers may be equal. +/ - This code assumes all x86 processors have SSE2 and SSSE3 instructions +/ when compiling under MSVC. If untrue, alter the #define. +/ - This code is tested for C99 and recent versions of GCC and MSVC. */ + +/* ----------------------------------------------------------------------- */ +/* User configuration options */ +/* ----------------------------------------------------------------------- */ + +/* Set the AES key length to use and length of authentication tag to produce. +/ Setting either to 0 requires the value be set at runtime via ae_init(). +/ Some optimizations occur for each when set to a fixed value. */ +#define OCB_KEY_LEN 16 /* 0, 16, 24 or 32. 0 means set in ae_init */ +#define OCB_TAG_LEN 16 /* 0 to 16. 0 means set in ae_init */ + +/* This implementation has built-in support for multiple AES APIs. Set any +/ one of the following to non-zero to specify which to use. */ +#define USE_OPENSSL_AES 1 /* http://openssl.org */ +#define USE_REFERENCE_AES 0 /* Internet search: rijndael-alg-fst.c */ +#define USE_AES_NI 0 /* Uses compiler's intrinsics */ + +/* During encryption and decryption, various "L values" are required. +/ The L values can be precomputed during initialization (requiring extra +/ space in ae_ctx), generated as needed (slightly slowing encryption and +/ decryption), or some combination of the two. L_TABLE_SZ specifies how many +/ L values to precomute. L_TABLE_SZ must be at least 3. L_TABLE_SZ*16 bytes +/ are used for L values in ae_ctx. Plaintext and ciphertexts shorter than +/ 2^L_TABLE_SZ blocks need no L values calculated dynamically. */ +#define L_TABLE_SZ 16 + +/* Set L_TABLE_SZ_IS_ENOUGH non-zero iff you know that all plaintexts +/ will be shorter than 2^(L_TABLE_SZ+4) bytes in length. This results +/ in better performance. */ +#define L_TABLE_SZ_IS_ENOUGH 1 + +/* ----------------------------------------------------------------------- */ +/* Includes and compiler specific definitions */ +/* ----------------------------------------------------------------------- */ + +#include "ae.hpp" +#include +#include + +/* Define standard sized integers */ +#if defined(_MSC_VER) && (_MSC_VER < 1600) + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; + typedef __int64 int64_t; +#else + #include +#endif + +/* Compiler-specific intrinsics and fixes: bswap64, ntz */ +#if _MSC_VER + #define inline __inline /* MSVC doesn't recognize "inline" in C */ + #define restrict __restrict /* MSVC doesn't recognize "restrict" in C */ + #define __SSE2__ (_M_IX86 || _M_AMD64 || _M_X64) /* Assume SSE2 */ + #define __SSSE3__ (_M_IX86 || _M_AMD64 || _M_X64) /* Assume SSSE3 */ + #include + #pragma intrinsic(_byteswap_uint64, _BitScanForward, memcpy) + #define bswap64(x) _byteswap_uint64(x) + static inline unsigned ntz(unsigned x) {_BitScanForward(&x,x);return x;} +#elif __GNUC__ + #define inline __inline__ /* No "inline" in GCC ansi C mode */ + #define restrict __restrict__ /* No "restrict" in GCC ansi C mode */ + #define bswap64(x) __builtin_bswap64(x) /* Assuming GCC 4.3+ */ + #define ntz(x) __builtin_ctz((unsigned)(x)) /* Assuming GCC 3.4+ */ +#else /* Assume some C99 features: stdint.h, inline, restrict */ + #define bswap32(x) \ + ((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >> 8) | \ + (((x) & 0x0000ff00u) << 8) | (((x) & 0x000000ffu) << 24)) + + static inline uint64_t bswap64(uint64_t x) { + union { uint64_t u64; uint32_t u32[2]; } in, out; + in.u64 = x; + out.u32[0] = bswap32(in.u32[1]); + out.u32[1] = bswap32(in.u32[0]); + return out.u64; + } + + #if (L_TABLE_SZ <= 9) && (L_TABLE_SZ_IS_ENOUGH) /* < 2^13 byte texts */ + static inline unsigned ntz(unsigned x) { + static const unsigned char tz_table[] = {0, + 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,7, + 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,8, + 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,7, + 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2}; + return tz_table[x/4]; + } + #else /* From http://supertech.csail.mit.edu/papers/debruijn.pdf */ + static inline unsigned ntz(unsigned x) { + static const unsigned char tz_table[32] = + { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; + return tz_table[((uint32_t)((x & -x) * 0x077CB531u)) >> 27]; + } + #endif +#endif + +/* ----------------------------------------------------------------------- */ +/* Define blocks and operations -- Patch if incorrect on your compiler. */ +/* ----------------------------------------------------------------------- */ + +#if __SSE2__ + #include /* SSE instructions and _mm_malloc */ + #include /* SSE2 instructions */ + typedef __m128i block; + #define xor_block(x,y) _mm_xor_si128(x,y) + #define zero_block() _mm_setzero_si128() + #define unequal_blocks(x,y) \ + (_mm_movemask_epi8(_mm_cmpeq_epi8(x,y)) != 0xffff) + #if __SSSE3__ || USE_AES_NI + #include /* SSSE3 instructions */ + #define swap_if_le(b) \ + _mm_shuffle_epi8(b,_mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)) + #else + static inline block swap_if_le(block b) { + block a = _mm_shuffle_epi32 (b, _MM_SHUFFLE(0,1,2,3)); + a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(2,3,0,1)); + a = _mm_shufflelo_epi16(a, _MM_SHUFFLE(2,3,0,1)); + return _mm_xor_si128(_mm_srli_epi16(a,8), _mm_slli_epi16(a,8)); + } + #endif + static inline block gen_offset(uint64_t KtopStr[3], unsigned bot) { + block hi = _mm_load_si128((__m128i *)(KtopStr+0)); /* hi = B A */ + block lo = _mm_loadu_si128((__m128i *)(KtopStr+1)); /* lo = C B */ + __m128i lshift = _mm_cvtsi32_si128(bot); + __m128i rshift = _mm_cvtsi32_si128(64-bot); + lo = _mm_xor_si128(_mm_sll_epi64(hi,lshift),_mm_srl_epi64(lo,rshift)); + #if __SSSE3__ || USE_AES_NI + return _mm_shuffle_epi8(lo,_mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); + #else + return swap_if_le(_mm_shuffle_epi32(lo, _MM_SHUFFLE(1,0,3,2))); + #endif + } + static inline block double_block(block bl) { + const __m128i mask = _mm_set_epi32(135,1,1,1); + __m128i tmp = _mm_srai_epi32(bl, 31); + tmp = _mm_and_si128(tmp, mask); + tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2,1,0,3)); + bl = _mm_slli_epi32(bl, 1); + return _mm_xor_si128(bl,tmp); + } +#elif __ALTIVEC__ + #include + typedef vector unsigned block; + #define xor_block(x,y) vec_xor(x,y) + #define zero_block() vec_splat_u32(0) + #define unequal_blocks(x,y) vec_any_ne(x,y) + #define swap_if_le(b) (b) + #if __PPC64__ + block gen_offset(uint64_t KtopStr[3], unsigned bot) { + union {uint64_t u64[2]; block bl;} rval; + rval.u64[0] = (KtopStr[0] << bot) | (KtopStr[1] >> (64-bot)); + rval.u64[1] = (KtopStr[1] << bot) | (KtopStr[2] >> (64-bot)); + return rval.bl; + } + #else + /* Special handling: Shifts are mod 32, and no 64-bit types */ + block gen_offset(uint64_t KtopStr[3], unsigned bot) { + const vector unsigned k32 = {32,32,32,32}; + vector unsigned hi = *(vector unsigned *)(KtopStr+0); + vector unsigned lo = *(vector unsigned *)(KtopStr+2); + vector unsigned bot_vec; + if (bot < 32) { + lo = vec_sld(hi,lo,4); + } else { + vector unsigned t = vec_sld(hi,lo,4); + lo = vec_sld(hi,lo,8); + hi = t; + bot = bot - 32; + } + if (bot == 0) return hi; + *(unsigned *)&bot_vec = bot; + vector unsigned lshift = vec_splat(bot_vec,0); + vector unsigned rshift = vec_sub(k32,lshift); + hi = vec_sl(hi,lshift); + lo = vec_sr(lo,rshift); + return vec_xor(hi,lo); + } + #endif + static inline block double_block(block b) { + const vector unsigned char mask = {135,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; + const vector unsigned char perm = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0}; + const vector unsigned char shift7 = vec_splat_u8(7); + const vector unsigned char shift1 = vec_splat_u8(1); + vector unsigned char c = (vector unsigned char)b; + vector unsigned char t = vec_sra(c,shift7); + t = vec_and(t,mask); + t = vec_perm(t,t,perm); + c = vec_sl(c,shift1); + return (block)vec_xor(c,t); + } +#elif __ARM_NEON__ + #include + typedef int8x16_t block; /* Yay! Endian-neutral reads! */ + #define xor_block(x,y) veorq_s8(x,y) + #define zero_block() vdupq_n_s8(0) + static inline int unequal_blocks(block a, block b) { + int64x2_t t=veorq_s64((int64x2_t)a,(int64x2_t)b); + return (vgetq_lane_s64(t,0)|vgetq_lane_s64(t,1))!=0; + } + #define swap_if_le(b) (b) /* Using endian-neutral int8x16_t */ + /* KtopStr is reg correct by 64 bits, return mem correct */ + block gen_offset(uint64_t KtopStr[3], unsigned bot) { + const union { unsigned x; unsigned char endian; } little = { 1 }; + const int64x2_t k64 = {-64,-64}; + uint64x2_t hi = *(uint64x2_t *)(KtopStr+0); /* hi = A B */ + uint64x2_t lo = *(uint64x2_t *)(KtopStr+1); /* hi = B C */ + int64x2_t ls = vdupq_n_s64(bot); + int64x2_t rs = vqaddq_s64(k64,ls); + block rval = (block)veorq_u64(vshlq_u64(hi,ls),vshlq_u64(lo,rs)); + if (little.endian) + rval = vrev64q_s8(rval); + return rval; + } + static inline block double_block(block b) + { + const block mask = {135,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; + block tmp = vshrq_n_s8(b,7); + tmp = vandq_s8(tmp, mask); + tmp = vextq_s8(tmp, tmp, 1); /* Rotate high byte to end */ + b = vshlq_n_s8(b,1); + return veorq_s8(tmp,b); + } +#else + typedef struct { uint64_t l,r; } block; + static inline block xor_block(block x, block y) { + x.l^=y.l; x.r^=y.r; return x; + } + static inline block zero_block(void) { const block t = {0,0}; return t; } + #define unequal_blocks(x, y) ((((x).l^(y).l)|((x).r^(y).r)) != 0) + static inline block swap_if_le(block b) { + const union { unsigned x; unsigned char endian; } little = { 1 }; + if (little.endian) { + block r; + r.l = bswap64(b.l); + r.r = bswap64(b.r); + return r; + } else + return b; + } + + /* KtopStr is reg correct by 64 bits, return mem correct */ + block gen_offset(uint64_t KtopStr[3], unsigned bot) { + block rval; + if (bot != 0) { + rval.l = (KtopStr[0] << bot) | (KtopStr[1] >> (64-bot)); + rval.r = (KtopStr[1] << bot) | (KtopStr[2] >> (64-bot)); + } else { + rval.l = KtopStr[0]; + rval.r = KtopStr[1]; + } + return swap_if_le(rval); + } + + #if __GNUC__ && __arm__ + static inline block double_block(block b) { + __asm__ ("adds %1,%1,%1\n\t" + "adcs %H1,%H1,%H1\n\t" + "adcs %0,%0,%0\n\t" + "adcs %H0,%H0,%H0\n\t" + "eorcs %1,%1,#135" + : "+r"(b.l), "+r"(b.r) : : "cc"); + return b; + } + #else + static inline block double_block(block b) { + uint64_t t = (uint64_t)((int64_t)b.l >> 63); + b.l = (b.l + b.l) ^ (b.r >> 63); + b.r = (b.r + b.r) ^ (t & 135); + return b; + } + #endif + +#endif + +/* ----------------------------------------------------------------------- */ +/* AES - Code uses OpenSSL API. Other implementations get mapped to it. */ +/* ----------------------------------------------------------------------- */ + +/*---------------*/ +#if USE_OPENSSL_AES +/*---------------*/ + +#include /* http://openssl.org/ */ + +/* How to ECB encrypt an array of blocks, in place */ +static inline void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) { + while (nblks) { + --nblks; + AES_encrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key); + } +} + +static inline void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) { + while (nblks) { + --nblks; + AES_decrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key); + } +} + +#define BPI 4 /* Number of blocks in buffer per ECB call */ + +/*-------------------*/ +#elif USE_REFERENCE_AES +/*-------------------*/ + +#include "rijndael-alg-fst.h" /* Barreto's Public-Domain Code */ +#if (OCB_KEY_LEN == 0) + typedef struct { uint32_t rd_key[60]; int rounds; } AES_KEY; + #define ROUNDS(ctx) ((ctx)->rounds) + #define AES_set_encrypt_key(x, y, z) \ + do {rijndaelKeySetupEnc((z)->rd_key, x, y); (z)->rounds = y/32+6;} while (0) + #define AES_set_decrypt_key(x, y, z) \ + do {rijndaelKeySetupDec((z)->rd_key, x, y); (z)->rounds = y/32+6;} while (0) +#else + typedef struct { uint32_t rd_key[OCB_KEY_LEN+28]; } AES_KEY; + #define ROUNDS(ctx) (6+OCB_KEY_LEN/4) + #define AES_set_encrypt_key(x, y, z) rijndaelKeySetupEnc((z)->rd_key, x, y) + #define AES_set_decrypt_key(x, y, z) rijndaelKeySetupDec((z)->rd_key, x, y) +#endif +#define AES_encrypt(x,y,z) rijndaelEncrypt((z)->rd_key, ROUNDS(z), x, y) +#define AES_decrypt(x,y,z) rijndaelDecrypt((z)->rd_key, ROUNDS(z), x, y) + +static void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) { + while (nblks) { + --nblks; + AES_encrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key); + } +} + + void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) { + while (nblks) { + --nblks; + AES_decrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key); + } +} + +#define BPI 4 /* Number of blocks in buffer per ECB call */ + +/*----------*/ +#elif USE_AES_NI +/*----------*/ + +#include + +#if (OCB_KEY_LEN == 0) + typedef struct { __m128i rd_key[15]; int rounds; } AES_KEY; + #define ROUNDS(ctx) ((ctx)->rounds) +#else + typedef struct { __m128i rd_key[7+OCB_KEY_LEN/4]; } AES_KEY; + #define ROUNDS(ctx) (6+OCB_KEY_LEN/4) +#endif + +#define EXPAND_ASSIST(v1,v2,v3,v4,shuff_const,aes_const) \ + v2 = _mm_aeskeygenassist_si128(v4,aes_const); \ + v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), \ + _mm_castsi128_ps(v1), 16)); \ + v1 = _mm_xor_si128(v1,v3); \ + v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), \ + _mm_castsi128_ps(v1), 140)); \ + v1 = _mm_xor_si128(v1,v3); \ + v2 = _mm_shuffle_epi32(v2,shuff_const); \ + v1 = _mm_xor_si128(v1,v2) + +#define EXPAND192_STEP(idx,aes_const) \ + EXPAND_ASSIST(x0,x1,x2,x3,85,aes_const); \ + x3 = _mm_xor_si128(x3,_mm_slli_si128 (x3, 4)); \ + x3 = _mm_xor_si128(x3,_mm_shuffle_epi32(x0, 255)); \ + kp[idx] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp), \ + _mm_castsi128_ps(x0), 68)); \ + kp[idx+1] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x0), \ + _mm_castsi128_ps(x3), 78)); \ + EXPAND_ASSIST(x0,x1,x2,x3,85,(aes_const*2)); \ + x3 = _mm_xor_si128(x3,_mm_slli_si128 (x3, 4)); \ + x3 = _mm_xor_si128(x3,_mm_shuffle_epi32(x0, 255)); \ + kp[idx+2] = x0; tmp = x3 + +void AES_128_Key_Expansion(const unsigned char *userkey, void *key) +{ + __m128i x0,x1,x2; + __m128i *kp = (__m128i *)key; + kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey); + x2 = _mm_setzero_si128(); + EXPAND_ASSIST(x0,x1,x2,x0,255,1); kp[1] = x0; + EXPAND_ASSIST(x0,x1,x2,x0,255,2); kp[2] = x0; + EXPAND_ASSIST(x0,x1,x2,x0,255,4); kp[3] = x0; + EXPAND_ASSIST(x0,x1,x2,x0,255,8); kp[4] = x0; + EXPAND_ASSIST(x0,x1,x2,x0,255,16); kp[5] = x0; + EXPAND_ASSIST(x0,x1,x2,x0,255,32); kp[6] = x0; + EXPAND_ASSIST(x0,x1,x2,x0,255,64); kp[7] = x0; + EXPAND_ASSIST(x0,x1,x2,x0,255,128); kp[8] = x0; + EXPAND_ASSIST(x0,x1,x2,x0,255,27); kp[9] = x0; + EXPAND_ASSIST(x0,x1,x2,x0,255,54); kp[10] = x0; +} + +void AES_192_Key_Expansion(const unsigned char *userkey, void *key) +{ + __m128i x0,x1,x2,x3,tmp,*kp = (__m128i *)key; + kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey); + tmp = x3 = _mm_loadu_si128((__m128i*)(userkey+16)); + x2 = _mm_setzero_si128(); + EXPAND192_STEP(1,1); + EXPAND192_STEP(4,4); + EXPAND192_STEP(7,16); + EXPAND192_STEP(10,64); +} + +void AES_256_Key_Expansion(const unsigned char *userkey, void *key) +{ + __m128i x0,x1,x2,x3,*kp = (__m128i *)key; + kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey ); + kp[1] = x3 = _mm_loadu_si128((__m128i*)(userkey+16)); + x2 = _mm_setzero_si128(); + EXPAND_ASSIST(x0,x1,x2,x3,255,1); kp[2] = x0; + EXPAND_ASSIST(x3,x1,x2,x0,170,1); kp[3] = x3; + EXPAND_ASSIST(x0,x1,x2,x3,255,2); kp[4] = x0; + EXPAND_ASSIST(x3,x1,x2,x0,170,2); kp[5] = x3; + EXPAND_ASSIST(x0,x1,x2,x3,255,4); kp[6] = x0; + EXPAND_ASSIST(x3,x1,x2,x0,170,4); kp[7] = x3; + EXPAND_ASSIST(x0,x1,x2,x3,255,8); kp[8] = x0; + EXPAND_ASSIST(x3,x1,x2,x0,170,8); kp[9] = x3; + EXPAND_ASSIST(x0,x1,x2,x3,255,16); kp[10] = x0; + EXPAND_ASSIST(x3,x1,x2,x0,170,16); kp[11] = x3; + EXPAND_ASSIST(x0,x1,x2,x3,255,32); kp[12] = x0; + EXPAND_ASSIST(x3,x1,x2,x0,170,32); kp[13] = x3; + EXPAND_ASSIST(x0,x1,x2,x3,255,64); kp[14] = x0; +} + +int AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) +{ + if (bits == 128) { + AES_128_Key_Expansion (userKey,key); + } else if (bits == 192) { + AES_192_Key_Expansion (userKey,key); + } else if (bits == 256) { + AES_256_Key_Expansion (userKey,key); + } + #if (OCB_KEY_LEN == 0) + key->rounds = 6+bits/32; + #endif + return 0; +} + + void AES_set_decrypt_key_fast(AES_KEY *dkey, const AES_KEY *ekey) +{ + int j = 0; + int i = ROUNDS(ekey); + #if (OCB_KEY_LEN == 0) + dkey->rounds = i; + #endif + dkey->rd_key[i--] = ekey->rd_key[j++]; + while (i) + dkey->rd_key[i--] = _mm_aesimc_si128(ekey->rd_key[j++]); + dkey->rd_key[i] = ekey->rd_key[j]; +} + +int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) +{ + AES_KEY temp_key; + AES_set_encrypt_key(userKey,bits,&temp_key); + AES_set_decrypt_key_fast(key, &temp_key); + return 0; +} + +static inline void AES_encrypt(const unsigned char *in, + unsigned char *out, const AES_KEY *key) +{ + int j,rnds=ROUNDS(key); + const __m128i *sched = ((__m128i *)(key->rd_key)); + __m128i tmp = _mm_load_si128 ((__m128i*)in); + tmp = _mm_xor_si128 (tmp,sched[0]); + for (j=1; jrd_key)); + __m128i tmp = _mm_load_si128 ((__m128i*)in); + tmp = _mm_xor_si128 (tmp,sched[0]); + for (j=1; jrd_key)); + for (i=0; ird_key)); + for (i=0; iL[_tz]) +#else +static block getL(const ae_ctx *ctx, unsigned tz) +{ + if (tz < L_TABLE_SZ) + return ctx->L[tz]; + else { + unsigned i; + /* Bring L[MAX] into registers, make it register correct */ + block rval = swap_if_le(ctx->L[L_TABLE_SZ-1]); + rval = double_block(rval); + for (i=L_TABLE_SZ; i < tz; i++) + rval = double_block(rval); + return swap_if_le(rval); /* To memory correct */ + } +} +#endif + +/* ----------------------------------------------------------------------- */ +/* Public functions */ +/* ----------------------------------------------------------------------- */ + +/* 32-bit SSE2 and Altivec systems need to be forced to allocate memory + on 16-byte alignments. (I believe all major 64-bit systems do already.) */ + +ae_ctx* ae_allocate(void *misc) +{ + void *p; + (void) misc; /* misc unused in this implementation */ + #if (__SSE2__ && !_M_X64 && !_M_AMD64 && !__amd64__) + p = _mm_malloc(sizeof(ae_ctx),16); + #elif (__ALTIVEC__ && !__PPC64__) + if (posix_memalign(&p,16,sizeof(ae_ctx)) != 0) p = NULL; + #else + p = malloc(sizeof(ae_ctx)); + #endif + return (ae_ctx *)p; +} + +void ae_free(ae_ctx *ctx) +{ + #if (__SSE2__ && !_M_X64 && !_M_AMD64 && !__amd64__) + _mm_free(ctx); + #else + free(ctx); + #endif +} + +/* ----------------------------------------------------------------------- */ + +int ae_clear (ae_ctx *ctx) /* Zero ae_ctx and undo initialization */ +{ + memset(ctx, 0, sizeof(ae_ctx)); + return AE_SUCCESS; +} + +int ae_ctx_sizeof(void) { return (int) sizeof(ae_ctx); } + +/* ----------------------------------------------------------------------- */ + +int ae_init(ae_ctx *ctx, const void *key, int key_len, int nonce_len, int tag_len) +{ + unsigned i; + block tmp_blk; + + if (nonce_len != 12) + return AE_NOT_SUPPORTED; + + /* Initialize encryption & decryption keys */ + #if (OCB_KEY_LEN > 0) + key_len = OCB_KEY_LEN; + #endif + AES_set_encrypt_key((unsigned char *)key, key_len*8, &ctx->encrypt_key); + #if USE_AES_NI + AES_set_decrypt_key_fast(&ctx->decrypt_key,&ctx->encrypt_key); + #else + AES_set_decrypt_key((unsigned char *)key, (int)(key_len*8), &ctx->decrypt_key); + #endif + + /* Zero things that need zeroing */ + ctx->cached_Top = ctx->ad_checksum = zero_block(); + ctx->ad_blocks_processed = 0; + + /* Compute key-dependent values */ + AES_encrypt((unsigned char *)&ctx->cached_Top, + (unsigned char *)&ctx->Lstar, &ctx->encrypt_key); + tmp_blk = swap_if_le(ctx->Lstar); + tmp_blk = double_block(tmp_blk); + ctx->Ldollar = swap_if_le(tmp_blk); + tmp_blk = double_block(tmp_blk); + ctx->L[0] = swap_if_le(tmp_blk); + for (i = 1; i < L_TABLE_SZ; i++) { + tmp_blk = double_block(tmp_blk); + ctx->L[i] = swap_if_le(tmp_blk); + } + + #if (OCB_TAG_LEN == 0) + ctx->tag_len = tag_len; + #else + (void) tag_len; /* Supress var not used error */ + #endif + + return AE_SUCCESS; +} + +/* ----------------------------------------------------------------------- */ + +static block gen_offset_from_nonce(ae_ctx *ctx, const void *nonce) +{ + const union { unsigned x; unsigned char endian; } little = { 1 }; + union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp; + unsigned idx; + + /* Replace cached nonce Top if needed */ + tmp.u32[0] = (little.endian?0x01000000:0x00000001); + tmp.u32[1] = ((uint32_t *)nonce)[0]; + tmp.u32[2] = ((uint32_t *)nonce)[1]; + tmp.u32[3] = ((uint32_t *)nonce)[2]; + idx = (unsigned)(tmp.u8[15] & 0x3f); /* Get low 6 bits of nonce */ + tmp.u8[15] = tmp.u8[15] & 0xc0; /* Zero low 6 bits of nonce */ + if ( unequal_blocks(tmp.bl,ctx->cached_Top) ) { /* Cached? */ + ctx->cached_Top = tmp.bl; /* Update cache, KtopStr */ + AES_encrypt(tmp.u8, (unsigned char *)&ctx->KtopStr, &ctx->encrypt_key); + if (little.endian) { /* Make Register Correct */ + ctx->KtopStr[0] = bswap64(ctx->KtopStr[0]); + ctx->KtopStr[1] = bswap64(ctx->KtopStr[1]); + } + ctx->KtopStr[2] = ctx->KtopStr[0] ^ + (ctx->KtopStr[0] << 8) ^ (ctx->KtopStr[1] >> 56); + } + return gen_offset(ctx->KtopStr, idx); +} + + void process_ad(ae_ctx *ctx, const void *ad, int ad_len, int final) +{ + union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp; + block ad_offset, ad_checksum; + const block * adp = (block *)ad; + unsigned i,k,tz,remaining; + + ad_offset = ctx->ad_offset; + ad_checksum = ctx->ad_checksum; + i = ad_len/(BPI*16); + if (i) { + unsigned ad_block_num = ctx->ad_blocks_processed; + do { + block ta[BPI], oa[BPI]; + ad_block_num += BPI; + tz = ntz(ad_block_num); + oa[0] = xor_block(ad_offset, ctx->L[0]); + ta[0] = xor_block(oa[0], adp[0]); + oa[1] = xor_block(oa[0], ctx->L[1]); + ta[1] = xor_block(oa[1], adp[1]); + oa[2] = xor_block(ad_offset, ctx->L[1]); + ta[2] = xor_block(oa[2], adp[2]); + #if BPI == 4 + ad_offset = xor_block(oa[2], getL(ctx, tz)); + ta[3] = xor_block(ad_offset, adp[3]); + #elif BPI == 8 + oa[3] = xor_block(oa[2], ctx->L[2]); + ta[3] = xor_block(oa[3], adp[3]); + oa[4] = xor_block(oa[1], ctx->L[2]); + ta[4] = xor_block(oa[4], adp[4]); + oa[5] = xor_block(oa[0], ctx->L[2]); + ta[5] = xor_block(oa[5], adp[5]); + oa[6] = xor_block(ad_offset, ctx->L[2]); + ta[6] = xor_block(oa[6], adp[6]); + ad_offset = xor_block(oa[6], getL(ctx, tz)); + ta[7] = xor_block(ad_offset, adp[7]); + #endif + AES_ecb_encrypt_blks(ta,BPI,&ctx->encrypt_key); + ad_checksum = xor_block(ad_checksum, ta[0]); + ad_checksum = xor_block(ad_checksum, ta[1]); + ad_checksum = xor_block(ad_checksum, ta[2]); + ad_checksum = xor_block(ad_checksum, ta[3]); + #if (BPI == 8) + ad_checksum = xor_block(ad_checksum, ta[4]); + ad_checksum = xor_block(ad_checksum, ta[5]); + ad_checksum = xor_block(ad_checksum, ta[6]); + ad_checksum = xor_block(ad_checksum, ta[7]); + #endif + adp += BPI; + } while (--i); + ctx->ad_blocks_processed = ad_block_num; + ctx->ad_offset = ad_offset; + ctx->ad_checksum = ad_checksum; + } + + if (final) { + block ta[BPI]; + + /* Process remaining associated data, compute its tag contribution */ + remaining = ((unsigned)ad_len) % (BPI*16); + if (remaining) { + k=0; + #if (BPI == 8) + if (remaining >= 64) { + tmp.bl = xor_block(ad_offset, ctx->L[0]); + ta[0] = xor_block(tmp.bl, adp[0]); + tmp.bl = xor_block(tmp.bl, ctx->L[1]); + ta[1] = xor_block(tmp.bl, adp[1]); + ad_offset = xor_block(ad_offset, ctx->L[1]); + ta[2] = xor_block(ad_offset, adp[2]); + ad_offset = xor_block(ad_offset, ctx->L[2]); + ta[3] = xor_block(ad_offset, adp[3]); + remaining -= 64; + k=4; + } + #endif + if (remaining >= 32) { + ad_offset = xor_block(ad_offset, ctx->L[0]); + ta[k] = xor_block(ad_offset, adp[k]); + ad_offset = xor_block(ad_offset, getL(ctx, ntz(k+2))); + ta[k+1] = xor_block(ad_offset, adp[k+1]); + remaining -= 32; + k+=2; + } + if (remaining >= 16) { + ad_offset = xor_block(ad_offset, ctx->L[0]); + ta[k] = xor_block(ad_offset, adp[k]); + remaining = remaining - 16; + ++k; + } + if (remaining) { + ad_offset = xor_block(ad_offset,ctx->Lstar); + tmp.bl = zero_block(); + memcpy(tmp.u8, adp+k, remaining); + tmp.u8[remaining] = (unsigned char)0x80u; + ta[k] = xor_block(ad_offset, tmp.bl); + ++k; + } + AES_ecb_encrypt_blks(ta,k,&ctx->encrypt_key); + switch (k) { + #if (BPI == 8) + case 8: ad_checksum = xor_block(ad_checksum, ta[7]); + case 7: ad_checksum = xor_block(ad_checksum, ta[6]); + case 6: ad_checksum = xor_block(ad_checksum, ta[5]); + case 5: ad_checksum = xor_block(ad_checksum, ta[4]); + #endif + case 4: ad_checksum = xor_block(ad_checksum, ta[3]); + case 3: ad_checksum = xor_block(ad_checksum, ta[2]); + case 2: ad_checksum = xor_block(ad_checksum, ta[1]); + case 1: ad_checksum = xor_block(ad_checksum, ta[0]); + } + ctx->ad_checksum = ad_checksum; + } + } +} + +/* ----------------------------------------------------------------------- */ + +int ae_encrypt(ae_ctx * ctx, + const void * nonce, + const void *pt, + int pt_len, + const void *ad, + int ad_len, + void *ct, + void *tag, + int final) +{ + union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp; + block offset, checksum; + unsigned i, k; + block * ctp = (block *)ct; + const block * ptp = (block *)pt; + + /* Non-null nonce means start of new message, init per-message values */ + if (nonce) { + ctx->offset = gen_offset_from_nonce(ctx, nonce); + ctx->ad_offset = ctx->checksum = zero_block(); + ctx->ad_blocks_processed = ctx->blocks_processed = 0; + if (ad_len >= 0) + ctx->ad_checksum = zero_block(); + } + + /* Process associated data */ + if (ad_len > 0) + process_ad(ctx, ad, ad_len, final); + + /* Encrypt plaintext data BPI blocks at a time */ + offset = ctx->offset; + checksum = ctx->checksum; + i = pt_len/(BPI*16); + if (i) { + block oa[BPI]; + unsigned block_num = ctx->blocks_processed; + oa[BPI-1] = offset; + do { + block ta[BPI]; + block_num += BPI; + oa[0] = xor_block(oa[BPI-1], ctx->L[0]); + ta[0] = xor_block(oa[0], ptp[0]); + checksum = xor_block(checksum, ptp[0]); + oa[1] = xor_block(oa[0], ctx->L[1]); + ta[1] = xor_block(oa[1], ptp[1]); + checksum = xor_block(checksum, ptp[1]); + oa[2] = xor_block(oa[1], ctx->L[0]); + ta[2] = xor_block(oa[2], ptp[2]); + checksum = xor_block(checksum, ptp[2]); + #if BPI == 4 + oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num))); + ta[3] = xor_block(oa[3], ptp[3]); + checksum = xor_block(checksum, ptp[3]); + #elif BPI == 8 + oa[3] = xor_block(oa[2], ctx->L[2]); + ta[3] = xor_block(oa[3], ptp[3]); + checksum = xor_block(checksum, ptp[3]); + oa[4] = xor_block(oa[1], ctx->L[2]); + ta[4] = xor_block(oa[4], ptp[4]); + checksum = xor_block(checksum, ptp[4]); + oa[5] = xor_block(oa[0], ctx->L[2]); + ta[5] = xor_block(oa[5], ptp[5]); + checksum = xor_block(checksum, ptp[5]); + oa[6] = xor_block(oa[7], ctx->L[2]); + ta[6] = xor_block(oa[6], ptp[6]); + checksum = xor_block(checksum, ptp[6]); + oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num))); + ta[7] = xor_block(oa[7], ptp[7]); + checksum = xor_block(checksum, ptp[7]); + #endif + AES_ecb_encrypt_blks(ta,BPI,&ctx->encrypt_key); + ctp[0] = xor_block(ta[0], oa[0]); + ctp[1] = xor_block(ta[1], oa[1]); + ctp[2] = xor_block(ta[2], oa[2]); + ctp[3] = xor_block(ta[3], oa[3]); + #if (BPI == 8) + ctp[4] = xor_block(ta[4], oa[4]); + ctp[5] = xor_block(ta[5], oa[5]); + ctp[6] = xor_block(ta[6], oa[6]); + ctp[7] = xor_block(ta[7], oa[7]); + #endif + ptp += BPI; + ctp += BPI; + } while (--i); + ctx->offset = offset = oa[BPI-1]; + ctx->blocks_processed = block_num; + ctx->checksum = checksum; + } + + if (final) { + block ta[BPI+1], oa[BPI]; + + /* Process remaining plaintext and compute its tag contribution */ + unsigned remaining = ((unsigned)pt_len) % (BPI*16); + k = 0; /* How many blocks in ta[] need ECBing */ + if (remaining) { + #if (BPI == 8) + if (remaining >= 64) { + oa[0] = xor_block(offset, ctx->L[0]); + ta[0] = xor_block(oa[0], ptp[0]); + checksum = xor_block(checksum, ptp[0]); + oa[1] = xor_block(oa[0], ctx->L[1]); + ta[1] = xor_block(oa[1], ptp[1]); + checksum = xor_block(checksum, ptp[1]); + oa[2] = xor_block(oa[1], ctx->L[0]); + ta[2] = xor_block(oa[2], ptp[2]); + checksum = xor_block(checksum, ptp[2]); + offset = oa[3] = xor_block(oa[2], ctx->L[2]); + ta[3] = xor_block(offset, ptp[3]); + checksum = xor_block(checksum, ptp[3]); + remaining -= 64; + k = 4; + } + #endif + if (remaining >= 32) { + oa[k] = xor_block(offset, ctx->L[0]); + ta[k] = xor_block(oa[k], ptp[k]); + checksum = xor_block(checksum, ptp[k]); + offset = oa[k+1] = xor_block(oa[k], ctx->L[1]); + ta[k+1] = xor_block(offset, ptp[k+1]); + checksum = xor_block(checksum, ptp[k+1]); + remaining -= 32; + k+=2; + } + if (remaining >= 16) { + offset = oa[k] = xor_block(offset, ctx->L[0]); + ta[k] = xor_block(offset, ptp[k]); + checksum = xor_block(checksum, ptp[k]); + remaining -= 16; + ++k; + } + if (remaining) { + tmp.bl = zero_block(); + memcpy(tmp.u8, ptp+k, remaining); + tmp.u8[remaining] = (unsigned char)0x80u; + checksum = xor_block(checksum, tmp.bl); + ta[k] = offset = xor_block(offset,ctx->Lstar); + ++k; + } + } + offset = xor_block(offset, ctx->Ldollar); /* Part of tag gen */ + ta[k] = xor_block(offset, checksum); /* Part of tag gen */ + AES_ecb_encrypt_blks(ta,k+1,&ctx->encrypt_key); + offset = xor_block(ta[k], ctx->ad_checksum); /* Part of tag gen */ + if (remaining) { + --k; + tmp.bl = xor_block(tmp.bl, ta[k]); + memcpy(ctp+k, tmp.u8, remaining); + } + switch (k) { + #if (BPI == 8) + case 7: ctp[6] = xor_block(ta[6], oa[6]); + case 6: ctp[5] = xor_block(ta[5], oa[5]); + case 5: ctp[4] = xor_block(ta[4], oa[4]); + case 4: ctp[3] = xor_block(ta[3], oa[3]); + #endif + case 3: ctp[2] = xor_block(ta[2], oa[2]); + case 2: ctp[1] = xor_block(ta[1], oa[1]); + case 1: ctp[0] = xor_block(ta[0], oa[0]); + } + + /* Tag is placed at the correct location + */ + if (tag) { + #if (OCB_TAG_LEN == 16) + *(block *)tag = offset; + #elif (OCB_TAG_LEN > 0) + memcpy((char *)tag, &offset, OCB_TAG_LEN); + #else + memcpy((char *)tag, &offset, ctx->tag_len); + #endif + } else { + #if (OCB_TAG_LEN > 0) + memcpy((char *)ct + pt_len, &offset, OCB_TAG_LEN); + pt_len += OCB_TAG_LEN; + #else + memcpy((char *)ct + pt_len, &offset, ctx->tag_len); + pt_len += ctx->tag_len; + #endif + } + } + return (int) pt_len; +} + +/* ----------------------------------------------------------------------- */ + +int ae_decrypt(ae_ctx *ctx, + const void *nonce, + const void *ct, + int ct_len, + const void *ad, + int ad_len, + void *pt, + const void *tag, + int final) +{ + union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp; + block offset, checksum; + unsigned i, k; + block *ctp = (block *)ct; + block *ptp = (block *)pt; + + /* Reduce ct_len tag bundled in ct */ + if ((final) && (!tag)) + #if (OCB_TAG_LEN > 0) + ct_len -= OCB_TAG_LEN; + #else + ct_len -= ctx->tag_len; + #endif + + /* Non-null nonce means start of new message, init per-message values */ + if (nonce) { + ctx->offset = gen_offset_from_nonce(ctx, nonce); + ctx->ad_offset = ctx->checksum = zero_block(); + ctx->ad_blocks_processed = ctx->blocks_processed = 0; + if (ad_len >= 0) + ctx->ad_checksum = zero_block(); + } + + /* Process associated data */ + if (ad_len > 0) + process_ad(ctx, ad, ad_len, final); + + /* Encrypt plaintext data BPI blocks at a time */ + offset = ctx->offset; + checksum = ctx->checksum; + i = ct_len/(BPI*16); + if (i) { + block oa[BPI]; + unsigned block_num = ctx->blocks_processed; + oa[BPI-1] = offset; + do { + block ta[BPI]; + block_num += BPI; + oa[0] = xor_block(oa[BPI-1], ctx->L[0]); + ta[0] = xor_block(oa[0], ctp[0]); + oa[1] = xor_block(oa[0], ctx->L[1]); + ta[1] = xor_block(oa[1], ctp[1]); + oa[2] = xor_block(oa[1], ctx->L[0]); + ta[2] = xor_block(oa[2], ctp[2]); + #if BPI == 4 + oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num))); + ta[3] = xor_block(oa[3], ctp[3]); + #elif BPI == 8 + oa[3] = xor_block(oa[2], ctx->L[2]); + ta[3] = xor_block(oa[3], ctp[3]); + oa[4] = xor_block(oa[1], ctx->L[2]); + ta[4] = xor_block(oa[4], ctp[4]); + oa[5] = xor_block(oa[0], ctx->L[2]); + ta[5] = xor_block(oa[5], ctp[5]); + oa[6] = xor_block(oa[7], ctx->L[2]); + ta[6] = xor_block(oa[6], ctp[6]); + oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num))); + ta[7] = xor_block(oa[7], ctp[7]); + #endif + AES_ecb_decrypt_blks(ta,BPI,&ctx->decrypt_key); + ptp[0] = xor_block(ta[0], oa[0]); + checksum = xor_block(checksum, ptp[0]); + ptp[1] = xor_block(ta[1], oa[1]); + checksum = xor_block(checksum, ptp[1]); + ptp[2] = xor_block(ta[2], oa[2]); + checksum = xor_block(checksum, ptp[2]); + ptp[3] = xor_block(ta[3], oa[3]); + checksum = xor_block(checksum, ptp[3]); + #if (BPI == 8) + ptp[4] = xor_block(ta[4], oa[4]); + checksum = xor_block(checksum, ptp[4]); + ptp[5] = xor_block(ta[5], oa[5]); + checksum = xor_block(checksum, ptp[5]); + ptp[6] = xor_block(ta[6], oa[6]); + checksum = xor_block(checksum, ptp[6]); + ptp[7] = xor_block(ta[7], oa[7]); + checksum = xor_block(checksum, ptp[7]); + #endif + ptp += BPI; + ctp += BPI; + } while (--i); + ctx->offset = offset = oa[BPI-1]; + ctx->blocks_processed = block_num; + ctx->checksum = checksum; + } + + if (final) { + block ta[BPI+1], oa[BPI]; + + /* Process remaining plaintext and compute its tag contribution */ + unsigned remaining = ((unsigned)ct_len) % (BPI*16); + k = 0; /* How many blocks in ta[] need ECBing */ + if (remaining) { + #if (BPI == 8) + if (remaining >= 64) { + oa[0] = xor_block(offset, ctx->L[0]); + ta[0] = xor_block(oa[0], ctp[0]); + oa[1] = xor_block(oa[0], ctx->L[1]); + ta[1] = xor_block(oa[1], ctp[1]); + oa[2] = xor_block(oa[1], ctx->L[0]); + ta[2] = xor_block(oa[2], ctp[2]); + offset = oa[3] = xor_block(oa[2], ctx->L[2]); + ta[3] = xor_block(offset, ctp[3]); + remaining -= 64; + k = 4; + } + #endif + if (remaining >= 32) { + oa[k] = xor_block(offset, ctx->L[0]); + ta[k] = xor_block(oa[k], ctp[k]); + offset = oa[k+1] = xor_block(oa[k], ctx->L[1]); + ta[k+1] = xor_block(offset, ctp[k+1]); + remaining -= 32; + k+=2; + } + if (remaining >= 16) { + offset = oa[k] = xor_block(offset, ctx->L[0]); + ta[k] = xor_block(offset, ctp[k]); + remaining -= 16; + ++k; + } + if (remaining) { + block pad; + offset = xor_block(offset,ctx->Lstar); + AES_encrypt((unsigned char *)&offset, tmp.u8, &ctx->encrypt_key); + pad = tmp.bl; + memcpy(tmp.u8,ctp+k,remaining); + tmp.bl = xor_block(tmp.bl, pad); + tmp.u8[remaining] = (unsigned char)0x80u; + memcpy(ptp+k, tmp.u8, remaining); + checksum = xor_block(checksum, tmp.bl); + } + } + AES_ecb_decrypt_blks(ta,k,&ctx->decrypt_key); + switch (k) { + #if (BPI == 8) + case 7: ptp[6] = xor_block(ta[6], oa[6]); + checksum = xor_block(checksum, ptp[6]); + case 6: ptp[5] = xor_block(ta[5], oa[5]); + checksum = xor_block(checksum, ptp[5]); + case 5: ptp[4] = xor_block(ta[4], oa[4]); + checksum = xor_block(checksum, ptp[4]); + case 4: ptp[3] = xor_block(ta[3], oa[3]); + checksum = xor_block(checksum, ptp[3]); + #endif + case 3: ptp[2] = xor_block(ta[2], oa[2]); + checksum = xor_block(checksum, ptp[2]); + case 2: ptp[1] = xor_block(ta[1], oa[1]); + checksum = xor_block(checksum, ptp[1]); + case 1: ptp[0] = xor_block(ta[0], oa[0]); + checksum = xor_block(checksum, ptp[0]); + } + + /* Calculate expected tag */ + offset = xor_block(offset, ctx->Ldollar); + tmp.bl = xor_block(offset, checksum); + AES_encrypt(tmp.u8, tmp.u8, &ctx->encrypt_key); + tmp.bl = xor_block(tmp.bl, ctx->ad_checksum); /* Full tag */ + + /* Compare with proposed tag, change ct_len if invalid */ + if ((OCB_TAG_LEN == 16) && tag) { + if (unequal_blocks(tmp.bl, *(block *)tag)) + ct_len = AE_INVALID; + } else { + #if (OCB_TAG_LEN > 0) + int len = OCB_TAG_LEN; + #else + int len = ctx->tag_len; + #endif + if (tag) { + if (memcmp(tag,tmp.u8,len) != 0) + ct_len = AE_INVALID; + } else { + if (memcmp((char *)ct + ct_len,tmp.u8,len) != 0) + ct_len = AE_INVALID; + } + } + } + return ct_len; + } + +#if USE_AES_NI +char infoString[] = "OCB (AES-NI)"; +#elif USE_REFERENCE_AES +char infoString[] = "OCB (Reference AES)"; +#elif USE_OPENSSL_AES +char infoString[] = "OCB (OpenSSL AES)"; +#endif