Drop use of custom integer names

Just use the C99 integer names directly and document the ones that are needed in a comment in config.h in case it's needed when porting.
2018-04-17 21:57:52 -04:00 · 2018-04-17 21:57:52 -04:00 · ff83f9259e
parent 274b080e12
commit ff83f9259e
7 changed files with 381 additions and 344 deletions
--- a/config.h
+++ b/config.h
@ -65,17 +65,20 @@
 /* Integer definitions needed by crypto */
 #include <stdint.h>
-
+/* If your compiler lacks a stdint.h, such as when compiling with a
-#define U8C(v)  (UINT8_C(v))
+ * plain ANSI C compiler, you'll need to replace this include with the
-#define U16C(v) (UINT16_C(v))
+ * appropriate typedefs for the following types:
-#define U32C(v) (UINT32_C(v))
+ *
-
+ *   uint8_t
-typedef uint8_t u8;
+ *   uint32_t
-typedef uint16_t u16;
+ *   uint64_t
-typedef uint32_t u32;
+ *   int32_t
-typedef uint64_t u64;
+ *   int64_t
-
+ *
-typedef int32_t s32;
+ * You will also need to define these macros:
-typedef int64_t limb;
+ *
 *   UINT8_C
 *   UINT32_C
 */
 #endif /* CONFIG_H */
--- a/src/chacha.c
+++ b/src/chacha.c
@ -6,15 +6,14 @@ Public domain.
 #include "chacha.h"
-#define U8V(v)  ((u8)(v)  & U8C(0xFF))
+#define U8V(v)  ((uint8_t)(v)  & UINT8_C(0xFF))
-#define U16V(v) ((u16)(v) & U16C(0xFFFF))
+#define U32V(v) ((uint32_t)(v) & UINT32_C(0xFFFFFFFF))
 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
 #define U8TO32_LITTLE(p) \
-  (((u32)((p)[0])      ) | \
+  (((uint32_t)((p)[0])      ) | \
-   ((u32)((p)[1]) <<  8) | \
+   ((uint32_t)((p)[1]) <<  8) | \
-   ((u32)((p)[2]) << 16) | \
+   ((uint32_t)((p)[2]) << 16) | \
-   ((u32)((p)[3]) << 24))
+   ((uint32_t)((p)[3]) << 24))
 #define U32TO8_LITTLE(p, v) \
  do { \
@ -35,9 +34,10 @@ Public domain.
  x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
  x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
-static void salsa20_wordtobyte(u8 output[64],const u32 input[16])
+static void
 salsa20_wordtobyte(uint8_t output[64], const uint32_t input[16])
 {
-  u32 x[16];
+  uint32_t x[16];
  int i;
  for (i = 0;i < 16;++i) x[i] = input[i];
@ -58,7 +58,8 @@ static void salsa20_wordtobyte(u8 output[64],const u32 input[16])
 static const char sigma[16] = "expand 32-byte k";
 static const char tau[16] = "expand 16-byte k";
-void chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
+void
 chacha_keysetup(chacha_ctx *x, const uint8_t *k, uint32_t kbits)
 {
  const char *constants;
@ -82,7 +83,8 @@ void chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
  x->input[3] = U8TO32_LITTLE(constants + 12);
 }
-void chacha_ivsetup(chacha_ctx *x,const u8 *iv)
+void
 chacha_ivsetup(chacha_ctx *x, const uint8_t *iv)
 {
  x->input[12] = 0;
  x->input[13] = 0;
@ -90,10 +92,11 @@ void chacha_ivsetup(chacha_ctx *x,const u8 *iv)
  x->input[15] = U8TO32_LITTLE(iv + 4);
 }
-void chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
+void
 chacha_encrypt(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes)
 {
-  u8 output[64];
+  uint8_t output[64];
-  u32 i;
+  uint32_t i;
  if (!bytes) return;
  for (;;) {
--- a/src/chacha.h
+++ b/src/chacha.h
@ -6,11 +6,11 @@
 #define CHACHA_BLOCKLENGTH 64
 typedef struct {
-    u32 input[16];
+    uint32_t input[16];
 } chacha_ctx;
-void chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits);
+void chacha_keysetup(chacha_ctx *, const uint8_t *k, uint32_t kbits);
-void chacha_ivsetup(chacha_ctx *x,const u8 *iv);
+void chacha_ivsetup(chacha_ctx *, const uint8_t *iv);
-void chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes);
+void chacha_encrypt(chacha_ctx *, const uint8_t *m, uint8_t *c, uint32_t bytes);
 #endif /* CHACHA_H */
--- a/src/curve25519-donna.c
+++ b/src/curve25519-donna.c
@ -57,7 +57,9 @@
 * i.e. the limbs are 26, 25, 26, 25, ... bits wide. */
 /* Sum two numbers: output += in */
-static void fsum(limb *output, const limb *in) {
+static void
 fsum(int64_t *output, const int64_t *in)
 {
  unsigned i;
  for (i = 0; i < 10; i += 2) {
    output[0+i] = output[0+i] + in[0+i];
@ -67,7 +69,9 @@ static void fsum(limb *output, const limb *in) {
 /* Find the difference of two numbers: output = in - output
 * (note the order of the arguments!). */
-static void fdifference(limb *output, const limb *in) {
+static void
 fdifference(int64_t *output, const int64_t *in)
 {
  unsigned i;
  for (i = 0; i < 10; ++i) {
    output[i] = in[i] - output[i];
@ -75,7 +79,9 @@ static void fdifference(limb *output, const limb *in) {
 }
 /* Multiply a number by a scalar: output = in * scalar */
-static void fscalar_product(limb *output, const limb *in, const limb scalar) {
+static void
 fscalar_product(int64_t *output, const int64_t *in, const int64_t scalar)
 {
  unsigned i;
  for (i = 0; i < 10; ++i) {
    output[i] = in[i] * scalar;
@ -88,114 +94,118 @@ static void fscalar_product(limb *output, const limb *in, const limb scalar) {
 * form, the output is not.
 *
 * output[x] <= 14 * the largest product of the input limbs. */
-static void fproduct(limb *output, const limb *in2, const limb *in) {
+static void
-  output[0] =       ((limb) ((s32) in2[0])) * ((s32) in[0]);
+fproduct(int64_t *output, const int64_t *in2, const int64_t *in)
-  output[1] =       ((limb) ((s32) in2[0])) * ((s32) in[1]) +
+{
-                    ((limb) ((s32) in2[1])) * ((s32) in[0]);
+  output[0] =       ((int64_t) ((int32_t) in2[0])) * ((int32_t) in[0]);
-  output[2] =  2 *  ((limb) ((s32) in2[1])) * ((s32) in[1]) +
+  output[1] =       ((int64_t) ((int32_t) in2[0])) * ((int32_t) in[1]) +
-                    ((limb) ((s32) in2[0])) * ((s32) in[2]) +
+                    ((int64_t) ((int32_t) in2[1])) * ((int32_t) in[0]);
-                    ((limb) ((s32) in2[2])) * ((s32) in[0]);
+  output[2] =  2 *  ((int64_t) ((int32_t) in2[1])) * ((int32_t) in[1]) +
-  output[3] =       ((limb) ((s32) in2[1])) * ((s32) in[2]) +
+                    ((int64_t) ((int32_t) in2[0])) * ((int32_t) in[2]) +
-                    ((limb) ((s32) in2[2])) * ((s32) in[1]) +
+                    ((int64_t) ((int32_t) in2[2])) * ((int32_t) in[0]);
-                    ((limb) ((s32) in2[0])) * ((s32) in[3]) +
+  output[3] =       ((int64_t) ((int32_t) in2[1])) * ((int32_t) in[2]) +
-                    ((limb) ((s32) in2[3])) * ((s32) in[0]);
+                    ((int64_t) ((int32_t) in2[2])) * ((int32_t) in[1]) +
-  output[4] =       ((limb) ((s32) in2[2])) * ((s32) in[2]) +
+                    ((int64_t) ((int32_t) in2[0])) * ((int32_t) in[3]) +
-               2 * (((limb) ((s32) in2[1])) * ((s32) in[3]) +
+                    ((int64_t) ((int32_t) in2[3])) * ((int32_t) in[0]);
-                    ((limb) ((s32) in2[3])) * ((s32) in[1])) +
+  output[4] =       ((int64_t) ((int32_t) in2[2])) * ((int32_t) in[2]) +
-                    ((limb) ((s32) in2[0])) * ((s32) in[4]) +
+               2 * (((int64_t) ((int32_t) in2[1])) * ((int32_t) in[3]) +
-                    ((limb) ((s32) in2[4])) * ((s32) in[0]);
+                    ((int64_t) ((int32_t) in2[3])) * ((int32_t) in[1])) +
-  output[5] =       ((limb) ((s32) in2[2])) * ((s32) in[3]) +
+                    ((int64_t) ((int32_t) in2[0])) * ((int32_t) in[4]) +
-                    ((limb) ((s32) in2[3])) * ((s32) in[2]) +
+                    ((int64_t) ((int32_t) in2[4])) * ((int32_t) in[0]);
-                    ((limb) ((s32) in2[1])) * ((s32) in[4]) +
+  output[5] =       ((int64_t) ((int32_t) in2[2])) * ((int32_t) in[3]) +
-                    ((limb) ((s32) in2[4])) * ((s32) in[1]) +
+                    ((int64_t) ((int32_t) in2[3])) * ((int32_t) in[2]) +
-                    ((limb) ((s32) in2[0])) * ((s32) in[5]) +
+                    ((int64_t) ((int32_t) in2[1])) * ((int32_t) in[4]) +
-                    ((limb) ((s32) in2[5])) * ((s32) in[0]);
+                    ((int64_t) ((int32_t) in2[4])) * ((int32_t) in[1]) +
-  output[6] =  2 * (((limb) ((s32) in2[3])) * ((s32) in[3]) +
+                    ((int64_t) ((int32_t) in2[0])) * ((int32_t) in[5]) +
-                    ((limb) ((s32) in2[1])) * ((s32) in[5]) +
+                    ((int64_t) ((int32_t) in2[5])) * ((int32_t) in[0]);
-                    ((limb) ((s32) in2[5])) * ((s32) in[1])) +
+  output[6] =  2 * (((int64_t) ((int32_t) in2[3])) * ((int32_t) in[3]) +
-                    ((limb) ((s32) in2[2])) * ((s32) in[4]) +
+                    ((int64_t) ((int32_t) in2[1])) * ((int32_t) in[5]) +
-                    ((limb) ((s32) in2[4])) * ((s32) in[2]) +
+                    ((int64_t) ((int32_t) in2[5])) * ((int32_t) in[1])) +
-                    ((limb) ((s32) in2[0])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in2[2])) * ((int32_t) in[4]) +
-                    ((limb) ((s32) in2[6])) * ((s32) in[0]);
+                    ((int64_t) ((int32_t) in2[4])) * ((int32_t) in[2]) +
-  output[7] =       ((limb) ((s32) in2[3])) * ((s32) in[4]) +
+                    ((int64_t) ((int32_t) in2[0])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in2[4])) * ((s32) in[3]) +
+                    ((int64_t) ((int32_t) in2[6])) * ((int32_t) in[0]);
-                    ((limb) ((s32) in2[2])) * ((s32) in[5]) +
+  output[7] =       ((int64_t) ((int32_t) in2[3])) * ((int32_t) in[4]) +
-                    ((limb) ((s32) in2[5])) * ((s32) in[2]) +
+                    ((int64_t) ((int32_t) in2[4])) * ((int32_t) in[3]) +
-                    ((limb) ((s32) in2[1])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in2[2])) * ((int32_t) in[5]) +
-                    ((limb) ((s32) in2[6])) * ((s32) in[1]) +
+                    ((int64_t) ((int32_t) in2[5])) * ((int32_t) in[2]) +
-                    ((limb) ((s32) in2[0])) * ((s32) in[7]) +
+                    ((int64_t) ((int32_t) in2[1])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in2[7])) * ((s32) in[0]);
+                    ((int64_t) ((int32_t) in2[6])) * ((int32_t) in[1]) +
-  output[8] =       ((limb) ((s32) in2[4])) * ((s32) in[4]) +
+                    ((int64_t) ((int32_t) in2[0])) * ((int32_t) in[7]) +
-               2 * (((limb) ((s32) in2[3])) * ((s32) in[5]) +
+                    ((int64_t) ((int32_t) in2[7])) * ((int32_t) in[0]);
-                    ((limb) ((s32) in2[5])) * ((s32) in[3]) +
+  output[8] =       ((int64_t) ((int32_t) in2[4])) * ((int32_t) in[4]) +
-                    ((limb) ((s32) in2[1])) * ((s32) in[7]) +
+               2 * (((int64_t) ((int32_t) in2[3])) * ((int32_t) in[5]) +
-                    ((limb) ((s32) in2[7])) * ((s32) in[1])) +
+                    ((int64_t) ((int32_t) in2[5])) * ((int32_t) in[3]) +
-                    ((limb) ((s32) in2[2])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in2[1])) * ((int32_t) in[7]) +
-                    ((limb) ((s32) in2[6])) * ((s32) in[2]) +
+                    ((int64_t) ((int32_t) in2[7])) * ((int32_t) in[1])) +
-                    ((limb) ((s32) in2[0])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in2[2])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in2[8])) * ((s32) in[0]);
+                    ((int64_t) ((int32_t) in2[6])) * ((int32_t) in[2]) +
-  output[9] =       ((limb) ((s32) in2[4])) * ((s32) in[5]) +
+                    ((int64_t) ((int32_t) in2[0])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in2[5])) * ((s32) in[4]) +
+                    ((int64_t) ((int32_t) in2[8])) * ((int32_t) in[0]);
-                    ((limb) ((s32) in2[3])) * ((s32) in[6]) +
+  output[9] =       ((int64_t) ((int32_t) in2[4])) * ((int32_t) in[5]) +
-                    ((limb) ((s32) in2[6])) * ((s32) in[3]) +
+                    ((int64_t) ((int32_t) in2[5])) * ((int32_t) in[4]) +
-                    ((limb) ((s32) in2[2])) * ((s32) in[7]) +
+                    ((int64_t) ((int32_t) in2[3])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in2[7])) * ((s32) in[2]) +
+                    ((int64_t) ((int32_t) in2[6])) * ((int32_t) in[3]) +
-                    ((limb) ((s32) in2[1])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in2[2])) * ((int32_t) in[7]) +
-                    ((limb) ((s32) in2[8])) * ((s32) in[1]) +
+                    ((int64_t) ((int32_t) in2[7])) * ((int32_t) in[2]) +
-                    ((limb) ((s32) in2[0])) * ((s32) in[9]) +
+                    ((int64_t) ((int32_t) in2[1])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in2[9])) * ((s32) in[0]);
+                    ((int64_t) ((int32_t) in2[8])) * ((int32_t) in[1]) +
-  output[10] = 2 * (((limb) ((s32) in2[5])) * ((s32) in[5]) +
+                    ((int64_t) ((int32_t) in2[0])) * ((int32_t) in[9]) +
-                    ((limb) ((s32) in2[3])) * ((s32) in[7]) +
+                    ((int64_t) ((int32_t) in2[9])) * ((int32_t) in[0]);
-                    ((limb) ((s32) in2[7])) * ((s32) in[3]) +
+  output[10] = 2 * (((int64_t) ((int32_t) in2[5])) * ((int32_t) in[5]) +
-                    ((limb) ((s32) in2[1])) * ((s32) in[9]) +
+                    ((int64_t) ((int32_t) in2[3])) * ((int32_t) in[7]) +
-                    ((limb) ((s32) in2[9])) * ((s32) in[1])) +
+                    ((int64_t) ((int32_t) in2[7])) * ((int32_t) in[3]) +
-                    ((limb) ((s32) in2[4])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in2[1])) * ((int32_t) in[9]) +
-                    ((limb) ((s32) in2[6])) * ((s32) in[4]) +
+                    ((int64_t) ((int32_t) in2[9])) * ((int32_t) in[1])) +
-                    ((limb) ((s32) in2[2])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in2[4])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in2[8])) * ((s32) in[2]);
+                    ((int64_t) ((int32_t) in2[6])) * ((int32_t) in[4]) +
-  output[11] =      ((limb) ((s32) in2[5])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in2[2])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in2[6])) * ((s32) in[5]) +
+                    ((int64_t) ((int32_t) in2[8])) * ((int32_t) in[2]);
-                    ((limb) ((s32) in2[4])) * ((s32) in[7]) +
+  output[11] =      ((int64_t) ((int32_t) in2[5])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in2[7])) * ((s32) in[4]) +
+                    ((int64_t) ((int32_t) in2[6])) * ((int32_t) in[5]) +
-                    ((limb) ((s32) in2[3])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in2[4])) * ((int32_t) in[7]) +
-                    ((limb) ((s32) in2[8])) * ((s32) in[3]) +
+                    ((int64_t) ((int32_t) in2[7])) * ((int32_t) in[4]) +
-                    ((limb) ((s32) in2[2])) * ((s32) in[9]) +
+                    ((int64_t) ((int32_t) in2[3])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in2[9])) * ((s32) in[2]);
+                    ((int64_t) ((int32_t) in2[8])) * ((int32_t) in[3]) +
-  output[12] =      ((limb) ((s32) in2[6])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in2[2])) * ((int32_t) in[9]) +
-               2 * (((limb) ((s32) in2[5])) * ((s32) in[7]) +
+                    ((int64_t) ((int32_t) in2[9])) * ((int32_t) in[2]);
-                    ((limb) ((s32) in2[7])) * ((s32) in[5]) +
+  output[12] =      ((int64_t) ((int32_t) in2[6])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in2[3])) * ((s32) in[9]) +
+               2 * (((int64_t) ((int32_t) in2[5])) * ((int32_t) in[7]) +
-                    ((limb) ((s32) in2[9])) * ((s32) in[3])) +
+                    ((int64_t) ((int32_t) in2[7])) * ((int32_t) in[5]) +
-                    ((limb) ((s32) in2[4])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in2[3])) * ((int32_t) in[9]) +
-                    ((limb) ((s32) in2[8])) * ((s32) in[4]);
+                    ((int64_t) ((int32_t) in2[9])) * ((int32_t) in[3])) +
-  output[13] =      ((limb) ((s32) in2[6])) * ((s32) in[7]) +
+                    ((int64_t) ((int32_t) in2[4])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in2[7])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in2[8])) * ((int32_t) in[4]);
-                    ((limb) ((s32) in2[5])) * ((s32) in[8]) +
+  output[13] =      ((int64_t) ((int32_t) in2[6])) * ((int32_t) in[7]) +
-                    ((limb) ((s32) in2[8])) * ((s32) in[5]) +
+                    ((int64_t) ((int32_t) in2[7])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in2[4])) * ((s32) in[9]) +
+                    ((int64_t) ((int32_t) in2[5])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in2[9])) * ((s32) in[4]);
+                    ((int64_t) ((int32_t) in2[8])) * ((int32_t) in[5]) +
-  output[14] = 2 * (((limb) ((s32) in2[7])) * ((s32) in[7]) +
+                    ((int64_t) ((int32_t) in2[4])) * ((int32_t) in[9]) +
-                    ((limb) ((s32) in2[5])) * ((s32) in[9]) +
+                    ((int64_t) ((int32_t) in2[9])) * ((int32_t) in[4]);
-                    ((limb) ((s32) in2[9])) * ((s32) in[5])) +
+  output[14] = 2 * (((int64_t) ((int32_t) in2[7])) * ((int32_t) in[7]) +
-                    ((limb) ((s32) in2[6])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in2[5])) * ((int32_t) in[9]) +
-                    ((limb) ((s32) in2[8])) * ((s32) in[6]);
+                    ((int64_t) ((int32_t) in2[9])) * ((int32_t) in[5])) +
-  output[15] =      ((limb) ((s32) in2[7])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in2[6])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in2[8])) * ((s32) in[7]) +
+                    ((int64_t) ((int32_t) in2[8])) * ((int32_t) in[6]);
-                    ((limb) ((s32) in2[6])) * ((s32) in[9]) +
+  output[15] =      ((int64_t) ((int32_t) in2[7])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in2[9])) * ((s32) in[6]);
+                    ((int64_t) ((int32_t) in2[8])) * ((int32_t) in[7]) +
-  output[16] =      ((limb) ((s32) in2[8])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in2[6])) * ((int32_t) in[9]) +
-               2 * (((limb) ((s32) in2[7])) * ((s32) in[9]) +
+                    ((int64_t) ((int32_t) in2[9])) * ((int32_t) in[6]);
-                    ((limb) ((s32) in2[9])) * ((s32) in[7]));
+  output[16] =      ((int64_t) ((int32_t) in2[8])) * ((int32_t) in[8]) +
-  output[17] =      ((limb) ((s32) in2[8])) * ((s32) in[9]) +
+               2 * (((int64_t) ((int32_t) in2[7])) * ((int32_t) in[9]) +
-                    ((limb) ((s32) in2[9])) * ((s32) in[8]);
+                    ((int64_t) ((int32_t) in2[9])) * ((int32_t) in[7]));
-  output[18] = 2 *  ((limb) ((s32) in2[9])) * ((s32) in[9]);
+  output[17] =      ((int64_t) ((int32_t) in2[8])) * ((int32_t) in[9]) +
                    ((int64_t) ((int32_t) in2[9])) * ((int32_t) in[8]);
  output[18] = 2 *  ((int64_t) ((int32_t) in2[9])) * ((int32_t) in[9]);
 }
 /* Reduce a long form to a short form by taking the input mod 2^255 - 19.
 *
 * On entry: |output[i]| < 14*2^54
 * On exit: |output[0..8]| < 280*2^54 */
-static void freduce_degree(limb *output) {
+static void
 freduce_degree(int64_t *output)
 {
  /* Each of these shifts and adds ends up multiplying the value by 19.
   *
   * For output[0..8], the absolute entry value is < 14*2^54 and we add, at
@ -236,8 +246,8 @@ static void freduce_degree(limb *output) {
 /* return v / 2^26, using only shifts and adds.
 *
 * On entry: v can take any value. */
-static limb
+static int64_t
-div_by_2_26(const limb v)
+div_by_2_26(const int64_t v)
 {
  /* High word of v; no shift needed. */
  const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32);
@ -252,8 +262,8 @@ div_by_2_26(const limb v)
 /* return v / (2^25), using only shifts and adds.
 *
 * On entry: v can take any value. */
-static limb
+static int64_t
-div_by_2_25(const limb v)
+div_by_2_25(const int64_t v)
 {
  /* High word of v; no shift needed*/
  const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32);
@ -268,13 +278,15 @@ div_by_2_25(const limb v)
 /* Reduce all coefficients of the short form input so that |x| < 2^26.
 *
 * On entry: |output[i]| < 280*2^54 */
-static void freduce_coefficients(limb *output) {
+static void
 freduce_coefficients(int64_t *output)
 {
  unsigned i;
  output[10] = 0;
  for (i = 0; i < 10; i += 2) {
-    limb over = div_by_2_26(output[i]);
+    int64_t over = div_by_2_26(output[i]);
    /* The entry condition (that |output[i]| < 280*2^54) means that over is, at
     * most, 280*2^28 in the first iteration of this loop. This is added to the
     * next limb and we can approximate the resulting bound of that limb by
@ -302,7 +314,7 @@ static void freduce_coefficients(limb *output) {
  /* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29
   * So |over| will be no more than 2^16. */
  {
-    limb over = div_by_2_26(output[0]);
+    int64_t over = div_by_2_26(output[0]);
    output[0] -= over << 26;
    output[1] += over;
  }
@ -318,14 +330,15 @@ static void freduce_coefficients(limb *output) {
 * output must be distinct to both inputs. The output is reduced degree
 * (indeed, one need only provide storage for 10 limbs) and |output[i]| < 2^26. */
 static void
-fmul(limb *output, const limb *in, const limb *in2) {
+fmul(int64_t *output, const int64_t *in, const int64_t *in2)
-  limb t[19];
+{
  int64_t t[19];
  fproduct(t, in, in2);
  /* |t[i]| < 14*2^54 */
  freduce_degree(t);
  freduce_coefficients(t);
  /* |t[i]| < 2^26 */
-  memcpy(output, t, sizeof(limb) * 10);
+  memcpy(output, t, sizeof(int64_t) * 10);
 }
 /* Square a number: output = in**2
@ -334,62 +347,64 @@ fmul(limb *output, const limb *in, const limb *in2) {
 * form, the output is not.
 *
 * output[x] <= 14 * the largest product of the input limbs. */
-static void fsquare_inner(limb *output, const limb *in) {
+static void
-  output[0] =       ((limb) ((s32) in[0])) * ((s32) in[0]);
+fsquare_inner(int64_t *output, const int64_t *in)
-  output[1] =  2 *  ((limb) ((s32) in[0])) * ((s32) in[1]);
+{
-  output[2] =  2 * (((limb) ((s32) in[1])) * ((s32) in[1]) +
+  output[0] =       ((int64_t) ((int32_t) in[0])) * ((int32_t) in[0]);
-                    ((limb) ((s32) in[0])) * ((s32) in[2]));
+  output[1] =  2 *  ((int64_t) ((int32_t) in[0])) * ((int32_t) in[1]);
-  output[3] =  2 * (((limb) ((s32) in[1])) * ((s32) in[2]) +
+  output[2] =  2 * (((int64_t) ((int32_t) in[1])) * ((int32_t) in[1]) +
-                    ((limb) ((s32) in[0])) * ((s32) in[3]));
+                    ((int64_t) ((int32_t) in[0])) * ((int32_t) in[2]));
-  output[4] =       ((limb) ((s32) in[2])) * ((s32) in[2]) +
+  output[3] =  2 * (((int64_t) ((int32_t) in[1])) * ((int32_t) in[2]) +
-               4 *  ((limb) ((s32) in[1])) * ((s32) in[3]) +
+                    ((int64_t) ((int32_t) in[0])) * ((int32_t) in[3]));
-               2 *  ((limb) ((s32) in[0])) * ((s32) in[4]);
+  output[4] =       ((int64_t) ((int32_t) in[2])) * ((int32_t) in[2]) +
-  output[5] =  2 * (((limb) ((s32) in[2])) * ((s32) in[3]) +
+               4 *  ((int64_t) ((int32_t) in[1])) * ((int32_t) in[3]) +
-                    ((limb) ((s32) in[1])) * ((s32) in[4]) +
+               2 *  ((int64_t) ((int32_t) in[0])) * ((int32_t) in[4]);
-                    ((limb) ((s32) in[0])) * ((s32) in[5]));
+  output[5] =  2 * (((int64_t) ((int32_t) in[2])) * ((int32_t) in[3]) +
-  output[6] =  2 * (((limb) ((s32) in[3])) * ((s32) in[3]) +
+                    ((int64_t) ((int32_t) in[1])) * ((int32_t) in[4]) +
-                    ((limb) ((s32) in[2])) * ((s32) in[4]) +
+                    ((int64_t) ((int32_t) in[0])) * ((int32_t) in[5]));
-                    ((limb) ((s32) in[0])) * ((s32) in[6]) +
+  output[6] =  2 * (((int64_t) ((int32_t) in[3])) * ((int32_t) in[3]) +
-               2 *  ((limb) ((s32) in[1])) * ((s32) in[5]));
+                    ((int64_t) ((int32_t) in[2])) * ((int32_t) in[4]) +
-  output[7] =  2 * (((limb) ((s32) in[3])) * ((s32) in[4]) +
+                    ((int64_t) ((int32_t) in[0])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in[2])) * ((s32) in[5]) +
+               2 *  ((int64_t) ((int32_t) in[1])) * ((int32_t) in[5]));
-                    ((limb) ((s32) in[1])) * ((s32) in[6]) +
+  output[7] =  2 * (((int64_t) ((int32_t) in[3])) * ((int32_t) in[4]) +
-                    ((limb) ((s32) in[0])) * ((s32) in[7]));
+                    ((int64_t) ((int32_t) in[2])) * ((int32_t) in[5]) +
-  output[8] =       ((limb) ((s32) in[4])) * ((s32) in[4]) +
+                    ((int64_t) ((int32_t) in[1])) * ((int32_t) in[6]) +
-               2 * (((limb) ((s32) in[2])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in[0])) * ((int32_t) in[7]));
-                    ((limb) ((s32) in[0])) * ((s32) in[8]) +
+  output[8] =       ((int64_t) ((int32_t) in[4])) * ((int32_t) in[4]) +
-               2 * (((limb) ((s32) in[1])) * ((s32) in[7]) +
+               2 * (((int64_t) ((int32_t) in[2])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in[3])) * ((s32) in[5])));
+                    ((int64_t) ((int32_t) in[0])) * ((int32_t) in[8]) +
-  output[9] =  2 * (((limb) ((s32) in[4])) * ((s32) in[5]) +
+               2 * (((int64_t) ((int32_t) in[1])) * ((int32_t) in[7]) +
-                    ((limb) ((s32) in[3])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in[3])) * ((int32_t) in[5])));
-                    ((limb) ((s32) in[2])) * ((s32) in[7]) +
+  output[9] =  2 * (((int64_t) ((int32_t) in[4])) * ((int32_t) in[5]) +
-                    ((limb) ((s32) in[1])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in[3])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in[0])) * ((s32) in[9]));
+                    ((int64_t) ((int32_t) in[2])) * ((int32_t) in[7]) +
-  output[10] = 2 * (((limb) ((s32) in[5])) * ((s32) in[5]) +
+                    ((int64_t) ((int32_t) in[1])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in[4])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in[0])) * ((int32_t) in[9]));
-                    ((limb) ((s32) in[2])) * ((s32) in[8]) +
+  output[10] = 2 * (((int64_t) ((int32_t) in[5])) * ((int32_t) in[5]) +
-               2 * (((limb) ((s32) in[3])) * ((s32) in[7]) +
+                    ((int64_t) ((int32_t) in[4])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in[1])) * ((s32) in[9])));
+                    ((int64_t) ((int32_t) in[2])) * ((int32_t) in[8]) +
-  output[11] = 2 * (((limb) ((s32) in[5])) * ((s32) in[6]) +
+               2 * (((int64_t) ((int32_t) in[3])) * ((int32_t) in[7]) +
-                    ((limb) ((s32) in[4])) * ((s32) in[7]) +
+                    ((int64_t) ((int32_t) in[1])) * ((int32_t) in[9])));
-                    ((limb) ((s32) in[3])) * ((s32) in[8]) +
+  output[11] = 2 * (((int64_t) ((int32_t) in[5])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in[2])) * ((s32) in[9]));
+                    ((int64_t) ((int32_t) in[4])) * ((int32_t) in[7]) +
-  output[12] =      ((limb) ((s32) in[6])) * ((s32) in[6]) +
+                    ((int64_t) ((int32_t) in[3])) * ((int32_t) in[8]) +
-               2 * (((limb) ((s32) in[4])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in[2])) * ((int32_t) in[9]));
-               2 * (((limb) ((s32) in[5])) * ((s32) in[7]) +
+  output[12] =      ((int64_t) ((int32_t) in[6])) * ((int32_t) in[6]) +
-                    ((limb) ((s32) in[3])) * ((s32) in[9])));
+               2 * (((int64_t) ((int32_t) in[4])) * ((int32_t) in[8]) +
-  output[13] = 2 * (((limb) ((s32) in[6])) * ((s32) in[7]) +
+               2 * (((int64_t) ((int32_t) in[5])) * ((int32_t) in[7]) +
-                    ((limb) ((s32) in[5])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in[3])) * ((int32_t) in[9])));
-                    ((limb) ((s32) in[4])) * ((s32) in[9]));
+  output[13] = 2 * (((int64_t) ((int32_t) in[6])) * ((int32_t) in[7]) +
-  output[14] = 2 * (((limb) ((s32) in[7])) * ((s32) in[7]) +
+                    ((int64_t) ((int32_t) in[5])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in[6])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in[4])) * ((int32_t) in[9]));
-               2 *  ((limb) ((s32) in[5])) * ((s32) in[9]));
+  output[14] = 2 * (((int64_t) ((int32_t) in[7])) * ((int32_t) in[7]) +
-  output[15] = 2 * (((limb) ((s32) in[7])) * ((s32) in[8]) +
+                    ((int64_t) ((int32_t) in[6])) * ((int32_t) in[8]) +
-                    ((limb) ((s32) in[6])) * ((s32) in[9]));
+               2 *  ((int64_t) ((int32_t) in[5])) * ((int32_t) in[9]));
-  output[16] =      ((limb) ((s32) in[8])) * ((s32) in[8]) +
+  output[15] = 2 * (((int64_t) ((int32_t) in[7])) * ((int32_t) in[8]) +
-               4 *  ((limb) ((s32) in[7])) * ((s32) in[9]);
+                    ((int64_t) ((int32_t) in[6])) * ((int32_t) in[9]));
-  output[17] = 2 *  ((limb) ((s32) in[8])) * ((s32) in[9]);
+  output[16] =      ((int64_t) ((int32_t) in[8])) * ((int32_t) in[8]) +
-  output[18] = 2 *  ((limb) ((s32) in[9])) * ((s32) in[9]);
+               4 *  ((int64_t) ((int32_t) in[7])) * ((int32_t) in[9]);
  output[17] = 2 *  ((int64_t) ((int32_t) in[8])) * ((int32_t) in[9]);
  output[18] = 2 *  ((int64_t) ((int32_t) in[9])) * ((int32_t) in[9]);
 }
 /* fsquare sets output = in^2.
@ -400,8 +415,9 @@ static void fsquare_inner(limb *output, const limb *in) {
 * On exit: The |output| argument is in reduced coefficients form (indeed, one
 * need only provide storage for 10 limbs) and |out[i]| < 2^26. */
 static void
-fsquare(limb *output, const limb *in) {
+fsquare(int64_t *output, const int64_t *in)
-  limb t[19];
+{
  int64_t t[19];
  fsquare_inner(t, in);
  /* |t[i]| < 14*2^54 because the largest product of two limbs will be <
   * 2^(27+27) and fsquare_inner adds together, at most, 14 of those
@ -409,17 +425,18 @@ fsquare(limb *output, const limb *in) {
  freduce_degree(t);
  freduce_coefficients(t);
  /* |t[i]| < 2^26 */
-  memcpy(output, t, sizeof(limb) * 10);
+  memcpy(output, t, sizeof(int64_t) * 10);
 }
 /* Take a little-endian, 32-byte number and expand it into polynomial form */
 static void
-fexpand(limb *output, const u8 *input) {
+fexpand(int64_t *output, const uint8_t *input)
 {
 #define F(n,start,shift,mask) \
-  output[n] = ((((limb) input[start + 0]) | \
+  output[n] = ((((int64_t) input[start + 0]) | \
-                ((limb) input[start + 1]) << 8 | \
+                ((int64_t) input[start + 1]) << 8 | \
-                ((limb) input[start + 2]) << 16 | \
+                ((int64_t) input[start + 2]) << 16 | \
-                ((limb) input[start + 3]) << 24) >> shift) & mask;
+                ((int64_t) input[start + 3]) << 24) >> shift) & mask;
  F(0, 0, 0, 0x3ffffff);
  F(1, 3, 2, 0x1ffffff);
  F(2, 6, 3, 0x3ffffff);
@ -438,7 +455,9 @@ fexpand(limb *output, const u8 *input) {
 #endif
 /* s32_eq returns 0xffffffff iff a == b and zero otherwise. */
-static s32 s32_eq(s32 a, s32 b) {
+static int32_t
 s32_eq(int32_t a, int32_t b)
 {
  a = ~(a ^ b);
  a &= a << 16;
  a &= a << 8;
@ -450,7 +469,9 @@ static s32 s32_eq(s32 a, s32 b) {
 /* s32_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are
 * both non-negative. */
-static s32 s32_gte(s32 a, s32 b) {
+static int32_t
 s32_gte(int32_t a, int32_t b)
 {
  a -= b;
  /* a >= 0 iff a >= b. */
  return ~(a >> 31);
@ -461,13 +482,14 @@ static s32 s32_gte(s32 a, s32 b) {
 *
 * On entry: |input_limbs[i]| < 2^26 */
 static void
-fcontract(u8 *output, limb *input_limbs) {
+fcontract(uint8_t *output, int64_t *input_limbs)
 {
  int i;
  int j;
-  s32 input[10];
+  int32_t input[10];
-  s32 mask;
+  int32_t mask;
-  /* |input_limbs[i]| < 2^26, so it's valid to convert to an s32. */
+  /* |input_limbs[i]| < 2^26, so it's valid to convert to an int32_t. */
  for (i = 0; i < 10; i++) {
    input[i] = input_limbs[i];
  }
@ -477,13 +499,13 @@ fcontract(u8 *output, limb *input_limbs) {
      if ((i & 1) == 1) {
        /* This calculation is a time-invariant way to make input[i]
         * non-negative by borrowing from the next-larger limb. */
-        const s32 mask = input[i] >> 31;
+        const int32_t mask = input[i] >> 31;
-        const s32 carry = -((input[i] & mask) >> 25);
+        const int32_t carry = -((input[i] & mask) >> 25);
        input[i] = input[i] + (carry << 25);
        input[i+1] = input[i+1] - carry;
      } else {
-        const s32 mask = input[i] >> 31;
+        const int32_t mask = input[i] >> 31;
-        const s32 carry = -((input[i] & mask) >> 26);
+        const int32_t carry = -((input[i] & mask) >> 26);
        input[i] = input[i] + (carry << 26);
        input[i+1] = input[i+1] - carry;
      }
@ -492,8 +514,8 @@ fcontract(u8 *output, limb *input_limbs) {
    /* There's no greater limb for input[9] to borrow from, but we can multiply
     * by 19 and borrow from input[0], which is valid mod 2^255-19. */
    {
-      const s32 mask = input[9] >> 31;
+      const int32_t mask = input[9] >> 31;
-      const s32 carry = -((input[9] & mask) >> 25);
+      const int32_t carry = -((input[9] & mask) >> 25);
      input[9] = input[9] + (carry << 25);
      input[0] = input[0] - (carry * 19);
    }
@ -516,8 +538,8 @@ fcontract(u8 *output, limb *input_limbs) {
     through input[9] were all zero.  In that case, input[1] is now 2^25 - 1,
     and this last borrow-propagation step will leave input[1] non-negative. */
  {
-    const s32 mask = input[0] >> 31;
+    const int32_t mask = input[0] >> 31;
-    const s32 carry = -((input[0] & mask) >> 26);
+    const int32_t carry = -((input[0] & mask) >> 26);
    input[0] = input[0] + (carry << 26);
    input[1] = input[1] - carry;
  }
@ -527,18 +549,18 @@ fcontract(u8 *output, limb *input_limbs) {
  for (j = 0; j < 2; j++) {
    for (i = 0; i < 9; i++) {
      if ((i & 1) == 1) {
-        const s32 carry = input[i] >> 25;
+        const int32_t carry = input[i] >> 25;
        input[i] &= 0x1ffffff;
        input[i+1] += carry;
      } else {
-        const s32 carry = input[i] >> 26;
+        const int32_t carry = input[i] >> 26;
        input[i] &= 0x3ffffff;
        input[i+1] += carry;
      }
    }
    {
-      const s32 carry = input[9] >> 25;
+      const int32_t carry = input[9] >> 25;
      input[9] &= 0x1ffffff;
      input[0] += 19*carry;
    }
@ -614,21 +636,23 @@ fcontract(u8 *output, limb *input_limbs) {
 *
 * On entry and exit, the absolute value of the limbs of all inputs and outputs
 * are < 2^26. */
-static void fmonty(limb *x2, limb *z2,  /* output 2Q */
+static void
-                   limb *x3, limb *z3,  /* output Q + Q' */
+fmonty(int64_t *x2, int64_t *z2,  /* output 2Q */
-                   limb *x, limb *z,    /* input Q */
+       int64_t *x3, int64_t *z3,  /* output Q + Q' */
-                   limb *xprime, limb *zprime,  /* input Q' */
+       int64_t *x, int64_t *z,    /* input Q */
-                   const limb *qmqp /* input Q - Q' */) {
+       int64_t *xprime, int64_t *zprime,  /* input Q' */
-  limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19],
+       const int64_t *qmqp /* input Q - Q' */)
 {
  int64_t origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19],
        zzprime[19], zzzprime[19], xxxprime[19];
-  memcpy(origx, x, 10 * sizeof(limb));
+  memcpy(origx, x, 10 * sizeof(int64_t));
  fsum(x, z);
  /* |x[i]| < 2^27 */
  fdifference(z, origx);  /* does x - z */
  /* |z[i]| < 2^27 */
-  memcpy(origxprime, xprime, sizeof(limb) * 10);
+  memcpy(origxprime, xprime, sizeof(int64_t) * 10);
  fsum(xprime, zprime);
  /* |xprime[i]| < 2^27 */
  fdifference(zprime, origxprime);
@ -645,7 +669,7 @@ static void fmonty(limb *x2, limb *z2,  /* output 2Q */
  freduce_degree(zzprime);
  freduce_coefficients(zzprime);
  /* |zzprime[i]| < 2^26 */
-  memcpy(origxprime, xxprime, sizeof(limb) * 10);
+  memcpy(origxprime, xxprime, sizeof(int64_t) * 10);
  fsum(xxprime, zzprime);
  /* |xxprime[i]| < 2^27 */
  fdifference(zzprime, origxprime);
@ -659,8 +683,8 @@ static void fmonty(limb *x2, limb *z2,  /* output 2Q */
  freduce_degree(zzprime);
  freduce_coefficients(zzprime);
  /* |zzprime[i]| < 2^26 */
-  memcpy(x3, xxxprime, sizeof(limb) * 10);
+  memcpy(x3, xxxprime, sizeof(int64_t) * 10);
-  memcpy(z3, zzprime, sizeof(limb) * 10);
+  memcpy(z3, zzprime, sizeof(int64_t) * 10);
  fsquare(xx, x);
  /* |xx[i]| < 2^26 */
@ -673,7 +697,7 @@ static void fmonty(limb *x2, limb *z2,  /* output 2Q */
  /* |x2[i]| < 2^26 */
  fdifference(zz, xx);  /* does zz = xx - zz */
  /* |zz[i]| < 2^27 */
-  memset(zzz + 10, 0, sizeof(limb) * 9);
+  memset(zzz + 10, 0, sizeof(int64_t) * 9);
  fscalar_product(zzz, zz, 121665);
  /* |zzz[i]| < 2^(27+17) */
  /* No need to call freduce_degree here:
@ -699,14 +723,15 @@ static void fmonty(limb *x2, limb *z2,  /* output 2Q */
 * and all all values in a[0..9],b[0..9] must have magnitude less than
 * INT32_MAX. */
 static void
-swap_conditional(limb a[19], limb b[19], limb iswap) {
+swap_conditional(int64_t a[19], int64_t b[19], int64_t iswap)
 {
  unsigned i;
-  const s32 swap = (s32) -iswap;
+  const int32_t swap = (int32_t) -iswap;
  for (i = 0; i < 10; ++i) {
-    const s32 x = swap & ( ((s32)a[i]) ^ ((s32)b[i]) );
+    const int32_t x = swap & ( ((int32_t)a[i]) ^ ((int32_t)b[i]) );
-    a[i] = ((s32)a[i]) ^ x;
+    a[i] = ((int32_t)a[i]) ^ x;
-    b[i] = ((s32)b[i]) ^ x;
+    b[i] = ((int32_t)b[i]) ^ x;
  }
 }
@ -716,20 +741,21 @@ swap_conditional(limb a[19], limb b[19], limb iswap) {
 *   n: a little endian, 32-byte number
 *   q: a point of the curve (short form) */
 static void
-cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) {
+cmult(int64_t *resultx, int64_t *resultz, const uint8_t *n, const int64_t *q)
-  limb a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0};
+{
-  limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+  int64_t a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0};
-  limb e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1};
+  int64_t *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
-  limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+  int64_t e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1};
  int64_t *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
  unsigned i, j;
-  memcpy(nqpqx, q, sizeof(limb) * 10);
+  memcpy(nqpqx, q, sizeof(int64_t) * 10);
  for (i = 0; i < 32; ++i) {
-    u8 byte = n[31 - i];
+    uint8_t byte = n[31 - i];
    for (j = 0; j < 8; ++j) {
-      const limb bit = byte >> 7;
+      const int64_t bit = byte >> 7;
      swap_conditional(nqx, nqpqx, bit);
      swap_conditional(nqz, nqpqz, bit);
@ -758,22 +784,23 @@ cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) {
    }
  }
-  memcpy(resultx, nqx, sizeof(limb) * 10);
+  memcpy(resultx, nqx, sizeof(int64_t) * 10);
-  memcpy(resultz, nqz, sizeof(limb) * 10);
+  memcpy(resultz, nqz, sizeof(int64_t) * 10);
 }
 static void
-crecip(limb *out, const limb *z) {
+crecip(int64_t *out, const int64_t *z)
-  limb z2[10];
+{
-  limb z9[10];
+  int64_t z2[10];
-  limb z11[10];
+  int64_t z9[10];
-  limb z2_5_0[10];
+  int64_t z11[10];
-  limb z2_10_0[10];
+  int64_t z2_5_0[10];
-  limb z2_20_0[10];
+  int64_t z2_10_0[10];
-  limb z2_50_0[10];
+  int64_t z2_20_0[10];
-  limb z2_100_0[10];
+  int64_t z2_50_0[10];
-  limb t0[10];
+  int64_t z2_100_0[10];
-  limb t1[10];
+  int64_t t0[10];
  int64_t t1[10];
  int i;
  /* 2 */ fsquare(z2,z);
@ -830,8 +857,12 @@ crecip(limb *out, const limb *z) {
 }
 int
-curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
+curve25519_donna(
-  limb bp[10], x[10], z[11], zmone[10];
+        uint8_t *mypublic,
        const uint8_t *secret,
        const uint8_t *basepoint)
 {
  int64_t bp[10], x[10], z[11], zmone[10];
  uint8_t e[32];
  int i;
--- a/src/enchive.c
+++ b/src/enchive.c
@ -10,7 +10,7 @@
 #include "chacha.h"
 #include "optparse.h"
-int curve25519_donna(u8 *p, const u8 *s, const u8 *b);
+int curve25519_donna(uint8_t *p, const uint8_t *s, const uint8_t *b);
 /* Global options. */
 static char *global_pubkey = 0;
@ -174,12 +174,12 @@ joinstr(int n, ...)
 /**
 * Read the protection key from a key agent identified by its IV.
 */
-static int agent_read(u8 *key, const u8 *id);
+static int agent_read(uint8_t *key, const uint8_t *id);
 /**
 * Serve the protection key on a key agent identified by its IV.
 */
-static int agent_run(const u8 *key, const u8 *id);
+static int agent_run(const uint8_t *key, const uint8_t *id);
 #if ENCHIVE_OPTION_AGENT
 #include <poll.h>
@ -192,7 +192,7 @@ static int agent_run(const u8 *key, const u8 *id);
 * Fill ADDR with a unix domain socket name for the agent.
 */
 static int
-agent_addr(struct sockaddr_un *addr, const u8 *iv)
+agent_addr(struct sockaddr_un *addr, const uint8_t *iv)
 {
    char *dir = getenv("XDG_RUNTIME_DIR");
    if (!dir) {
@ -213,7 +213,7 @@ agent_addr(struct sockaddr_un *addr, const u8 *iv)
 }
 static int
-agent_read(u8 *key, const u8 *iv)
+agent_read(uint8_t *key, const uint8_t *iv)
 {
    int success;
    struct sockaddr_un addr;
@ -232,7 +232,7 @@ agent_read(u8 *key, const u8 *iv)
 }
 static int
-agent_run(const u8 *key, const u8 *iv)
+agent_run(const uint8_t *key, const uint8_t *iv)
 {
    struct pollfd pfd = {-1, POLLIN, 0};
    struct sockaddr_un addr;
@ -302,7 +302,7 @@ agent_run(const u8 *key, const u8 *iv)
 #else
 static int
-agent_read(u8 *key, const u8 *id)
+agent_read(uint8_t *key, const uint8_t *id)
 {
    (void)key;
    (void)id;
@ -310,7 +310,7 @@ agent_read(u8 *key, const u8 *id)
 }
 static int
-agent_run(const u8 *key, const u8 *id)
+agent_run(const uint8_t *key, const uint8_t *id)
 {
    (void)key;
    (void)id;
@ -632,10 +632,10 @@ secure_creat(const char *file)
 * All message data will go into the resulting context.
 */
 static void
-hmac_init(SHA256_CTX *ctx, const u8 *key)
+hmac_init(SHA256_CTX *ctx, const uint8_t *key)
 {
    int i;
-    u8 pad[SHA256_BLOCK_SIZE];
+    uint8_t pad[SHA256_BLOCK_SIZE];
    sha256_init(ctx);
    for (i = 0; i < SHA256_BLOCK_SIZE; i++)
        pad[i] = key[i] ^ 0x36U;
@ -647,10 +647,10 @@ hmac_init(SHA256_CTX *ctx, const u8 *key)
 * The key must be the same as used for initialization.
 */
 static void
-hmac_final(SHA256_CTX *ctx, const u8 *key, u8 *hash)
+hmac_final(SHA256_CTX *ctx, const uint8_t *key, uint8_t *hash)
 {
    int i;
-    u8 pad[SHA256_BLOCK_SIZE];
+    uint8_t pad[SHA256_BLOCK_SIZE];
    sha256_final(ctx, hash);
    sha256_init(ctx);
    for (i = 0; i < SHA256_BLOCK_SIZE; i++)
@ -665,15 +665,15 @@ hmac_final(SHA256_CTX *ctx, const u8 *key, u8 *hash)
 * Optionally provide an 8-byte salt.
 */
 static void
-key_derive(const char *passphrase, u8 *buf, int iexp, const u8 *salt)
+key_derive(const char *passphrase, uint8_t *buf, int iexp, const uint8_t *salt)
 {
-    u8 salt32[SHA256_BLOCK_SIZE] = {0};
+    uint8_t salt32[SHA256_BLOCK_SIZE] = {0};
    SHA256_CTX ctx[1];
    unsigned long i;
    unsigned long memlen = 1UL << iexp;
    unsigned long mask = memlen - 1;
    unsigned long iterations = 1UL << (iexp - 5);
-    u8 *memory, *memptr, *p;
+    uint8_t *memory, *memptr, *p;
    memory = malloc(memlen + SHA256_BLOCK_SIZE);
    if (!memory)
@ -682,7 +682,7 @@ key_derive(const char *passphrase, u8 *buf, int iexp, const u8 *salt)
    if (salt)
        memcpy(salt32, salt, 8);
    hmac_init(ctx, salt32);
-    sha256_update(ctx, (u8 *)passphrase, strlen(passphrase));
+    sha256_update(ctx, (uint8_t *)passphrase, strlen(passphrase));
    hmac_final(ctx, salt32, memory);
    for (p = memory + SHA256_BLOCK_SIZE;
@ -748,7 +748,7 @@ secure_entropy(void *buf, size_t len)
 * Generate a brand new Curve25519 secret key from system entropy.
 */
 static void
-generate_secret(u8 *s)
+generate_secret(uint8_t *s)
 {
    secure_entropy(s, 32);
    s[0] &= 248;
@ -760,9 +760,9 @@ generate_secret(u8 *s)
 * Generate a Curve25519 public key from a secret key.
 */
 static void
-compute_public(u8 *p, const u8 *s)
+compute_public(uint8_t *p, const uint8_t *s)
 {
-    static const u8 b[32] = {9};
+    static const uint8_t b[32] = {9};
    curve25519_donna(p, s, b);
 }
@ -770,7 +770,7 @@ compute_public(u8 *p, const u8 *s)
 * Compute a shared secret from our secret key and their public key.
 */
 static void
-compute_shared(u8 *sh, const u8 *s, const u8 *p)
+compute_shared(uint8_t *sh, const uint8_t *s, const uint8_t *p)
 {
    curve25519_donna(sh, s, p);
 }
@ -779,10 +779,10 @@ compute_shared(u8 *sh, const u8 *s, const u8 *p)
 * Encrypt from file to file using key/iv, aborting on any error.
 */
 static void
-symmetric_encrypt(FILE *in, FILE *out, const u8 *key, const u8 *iv)
+symmetric_encrypt(FILE *in, FILE *out, const uint8_t *key, const uint8_t *iv)
 {
-    static u8 buffer[2][CHACHA_BLOCKLENGTH * 1024];
+    static uint8_t buffer[2][CHACHA_BLOCKLENGTH * 1024];
-    u8 mac[SHA256_BLOCK_SIZE];
+    uint8_t mac[SHA256_BLOCK_SIZE];
    SHA256_CTX hmac[1];
    chacha_ctx ctx[1];
@ -798,7 +798,7 @@ symmetric_encrypt(FILE *in, FILE *out, const u8 *key, const u8 *iv)
            break;
        }
        sha256_update(hmac, buffer[0], z);
-        chacha_encrypt_bytes(ctx, buffer[0], buffer[1], z);
+        chacha_encrypt(ctx, buffer[0], buffer[1], z);
        if (!fwrite(buffer[1], z, 1, out))
            fatal("error writing ciphertext file");
        if (z < sizeof(buffer[0]))
@ -817,10 +817,10 @@ symmetric_encrypt(FILE *in, FILE *out, const u8 *key, const u8 *iv)
 * Decrypt from file to file using key/iv, aborting on any error.
 */
 static void
-symmetric_decrypt(FILE *in, FILE *out, const u8 *key, const u8 *iv)
+symmetric_decrypt(FILE *in, FILE *out, const uint8_t *key, const uint8_t *iv)
 {
-    static u8 buffer[2][CHACHA_BLOCKLENGTH * 1024 + SHA256_BLOCK_SIZE];
+    static uint8_t buffer[2][CHACHA_BLOCKLENGTH * 1024 + SHA256_BLOCK_SIZE];
-    u8 mac[SHA256_BLOCK_SIZE];
+    uint8_t mac[SHA256_BLOCK_SIZE];
    SHA256_CTX hmac[1];
    chacha_ctx ctx[1];
@ -837,14 +837,14 @@ symmetric_decrypt(FILE *in, FILE *out, const u8 *key, const u8 *iv)
    }
    for (;;) {
-        u8 *p = buffer[0] + SHA256_BLOCK_SIZE;
+        uint8_t *p = buffer[0] + SHA256_BLOCK_SIZE;
        size_t z = fread(p, 1, sizeof(buffer[0]) - SHA256_BLOCK_SIZE, in);
        if (!z) {
            if (ferror(in))
                fatal("error reading ciphertext file");
            break;
        }
-        chacha_encrypt_bytes(ctx, buffer[0], buffer[1], z);
+        chacha_encrypt(ctx, buffer[0], buffer[1], z);
        sha256_update(hmac, buffer[1], z);
        if (!fwrite(buffer[1], z, 1, out))
            fatal("error writing plaintext file");
@ -886,7 +886,7 @@ default_secfile(void)
 * Dump the public key to a file, aborting on error.
 */
 static void
-write_pubkey(char *file, u8 *key)
+write_pubkey(char *file, uint8_t *key)
 {
    FILE *f = fopen(file, "wb");
    if (!f)
@ -911,19 +911,19 @@ write_pubkey(char *file, u8 *key)
 * Write the secret key to a file, encrypting it if necessary.
 */
 static void
-write_seckey(char *file, const u8 *seckey, int iexp)
+write_seckey(char *file, const uint8_t *seckey, int iexp)
 {
    FILE *secfile;
    chacha_ctx cha[1];
    SHA256_CTX sha[1];
-    u8 buf[8 + 1 + 3 + 20 + 32] = {0}; /* entire file contents */
+    uint8_t buf[8 + 1 + 3 + 20 + 32] = {0}; /* entire file contents */
-    u8 protect[32];
+    uint8_t protect[32];
-    u8 *buf_iv           = buf + SECFILE_IV;
+    uint8_t *buf_iv           = buf + SECFILE_IV;
-    u8 *buf_iterations   = buf + SECFILE_ITERATIONS;
+    uint8_t *buf_iterations   = buf + SECFILE_ITERATIONS;
-    u8 *buf_version      = buf + SECFILE_VERSION;
+    uint8_t *buf_version      = buf + SECFILE_VERSION;
-    u8 *buf_protect_hash = buf + SECFILE_PROTECT_HASH;
+    uint8_t *buf_protect_hash = buf + SECFILE_PROTECT_HASH;
-    u8 *buf_seckey       = buf + SECFILE_SECKEY;
+    uint8_t *buf_seckey       = buf + SECFILE_SECKEY;
    buf_version[0] = ENCHIVE_FORMAT_VERSION;
@ -957,7 +957,7 @@ write_seckey(char *file, const u8 *seckey, int iexp)
        /* Encrypt using key derived from passphrase. */
        chacha_keysetup(cha, protect, 256);
        chacha_ivsetup(cha, buf_iv);
-        chacha_encrypt_bytes(cha, seckey, buf_seckey, 32);
+        chacha_encrypt(cha, seckey, buf_seckey, 32);
    } else {
        /* Copy key to output buffer. */
        memcpy(buf_seckey, seckey, 32);
@ -978,7 +978,7 @@ write_seckey(char *file, const u8 *seckey, int iexp)
 * Load the public key from the file.
 */
 static void
-load_pubkey(const char *file, u8 *key)
+load_pubkey(const char *file, uint8_t *key)
 {
    FILE *f = fopen(file, "rb");
    if (!f)
@ -1000,22 +1000,22 @@ load_pubkey(const char *file, u8 *key)
 * necessary.
 */
 static void
-load_seckey(const char *file, u8 *seckey)
+load_seckey(const char *file, uint8_t *seckey)
 {
    FILE *secfile;
    chacha_ctx cha[1];
    SHA256_CTX sha[1];
-    u8 buf[8 + 4 + 20 + 32];            /* entire key file contents */
+    uint8_t buf[8 + 4 + 20 + 32];            /* entire key file contents */
-    u8 protect[32];                     /* protection key */
+    uint8_t protect[32];                     /* protection key */
-    u8 protect_hash[SHA256_BLOCK_SIZE]; /* hash of protection key */
+    uint8_t protect_hash[SHA256_BLOCK_SIZE]; /* hash of protection key */
    int iexp;
    int version;
-    u8 *buf_iv           = buf + SECFILE_IV;
+    uint8_t *buf_iv           = buf + SECFILE_IV;
-    u8 *buf_iterations   = buf + SECFILE_ITERATIONS;
+    uint8_t *buf_iterations   = buf + SECFILE_ITERATIONS;
-    u8 *buf_version      = buf + SECFILE_VERSION;
+    uint8_t *buf_version      = buf + SECFILE_VERSION;
-    u8 *buf_protect_hash = buf + SECFILE_PROTECT_HASH;
+    uint8_t *buf_protect_hash = buf + SECFILE_PROTECT_HASH;
-    u8 *buf_seckey       = buf + SECFILE_SECKEY;
+    uint8_t *buf_seckey       = buf + SECFILE_SECKEY;
    /* Read the entire file into buf. */
    secfile = fopen(file, "rb");
@ -1064,7 +1064,7 @@ load_seckey(const char *file, u8 *seckey)
        /* Decrypt the key into the output. */
        chacha_keysetup(cha, protect, 256);
        chacha_ivsetup(cha, buf_iv);
-        chacha_encrypt_bytes(cha, buf_seckey, seckey, 32);
+        chacha_encrypt(cha, buf_seckey, seckey, 32);
    } else {
        /* Key is unencrypted, copy into output. */
        memcpy(seckey, buf_seckey, 32);
@ -1089,10 +1089,10 @@ file_exists(char *filename)
 * Print a nice fingerprint of a key.
 */
 static void
-print_fingerprint(const u8 *key)
+print_fingerprint(const uint8_t *key)
 {
    int i;
-    u8 hash[32];
+    uint8_t hash[32];
    SHA256_CTX sha[1];
    sha256_init(sha);
@ -1157,8 +1157,8 @@ command_keygen(struct optparse *options)
    char *secfile = dupstr(global_seckey);
    int pubfile_exists;
    int secfile_exists;
-    u8 public[32];
+    uint8_t public[32];
-    u8 secret[32];
+    uint8_t secret[32];
    int clobber = 0;
    int derive = 0;
    int edit = 0;
@ -1275,7 +1275,7 @@ command_fingerprint(struct optparse *options)
    };
    char *pubfile = dupstr(global_pubkey);
-    u8 public[32];
+    uint8_t public[32];
    int option;
    while ((option = optparse_long(options, fingerprint, 0)) != -1) {
@ -1311,11 +1311,11 @@ command_archive(struct optparse *options)
    int delete = 0;
    /* Workspace */
-    u8 public[32];
+    uint8_t public[32];
-    u8 esecret[32];
+    uint8_t esecret[32];
-    u8 epublic[32];
+    uint8_t epublic[32];
-    u8 shared[32];
+    uint8_t shared[32];
-    u8 iv[SHA256_BLOCK_SIZE];
+    uint8_t iv[SHA256_BLOCK_SIZE];
    SHA256_CTX sha[1];
    int option;
@ -1401,11 +1401,11 @@ command_extract(struct optparse *options)
    /* Workspace */
    SHA256_CTX sha[1];
-    u8 secret[32];
+    uint8_t secret[32];
-    u8 epublic[32];
+    uint8_t epublic[32];
-    u8 shared[32];
+    uint8_t shared[32];
-    u8 iv[8];
+    uint8_t iv[8];
-    u8 check_iv[SHA256_BLOCK_SIZE];
+    uint8_t check_iv[SHA256_BLOCK_SIZE];
    int option;
    while ((option = optparse_long(options, extract, 0)) != -1) {
--- a/src/sha256.c
+++ b/src/sha256.c
@ -29,7 +29,7 @@
 #define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10))
 /**************************** VARIABLES *****************************/
-static const u32 k[64] = {
+static const uint32_t k[64] = {
 	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
 	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
 	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
@ -41,9 +41,9 @@ static const u32 k[64] = {
 };
 /*********************** FUNCTION DEFINITIONS ***********************/
-void sha256_transform(SHA256_CTX *ctx, const u8 data[])
+void sha256_transform(SHA256_CTX *ctx, const uint8_t data[])
 {
-	u32 a, b, c, d, e, f, g, h, i, j, t1, t2, m[64];
+	uint32_t a, b, c, d, e, f, g, h, i, j, t1, t2, m[64];
 	for (i = 0, j = 0; i < 16; ++i, j += 4)
 		m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]);
@ -96,9 +96,9 @@ void sha256_init(SHA256_CTX *ctx)
 	ctx->state[7] = 0x5be0cd19;
 }
-void sha256_update(SHA256_CTX *ctx, const u8 data[], size_t len)
+void sha256_update(SHA256_CTX *ctx, const uint8_t data[], size_t len)
 {
-	u32 i;
+	uint32_t i;
 	for (i = 0; i < len; ++i) {
 		ctx->data[ctx->datalen] = data[i];
@ -111,9 +111,9 @@ void sha256_update(SHA256_CTX *ctx, const u8 data[], size_t len)
 	}
 }
-void sha256_final(SHA256_CTX *ctx, u8 hash[])
+void sha256_final(SHA256_CTX *ctx, uint8_t hash[])
 {
-	u32 i;
+	uint32_t i;
 	i = ctx->datalen;
--- a/src/sha256.h
+++ b/src/sha256.h
@ -15,15 +15,15 @@
 #define SHA256_BLOCK_SIZE 32
 typedef struct {
-	u8 data[64];
+	uint8_t data[64];
-	u32 datalen;
+	uint32_t datalen;
-	u64 bitlen;
+	uint64_t bitlen;
-	u32 state[8];
+	uint32_t state[8];
 } SHA256_CTX;
 void sha256_init(SHA256_CTX *ctx);
-void sha256_update(SHA256_CTX *ctx, const u8 data[], size_t len);
+void sha256_update(SHA256_CTX *ctx, const uint8_t data[], size_t len);
-void sha256_final(SHA256_CTX *ctx, u8 hash[]);
+void sha256_final(SHA256_CTX *ctx, uint8_t hash[]);
 #endif /* SHA256_H */