kernel-release-clang 5.5.13-2 (x86_64;znver1) 2020-11095
-9999

Status rejected
Submitter nobodydead [@T] gmail.com
Platform rolling
Repository main
URL https://abf.openmandriva.org/build_lists/751266
Packages
kernel-release-clang-5.5.13-2.x86_64.source
kernel-release-clang-desktop-clang-5.5.13-2.x86_64.binary
kernel-release-clang-5.5.13-2.znver1.source
kernel-release-clang-desktop-clang-5.5.13-2.znver1.binary
Build Date 2020-03-31 23:20:09 +0000 UTC
Last Updated 2020-04-05 00:17:54.075492234 +0000 UTC
$ git diff --patch-with-stat --summary 22e2c1b117e4c16ee6dfe272b65b61fe9cf90763..3ed5999eb8e55d11ae8796996a1a568cfaa1e6b6

 common.config             |    10 +-
 kernel-release-clang.spec |    26 +-
 net-WireGuard.patch       | 50679 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 50703 insertions(+), 12 deletions(-)
 create mode 100644 net-WireGuard.patch

diff --git a/common.config b/common.config
index 8eaaac4..81fbe47 100644
--- a/common.config
+++ b/common.config
@@ -373,6 +373,8 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_UDP_TUNNEL=m
 CONFIG_NET_FOU=m
 CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_WIREGUARD=m
+# CONFIG_WIREGUARD_DEBUG is not set
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_ESP_OFFLOAD=m
@@ -1429,7 +1431,7 @@ CONFIG_IDE_GD_ATAPI=y
 CONFIG_BLK_DEV_IDECS=m
 CONFIG_BLK_DEV_DELKIN=m
 CONFIG_BLK_DEV_IDECD=m
-CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y
+# CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS is not set
 CONFIG_BLK_DEV_IDETAPE=m
 CONFIG_BLK_DEV_IDEACPI=y
 CONFIG_IDE_TASK_IOCTL=y
@@ -4348,8 +4350,8 @@ CONFIG_CHASH=m
 # CONFIG_CHASH_STATS is not set
 # CONFIG_CHASH_SELFTEST is not set
 CONFIG_DRM_NOUVEAU=m
-CONFIG_NOUVEAU_DEBUG=5
-CONFIG_NOUVEAU_DEBUG_DEFAULT=3
+CONFIG_NOUVEAU_DEBUG=3
+CONFIG_NOUVEAU_DEBUG_DEFAULT=1
 # CONFIG_NOUVEAU_DEBUG_MMU is not set
 CONFIG_DRM_NOUVEAU_BACKLIGHT=y
 CONFIG_DRM_I915=m
@@ -4534,7 +4536,7 @@ CONFIG_SOUND=m
 CONFIG_SOUND_OSS_CORE=y
 # CONFIG_SOUND_OSS_CORE_PRECLAIM is not set
 CONFIG_SND=m
-CONFIG_SND_DEBUG_VERBOSE=y
+# CONFIG_SND_DEBUG_VERBOSE is not set
 CONFIG_SND_TIMER=m
 CONFIG_SND_PCM=m
 CONFIG_SND_PCM_ELD=y
diff --git a/kernel-release-clang.spec b/kernel-release-clang.spec
index 5545609..f509932 100644
--- a/kernel-release-clang.spec
+++ b/kernel-release-clang.spec
@@ -32,7 +32,7 @@
 %define rpmrel		0.rc%{relc}.1
 %define tar_ver   	%{kernelversion}.%{patchlevel}-rc%{relc}
 %else
-%define rpmrel		1
+%define rpmrel		2
 %define tar_ver		%{kernelversion}.%{patchlevel}
 %endif
 %define buildrpmrel	%{rpmrel}%{rpmtag}
@@ -425,6 +425,15 @@ Patch904:	drm-i915-Cast-remain-to-unsigned-long-in-eb_relocate_vma.patch
 Patch905:	drm-i915-perf-Reverse-a-ternary-to-make-sparse-happy.patch
 %endif
 
+
+# WireGuard VPN
+# from https://git.zx2c4.com/wireguard-linux-compat/
+# unpack tarball, currently v0.0.20200318
+# create patch with kernel-tree-scripts/create-patch.sh
+# NOTE! Dont rename the patch, as upstream WireGuard version check relies on the name
+# TTL 5.6
+Patch1000:	net-WireGuard.patch
+
 %define common_desc_kernel The kernel package contains the Linux kernel (vmlinuz), the core of your \
 OpenMandriva Lx operating system. The kernel handles the basic functions \
 of the operating system: memory allocation, process allocation, device \
@@ -451,6 +460,7 @@ input and output, etc.
 %define kconflicts3 dkms-nvidia-current < 325.15-1
 %define kconflicts4 dkms-nvidia-long-lived < 319.49-1
 %define kconflicts5 dkms-nvidia304 < 304.108-1
+%define kconflicts6 fuse-exfat < 1.3.0.-6
 # nvidia173 does not support this kernel
 
 Autoreqprov:	no
@@ -555,7 +565,7 @@ Requires(pre):	%requires3 %requires4			\
 Requires:	%requires5				\
 Obsoletes:	%kobsoletes1 %kobsoletes2 %kobsoletes3	\
 Conflicts:	%kconflicts1 %kconflicts2 %kconflicts3	\
-Conflicts:	%kconflicts4 %kconflicts5		\
+Conflicts:	%kconflicts4 %kconflicts5 %kconflicts6	\
 Conflicts:	%{kname}-%{1}-latest <= %{kversion}-%{rpmrel}	\
 Obsoletes:	%{kname}-%{1}-latest <= %{kversion}-%{rpmrel}	\
 Provides:	installonlypkg(kernel)			\
@@ -1363,18 +1373,18 @@ cat > $kernel_files-post <<EOF
 # kernels installed
 cd /boot > /dev/null
 
-for i in $(ls vmlinuz-[0-9]*| sed 's/.*vmlinuz-//g')
+for v in $(ls vmlinuz-[0-9]*| sed 's/.*vmlinuz-//g');
 do
-	if [[ vmlinuz-$i =~ vmlinuz-%{kversion}-$kernel_flavour-%{buildrpmrel} ]]; then
+	if [[ vmlinuz-$v =~ vmlinuz-%{kversion}-$kernel_flavour-%{buildrpmrel} ]]; then
 		# we just create this
 		continue
 	fi
-	if [[ -e "initrd-$i.img" ]]; then
+	if [ -e "initrd-$v.img" ]; then
 		## if exist ignore
 		continue
 	fi
-	/sbin/depmod -a "$i"
-	[ -x /sbin/dracut ] && /sbin/dracut -f --kver "$i"
+	/sbin/depmod -a "$v"
+	[ -x /sbin/dracut ] && /sbin/dracut -f --kver "$v"
 done
 
 ## cleanup some werid symlinks we never used anyway
@@ -1420,7 +1430,7 @@ if [ -x /usr/sbin/dkms_autoinstaller ] && [ -d /usr/src/linux-%{kversion}-$kerne
     /usr/sbin/dkms_autoinstaller start %{kversion}-$kernel_flavour-%{buildrpmrel}
 fi
 
-if [ -x %{_sbindir}/dkms -a -e %{_unitdir}/dkms.service ] && [ -d /usr/src/linux-%{kversion}-$kernel_flavour-%{buildrpmrel} ]; then
+if [ -x %{_sbindir}/dkms ] && [ -e %{_unitdir}/dkms.service ] && [ -d /usr/src/linux-%{kversion}-$kernel_flavour-%{buildrpmrel} ]; then
     /bin/systemctl --quiet restart dkms.service
     /bin/systemctl --quiet try-restart fedora-loadmodules.service
     %{_sbindir}/dkms autoinstall --verbose --kernelver %{kversion}-$kernel_flavour-%{buildrpmrel}
diff --git a/net-WireGuard.patch b/net-WireGuard.patch
new file mode 100644
index 0000000..244659f
--- /dev/null
+++ b/net-WireGuard.patch
@@ -0,0 +1,50679 @@
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/allowedips.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,382 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "allowedips.h"
++#include "peer.h"
++
++static void swap_endian(u8 *dst, const u8 *src, u8 bits)
++{
++	if (bits == 32) {
++		*(u32 *)dst = be32_to_cpu(*(const __be32 *)src);
++	} else if (bits == 128) {
++		((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]);
++		((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]);
++	}
++}
++
++static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
++				 u8 cidr, u8 bits)
++{
++	node->cidr = cidr;
++	node->bit_at_a = cidr / 8U;
++#ifdef __LITTLE_ENDIAN
++	node->bit_at_a ^= (bits / 8U - 1U) % 8U;
++#endif
++	node->bit_at_b = 7U - (cidr % 8U);
++	node->bitlen = bits;
++	memcpy(node->bits, src, bits / 8U);
++}
++#define CHOOSE_NODE(parent, key) \
++	parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
++
++static void node_free_rcu(struct rcu_head *rcu)
++{
++	kfree(container_of(rcu, struct allowedips_node, rcu));
++}
++
++static void push_rcu(struct allowedips_node **stack,
++		     struct allowedips_node __rcu *p, unsigned int *len)
++{
++	if (rcu_access_pointer(p)) {
++		WARN_ON(IS_ENABLED(DEBUG) && *len >= 128);
++		stack[(*len)++] = rcu_dereference_raw(p);
++	}
++}
++
++static void root_free_rcu(struct rcu_head *rcu)
++{
++	struct allowedips_node *node, *stack[128] = {
++		container_of(rcu, struct allowedips_node, rcu) };
++	unsigned int len = 1;
++
++	while (len > 0 && (node = stack[--len])) {
++		push_rcu(stack, node->bit[0], &len);
++		push_rcu(stack, node->bit[1], &len);
++		kfree(node);
++	}
++}
++
++static void root_remove_peer_lists(struct allowedips_node *root)
++{
++	struct allowedips_node *node, *stack[128] = { root };
++	unsigned int len = 1;
++
++	while (len > 0 && (node = stack[--len])) {
++		push_rcu(stack, node->bit[0], &len);
++		push_rcu(stack, node->bit[1], &len);
++		if (rcu_access_pointer(node->peer))
++			list_del(&node->peer_list);
++	}
++}
++
++static void walk_remove_by_peer(struct allowedips_node __rcu **top,
++				struct wg_peer *peer, struct mutex *lock)
++{
++#define REF(p) rcu_access_pointer(p)
++#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
++#define PUSH(p) ({                                                             \
++		WARN_ON(IS_ENABLED(DEBUG) && len >= 128);                      \
++		stack[len++] = p;                                              \
++	})
++
++	struct allowedips_node __rcu **stack[128], **nptr;
++	struct allowedips_node *node, *prev;
++	unsigned int len;
++
++	if (unlikely(!peer || !REF(*top)))
++		return;
++
++	for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
++		nptr = stack[len - 1];
++		node = DEREF(nptr);
++		if (!node) {
++			--len;
++			continue;
++		}
++		if (!prev || REF(prev->bit[0]) == node ||
++		    REF(prev->bit[1]) == node) {
++			if (REF(node->bit[0]))
++				PUSH(&node->bit[0]);
++			else if (REF(node->bit[1]))
++				PUSH(&node->bit[1]);
++		} else if (REF(node->bit[0]) == prev) {
++			if (REF(node->bit[1]))
++				PUSH(&node->bit[1]);
++		} else {
++			if (rcu_dereference_protected(node->peer,
++				lockdep_is_held(lock)) == peer) {
++				RCU_INIT_POINTER(node->peer, NULL);
++				list_del_init(&node->peer_list);
++				if (!node->bit[0] || !node->bit[1]) {
++					rcu_assign_pointer(*nptr, DEREF(
++					       &node->bit[!REF(node->bit[0])]));
++					call_rcu(&node->rcu, node_free_rcu);
++					node = DEREF(nptr);
++				}
++			}
++			--len;
++		}
++	}
++
++#undef REF
++#undef DEREF
++#undef PUSH
++}
++
++static unsigned int fls128(u64 a, u64 b)
++{
++	return a ? fls64(a) + 64U : fls64(b);
++}
++
++static u8 common_bits(const struct allowedips_node *node, const u8 *key,
++		      u8 bits)
++{
++	if (bits == 32)
++		return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key);
++	else if (bits == 128)
++		return 128U - fls128(
++			*(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0],
++			*(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]);
++	return 0;
++}
++
++static bool prefix_matches(const struct allowedips_node *node, const u8 *key,
++			   u8 bits)
++{
++	/* This could be much faster if it actually just compared the common
++	 * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and
++	 * the rest, but it turns out that common_bits is already super fast on
++	 * modern processors, even taking into account the unfortunate bswap.
++	 * So, we just inline it like this instead.
++	 */
++	return common_bits(node, key, bits) >= node->cidr;
++}
++
++static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
++					 const u8 *key)
++{
++	struct allowedips_node *node = trie, *found = NULL;
++
++	while (node && prefix_matches(node, key, bits)) {
++		if (rcu_access_pointer(node->peer))
++			found = node;
++		if (node->cidr == bits)
++			break;
++		node = rcu_dereference_bh(CHOOSE_NODE(node, key));
++	}
++	return found;
++}
++
++/* Returns a strong reference to a peer */
++static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits,
++			      const void *be_ip)
++{
++	/* Aligned so it can be passed to fls/fls64 */
++	u8 ip[16] __aligned(__alignof(u64));
++	struct allowedips_node *node;
++	struct wg_peer *peer = NULL;
++
++	swap_endian(ip, be_ip, bits);
++
++	rcu_read_lock_bh();
++retry:
++	node = find_node(rcu_dereference_bh(root), bits, ip);
++	if (node) {
++		peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer));
++		if (!peer)
++			goto retry;
++	}
++	rcu_read_unlock_bh();
++	return peer;
++}
++
++static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
++			   u8 cidr, u8 bits, struct allowedips_node **rnode,
++			   struct mutex *lock)
++{
++	struct allowedips_node *node = rcu_dereference_protected(trie,
++						lockdep_is_held(lock));
++	struct allowedips_node *parent = NULL;
++	bool exact = false;
++
++	while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) {
++		parent = node;
++		if (parent->cidr == cidr) {
++			exact = true;
++			break;
++		}
++		node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
++						 lockdep_is_held(lock));
++	}
++	*rnode = parent;
++	return exact;
++}
++
++static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
++	       u8 cidr, struct wg_peer *peer, struct mutex *lock)
++{
++	struct allowedips_node *node, *parent, *down, *newnode;
++
++	if (unlikely(cidr > bits || !peer))
++		return -EINVAL;
++
++	if (!rcu_access_pointer(*trie)) {
++		node = kzalloc(sizeof(*node), GFP_KERNEL);
++		if (unlikely(!node))
++			return -ENOMEM;
++		RCU_INIT_POINTER(node->peer, peer);
++		list_add_tail(&node->peer_list, &peer->allowedips_list);
++		copy_and_assign_cidr(node, key, cidr, bits);
++		rcu_assign_pointer(*trie, node);
++		return 0;
++	}
++	if (node_placement(*trie, key, cidr, bits, &node, lock)) {
++		rcu_assign_pointer(node->peer, peer);
++		list_move_tail(&node->peer_list, &peer->allowedips_list);
++		return 0;
++	}
++
++	newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
++	if (unlikely(!newnode))
++		return -ENOMEM;
++	RCU_INIT_POINTER(newnode->peer, peer);
++	list_add_tail(&newnode->peer_list, &peer->allowedips_list);
++	copy_and_assign_cidr(newnode, key, cidr, bits);
++
++	if (!node) {
++		down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
++	} else {
++		down = rcu_dereference_protected(CHOOSE_NODE(node, key),
++						 lockdep_is_held(lock));
++		if (!down) {
++			rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
++			return 0;
++		}
++	}
++	cidr = min(cidr, common_bits(down, key, bits));
++	parent = node;
++
++	if (newnode->cidr == cidr) {
++		rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
++		if (!parent)
++			rcu_assign_pointer(*trie, newnode);
++		else
++			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
++					   newnode);
++	} else {
++		node = kzalloc(sizeof(*node), GFP_KERNEL);
++		if (unlikely(!node)) {
++			list_del(&newnode->peer_list);
++			kfree(newnode);
++			return -ENOMEM;
++		}
++		INIT_LIST_HEAD(&node->peer_list);
++		copy_and_assign_cidr(node, newnode->bits, cidr, bits);
++
++		rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
++		rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
++		if (!parent)
++			rcu_assign_pointer(*trie, node);
++		else
++			rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
++					   node);
++	}
++	return 0;
++}
++
++void wg_allowedips_init(struct allowedips *table)
++{
++	table->root4 = table->root6 = NULL;
++	table->seq = 1;
++}
++
++void wg_allowedips_free(struct allowedips *table, struct mutex *lock)
++{
++	struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6;
++
++	++table->seq;
++	RCU_INIT_POINTER(table->root4, NULL);
++	RCU_INIT_POINTER(table->root6, NULL);
++	if (rcu_access_pointer(old4)) {
++		struct allowedips_node *node = rcu_dereference_protected(old4,
++							lockdep_is_held(lock));
++
++		root_remove_peer_lists(node);
++		call_rcu(&node->rcu, root_free_rcu);
++	}
++	if (rcu_access_pointer(old6)) {
++		struct allowedips_node *node = rcu_dereference_protected(old6,
++							lockdep_is_held(lock));
++
++		root_remove_peer_lists(node);
++		call_rcu(&node->rcu, root_free_rcu);
++	}
++}
++
++int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
++			    u8 cidr, struct wg_peer *peer, struct mutex *lock)
++{
++	/* Aligned so it can be passed to fls */
++	u8 key[4] __aligned(__alignof(u32));
++
++	++table->seq;
++	swap_endian(key, (const u8 *)ip, 32);
++	return add(&table->root4, 32, key, cidr, peer, lock);
++}
++
++int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
++			    u8 cidr, struct wg_peer *peer, struct mutex *lock)
++{
++	/* Aligned so it can be passed to fls64 */
++	u8 key[16] __aligned(__alignof(u64));
++
++	++table->seq;
++	swap_endian(key, (const u8 *)ip, 128);
++	return add(&table->root6, 128, key, cidr, peer, lock);
++}
++
++void wg_allowedips_remove_by_peer(struct allowedips *table,
++				  struct wg_peer *peer, struct mutex *lock)
++{
++	++table->seq;
++	walk_remove_by_peer(&table->root4, peer, lock);
++	walk_remove_by_peer(&table->root6, peer, lock);
++}
++
++int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
++{
++	const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U);
++	swap_endian(ip, node->bits, node->bitlen);
++	memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes);
++	if (node->cidr)
++		ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U);
++
++	*cidr = node->cidr;
++	return node->bitlen == 32 ? AF_INET : AF_INET6;
++}
++
++/* Returns a strong reference to a peer */
++struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
++					 struct sk_buff *skb)
++{
++	if (skb->protocol == htons(ETH_P_IP))
++		return lookup(table->root4, 32, &ip_hdr(skb)->daddr);
++	else if (skb->protocol == htons(ETH_P_IPV6))
++		return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr);
++	return NULL;
++}
++
++/* Returns a strong reference to a peer */
++struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
++					 struct sk_buff *skb)
++{
++	if (skb->protocol == htons(ETH_P_IP))
++		return lookup(table->root4, 32, &ip_hdr(skb)->saddr);
++	else if (skb->protocol == htons(ETH_P_IPV6))
++		return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr);
++	return NULL;
++}
++
++#include "selftest/allowedips.c"
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/dst_cache/dst_cache.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,175 @@
++/*
++ * net/core/dst_cache.c - dst entry cache
++ *
++ * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ */
++
++#include <linux/kernel.h>
++#include <linux/percpu.h>
++#include <net/dst_cache.h>
++#include <net/route.h>
++#if IS_ENABLED(CONFIG_IPV6)
++#include <net/ip6_fib.h>
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 50)
++static inline u32 rt6_get_cookie(const struct rt6_info *rt)
++{
++	if ((unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
++		rt = (struct rt6_info *)(rt->dst.from);
++
++	return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
++}
++#endif
++#endif
++#include <uapi/linux/in.h>
++
++struct dst_cache_pcpu {
++	unsigned long refresh_ts;
++	struct dst_entry *dst;
++	u32 cookie;
++	union {
++		struct in_addr in_saddr;
++		struct in6_addr in6_saddr;
++	};
++};
++
++static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
++				      struct dst_entry *dst, u32 cookie)
++{
++	dst_release(dst_cache->dst);
++	if (dst)
++		dst_hold(dst);
++
++	dst_cache->cookie = cookie;
++	dst_cache->dst = dst;
++}
++
++static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
++					       struct dst_cache_pcpu *idst)
++{
++	struct dst_entry *dst;
++
++	dst = idst->dst;
++	if (!dst)
++		goto fail;
++
++	/* the cache already hold a dst reference; it can't go away */
++	dst_hold(dst);
++
++	if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
++		     (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
++		dst_cache_per_cpu_dst_set(idst, NULL, 0);
++		dst_release(dst);
++		goto fail;
++	}
++	return dst;
++
++fail:
++	idst->refresh_ts = jiffies;
++	return NULL;
++}
++
++struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
++{
++	if (!dst_cache->cache)
++		return NULL;
++
++	return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
++}
++
++struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
++{
++	struct dst_cache_pcpu *idst;
++	struct dst_entry *dst;
++
++	if (!dst_cache->cache)
++		return NULL;
++
++	idst = this_cpu_ptr(dst_cache->cache);
++	dst = dst_cache_per_cpu_get(dst_cache, idst);
++	if (!dst)
++		return NULL;
++
++	*saddr = idst->in_saddr.s_addr;
++	return container_of(dst, struct rtable, dst);
++}
++
++void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
++		       __be32 saddr)
++{
++	struct dst_cache_pcpu *idst;
++
++	if (!dst_cache->cache)
++		return;
++
++	idst = this_cpu_ptr(dst_cache->cache);
++	dst_cache_per_cpu_dst_set(idst, dst, 0);
++	idst->in_saddr.s_addr = saddr;
++}
++
++#if IS_ENABLED(CONFIG_IPV6)
++void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
++		       const struct in6_addr *addr)
++{
++	struct dst_cache_pcpu *idst;
++
++	if (!dst_cache->cache)
++		return;
++
++	idst = this_cpu_ptr(dst_cache->cache);
++	dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
++				  rt6_get_cookie((struct rt6_info *)dst));
++	idst->in6_saddr = *addr;
++}
++
++struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
++				    struct in6_addr *saddr)
++{
++	struct dst_cache_pcpu *idst;
++	struct dst_entry *dst;
++
++	if (!dst_cache->cache)
++		return NULL;
++
++	idst = this_cpu_ptr(dst_cache->cache);
++	dst = dst_cache_per_cpu_get(dst_cache, idst);
++	if (!dst)
++		return NULL;
++
++	*saddr = idst->in6_saddr;
++	return dst;
++}
++#endif
++
++int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
++{
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
++	BUG_ON(gfp & GFP_ATOMIC);
++	dst_cache->cache = alloc_percpu(struct dst_cache_pcpu);
++#else
++	dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
++					    gfp | __GFP_ZERO);
++#endif
++	if (!dst_cache->cache)
++		return -ENOMEM;
++
++	dst_cache_reset(dst_cache);
++	return 0;
++}
++
++void dst_cache_destroy(struct dst_cache *dst_cache)
++{
++	int i;
++
++	if (!dst_cache->cache)
++		return;
++
++	for_each_possible_cpu(i)
++		dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
++
++	free_percpu(dst_cache->cache);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/memneq/memneq.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,170 @@
++/*
++ * Constant-time equality testing of memory regions.
++ *
++ * Authors:
++ *
++ *   James Yonan <james@openvpn.net>
++ *   Daniel Borkmann <dborkman@redhat.com>
++ *
++ * This file is provided under a dual BSD/GPLv2 license.  When using or
++ * redistributing this file, you may do so under either license.
++ *
++ * GPL LICENSE SUMMARY
++ *
++ * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of version 2 of the GNU General Public License as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ * The full GNU General Public License is included in this distribution
++ * in the file called LICENSE.GPL.
++ *
++ * BSD LICENSE
++ *
++ * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ *   * Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *   * Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in
++ *     the documentation and/or other materials provided with the
++ *     distribution.
++ *   * Neither the name of OpenVPN Technologies nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <crypto/algapi.h>
++
++/* Make the optimizer believe the variable can be manipulated arbitrarily. */
++#define COMPILER_OPTIMIZER_HIDE_VAR(var) asm("" : "=r" (var) : "0" (var))
++
++#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
++
++/* Generic path for arbitrary size */
++static inline unsigned long
++__crypto_memneq_generic(const void *a, const void *b, size_t size)
++{
++	unsigned long neq = 0;
++
++#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
++	while (size >= sizeof(unsigned long)) {
++		neq |= *(unsigned long *)a ^ *(unsigned long *)b;
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		a += sizeof(unsigned long);
++		b += sizeof(unsigned long);
++		size -= sizeof(unsigned long);
++	}
++#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
++	while (size > 0) {
++		neq |= *(unsigned char *)a ^ *(unsigned char *)b;
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		a += 1;
++		b += 1;
++		size -= 1;
++	}
++	return neq;
++}
++
++/* Loop-free fast-path for frequently used 16-byte size */
++static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
++{
++	unsigned long neq = 0;
++
++#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++	if (sizeof(unsigned long) == 8) {
++		neq |= *(unsigned long *)(a)   ^ *(unsigned long *)(b);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++	} else if (sizeof(unsigned int) == 4) {
++		neq |= *(unsigned int *)(a)    ^ *(unsigned int *)(b);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned int *)(a+4)  ^ *(unsigned int *)(b+4);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned int *)(a+8)  ^ *(unsigned int *)(b+8);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++	} else
++#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
++	{
++		neq |= *(unsigned char *)(a)    ^ *(unsigned char *)(b);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+1)  ^ *(unsigned char *)(b+1);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+2)  ^ *(unsigned char *)(b+2);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+3)  ^ *(unsigned char *)(b+3);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+4)  ^ *(unsigned char *)(b+4);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+5)  ^ *(unsigned char *)(b+5);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+6)  ^ *(unsigned char *)(b+6);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+7)  ^ *(unsigned char *)(b+7);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+8)  ^ *(unsigned char *)(b+8);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+9)  ^ *(unsigned char *)(b+9);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++		neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
++		COMPILER_OPTIMIZER_HIDE_VAR(neq);
++	}
++
++	return neq;
++}
++
++/* Compare two areas of memory without leaking timing information,
++ * and with special optimizations for common sizes.  Users should
++ * not call this function directly, but should instead use
++ * crypto_memneq defined in crypto/algapi.h.
++ */
++noinline unsigned long __crypto_memneq(const void *a, const void *b,
++				       size_t size)
++{
++	switch (size) {
++	case 16:
++		return __crypto_memneq_16(a, b);
++	default:
++		return __crypto_memneq_generic(a, b, size);
++	}
++}
++
++#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/siphash/siphash.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,539 @@
++/* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This file is provided under a dual BSD/GPLv2 license.
++ *
++ * SipHash: a fast short-input PRF
++ * https://131002.net/siphash/
++ *
++ * This implementation is specifically for SipHash2-4 for a secure PRF
++ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
++ * hashtables.
++ */
++
++#include <linux/siphash.h>
++#include <asm/unaligned.h>
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
++#ifdef __LITTLE_ENDIAN
++#define bytemask_from_count(cnt)	(~(~0ul << (cnt)*8))
++#else
++#define bytemask_from_count(cnt)	(~(~0ul >> (cnt)*8))
++#endif
++#endif
++
++#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
++#include <linux/dcache.h>
++#include <asm/word-at-a-time.h>
++#endif
++
++#define SIPROUND \
++	do { \
++	v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
++	v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
++	v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
++	v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
++	} while (0)
++
++#define PREAMBLE(len) \
++	u64 v0 = 0x736f6d6570736575ULL; \
++	u64 v1 = 0x646f72616e646f6dULL; \
++	u64 v2 = 0x6c7967656e657261ULL; \
++	u64 v3 = 0x7465646279746573ULL; \
++	u64 b = ((u64)(len)) << 56; \
++	v3 ^= key->key[1]; \
++	v2 ^= key->key[0]; \
++	v1 ^= key->key[1]; \
++	v0 ^= key->key[0];
++
++#define POSTAMBLE \
++	v3 ^= b; \
++	SIPROUND; \
++	SIPROUND; \
++	v0 ^= b; \
++	v2 ^= 0xff; \
++	SIPROUND; \
++	SIPROUND; \
++	SIPROUND; \
++	SIPROUND; \
++	return (v0 ^ v1) ^ (v2 ^ v3);
++
++u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
++{
++	const u8 *end = data + len - (len % sizeof(u64));
++	const u8 left = len & (sizeof(u64) - 1);
++	u64 m;
++	PREAMBLE(len)
++	for (; data != end; data += sizeof(u64)) {
++		m = le64_to_cpup(data);
++		v3 ^= m;
++		SIPROUND;
++		SIPROUND;
++		v0 ^= m;
++	}
++#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
++	if (left)
++		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
++						  bytemask_from_count(left)));
++#else
++	switch (left) {
++	case 7: b |= ((u64)end[6]) << 48;
++	case 6: b |= ((u64)end[5]) << 40;
++	case 5: b |= ((u64)end[4]) << 32;
++	case 4: b |= le32_to_cpup(data); break;
++	case 3: b |= ((u64)end[2]) << 16;
++	case 2: b |= le16_to_cpup(data); break;
++	case 1: b |= end[0];
++	}
++#endif
++	POSTAMBLE
++}
++
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
++{
++	const u8 *end = data + len - (len % sizeof(u64));
++	const u8 left = len & (sizeof(u64) - 1);
++	u64 m;
++	PREAMBLE(len)
++	for (; data != end; data += sizeof(u64)) {
++		m = get_unaligned_le64(data);
++		v3 ^= m;
++		SIPROUND;
++		SIPROUND;
++		v0 ^= m;
++	}
++#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
++	if (left)
++		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
++						  bytemask_from_count(left)));
++#else
++	switch (left) {
++	case 7: b |= ((u64)end[6]) << 48;
++	case 6: b |= ((u64)end[5]) << 40;
++	case 5: b |= ((u64)end[4]) << 32;
++	case 4: b |= get_unaligned_le32(end); break;
++	case 3: b |= ((u64)end[2]) << 16;
++	case 2: b |= get_unaligned_le16(end); break;
++	case 1: b |= end[0];
++	}
++#endif
++	POSTAMBLE
++}
++#endif
++
++/**
++ * siphash_1u64 - compute 64-bit siphash PRF value of a u64
++ * @first: first u64
++ * @key: the siphash key
++ */
++u64 siphash_1u64(const u64 first, const siphash_key_t *key)
++{
++	PREAMBLE(8)
++	v3 ^= first;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= first;
++	POSTAMBLE
++}
++
++/**
++ * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64
++ * @first: first u64
++ * @second: second u64
++ * @key: the siphash key
++ */
++u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
++{
++	PREAMBLE(16)
++	v3 ^= first;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= first;
++	v3 ^= second;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= second;
++	POSTAMBLE
++}
++
++/**
++ * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64
++ * @first: first u64
++ * @second: second u64
++ * @third: third u64
++ * @key: the siphash key
++ */
++u64 siphash_3u64(const u64 first, const u64 second, const u64 third,
++		 const siphash_key_t *key)
++{
++	PREAMBLE(24)
++	v3 ^= first;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= first;
++	v3 ^= second;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= second;
++	v3 ^= third;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= third;
++	POSTAMBLE
++}
++
++/**
++ * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64
++ * @first: first u64
++ * @second: second u64
++ * @third: third u64
++ * @forth: forth u64
++ * @key: the siphash key
++ */
++u64 siphash_4u64(const u64 first, const u64 second, const u64 third,
++		 const u64 forth, const siphash_key_t *key)
++{
++	PREAMBLE(32)
++	v3 ^= first;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= first;
++	v3 ^= second;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= second;
++	v3 ^= third;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= third;
++	v3 ^= forth;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= forth;
++	POSTAMBLE
++}
++
++u64 siphash_1u32(const u32 first, const siphash_key_t *key)
++{
++	PREAMBLE(4)
++	b |= first;
++	POSTAMBLE
++}
++
++u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
++		 const siphash_key_t *key)
++{
++	u64 combined = (u64)second << 32 | first;
++	PREAMBLE(12)
++	v3 ^= combined;
++	SIPROUND;
++	SIPROUND;
++	v0 ^= combined;
++	b |= third;
++	POSTAMBLE
++}
++
++#if BITS_PER_LONG == 64
++/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for
++ * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3.
++ */
++
++#define HSIPROUND SIPROUND
++#define HPREAMBLE(len) PREAMBLE(len)
++#define HPOSTAMBLE \
++	v3 ^= b; \
++	HSIPROUND; \
++	v0 ^= b; \
++	v2 ^= 0xff; \
++	HSIPROUND; \
++	HSIPROUND; \
++	HSIPROUND; \
++	return (v0 ^ v1) ^ (v2 ^ v3);
++
++u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
++{
++	const u8 *end = data + len - (len % sizeof(u64));
++	const u8 left = len & (sizeof(u64) - 1);
++	u64 m;
++	HPREAMBLE(len)
++	for (; data != end; data += sizeof(u64)) {
++		m = le64_to_cpup(data);
++		v3 ^= m;
++		HSIPROUND;
++		v0 ^= m;
++	}
++#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
++	if (left)
++		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
++						  bytemask_from_count(left)));
++#else
++	switch (left) {
++	case 7: b |= ((u64)end[6]) << 48;
++	case 6: b |= ((u64)end[5]) << 40;
++	case 5: b |= ((u64)end[4]) << 32;
++	case 4: b |= le32_to_cpup(data); break;
++	case 3: b |= ((u64)end[2]) << 16;
++	case 2: b |= le16_to_cpup(data); break;
++	case 1: b |= end[0];
++	}
++#endif
++	HPOSTAMBLE
++}
++
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++u32 __hsiphash_unaligned(const void *data, size_t len,
++			 const hsiphash_key_t *key)
++{
++	const u8 *end = data + len - (len % sizeof(u64));
++	const u8 left = len & (sizeof(u64) - 1);
++	u64 m;
++	HPREAMBLE(len)
++	for (; data != end; data += sizeof(u64)) {
++		m = get_unaligned_le64(data);
++		v3 ^= m;
++		HSIPROUND;
++		v0 ^= m;
++	}
++#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
++	if (left)
++		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
++						  bytemask_from_count(left)));
++#else
++	switch (left) {
++	case 7: b |= ((u64)end[6]) << 48;
++	case 6: b |= ((u64)end[5]) << 40;
++	case 5: b |= ((u64)end[4]) << 32;
++	case 4: b |= get_unaligned_le32(end); break;
++	case 3: b |= ((u64)end[2]) << 16;
++	case 2: b |= get_unaligned_le16(end); break;
++	case 1: b |= end[0];
++	}
++#endif
++	HPOSTAMBLE
++}
++#endif
++
++/**
++ * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
++ * @first: first u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
++{
++	HPREAMBLE(4)
++	b |= first;
++	HPOSTAMBLE
++}
++
++/**
++ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
++ * @first: first u32
++ * @second: second u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
++{
++	u64 combined = (u64)second << 32 | first;
++	HPREAMBLE(8)
++	v3 ^= combined;
++	HSIPROUND;
++	v0 ^= combined;
++	HPOSTAMBLE
++}
++
++/**
++ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
++ * @first: first u32
++ * @second: second u32
++ * @third: third u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
++		  const hsiphash_key_t *key)
++{
++	u64 combined = (u64)second << 32 | first;
++	HPREAMBLE(12)
++	v3 ^= combined;
++	HSIPROUND;
++	v0 ^= combined;
++	b |= third;
++	HPOSTAMBLE
++}
++
++/**
++ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
++ * @first: first u32
++ * @second: second u32
++ * @third: third u32
++ * @forth: forth u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
++		  const u32 forth, const hsiphash_key_t *key)
++{
++	u64 combined = (u64)second << 32 | first;
++	HPREAMBLE(16)
++	v3 ^= combined;
++	HSIPROUND;
++	v0 ^= combined;
++	combined = (u64)forth << 32 | third;
++	v3 ^= combined;
++	HSIPROUND;
++	v0 ^= combined;
++	HPOSTAMBLE
++}
++#else
++#define HSIPROUND \
++	do { \
++	v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
++	v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
++	v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
++	v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
++	} while (0)
++
++#define HPREAMBLE(len) \
++	u32 v0 = 0; \
++	u32 v1 = 0; \
++	u32 v2 = 0x6c796765U; \
++	u32 v3 = 0x74656462U; \
++	u32 b = ((u32)(len)) << 24; \
++	v3 ^= key->key[1]; \
++	v2 ^= key->key[0]; \
++	v1 ^= key->key[1]; \
++	v0 ^= key->key[0];
++
++#define HPOSTAMBLE \
++	v3 ^= b; \
++	HSIPROUND; \
++	v0 ^= b; \
++	v2 ^= 0xff; \
++	HSIPROUND; \
++	HSIPROUND; \
++	HSIPROUND; \
++	return v1 ^ v3;
++
++u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
++{
++	const u8 *end = data + len - (len % sizeof(u32));
++	const u8 left = len & (sizeof(u32) - 1);
++	u32 m;
++	HPREAMBLE(len)
++	for (; data != end; data += sizeof(u32)) {
++		m = le32_to_cpup(data);
++		v3 ^= m;
++		HSIPROUND;
++		v0 ^= m;
++	}
++	switch (left) {
++	case 3: b |= ((u32)end[2]) << 16;
++	case 2: b |= le16_to_cpup(data); break;
++	case 1: b |= end[0];
++	}
++	HPOSTAMBLE
++}
++
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++u32 __hsiphash_unaligned(const void *data, size_t len,
++			 const hsiphash_key_t *key)
++{
++	const u8 *end = data + len - (len % sizeof(u32));
++	const u8 left = len & (sizeof(u32) - 1);
++	u32 m;
++	HPREAMBLE(len)
++	for (; data != end; data += sizeof(u32)) {
++		m = get_unaligned_le32(data);
++		v3 ^= m;
++		HSIPROUND;
++		v0 ^= m;
++	}
++	switch (left) {
++	case 3: b |= ((u32)end[2]) << 16;
++	case 2: b |= get_unaligned_le16(end); break;
++	case 1: b |= end[0];
++	}
++	HPOSTAMBLE
++}
++#endif
++
++/**
++ * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
++ * @first: first u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
++{
++	HPREAMBLE(4)
++	v3 ^= first;
++	HSIPROUND;
++	v0 ^= first;
++	HPOSTAMBLE
++}
++
++/**
++ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
++ * @first: first u32
++ * @second: second u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
++{
++	HPREAMBLE(8)
++	v3 ^= first;
++	HSIPROUND;
++	v0 ^= first;
++	v3 ^= second;
++	HSIPROUND;
++	v0 ^= second;
++	HPOSTAMBLE
++}
++
++/**
++ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
++ * @first: first u32
++ * @second: second u32
++ * @third: third u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
++		  const hsiphash_key_t *key)
++{
++	HPREAMBLE(12)
++	v3 ^= first;
++	HSIPROUND;
++	v0 ^= first;
++	v3 ^= second;
++	HSIPROUND;
++	v0 ^= second;
++	v3 ^= third;
++	HSIPROUND;
++	v0 ^= third;
++	HPOSTAMBLE
++}
++
++/**
++ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
++ * @first: first u32
++ * @second: second u32
++ * @third: third u32
++ * @forth: forth u32
++ * @key: the hsiphash key
++ */
++u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
++		  const u32 forth, const hsiphash_key_t *key)
++{
++	HPREAMBLE(16)
++	v3 ^= first;
++	HSIPROUND;
++	v0 ^= first;
++	v3 ^= second;
++	HSIPROUND;
++	v0 ^= second;
++	v3 ^= third;
++	HSIPROUND;
++	v0 ^= third;
++	v3 ^= forth;
++	HSIPROUND;
++	v0 ^= forth;
++	HPOSTAMBLE
++}
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/udp_tunnel/udp_tunnel.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,387 @@
++#include <linux/module.h>
++#include <linux/errno.h>
++#include <linux/socket.h>
++#include <linux/udp.h>
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <net/net_namespace.h>
++#include <net/inet_common.h>
++#include <net/udp.h>
++#include <net/udp_tunnel.h>
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
++#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
++#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
++#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
++#endif
++
++/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */
++static udp_tunnel_encap_rcv_t encap_rcv = NULL;
++static void __compat_sk_data_ready(struct sock *sk
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
++			      ,int unused_vulnerable_length_param
++#endif
++			      )
++{
++	struct sk_buff *skb;
++	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
++		skb_orphan(skb);
++		sk_mem_reclaim(sk);
++		encap_rcv(sk, skb);
++	}
++}
++
++int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
++		     struct socket **sockp)
++{
++	int err;
++	struct socket *sock = NULL;
++	struct sockaddr_in udp_addr;
++
++	err = __sock_create(net, AF_INET, SOCK_DGRAM, 0, &sock, 1);
++	if (err < 0)
++		goto error;
++
++	udp_addr.sin_family = AF_INET;
++	udp_addr.sin_addr = cfg->local_ip;
++	udp_addr.sin_port = cfg->local_udp_port;
++	err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
++			  sizeof(udp_addr));
++	if (err < 0)
++		goto error;
++
++	if (cfg->peer_udp_port) {
++		udp_addr.sin_family = AF_INET;
++		udp_addr.sin_addr = cfg->peer_ip;
++		udp_addr.sin_port = cfg->peer_udp_port;
++		err = kernel_connect(sock, (struct sockaddr *)&udp_addr,
++				     sizeof(udp_addr), 0);
++		if (err < 0)
++			goto error;
++	}
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
++	sock->sk->sk_no_check = !cfg->use_udp_checksums;
++#else
++	sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
++#endif
++
++	*sockp = sock;
++	return 0;
++
++error:
++	if (sock) {
++		kernel_sock_shutdown(sock, SHUT_RDWR);
++		sock_release(sock);
++	}
++	*sockp = NULL;
++	return err;
++}
++
++void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
++			   struct udp_tunnel_sock_cfg *cfg)
++{
++	inet_sk(sock->sk)->mc_loop = 0;
++	encap_rcv = cfg->encap_rcv;
++	rcu_assign_sk_user_data(sock->sk, cfg->sk_user_data);
++	/* We force the cast in this awful way, due to various Android kernels
++	 * backporting things stupidly. */
++	*(void **)&sock->sk->sk_data_ready = (void *)__compat_sk_data_ready;
++}
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
++static inline __sum16 udp_v4_check(int len, __be32 saddr,
++				   __be32 daddr, __wsum base)
++{
++	return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
++}
++
++static void udp_set_csum(bool nocheck, struct sk_buff *skb,
++		  __be32 saddr, __be32 daddr, int len)
++{
++	struct udphdr *uh = udp_hdr(skb);
++
++	if (nocheck)
++		uh->check = 0;
++	else if (skb_is_gso(skb))
++		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
++	else if (skb_dst(skb) && skb_dst(skb)->dev &&
++		 (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
++
++		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
++
++		skb->ip_summed = CHECKSUM_PARTIAL;
++		skb->csum_start = skb_transport_header(skb) - skb->head;
++		skb->csum_offset = offsetof(struct udphdr, check);
++		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
++	} else {
++		__wsum csum;
++
++		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
++
++		uh->check = 0;
++		csum = skb_checksum(skb, 0, len, 0);
++		uh->check = udp_v4_check(len, saddr, daddr, csum);
++		if (uh->check == 0)
++			uh->check = CSUM_MANGLED_0;
++
++		skb->ip_summed = CHECKSUM_UNNECESSARY;
++	}
++}
++
++#endif
++
++static void __compat_fake_destructor(struct sk_buff *skb)
++{
++}
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
++static void __compat_iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
++		  __be32 src, __be32 dst, __u8 proto,
++		  __u8 tos, __u8 ttl, __be16 df, bool xnet)
++{
++	struct iphdr *iph;
++	struct pcpu_tstats *tstats = this_cpu_ptr(skb->dev->tstats);
++
++	skb_scrub_packet(skb, xnet);
++
++	skb->rxhash = 0;
++	skb_dst_set(skb, &rt->dst);
++	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
++
++	/* Push down and install the IP header. */
++	skb_push(skb, sizeof(struct iphdr));
++	skb_reset_network_header(skb);
++
++	iph = ip_hdr(skb);
++
++	iph->version	=	4;
++	iph->ihl	=	sizeof(struct iphdr) >> 2;
++	iph->frag_off	=	df;
++	iph->protocol	=	proto;
++	iph->tos	=	tos;
++	iph->daddr	=	dst;
++	iph->saddr	=	src;
++	iph->ttl	=	ttl;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 53)
++	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
++#else
++	__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
++#endif
++
++	iptunnel_xmit(skb, skb->dev);
++	u64_stats_update_begin(&tstats->syncp);
++	tstats->tx_bytes -= 8;
++	u64_stats_update_end(&tstats->syncp);
++}
++#define iptunnel_xmit __compat_iptunnel_xmit
++#endif
++
++void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
++			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
++			 __be16 df, __be16 src_port, __be16 dst_port,
++			 bool xnet, bool nocheck)
++{
++	struct udphdr *uh;
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
++	struct net_device *dev = skb->dev;
++	int ret;
++#endif
++
++	__skb_push(skb, sizeof(*uh));
++	skb_reset_transport_header(skb);
++	uh = udp_hdr(skb);
++
++	uh->dest = dst_port;
++	uh->source = src_port;
++	uh->len = htons(skb->len);
++
++	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
++
++	udp_set_csum(nocheck, skb, src, dst, skb->len);
++
++	if (!skb->sk)
++		skb->sk = sk;
++	if (!skb->destructor)
++		skb->destructor = __compat_fake_destructor;
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
++	ret =
++#endif
++	     iptunnel_xmit(
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
++			   sk,
++#endif
++			   rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
++	if (ret)
++		iptunnel_xmit_stats(ret - 8, &dev->stats, dev->tstats);
++#endif
++}
++
++void udp_tunnel_sock_release(struct socket *sock)
++{
++	rcu_assign_sk_user_data(sock->sk, NULL);
++	kernel_sock_shutdown(sock, SHUT_RDWR);
++	sock_release(sock);
++}
++
++#if IS_ENABLED(CONFIG_IPV6)
++#include <linux/module.h>
++#include <linux/errno.h>
++#include <linux/socket.h>
++#include <linux/udp.h>
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/in6.h>
++#include <net/udp.h>
++#include <net/udp_tunnel.h>
++#include <net/net_namespace.h>
++#include <net/netns/generic.h>
++#include <net/ip6_tunnel.h>
++#include <net/ip6_checksum.h>
++
++int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
++		     struct socket **sockp)
++{
++	struct sockaddr_in6 udp6_addr;
++	int err;
++	struct socket *sock = NULL;
++
++	err = __sock_create(net, AF_INET6, SOCK_DGRAM, 0, &sock, 1);
++	if (err < 0)
++		goto error;
++
++	if (cfg->ipv6_v6only) {
++		int val = 1;
++
++		err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
++					(char *) &val, sizeof(val));
++		if (err < 0)
++			goto error;
++	}
++
++	udp6_addr.sin6_family = AF_INET6;
++	memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
++	       sizeof(udp6_addr.sin6_addr));
++	udp6_addr.sin6_port = cfg->local_udp_port;
++	err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
++			  sizeof(udp6_addr));
++	if (err < 0)
++		goto error;
++
++	if (cfg->peer_udp_port) {
++		udp6_addr.sin6_family = AF_INET6;
++		memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
++		       sizeof(udp6_addr.sin6_addr));
++		udp6_addr.sin6_port = cfg->peer_udp_port;
++		err = kernel_connect(sock,
++				     (struct sockaddr *)&udp6_addr,
++				     sizeof(udp6_addr), 0);
++	}
++	if (err < 0)
++		goto error;
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
++	sock->sk->sk_no_check = !cfg->use_udp_checksums;
++#else
++	udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
++	udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
++#endif
++
++	*sockp = sock;
++	return 0;
++
++error:
++	if (sock) {
++		kernel_sock_shutdown(sock, SHUT_RDWR);
++		sock_release(sock);
++	}
++	*sockp = NULL;
++	return err;
++}
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
++static inline __sum16 udp_v6_check(int len,
++		const struct in6_addr *saddr,
++		const struct in6_addr *daddr,
++		__wsum base)
++{
++	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base);
++}
++static void udp6_set_csum(bool nocheck, struct sk_buff *skb,
++		   const struct in6_addr *saddr,
++		   const struct in6_addr *daddr, int len)
++{
++	struct udphdr *uh = udp_hdr(skb);
++
++	if (nocheck)
++		uh->check = 0;
++	else if (skb_is_gso(skb))
++		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
++	else if (skb_dst(skb) && skb_dst(skb)->dev &&
++		 (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
++
++		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
++
++		skb->ip_summed = CHECKSUM_PARTIAL;
++		skb->csum_start = skb_transport_header(skb) - skb->head;
++		skb->csum_offset = offsetof(struct udphdr, check);
++		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
++	} else {
++		__wsum csum;
++
++		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
++
++		uh->check = 0;
++		csum = skb_checksum(skb, 0, len, 0);
++		uh->check = udp_v6_check(len, saddr, daddr, csum);
++		if (uh->check == 0)
++			uh->check = CSUM_MANGLED_0;
++
++		skb->ip_summed = CHECKSUM_UNNECESSARY;
++	}
++}
++#endif
++
++int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
++			 struct sk_buff *skb,
++			 struct net_device *dev, struct in6_addr *saddr,
++			 struct in6_addr *daddr,
++			 __u8 prio, __u8 ttl, __be32 label,
++			 __be16 src_port, __be16 dst_port, bool nocheck)
++{
++	struct udphdr *uh;
++	struct ipv6hdr *ip6h;
++
++	__skb_push(skb, sizeof(*uh));
++	skb_reset_transport_header(skb);
++	uh = udp_hdr(skb);
++
++	uh->dest = dst_port;
++	uh->source = src_port;
++
++	uh->len = htons(skb->len);
++
++	skb_dst_set(skb, dst);
++
++	udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
++
++	__skb_push(skb, sizeof(*ip6h));
++	skb_reset_network_header(skb);
++	ip6h		  = ipv6_hdr(skb);
++	ip6_flow_hdr(ip6h, prio, label);
++	ip6h->payload_len = htons(skb->len);
++	ip6h->nexthdr     = IPPROTO_UDP;
++	ip6h->hop_limit   = ttl;
++	ip6h->daddr	  = *daddr;
++	ip6h->saddr	  = *saddr;
++
++	if (!skb->sk)
++		skb->sk = sk;
++	if (!skb->destructor)
++		skb->destructor = __compat_fake_destructor;
++
++	ip6tunnel_xmit(skb, dev);
++	return 0;
++}
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/cookie.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,236 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "cookie.h"
++#include "peer.h"
++#include "device.h"
++#include "messages.h"
++#include "ratelimiter.h"
++#include "timers.h"
++
++#include <zinc/blake2s.h>
++#include <zinc/chacha20poly1305.h>
++
++#include <net/ipv6.h>
++#include <crypto/algapi.h>
++
++void wg_cookie_checker_init(struct cookie_checker *checker,
++			    struct wg_device *wg)
++{
++	init_rwsem(&checker->secret_lock);
++	checker->secret_birthdate = ktime_get_coarse_boottime_ns();
++	get_random_bytes(checker->secret, NOISE_HASH_LEN);
++	checker->device = wg;
++}
++
++enum { COOKIE_KEY_LABEL_LEN = 8 };
++static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----";
++static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--";
++
++static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN],
++			   const u8 pubkey[NOISE_PUBLIC_KEY_LEN],
++			   const u8 label[COOKIE_KEY_LABEL_LEN])
++{
++	struct blake2s_state blake;
++
++	blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN);
++	blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN);
++	blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN);
++	blake2s_final(&blake, key);
++}
++
++/* Must hold peer->handshake.static_identity->lock */
++void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker)
++{
++	if (likely(checker->device->static_identity.has_identity)) {
++		precompute_key(checker->cookie_encryption_key,
++			       checker->device->static_identity.static_public,
++			       cookie_key_label);
++		precompute_key(checker->message_mac1_key,
++			       checker->device->static_identity.static_public,
++			       mac1_key_label);
++	} else {
++		memset(checker->cookie_encryption_key, 0,
++		       NOISE_SYMMETRIC_KEY_LEN);
++		memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN);
++	}
++}
++
++void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer)
++{
++	precompute_key(peer->latest_cookie.cookie_decryption_key,
++		       peer->handshake.remote_static, cookie_key_label);
++	precompute_key(peer->latest_cookie.message_mac1_key,
++		       peer->handshake.remote_static, mac1_key_label);
++}
++
++void wg_cookie_init(struct cookie *cookie)
++{
++	memset(cookie, 0, sizeof(*cookie));
++	init_rwsem(&cookie->lock);
++}
++
++static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len,
++			 const u8 key[NOISE_SYMMETRIC_KEY_LEN])
++{
++	len = len - sizeof(struct message_macs) +
++	      offsetof(struct message_macs, mac1);
++	blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN);
++}
++
++static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len,
++			 const u8 cookie[COOKIE_LEN])
++{
++	len = len - sizeof(struct message_macs) +
++	      offsetof(struct message_macs, mac2);
++	blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN);
++}
++
++static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb,
++			struct cookie_checker *checker)
++{
++	struct blake2s_state state;
++
++	if (wg_birthdate_has_expired(checker->secret_birthdate,
++				     COOKIE_SECRET_MAX_AGE)) {
++		down_write(&checker->secret_lock);
++		checker->secret_birthdate = ktime_get_coarse_boottime_ns();
++		get_random_bytes(checker->secret, NOISE_HASH_LEN);
++		up_write(&checker->secret_lock);
++	}
++
++	down_read(&checker->secret_lock);
++
++	blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN);
++	if (skb->protocol == htons(ETH_P_IP))
++		blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr,
++			       sizeof(struct in_addr));
++	else if (skb->protocol == htons(ETH_P_IPV6))
++		blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr,
++			       sizeof(struct in6_addr));
++	blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16));
++	blake2s_final(&state, cookie);
++
++	up_read(&checker->secret_lock);
++}
++
++enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
++						struct sk_buff *skb,
++						bool check_cookie)
++{
++	struct message_macs *macs = (struct message_macs *)
++		(skb->data + skb->len - sizeof(*macs));
++	enum cookie_mac_state ret;
++	u8 computed_mac[COOKIE_LEN];
++	u8 cookie[COOKIE_LEN];
++
++	ret = INVALID_MAC;
++	compute_mac1(computed_mac, skb->data, skb->len,
++		     checker->message_mac1_key);
++	if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN))
++		goto out;
++
++	ret = VALID_MAC_BUT_NO_COOKIE;
++
++	if (!check_cookie)
++		goto out;
++
++	make_cookie(cookie, skb, checker);
++
++	compute_mac2(computed_mac, skb->data, skb->len, cookie);
++	if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN))
++		goto out;
++
++	ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED;
++	if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev)))
++		goto out;
++
++	ret = VALID_MAC_WITH_COOKIE;
++
++out:
++	return ret;
++}
++
++void wg_cookie_add_mac_to_packet(void *message, size_t len,
++				 struct wg_peer *peer)
++{
++	struct message_macs *macs = (struct message_macs *)
++		((u8 *)message + len - sizeof(*macs));
++
++	down_write(&peer->latest_cookie.lock);
++	compute_mac1(macs->mac1, message, len,
++		     peer->latest_cookie.message_mac1_key);
++	memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN);
++	peer->latest_cookie.have_sent_mac1 = true;
++	up_write(&peer->latest_cookie.lock);
++
++	down_read(&peer->latest_cookie.lock);
++	if (peer->latest_cookie.is_valid &&
++	    !wg_birthdate_has_expired(peer->latest_cookie.birthdate,
++				COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY))
++		compute_mac2(macs->mac2, message, len,
++			     peer->latest_cookie.cookie);
++	else
++		memset(macs->mac2, 0, COOKIE_LEN);
++	up_read(&peer->latest_cookie.lock);
++}
++
++void wg_cookie_message_create(struct message_handshake_cookie *dst,
++			      struct sk_buff *skb, __le32 index,
++			      struct cookie_checker *checker)
++{
++	struct message_macs *macs = (struct message_macs *)
++		((u8 *)skb->data + skb->len - sizeof(*macs));
++	u8 cookie[COOKIE_LEN];
++
++	dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE);
++	dst->receiver_index = index;
++	get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN);
++
++	make_cookie(cookie, skb, checker);
++	xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN,
++				  macs->mac1, COOKIE_LEN, dst->nonce,
++				  checker->cookie_encryption_key);
++}
++
++void wg_cookie_message_consume(struct message_handshake_cookie *src,
++			       struct wg_device *wg)
++{
++	struct wg_peer *peer = NULL;
++	u8 cookie[COOKIE_LEN];
++	bool ret;
++
++	if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable,
++						INDEX_HASHTABLE_HANDSHAKE |
++						INDEX_HASHTABLE_KEYPAIR,
++						src->receiver_index, &peer)))
++		return;
++
++	down_read(&peer->latest_cookie.lock);
++	if (unlikely(!peer->latest_cookie.have_sent_mac1)) {
++		up_read(&peer->latest_cookie.lock);
++		goto out;
++	}
++	ret = xchacha20poly1305_decrypt(
++		cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie),
++		peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce,
++		peer->latest_cookie.cookie_decryption_key);
++	up_read(&peer->latest_cookie.lock);
++
++	if (ret) {
++		down_write(&peer->latest_cookie.lock);
++		memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN);
++		peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns();
++		peer->latest_cookie.is_valid = true;
++		peer->latest_cookie.have_sent_mac1 = false;
++		up_write(&peer->latest_cookie.lock);
++	} else {
++		net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n",
++				    wg->dev->name);
++	}
++
++out:
++	wg_peer_put(peer);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/blake2s/blake2s.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,276 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This is an implementation of the BLAKE2s hash and PRF functions.
++ *
++ * Information: https://blake2.net/
++ *
++ */
++
++#include <zinc/blake2s.h>
++#include "../selftest/run.h"
++
++#include <linux/types.h>
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/bug.h>
++#include <asm/unaligned.h>
++
++static const u32 blake2s_iv[8] = {
++	0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
++	0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
++};
++
++static const u8 blake2s_sigma[10][16] = {
++	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
++	{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
++	{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
++	{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
++	{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
++	{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
++	{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
++	{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
++	{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
++	{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
++};
++
++static inline void blake2s_set_lastblock(struct blake2s_state *state)
++{
++	state->f[0] = -1;
++}
++
++static inline void blake2s_increment_counter(struct blake2s_state *state,
++					     const u32 inc)
++{
++	state->t[0] += inc;
++	state->t[1] += (state->t[0] < inc);
++}
++
++static inline void blake2s_init_param(struct blake2s_state *state,
++				      const u32 param)
++{
++	int i;
++
++	memset(state, 0, sizeof(*state));
++	for (i = 0; i < 8; ++i)
++		state->h[i] = blake2s_iv[i];
++	state->h[0] ^= param;
++}
++
++void blake2s_init(struct blake2s_state *state, const size_t outlen)
++{
++	WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE));
++	blake2s_init_param(state, 0x01010000 | outlen);
++	state->outlen = outlen;
++}
++EXPORT_SYMBOL(blake2s_init);
++
++void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
++		      const void *key, const size_t keylen)
++{
++	u8 block[BLAKE2S_BLOCK_SIZE] = { 0 };
++
++	WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
++		!key || !keylen || keylen > BLAKE2S_KEY_SIZE));
++	blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen);
++	state->outlen = outlen;
++	memcpy(block, key, keylen);
++	blake2s_update(state, block, BLAKE2S_BLOCK_SIZE);
++	memzero_explicit(block, BLAKE2S_BLOCK_SIZE);
++}
++EXPORT_SYMBOL(blake2s_init_key);
++
++#if defined(CONFIG_ZINC_ARCH_X86_64)
++#include "blake2s-x86_64-glue.c"
++#else
++static bool *const blake2s_nobs[] __initconst = { };
++static void __init blake2s_fpu_init(void)
++{
++}
++static inline bool blake2s_compress_arch(struct blake2s_state *state,
++					 const u8 *block, size_t nblocks,
++					 const u32 inc)
++{
++	return false;
++}
++#endif
++
++static inline void blake2s_compress(struct blake2s_state *state,
++				    const u8 *block, size_t nblocks,
++				    const u32 inc)
++{
++	u32 m[16];
++	u32 v[16];
++	int i;
++
++	WARN_ON(IS_ENABLED(DEBUG) &&
++		(nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));
++
++	if (blake2s_compress_arch(state, block, nblocks, inc))
++		return;
++
++	while (nblocks > 0) {
++		blake2s_increment_counter(state, inc);
++		memcpy(m, block, BLAKE2S_BLOCK_SIZE);
++		le32_to_cpu_array(m, ARRAY_SIZE(m));
++		memcpy(v, state->h, 32);
++		v[ 8] = blake2s_iv[0];
++		v[ 9] = blake2s_iv[1];
++		v[10] = blake2s_iv[2];
++		v[11] = blake2s_iv[3];
++		v[12] = blake2s_iv[4] ^ state->t[0];
++		v[13] = blake2s_iv[5] ^ state->t[1];
++		v[14] = blake2s_iv[6] ^ state->f[0];
++		v[15] = blake2s_iv[7] ^ state->f[1];
++
++#define G(r, i, a, b, c, d) do { \
++	a += b + m[blake2s_sigma[r][2 * i + 0]]; \
++	d = ror32(d ^ a, 16); \
++	c += d; \
++	b = ror32(b ^ c, 12); \
++	a += b + m[blake2s_sigma[r][2 * i + 1]]; \
++	d = ror32(d ^ a, 8); \
++	c += d; \
++	b = ror32(b ^ c, 7); \
++} while (0)
++
++#define ROUND(r) do { \
++	G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
++	G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
++	G(r, 2, v[2], v[ 6], v[10], v[14]); \
++	G(r, 3, v[3], v[ 7], v[11], v[15]); \
++	G(r, 4, v[0], v[ 5], v[10], v[15]); \
++	G(r, 5, v[1], v[ 6], v[11], v[12]); \
++	G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
++	G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
++} while (0)
++		ROUND(0);
++		ROUND(1);
++		ROUND(2);
++		ROUND(3);
++		ROUND(4);
++		ROUND(5);
++		ROUND(6);
++		ROUND(7);
++		ROUND(8);
++		ROUND(9);
++
++#undef G
++#undef ROUND
++
++		for (i = 0; i < 8; ++i)
++			state->h[i] ^= v[i] ^ v[i + 8];
++
++		block += BLAKE2S_BLOCK_SIZE;
++		--nblocks;
++	}
++}
++
++void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
++{
++	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
++
++	if (unlikely(!inlen))
++		return;
++	if (inlen > fill) {
++		memcpy(state->buf + state->buflen, in, fill);
++		blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
++		state->buflen = 0;
++		in += fill;
++		inlen -= fill;
++	}
++	if (inlen > BLAKE2S_BLOCK_SIZE) {
++		const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
++		/* Hash one less (full) block than strictly possible */
++		blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
++		in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
++		inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
++	}
++	memcpy(state->buf + state->buflen, in, inlen);
++	state->buflen += inlen;
++}
++EXPORT_SYMBOL(blake2s_update);
++
++void blake2s_final(struct blake2s_state *state, u8 *out)
++{
++	WARN_ON(IS_ENABLED(DEBUG) && !out);
++	blake2s_set_lastblock(state);
++	memset(state->buf + state->buflen, 0,
++	       BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
++	blake2s_compress(state, state->buf, 1, state->buflen);
++	cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
++	memcpy(out, state->h, state->outlen);
++	memzero_explicit(state, sizeof(*state));
++}
++EXPORT_SYMBOL(blake2s_final);
++
++void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen,
++		  const size_t inlen, const size_t keylen)
++{
++	struct blake2s_state state;
++	u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
++	u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
++	int i;
++
++	if (keylen > BLAKE2S_BLOCK_SIZE) {
++		blake2s_init(&state, BLAKE2S_HASH_SIZE);
++		blake2s_update(&state, key, keylen);
++		blake2s_final(&state, x_key);
++	} else
++		memcpy(x_key, key, keylen);
++
++	for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
++		x_key[i] ^= 0x36;
++
++	blake2s_init(&state, BLAKE2S_HASH_SIZE);
++	blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
++	blake2s_update(&state, in, inlen);
++	blake2s_final(&state, i_hash);
++
++	for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
++		x_key[i] ^= 0x5c ^ 0x36;
++
++	blake2s_init(&state, BLAKE2S_HASH_SIZE);
++	blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
++	blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
++	blake2s_final(&state, i_hash);
++
++	memcpy(out, i_hash, outlen);
++	memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
++	memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
++}
++EXPORT_SYMBOL(blake2s_hmac);
++
++#include "../selftest/blake2s.c"
++
++static bool nosimd __initdata = false;
++
++#ifndef COMPAT_ZINC_IS_A_MODULE
++int __init blake2s_mod_init(void)
++#else
++static int __init mod_init(void)
++#endif
++{
++	if (!nosimd)
++		blake2s_fpu_init();
++	if (!selftest_run("blake2s", blake2s_selftest, blake2s_nobs,
++			  ARRAY_SIZE(blake2s_nobs)))
++		return -ENOTRECOVERABLE;
++	return 0;
++}
++
++#ifdef COMPAT_ZINC_IS_A_MODULE
++static void __exit mod_exit(void)
++{
++}
++
++module_param(nosimd, bool, 0);
++module_init(mod_init);
++module_exit(mod_exit);
++MODULE_LICENSE("GPL v2");
++MODULE_DESCRIPTION("BLAKE2s hash function");
++MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/blake2s/blake2s-x86_64-glue.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,72 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include <linux/simd.h>
++#include <asm/cpufeature.h>
++#include <asm/processor.h>
++#include <asm/fpu/api.h>
++
++asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
++				       const u8 *block, const size_t nblocks,
++				       const u32 inc);
++asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
++					const u8 *block, const size_t nblocks,
++					const u32 inc);
++
++static bool blake2s_use_ssse3 __ro_after_init;
++static bool blake2s_use_avx512 __ro_after_init;
++static bool *const blake2s_nobs[] __initconst = { &blake2s_use_ssse3,
++						  &blake2s_use_avx512 };
++
++static void __init blake2s_fpu_init(void)
++{
++	blake2s_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
++#ifndef COMPAT_CANNOT_USE_AVX512
++	blake2s_use_avx512 =
++		boot_cpu_has(X86_FEATURE_AVX) &&
++		boot_cpu_has(X86_FEATURE_AVX2) &&
++		boot_cpu_has(X86_FEATURE_AVX512F) &&
++		boot_cpu_has(X86_FEATURE_AVX512VL) &&
++		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
++				  XFEATURE_MASK_AVX512, NULL);
++#endif
++}
++
++static inline bool blake2s_compress_arch(struct blake2s_state *state,
++					 const u8 *block, size_t nblocks,
++					 const u32 inc)
++{
++	simd_context_t simd_context;
++	bool used_arch = false;
++
++	/* SIMD disables preemption, so relax after processing each page. */
++	BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
++
++	simd_get(&simd_context);
++
++	if (!IS_ENABLED(CONFIG_AS_SSSE3) || !blake2s_use_ssse3 ||
++	    !simd_use(&simd_context))
++		goto out;
++	used_arch = true;
++
++	for (;;) {
++		const size_t blocks = min_t(size_t, nblocks,
++					    PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
++
++		if (IS_ENABLED(CONFIG_AS_AVX512) && blake2s_use_avx512)
++			blake2s_compress_avx512(state, block, blocks, inc);
++		else
++			blake2s_compress_ssse3(state, block, blocks, inc);
++
++		nblocks -= blocks;
++		if (!nblocks)
++			break;
++		block += blocks * BLAKE2S_BLOCK_SIZE;
++		simd_relax(&simd_context);
++	}
++out:
++	simd_put(&simd_context);
++	return used_arch;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/chacha20/chacha20-arm-glue.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,98 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include <asm/hwcap.h>
++#include <asm/neon.h>
++#if defined(CONFIG_ZINC_ARCH_ARM)
++#include <asm/system_info.h>
++#include <asm/cputype.h>
++#endif
++
++asmlinkage void chacha20_arm(u8 *out, const u8 *in, const size_t len,
++			     const u32 key[8], const u32 counter[4]);
++asmlinkage void hchacha20_arm(const u32 state[16], u32 out[8]);
++asmlinkage void chacha20_neon(u8 *out, const u8 *in, const size_t len,
++			      const u32 key[8], const u32 counter[4]);
++
++static bool chacha20_use_neon __ro_after_init;
++static bool *const chacha20_nobs[] __initconst = { &chacha20_use_neon };
++static void __init chacha20_fpu_init(void)
++{
++#if defined(CONFIG_ZINC_ARCH_ARM64)
++	chacha20_use_neon = cpu_have_named_feature(ASIMD);
++#elif defined(CONFIG_ZINC_ARCH_ARM)
++	switch (read_cpuid_part()) {
++	case ARM_CPU_PART_CORTEX_A7:
++	case ARM_CPU_PART_CORTEX_A5:
++		/* The Cortex-A7 and Cortex-A5 do not perform well with the NEON
++		 * implementation but do incredibly with the scalar one and use
++		 * less power.
++		 */
++		break;
++	default:
++		chacha20_use_neon = elf_hwcap & HWCAP_NEON;
++	}
++#endif
++}
++
++static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
++				 const u8 *src, size_t len,
++				 simd_context_t *simd_context)
++{
++	/* SIMD disables preemption, so relax after processing each page. */
++	BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
++		     PAGE_SIZE % CHACHA20_BLOCK_SIZE);
++
++	for (;;) {
++		if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && chacha20_use_neon &&
++		    len >= CHACHA20_BLOCK_SIZE * 3 && simd_use(simd_context)) {
++			const size_t bytes = min_t(size_t, len, PAGE_SIZE);
++
++			chacha20_neon(dst, src, bytes, ctx->key, ctx->counter);
++			ctx->counter[0] += (bytes + 63) / 64;
++			len -= bytes;
++			if (!len)
++				break;
++			dst += bytes;
++			src += bytes;
++			simd_relax(simd_context);
++		} else {
++			chacha20_arm(dst, src, len, ctx->key, ctx->counter);
++			ctx->counter[0] += (len + 63) / 64;
++			break;
++		}
++	}
++
++	return true;
++}
++
++static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
++				  const u8 nonce[HCHACHA20_NONCE_SIZE],
++				  const u8 key[HCHACHA20_KEY_SIZE],
++				  simd_context_t *simd_context)
++{
++	if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM)) {
++		u32 x[] = { CHACHA20_CONSTANT_EXPA,
++			    CHACHA20_CONSTANT_ND_3,
++			    CHACHA20_CONSTANT_2_BY,
++			    CHACHA20_CONSTANT_TE_K,
++			    get_unaligned_le32(key + 0),
++			    get_unaligned_le32(key + 4),
++			    get_unaligned_le32(key + 8),
++			    get_unaligned_le32(key + 12),
++			    get_unaligned_le32(key + 16),
++			    get_unaligned_le32(key + 20),
++			    get_unaligned_le32(key + 24),
++			    get_unaligned_le32(key + 28),
++			    get_unaligned_le32(nonce + 0),
++			    get_unaligned_le32(nonce + 4),
++			    get_unaligned_le32(nonce + 8),
++			    get_unaligned_le32(nonce + 12)
++			  };
++		hchacha20_arm(x, derived_key);
++		return true;
++	}
++	return false;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/chacha20/chacha20.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,193 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * Implementation of the ChaCha20 stream cipher.
++ *
++ * Information: https://cr.yp.to/chacha.html
++ */
++
++#include <zinc/chacha20.h>
++#include "../selftest/run.h"
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/vmalloc.h>
++#include <crypto/algapi.h> // For crypto_xor_cpy.
++
++#if defined(CONFIG_ZINC_ARCH_X86_64)
++#include "chacha20-x86_64-glue.c"
++#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
++#include "chacha20-arm-glue.c"
++#elif defined(CONFIG_ZINC_ARCH_MIPS)
++#include "chacha20-mips-glue.c"
++#else
++static bool *const chacha20_nobs[] __initconst = { };
++static void __init chacha20_fpu_init(void)
++{
++}
++static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
++				 const u8 *src, size_t len,
++				 simd_context_t *simd_context)
++{
++	return false;
++}
++static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
++				  const u8 nonce[HCHACHA20_NONCE_SIZE],
++				  const u8 key[HCHACHA20_KEY_SIZE],
++				  simd_context_t *simd_context)
++{
++	return false;
++}
++#endif
++
++#define QUARTER_ROUND(x, a, b, c, d) ( \
++	x[a] += x[b], \
++	x[d] = rol32((x[d] ^ x[a]), 16), \
++	x[c] += x[d], \
++	x[b] = rol32((x[b] ^ x[c]), 12), \
++	x[a] += x[b], \
++	x[d] = rol32((x[d] ^ x[a]), 8), \
++	x[c] += x[d], \
++	x[b] = rol32((x[b] ^ x[c]), 7) \
++)
++
++#define C(i, j) (i * 4 + j)
++
++#define DOUBLE_ROUND(x) ( \
++	/* Column Round */ \
++	QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
++	QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
++	QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
++	QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
++	/* Diagonal Round */ \
++	QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
++	QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
++	QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
++	QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
++)
++
++#define TWENTY_ROUNDS(x) ( \
++	DOUBLE_ROUND(x), \
++	DOUBLE_ROUND(x), \
++	DOUBLE_ROUND(x), \
++	DOUBLE_ROUND(x), \
++	DOUBLE_ROUND(x), \
++	DOUBLE_ROUND(x), \
++	DOUBLE_ROUND(x), \
++	DOUBLE_ROUND(x), \
++	DOUBLE_ROUND(x), \
++	DOUBLE_ROUND(x) \
++)
++
++static void chacha20_block_generic(struct chacha20_ctx *ctx, __le32 *stream)
++{
++	u32 x[CHACHA20_BLOCK_WORDS];
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(x); ++i)
++		x[i] = ctx->state[i];
++
++	TWENTY_ROUNDS(x);
++
++	for (i = 0; i < ARRAY_SIZE(x); ++i)
++		stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
++
++	ctx->counter[0] += 1;
++}
++
++static void chacha20_generic(struct chacha20_ctx *ctx, u8 *out, const u8 *in,
++			     u32 len)
++{
++	__le32 buf[CHACHA20_BLOCK_WORDS];
++
++	while (len >= CHACHA20_BLOCK_SIZE) {
++		chacha20_block_generic(ctx, buf);
++		crypto_xor_cpy(out, in, (u8 *)buf, CHACHA20_BLOCK_SIZE);
++		len -= CHACHA20_BLOCK_SIZE;
++		out += CHACHA20_BLOCK_SIZE;
++		in += CHACHA20_BLOCK_SIZE;
++	}
++	if (len) {
++		chacha20_block_generic(ctx, buf);
++		crypto_xor_cpy(out, in, (u8 *)buf, len);
++	}
++}
++
++void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
++	      simd_context_t *simd_context)
++{
++	if (!chacha20_arch(ctx, dst, src, len, simd_context))
++		chacha20_generic(ctx, dst, src, len);
++}
++EXPORT_SYMBOL(chacha20);
++
++static void hchacha20_generic(u32 derived_key[CHACHA20_KEY_WORDS],
++			      const u8 nonce[HCHACHA20_NONCE_SIZE],
++			      const u8 key[HCHACHA20_KEY_SIZE])
++{
++	u32 x[] = { CHACHA20_CONSTANT_EXPA,
++		    CHACHA20_CONSTANT_ND_3,
++		    CHACHA20_CONSTANT_2_BY,
++		    CHACHA20_CONSTANT_TE_K,
++		    get_unaligned_le32(key +  0),
++		    get_unaligned_le32(key +  4),
++		    get_unaligned_le32(key +  8),
++		    get_unaligned_le32(key + 12),
++		    get_unaligned_le32(key + 16),
++		    get_unaligned_le32(key + 20),
++		    get_unaligned_le32(key + 24),
++		    get_unaligned_le32(key + 28),
++		    get_unaligned_le32(nonce +  0),
++		    get_unaligned_le32(nonce +  4),
++		    get_unaligned_le32(nonce +  8),
++		    get_unaligned_le32(nonce + 12)
++	};
++
++	TWENTY_ROUNDS(x);
++
++	memcpy(derived_key + 0, x +  0, sizeof(u32) * 4);
++	memcpy(derived_key + 4, x + 12, sizeof(u32) * 4);
++}
++
++/* Derived key should be 32-bit aligned */
++void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
++	       const u8 nonce[HCHACHA20_NONCE_SIZE],
++	       const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context)
++{
++	if (!hchacha20_arch(derived_key, nonce, key, simd_context))
++		hchacha20_generic(derived_key, nonce, key);
++}
++EXPORT_SYMBOL(hchacha20);
++
++#include "../selftest/chacha20.c"
++
++static bool nosimd __initdata = false;
++
++#ifndef COMPAT_ZINC_IS_A_MODULE
++int __init chacha20_mod_init(void)
++#else
++static int __init mod_init(void)
++#endif
++{
++	if (!nosimd)
++		chacha20_fpu_init();
++	if (!selftest_run("chacha20", chacha20_selftest, chacha20_nobs,
++			  ARRAY_SIZE(chacha20_nobs)))
++		return -ENOTRECOVERABLE;
++	return 0;
++}
++
++#ifdef COMPAT_ZINC_IS_A_MODULE
++static void __exit mod_exit(void)
++{
++}
++
++module_param(nosimd, bool, 0);
++module_init(mod_init);
++module_exit(mod_exit);
++MODULE_LICENSE("GPL v2");
++MODULE_DESCRIPTION("ChaCha20 stream cipher");
++MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/chacha20/chacha20-mips-glue.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,27 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++asmlinkage void chacha20_mips(u32 state[16], u8 *out, const u8 *in,
++			      const size_t len);
++static bool *const chacha20_nobs[] __initconst = { };
++static void __init chacha20_fpu_init(void)
++{
++}
++
++static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
++				 const u8 *src, size_t len,
++				 simd_context_t *simd_context)
++{
++	chacha20_mips(ctx->state, dst, src, len);
++	return true;
++}
++
++static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
++				  const u8 nonce[HCHACHA20_NONCE_SIZE],
++				  const u8 key[HCHACHA20_KEY_SIZE],
++				  simd_context_t *simd_context)
++{
++	return false;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/chacha20/chacha20-x86_64-glue.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,105 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include <asm/fpu/api.h>
++#include <asm/cpufeature.h>
++#include <asm/processor.h>
++#include <asm/intel-family.h>
++
++asmlinkage void hchacha20_ssse3(u32 *derived_key, const u8 *nonce,
++				const u8 *key);
++asmlinkage void chacha20_ssse3(u8 *out, const u8 *in, const size_t len,
++			       const u32 key[8], const u32 counter[4]);
++asmlinkage void chacha20_avx2(u8 *out, const u8 *in, const size_t len,
++			      const u32 key[8], const u32 counter[4]);
++asmlinkage void chacha20_avx512(u8 *out, const u8 *in, const size_t len,
++				const u32 key[8], const u32 counter[4]);
++asmlinkage void chacha20_avx512vl(u8 *out, const u8 *in, const size_t len,
++				  const u32 key[8], const u32 counter[4]);
++
++static bool chacha20_use_ssse3 __ro_after_init;
++static bool chacha20_use_avx2 __ro_after_init;
++static bool chacha20_use_avx512 __ro_after_init;
++static bool chacha20_use_avx512vl __ro_after_init;
++static bool *const chacha20_nobs[] __initconst = {
++	&chacha20_use_ssse3, &chacha20_use_avx2, &chacha20_use_avx512,
++	&chacha20_use_avx512vl };
++
++static void __init chacha20_fpu_init(void)
++{
++	chacha20_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
++	chacha20_use_avx2 =
++		boot_cpu_has(X86_FEATURE_AVX) &&
++		boot_cpu_has(X86_FEATURE_AVX2) &&
++		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
++#ifndef COMPAT_CANNOT_USE_AVX512
++	chacha20_use_avx512 =
++		boot_cpu_has(X86_FEATURE_AVX) &&
++		boot_cpu_has(X86_FEATURE_AVX2) &&
++		boot_cpu_has(X86_FEATURE_AVX512F) &&
++		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
++				  XFEATURE_MASK_AVX512, NULL) &&
++		/* Skylake downclocks unacceptably much when using zmm. */
++		boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
++	chacha20_use_avx512vl =
++		boot_cpu_has(X86_FEATURE_AVX) &&
++		boot_cpu_has(X86_FEATURE_AVX2) &&
++		boot_cpu_has(X86_FEATURE_AVX512F) &&
++		boot_cpu_has(X86_FEATURE_AVX512VL) &&
++		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
++				  XFEATURE_MASK_AVX512, NULL);
++#endif
++}
++
++static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
++				 const u8 *src, size_t len,
++				 simd_context_t *simd_context)
++{
++	/* SIMD disables preemption, so relax after processing each page. */
++	BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
++		     PAGE_SIZE % CHACHA20_BLOCK_SIZE);
++
++	if (!IS_ENABLED(CONFIG_AS_SSSE3) || !chacha20_use_ssse3 ||
++	    len <= CHACHA20_BLOCK_SIZE || !simd_use(simd_context))
++		return false;
++
++	for (;;) {
++		const size_t bytes = min_t(size_t, len, PAGE_SIZE);
++
++		if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512 &&
++		    len >= CHACHA20_BLOCK_SIZE * 8)
++			chacha20_avx512(dst, src, bytes, ctx->key, ctx->counter);
++		else if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512vl &&
++			 len >= CHACHA20_BLOCK_SIZE * 4)
++			chacha20_avx512vl(dst, src, bytes, ctx->key, ctx->counter);
++		else if (IS_ENABLED(CONFIG_AS_AVX2) && chacha20_use_avx2 &&
++			 len >= CHACHA20_BLOCK_SIZE * 4)
++			chacha20_avx2(dst, src, bytes, ctx->key, ctx->counter);
++		else
++			chacha20_ssse3(dst, src, bytes, ctx->key, ctx->counter);
++		ctx->counter[0] += (bytes + 63) / 64;
++		len -= bytes;
++		if (!len)
++			break;
++		dst += bytes;
++		src += bytes;
++		simd_relax(simd_context);
++	}
++
++	return true;
++}
++
++static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
++				  const u8 nonce[HCHACHA20_NONCE_SIZE],
++				  const u8 key[HCHACHA20_KEY_SIZE],
++				  simd_context_t *simd_context)
++{
++	if (IS_ENABLED(CONFIG_AS_SSSE3) && chacha20_use_ssse3 &&
++	    simd_use(simd_context)) {
++		hchacha20_ssse3(derived_key, nonce, key);
++		return true;
++	}
++	return false;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/chacha20poly1305.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,404 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This is an implementation of the ChaCha20Poly1305 AEAD construction.
++ *
++ * Information: https://tools.ietf.org/html/rfc8439
++ */
++
++#include <zinc/chacha20poly1305.h>
++#include <zinc/chacha20.h>
++#include <zinc/poly1305.h>
++#include "selftest/run.h"
++
++#include <asm/unaligned.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <crypto/scatterwalk.h> // For blkcipher_walk.
++
++static const u8 pad0[CHACHA20_BLOCK_SIZE] = { 0 };
++
++static inline void
++__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
++			   const u8 *ad, const size_t ad_len, const u64 nonce,
++			   const u8 key[CHACHA20POLY1305_KEY_SIZE],
++			   simd_context_t *simd_context)
++{
++	struct poly1305_ctx poly1305_state;
++	struct chacha20_ctx chacha20_state;
++	union {
++		u8 block0[POLY1305_KEY_SIZE];
++		__le64 lens[2];
++	} b = { { 0 } };
++
++	chacha20_init(&chacha20_state, key, nonce);
++	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
++		 simd_context);
++	poly1305_init(&poly1305_state, b.block0);
++
++	poly1305_update(&poly1305_state, ad, ad_len, simd_context);
++	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
++			simd_context);
++
++	chacha20(&chacha20_state, dst, src, src_len, simd_context);
++
++	poly1305_update(&poly1305_state, dst, src_len, simd_context);
++	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
++			simd_context);
++
++	b.lens[0] = cpu_to_le64(ad_len);
++	b.lens[1] = cpu_to_le64(src_len);
++	poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
++			simd_context);
++
++	poly1305_final(&poly1305_state, dst + src_len, simd_context);
++
++	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
++	memzero_explicit(&b, sizeof(b));
++}
++
++void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
++			      const u8 *ad, const size_t ad_len,
++			      const u64 nonce,
++			      const u8 key[CHACHA20POLY1305_KEY_SIZE])
++{
++	simd_context_t simd_context;
++
++	simd_get(&simd_context);
++	__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key,
++				   &simd_context);
++	simd_put(&simd_context);
++}
++EXPORT_SYMBOL(chacha20poly1305_encrypt);
++
++bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src,
++					 const size_t src_len,
++					 const u8 *ad, const size_t ad_len,
++					 const u64 nonce,
++					 const u8 key[CHACHA20POLY1305_KEY_SIZE],
++					 simd_context_t *simd_context)
++{
++	struct poly1305_ctx poly1305_state;
++	struct chacha20_ctx chacha20_state;
++	struct sg_mapping_iter miter;
++	size_t partial = 0;
++	ssize_t sl;
++	union {
++		u8 chacha20_stream[CHACHA20_BLOCK_SIZE];
++		u8 block0[POLY1305_KEY_SIZE];
++		u8 mac[POLY1305_MAC_SIZE];
++		__le64 lens[2];
++	} b __aligned(16) = { { 0 } };
++
++	if (WARN_ON(src_len > INT_MAX))
++		return false;
++
++	chacha20_init(&chacha20_state, key, nonce);
++	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
++		 simd_context);
++	poly1305_init(&poly1305_state, b.block0);
++
++	poly1305_update(&poly1305_state, ad, ad_len, simd_context);
++	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
++			simd_context);
++
++	sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC);
++	for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) {
++		u8 *addr = miter.addr;
++		size_t length = min_t(size_t, sl, miter.length);
++
++		if (unlikely(partial)) {
++			size_t l = min(length, CHACHA20_BLOCK_SIZE - partial);
++
++			crypto_xor(addr, b.chacha20_stream + partial, l);
++			partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1);
++
++			addr += l;
++			length -= l;
++		}
++
++		if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) {
++			size_t l = length;
++
++			if (unlikely(length < sl))
++				l &= ~(CHACHA20_BLOCK_SIZE - 1);
++			chacha20(&chacha20_state, addr, addr, l, simd_context);
++			addr += l;
++			length -= l;
++		}
++
++		if (unlikely(length > 0)) {
++			chacha20(&chacha20_state, b.chacha20_stream, pad0,
++				 CHACHA20_BLOCK_SIZE, simd_context);
++			crypto_xor(addr, b.chacha20_stream, length);
++			partial = length;
++		}
++
++		poly1305_update(&poly1305_state, miter.addr,
++				min_t(size_t, sl, miter.length), simd_context);
++
++		simd_relax(simd_context);
++	}
++
++	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
++			simd_context);
++
++	b.lens[0] = cpu_to_le64(ad_len);
++	b.lens[1] = cpu_to_le64(src_len);
++	poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
++			simd_context);
++
++	if (likely(sl <= -POLY1305_MAC_SIZE))
++		poly1305_final(&poly1305_state, miter.addr + miter.length + sl,
++			       simd_context);
++
++	sg_miter_stop(&miter);
++
++	if (unlikely(sl > -POLY1305_MAC_SIZE)) {
++		poly1305_final(&poly1305_state, b.mac, simd_context);
++		scatterwalk_map_and_copy(b.mac, src, src_len, sizeof(b.mac), 1);
++	}
++
++	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
++	memzero_explicit(&b, sizeof(b));
++	return true;
++}
++EXPORT_SYMBOL(chacha20poly1305_encrypt_sg_inplace);
++
++static inline bool
++__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
++			   const u8 *ad, const size_t ad_len, const u64 nonce,
++			   const u8 key[CHACHA20POLY1305_KEY_SIZE],
++			   simd_context_t *simd_context)
++{
++	struct poly1305_ctx poly1305_state;
++	struct chacha20_ctx chacha20_state;
++	int ret;
++	size_t dst_len;
++	union {
++		u8 block0[POLY1305_KEY_SIZE];
++		u8 mac[POLY1305_MAC_SIZE];
++		__le64 lens[2];
++	} b = { { 0 } };
++
++	if (unlikely(src_len < POLY1305_MAC_SIZE))
++		return false;
++
++	chacha20_init(&chacha20_state, key, nonce);
++	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
++		 simd_context);
++	poly1305_init(&poly1305_state, b.block0);
++
++	poly1305_update(&poly1305_state, ad, ad_len, simd_context);
++	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
++			simd_context);
++
++	dst_len = src_len - POLY1305_MAC_SIZE;
++	poly1305_update(&poly1305_state, src, dst_len, simd_context);
++	poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf,
++			simd_context);
++
++	b.lens[0] = cpu_to_le64(ad_len);
++	b.lens[1] = cpu_to_le64(dst_len);
++	poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
++			simd_context);
++
++	poly1305_final(&poly1305_state, b.mac, simd_context);
++
++	ret = crypto_memneq(b.mac, src + dst_len, POLY1305_MAC_SIZE);
++	if (likely(!ret))
++		chacha20(&chacha20_state, dst, src, dst_len, simd_context);
++
++	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
++	memzero_explicit(&b, sizeof(b));
++
++	return !ret;
++}
++
++bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
++			      const u8 *ad, const size_t ad_len,
++			      const u64 nonce,
++			      const u8 key[CHACHA20POLY1305_KEY_SIZE])
++{
++	simd_context_t simd_context, ret;
++
++	simd_get(&simd_context);
++	ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce,
++					 key, &simd_context);
++	simd_put(&simd_context);
++	return ret;
++}
++EXPORT_SYMBOL(chacha20poly1305_decrypt);
++
++bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src,
++					 size_t src_len,
++					 const u8 *ad, const size_t ad_len,
++					 const u64 nonce,
++					 const u8 key[CHACHA20POLY1305_KEY_SIZE],
++					 simd_context_t *simd_context)
++{
++	struct poly1305_ctx poly1305_state;
++	struct chacha20_ctx chacha20_state;
++	struct sg_mapping_iter miter;
++	size_t partial = 0;
++	ssize_t sl;
++	union {
++		u8 chacha20_stream[CHACHA20_BLOCK_SIZE];
++		u8 block0[POLY1305_KEY_SIZE];
++		struct {
++			u8 read_mac[POLY1305_MAC_SIZE];
++			u8 computed_mac[POLY1305_MAC_SIZE];
++		};
++		__le64 lens[2];
++	} b __aligned(16) = { { 0 } };
++	bool ret = false;
++
++	if (unlikely(src_len < POLY1305_MAC_SIZE || WARN_ON(src_len > INT_MAX)))
++		return ret;
++	src_len -= POLY1305_MAC_SIZE;
++
++	chacha20_init(&chacha20_state, key, nonce);
++	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
++		 simd_context);
++	poly1305_init(&poly1305_state, b.block0);
++
++	poly1305_update(&poly1305_state, ad, ad_len, simd_context);
++	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
++			simd_context);
++
++	sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC);
++	for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) {
++		u8 *addr = miter.addr;
++		size_t length = min_t(size_t, sl, miter.length);
++
++		poly1305_update(&poly1305_state, addr, length, simd_context);
++
++		if (unlikely(partial)) {
++			size_t l = min(length, CHACHA20_BLOCK_SIZE - partial);
++
++			crypto_xor(addr, b.chacha20_stream + partial, l);
++			partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1);
++
++			addr += l;
++			length -= l;
++		}
++
++		if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) {
++			size_t l = length;
++
++			if (unlikely(length < sl))
++				l &= ~(CHACHA20_BLOCK_SIZE - 1);
++			chacha20(&chacha20_state, addr, addr, l, simd_context);
++			addr += l;
++			length -= l;
++		}
++
++		if (unlikely(length > 0)) {
++			chacha20(&chacha20_state, b.chacha20_stream, pad0,
++				 CHACHA20_BLOCK_SIZE, simd_context);
++			crypto_xor(addr, b.chacha20_stream, length);
++			partial = length;
++		}
++
++		simd_relax(simd_context);
++	}
++
++	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
++			simd_context);
++
++	b.lens[0] = cpu_to_le64(ad_len);
++	b.lens[1] = cpu_to_le64(src_len);
++	poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
++			simd_context);
++
++	if (likely(sl <= -POLY1305_MAC_SIZE)) {
++		poly1305_final(&poly1305_state, b.computed_mac, simd_context);
++		ret = !crypto_memneq(b.computed_mac,
++				     miter.addr + miter.length + sl,
++				     POLY1305_MAC_SIZE);
++	}
++
++	sg_miter_stop(&miter);
++
++	if (unlikely(sl > -POLY1305_MAC_SIZE)) {
++		poly1305_final(&poly1305_state, b.computed_mac, simd_context);
++		scatterwalk_map_and_copy(b.read_mac, src, src_len,
++					 sizeof(b.read_mac), 0);
++		ret = !crypto_memneq(b.read_mac, b.computed_mac,
++				     POLY1305_MAC_SIZE);
++
++	}
++
++	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
++	memzero_explicit(&b, sizeof(b));
++	return ret;
++}
++EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace);
++
++void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
++			       const u8 *ad, const size_t ad_len,
++			       const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
++			       const u8 key[CHACHA20POLY1305_KEY_SIZE])
++{
++	simd_context_t simd_context;
++	u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16);
++
++	simd_get(&simd_context);
++	hchacha20(derived_key, nonce, key, &simd_context);
++	cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
++	__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
++				   get_unaligned_le64(nonce + 16),
++				   (u8 *)derived_key, &simd_context);
++	memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
++	simd_put(&simd_context);
++}
++EXPORT_SYMBOL(xchacha20poly1305_encrypt);
++
++bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
++			       const u8 *ad, const size_t ad_len,
++			       const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
++			       const u8 key[CHACHA20POLY1305_KEY_SIZE])
++{
++	bool ret;
++	simd_context_t simd_context;
++	u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16);
++
++	simd_get(&simd_context);
++	hchacha20(derived_key, nonce, key, &simd_context);
++	cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
++	ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
++					 get_unaligned_le64(nonce + 16),
++					 (u8 *)derived_key, &simd_context);
++	memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
++	simd_put(&simd_context);
++	return ret;
++}
++EXPORT_SYMBOL(xchacha20poly1305_decrypt);
++
++#include "selftest/chacha20poly1305.c"
++
++#ifndef COMPAT_ZINC_IS_A_MODULE
++int __init chacha20poly1305_mod_init(void)
++#else
++static int __init mod_init(void)
++#endif
++{
++	if (!selftest_run("chacha20poly1305", chacha20poly1305_selftest,
++			  NULL, 0))
++		return -ENOTRECOVERABLE;
++	return 0;
++}
++
++#ifdef COMPAT_ZINC_IS_A_MODULE
++static void __exit mod_exit(void)
++{
++}
++
++module_init(mod_init);
++module_exit(mod_exit);
++MODULE_LICENSE("GPL v2");
++MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction");
++MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/curve25519/curve25519-arm-glue.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,43 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include <linux/simd.h>
++#include <asm/hwcap.h>
++#include <asm/neon.h>
++
++asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
++				const u8 secret[CURVE25519_KEY_SIZE],
++				const u8 basepoint[CURVE25519_KEY_SIZE]);
++
++static bool curve25519_use_neon __ro_after_init;
++static bool *const curve25519_nobs[] __initconst = { &curve25519_use_neon };
++static void __init curve25519_fpu_init(void)
++{
++	curve25519_use_neon = elf_hwcap & HWCAP_NEON;
++}
++
++static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
++				   const u8 secret[CURVE25519_KEY_SIZE],
++				   const u8 basepoint[CURVE25519_KEY_SIZE])
++{
++	simd_context_t simd_context;
++	bool used_arch = false;
++
++	simd_get(&simd_context);
++	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
++	    !IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) && curve25519_use_neon &&
++	    simd_use(&simd_context)) {
++		curve25519_neon(mypublic, secret, basepoint);
++		used_arch = true;
++	}
++	simd_put(&simd_context);
++	return used_arch;
++}
++
++static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
++					const u8 secret[CURVE25519_KEY_SIZE])
++{
++	return false;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/curve25519/curve25519.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,113 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This is an implementation of the Curve25519 ECDH algorithm, using either
++ * a 32-bit implementation or a 64-bit implementation with 128-bit integers,
++ * depending on what is supported by the target compiler.
++ *
++ * Information: https://cr.yp.to/ecdh.html
++ */
++
++#include <zinc/curve25519.h>
++#include "../selftest/run.h"
++
++#include <asm/unaligned.h>
++#include <linux/version.h>
++#include <linux/string.h>
++#include <linux/random.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <crypto/algapi.h> // For crypto_memneq.
++
++#if defined(CONFIG_ZINC_ARCH_X86_64)
++#include "curve25519-x86_64-glue.c"
++#elif defined(CONFIG_ZINC_ARCH_ARM)
++#include "curve25519-arm-glue.c"
++#else
++static bool *const curve25519_nobs[] __initconst = { };
++static void __init curve25519_fpu_init(void)
++{
++}
++static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
++				   const u8 secret[CURVE25519_KEY_SIZE],
++				   const u8 basepoint[CURVE25519_KEY_SIZE])
++{
++	return false;
++}
++static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
++					const u8 secret[CURVE25519_KEY_SIZE])
++{
++	return false;
++}
++#endif
++
++#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
++#include "curve25519-hacl64.c"
++#else
++#include "curve25519-fiat32.c"
++#endif
++
++static const u8 null_point[CURVE25519_KEY_SIZE] = { 0 };
++
++bool curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
++		const u8 secret[CURVE25519_KEY_SIZE],
++		const u8 basepoint[CURVE25519_KEY_SIZE])
++{
++	if (!curve25519_arch(mypublic, secret, basepoint))
++		curve25519_generic(mypublic, secret, basepoint);
++	return crypto_memneq(mypublic, null_point, CURVE25519_KEY_SIZE);
++}
++EXPORT_SYMBOL(curve25519);
++
++bool curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
++				const u8 secret[CURVE25519_KEY_SIZE])
++{
++	static const u8 basepoint[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
++
++	if (unlikely(!crypto_memneq(secret, null_point, CURVE25519_KEY_SIZE)))
++		return false;
++
++	if (curve25519_base_arch(pub, secret))
++		return crypto_memneq(pub, null_point, CURVE25519_KEY_SIZE);
++	return curve25519(pub, secret, basepoint);
++}
++EXPORT_SYMBOL(curve25519_generate_public);
++
++void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE])
++{
++	get_random_bytes_wait(secret, CURVE25519_KEY_SIZE);
++	curve25519_clamp_secret(secret);
++}
++EXPORT_SYMBOL(curve25519_generate_secret);
++
++#include "../selftest/curve25519.c"
++
++static bool nosimd __initdata = false;
++
++#ifndef COMPAT_ZINC_IS_A_MODULE
++int __init curve25519_mod_init(void)
++#else
++static int __init mod_init(void)
++#endif
++{
++	if (!nosimd)
++		curve25519_fpu_init();
++	if (!selftest_run("curve25519", curve25519_selftest, curve25519_nobs,
++			  ARRAY_SIZE(curve25519_nobs)))
++		return -ENOTRECOVERABLE;
++	return 0;
++}
++
++#ifdef COMPAT_ZINC_IS_A_MODULE
++static void __exit mod_exit(void)
++{
++}
++
++module_param(nosimd, bool, 0);
++module_init(mod_init);
++module_exit(mod_exit);
++MODULE_LICENSE("GPL v2");
++MODULE_DESCRIPTION("Curve25519 scalar multiplication");
++MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/curve25519/curve25519-fiat32.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,860 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2016 The fiat-crypto Authors.
++ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This is a machine-generated formally verified implementation of Curve25519
++ * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
++ * machine generated, it has been tweaked to be suitable for use in the kernel.
++ * It is optimized for 32-bit machines and machines that cannot work efficiently
++ * with 128-bit integer types.
++ */
++
++/* fe means field element. Here the field is \Z/(2^255-19). An element t,
++ * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
++ * t[3]+2^102 t[4]+...+2^230 t[9].
++ * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
++ * Multiplication and carrying produce fe from fe_loose.
++ */
++typedef struct fe { u32 v[10]; } fe;
++
++/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
++ * Addition and subtraction produce fe_loose from (fe, fe).
++ */
++typedef struct fe_loose { u32 v[10]; } fe_loose;
++
++static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
++{
++	/* Ignores top bit of s. */
++	u32 a0 = get_unaligned_le32(s);
++	u32 a1 = get_unaligned_le32(s+4);
++	u32 a2 = get_unaligned_le32(s+8);
++	u32 a3 = get_unaligned_le32(s+12);
++	u32 a4 = get_unaligned_le32(s+16);
++	u32 a5 = get_unaligned_le32(s+20);
++	u32 a6 = get_unaligned_le32(s+24);
++	u32 a7 = get_unaligned_le32(s+28);
++	h[0] = a0&((1<<26)-1);                    /* 26 used, 32-26 left.   26 */
++	h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 =  6+19 = 25 */
++	h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
++	h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) +  6 = 19+ 6 = 25 */
++	h[4] = (a3>> 6);                          /* (32- 6)              = 26 */
++	h[5] = a4&((1<<25)-1);                    /*                        25 */
++	h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 =  7+19 = 26 */
++	h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
++	h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) +  6 = 20+ 6 = 26 */
++	h[9] = (a7>> 6)&((1<<25)-1); /*                                     25 */
++}
++
++static __always_inline void fe_frombytes(fe *h, const u8 *s)
++{
++	fe_frombytes_impl(h->v, s);
++}
++
++static __always_inline u8 /*bool*/
++addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
++{
++	/* This function extracts 25 bits of result and 1 bit of carry
++	 * (26 total), so a 32-bit intermediate is sufficient.
++	 */
++	u32 x = a + b + c;
++	*low = x & ((1 << 25) - 1);
++	return (x >> 25) & 1;
++}
++
++static __always_inline u8 /*bool*/
++addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
++{
++	/* This function extracts 26 bits of result and 1 bit of carry
++	 * (27 total), so a 32-bit intermediate is sufficient.
++	 */
++	u32 x = a + b + c;
++	*low = x & ((1 << 26) - 1);
++	return (x >> 26) & 1;
++}
++
++static __always_inline u8 /*bool*/
++subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
++{
++	/* This function extracts 25 bits of result and 1 bit of borrow
++	 * (26 total), so a 32-bit intermediate is sufficient.
++	 */
++	u32 x = a - b - c;
++	*low = x & ((1 << 25) - 1);
++	return x >> 31;
++}
++
++static __always_inline u8 /*bool*/
++subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
++{
++	/* This function extracts 26 bits of result and 1 bit of borrow
++	 *(27 total), so a 32-bit intermediate is sufficient.
++	 */
++	u32 x = a - b - c;
++	*low = x & ((1 << 26) - 1);
++	return x >> 31;
++}
++
++static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz)
++{
++	t = -!!t; /* all set if nonzero, 0 if 0 */
++	return (t&nz) | ((~t)&z);
++}
++
++static __always_inline void fe_freeze(u32 out[10], const u32 in1[10])
++{
++	{ const u32 x17 = in1[9];
++	{ const u32 x18 = in1[8];
++	{ const u32 x16 = in1[7];
++	{ const u32 x14 = in1[6];
++	{ const u32 x12 = in1[5];
++	{ const u32 x10 = in1[4];
++	{ const u32 x8 = in1[3];
++	{ const u32 x6 = in1[2];
++	{ const u32 x4 = in1[1];
++	{ const u32 x2 = in1[0];
++	{ u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
++	{ u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
++	{ u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
++	{ u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
++	{ u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
++	{ u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
++	{ u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
++	{ u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
++	{ u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
++	{ u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
++	{ u32 x49 = cmovznz32(x48, 0x0, 0xffffffff);
++	{ u32 x50 = (x49 & 0x3ffffed);
++	{ u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
++	{ u32 x54 = (x49 & 0x1ffffff);
++	{ u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
++	{ u32 x58 = (x49 & 0x3ffffff);
++	{ u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
++	{ u32 x62 = (x49 & 0x1ffffff);
++	{ u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
++	{ u32 x66 = (x49 & 0x3ffffff);
++	{ u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
++	{ u32 x70 = (x49 & 0x1ffffff);
++	{ u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
++	{ u32 x74 = (x49 & 0x3ffffff);
++	{ u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
++	{ u32 x78 = (x49 & 0x1ffffff);
++	{ u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
++	{ u32 x82 = (x49 & 0x3ffffff);
++	{ u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
++	{ u32 x86 = (x49 & 0x1ffffff);
++	{ u32 x88; addcarryx_u25(x85, x47, x86, &x88);
++	out[0] = x52;
++	out[1] = x56;
++	out[2] = x60;
++	out[3] = x64;
++	out[4] = x68;
++	out[5] = x72;
++	out[6] = x76;
++	out[7] = x80;
++	out[8] = x84;
++	out[9] = x88;
++	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
++}
++
++static __always_inline void fe_tobytes(u8 s[32], const fe *f)
++{
++	u32 h[10];
++	fe_freeze(h, f->v);
++	s[0] = h[0] >> 0;
++	s[1] = h[0] >> 8;
++	s[2] = h[0] >> 16;
++	s[3] = (h[0] >> 24) | (h[1] << 2);
++	s[4] = h[1] >> 6;
++	s[5] = h[1] >> 14;
++	s[6] = (h[1] >> 22) | (h[2] << 3);
++	s[7] = h[2] >> 5;
++	s[8] = h[2] >> 13;
++	s[9] = (h[2] >> 21) | (h[3] << 5);
++	s[10] = h[3] >> 3;
++	s[11] = h[3] >> 11;
++	s[12] = (h[3] >> 19) | (h[4] << 6);
++	s[13] = h[4] >> 2;
++	s[14] = h[4] >> 10;
++	s[15] = h[4] >> 18;
++	s[16] = h[5] >> 0;
++	s[17] = h[5] >> 8;
++	s[18] = h[5] >> 16;
++	s[19] = (h[5] >> 24) | (h[6] << 1);
++	s[20] = h[6] >> 7;
++	s[21] = h[6] >> 15;
++	s[22] = (h[6] >> 23) | (h[7] << 3);
++	s[23] = h[7] >> 5;
++	s[24] = h[7] >> 13;
++	s[25] = (h[7] >> 21) | (h[8] << 4);
++	s[26] = h[8] >> 4;
++	s[27] = h[8] >> 12;
++	s[28] = (h[8] >> 20) | (h[9] << 6);
++	s[29] = h[9] >> 2;
++	s[30] = h[9] >> 10;
++	s[31] = h[9] >> 18;
++}
++
++/* h = f */
++static __always_inline void fe_copy(fe *h, const fe *f)
++{
++	memmove(h, f, sizeof(u32) * 10);
++}
++
++static __always_inline void fe_copy_lt(fe_loose *h, const fe *f)
++{
++	memmove(h, f, sizeof(u32) * 10);
++}
++
++/* h = 0 */
++static __always_inline void fe_0(fe *h)
++{
++	memset(h, 0, sizeof(u32) * 10);
++}
++
++/* h = 1 */
++static __always_inline void fe_1(fe *h)
++{
++	memset(h, 0, sizeof(u32) * 10);
++	h->v[0] = 1;
++}
++
++static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
++{
++	{ const u32 x20 = in1[9];
++	{ const u32 x21 = in1[8];
++	{ const u32 x19 = in1[7];
++	{ const u32 x17 = in1[6];
++	{ const u32 x15 = in1[5];
++	{ const u32 x13 = in1[4];
++	{ const u32 x11 = in1[3];
++	{ const u32 x9 = in1[2];
++	{ const u32 x7 = in1[1];
++	{ const u32 x5 = in1[0];
++	{ const u32 x38 = in2[9];
++	{ const u32 x39 = in2[8];
++	{ const u32 x37 = in2[7];
++	{ const u32 x35 = in2[6];
++	{ const u32 x33 = in2[5];
++	{ const u32 x31 = in2[4];
++	{ const u32 x29 = in2[3];
++	{ const u32 x27 = in2[2];
++	{ const u32 x25 = in2[1];
++	{ const u32 x23 = in2[0];
++	out[0] = (x5 + x23);
++	out[1] = (x7 + x25);
++	out[2] = (x9 + x27);
++	out[3] = (x11 + x29);
++	out[4] = (x13 + x31);
++	out[5] = (x15 + x33);
++	out[6] = (x17 + x35);
++	out[7] = (x19 + x37);
++	out[8] = (x21 + x39);
++	out[9] = (x20 + x38);
++	}}}}}}}}}}}}}}}}}}}}
++}
++
++/* h = f + g
++ * Can overlap h with f or g.
++ */
++static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
++{
++	fe_add_impl(h->v, f->v, g->v);
++}
++
++static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
++{
++	{ const u32 x20 = in1[9];
++	{ const u32 x21 = in1[8];
++	{ const u32 x19 = in1[7];
++	{ const u32 x17 = in1[6];
++	{ const u32 x15 = in1[5];
++	{ const u32 x13 = in1[4];
++	{ const u32 x11 = in1[3];
++	{ const u32 x9 = in1[2];
++	{ const u32 x7 = in1[1];
++	{ const u32 x5 = in1[0];
++	{ const u32 x38 = in2[9];
++	{ const u32 x39 = in2[8];
++	{ const u32 x37 = in2[7];
++	{ const u32 x35 = in2[6];
++	{ const u32 x33 = in2[5];
++	{ const u32 x31 = in2[4];
++	{ const u32 x29 = in2[3];
++	{ const u32 x27 = in2[2];
++	{ const u32 x25 = in2[1];
++	{ const u32 x23 = in2[0];
++	out[0] = ((0x7ffffda + x5) - x23);
++	out[1] = ((0x3fffffe + x7) - x25);
++	out[2] = ((0x7fffffe + x9) - x27);
++	out[3] = ((0x3fffffe + x11) - x29);
++	out[4] = ((0x7fffffe + x13) - x31);
++	out[5] = ((0x3fffffe + x15) - x33);
++	out[6] = ((0x7fffffe + x17) - x35);
++	out[7] = ((0x3fffffe + x19) - x37);
++	out[8] = ((0x7fffffe + x21) - x39);
++	out[9] = ((0x3fffffe + x20) - x38);
++	}}}}}}}}}}}}}}}}}}}}
++}
++
++/* h = f - g
++ * Can overlap h with f or g.
++ */
++static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
++{
++	fe_sub_impl(h->v, f->v, g->v);
++}
++
++static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
++{
++	{ const u32 x20 = in1[9];
++	{ const u32 x21 = in1[8];
++	{ const u32 x19 = in1[7];
++	{ const u32 x17 = in1[6];
++	{ const u32 x15 = in1[5];
++	{ const u32 x13 = in1[4];
++	{ const u32 x11 = in1[3];
++	{ const u32 x9 = in1[2];
++	{ const u32 x7 = in1[1];
++	{ const u32 x5 = in1[0];
++	{ const u32 x38 = in2[9];
++	{ const u32 x39 = in2[8];
++	{ const u32 x37 = in2[7];
++	{ const u32 x35 = in2[6];
++	{ const u32 x33 = in2[5];
++	{ const u32 x31 = in2[4];
++	{ const u32 x29 = in2[3];
++	{ const u32 x27 = in2[2];
++	{ const u32 x25 = in2[1];
++	{ const u32 x23 = in2[0];
++	{ u64 x40 = ((u64)x23 * x5);
++	{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
++	{ u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
++	{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
++	{ u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
++	{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
++	{ u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
++	{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
++	{ u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
++	{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
++	{ u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
++	{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
++	{ u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
++	{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
++	{ u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
++	{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
++	{ u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
++	{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
++	{ u64 x58 = ((u64)(0x2 * x38) * x20);
++	{ u64 x59 = (x48 + (x58 << 0x4));
++	{ u64 x60 = (x59 + (x58 << 0x1));
++	{ u64 x61 = (x60 + x58);
++	{ u64 x62 = (x47 + (x57 << 0x4));
++	{ u64 x63 = (x62 + (x57 << 0x1));
++	{ u64 x64 = (x63 + x57);
++	{ u64 x65 = (x46 + (x56 << 0x4));
++	{ u64 x66 = (x65 + (x56 << 0x1));
++	{ u64 x67 = (x66 + x56);
++	{ u64 x68 = (x45 + (x55 << 0x4));
++	{ u64 x69 = (x68 + (x55 << 0x1));
++	{ u64 x70 = (x69 + x55);
++	{ u64 x71 = (x44 + (x54 << 0x4));
++	{ u64 x72 = (x71 + (x54 << 0x1));
++	{ u64 x73 = (x72 + x54);
++	{ u64 x74 = (x43 + (x53 << 0x4));
++	{ u64 x75 = (x74 + (x53 << 0x1));
++	{ u64 x76 = (x75 + x53);
++	{ u64 x77 = (x42 + (x52 << 0x4));
++	{ u64 x78 = (x77 + (x52 << 0x1));
++	{ u64 x79 = (x78 + x52);
++	{ u64 x80 = (x41 + (x51 << 0x4));
++	{ u64 x81 = (x80 + (x51 << 0x1));
++	{ u64 x82 = (x81 + x51);
++	{ u64 x83 = (x40 + (x50 << 0x4));
++	{ u64 x84 = (x83 + (x50 << 0x1));
++	{ u64 x85 = (x84 + x50);
++	{ u64 x86 = (x85 >> 0x1a);
++	{ u32 x87 = ((u32)x85 & 0x3ffffff);
++	{ u64 x88 = (x86 + x82);
++	{ u64 x89 = (x88 >> 0x19);
++	{ u32 x90 = ((u32)x88 & 0x1ffffff);
++	{ u64 x91 = (x89 + x79);
++	{ u64 x92 = (x91 >> 0x1a);
++	{ u32 x93 = ((u32)x91 & 0x3ffffff);
++	{ u64 x94 = (x92 + x76);
++	{ u64 x95 = (x94 >> 0x19);
++	{ u32 x96 = ((u32)x94 & 0x1ffffff);
++	{ u64 x97 = (x95 + x73);
++	{ u64 x98 = (x97 >> 0x1a);
++	{ u32 x99 = ((u32)x97 & 0x3ffffff);
++	{ u64 x100 = (x98 + x70);
++	{ u64 x101 = (x100 >> 0x19);
++	{ u32 x102 = ((u32)x100 & 0x1ffffff);
++	{ u64 x103 = (x101 + x67);
++	{ u64 x104 = (x103 >> 0x1a);
++	{ u32 x105 = ((u32)x103 & 0x3ffffff);
++	{ u64 x106 = (x104 + x64);
++	{ u64 x107 = (x106 >> 0x19);
++	{ u32 x108 = ((u32)x106 & 0x1ffffff);
++	{ u64 x109 = (x107 + x61);
++	{ u64 x110 = (x109 >> 0x1a);
++	{ u32 x111 = ((u32)x109 & 0x3ffffff);
++	{ u64 x112 = (x110 + x49);
++	{ u64 x113 = (x112 >> 0x19);
++	{ u32 x114 = ((u32)x112 & 0x1ffffff);
++	{ u64 x115 = (x87 + (0x13 * x113));
++	{ u32 x116 = (u32) (x115 >> 0x1a);
++	{ u32 x117 = ((u32)x115 & 0x3ffffff);
++	{ u32 x118 = (x116 + x90);
++	{ u32 x119 = (x118 >> 0x19);
++	{ u32 x120 = (x118 & 0x1ffffff);
++	out[0] = x117;
++	out[1] = x120;
++	out[2] = (x119 + x93);
++	out[3] = x96;
++	out[4] = x99;
++	out[5] = x102;
++	out[6] = x105;
++	out[7] = x108;
++	out[8] = x111;
++	out[9] = x114;
++	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
++}
++
++static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
++{
++	fe_mul_impl(h->v, f->v, g->v);
++}
++
++static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
++{
++	fe_mul_impl(h->v, f->v, g->v);
++}
++
++static __always_inline void
++fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
++{
++	fe_mul_impl(h->v, f->v, g->v);
++}
++
++static void fe_sqr_impl(u32 out[10], const u32 in1[10])
++{
++	{ const u32 x17 = in1[9];
++	{ const u32 x18 = in1[8];
++	{ const u32 x16 = in1[7];
++	{ const u32 x14 = in1[6];
++	{ const u32 x12 = in1[5];
++	{ const u32 x10 = in1[4];
++	{ const u32 x8 = in1[3];
++	{ const u32 x6 = in1[2];
++	{ const u32 x4 = in1[1];
++	{ const u32 x2 = in1[0];
++	{ u64 x19 = ((u64)x2 * x2);
++	{ u64 x20 = ((u64)(0x2 * x2) * x4);
++	{ u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6)));
++	{ u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8)));
++	{ u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10));
++	{ u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12)));
++	{ u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12)));
++	{ u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16)));
++	{ u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12))))));
++	{ u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17)));
++	{ u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17)))));
++	{ u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17)));
++	{ u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17))))));
++	{ u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17)));
++	{ u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17)));
++	{ u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17)));
++	{ u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17));
++	{ u64 x36 = ((u64)(0x2 * x18) * x17);
++	{ u64 x37 = ((u64)(0x2 * x17) * x17);
++	{ u64 x38 = (x27 + (x37 << 0x4));
++	{ u64 x39 = (x38 + (x37 << 0x1));
++	{ u64 x40 = (x39 + x37);
++	{ u64 x41 = (x26 + (x36 << 0x4));
++	{ u64 x42 = (x41 + (x36 << 0x1));
++	{ u64 x43 = (x42 + x36);
++	{ u64 x44 = (x25 + (x35 << 0x4));
++	{ u64 x45 = (x44 + (x35 << 0x1));
++	{ u64 x46 = (x45 + x35);
++	{ u64 x47 = (x24 + (x34 << 0x4));
++	{ u64 x48 = (x47 + (x34 << 0x1));
++	{ u64 x49 = (x48 + x34);
++	{ u64 x50 = (x23 + (x33 << 0x4));
++	{ u64 x51 = (x50 + (x33 << 0x1));
++	{ u64 x52 = (x51 + x33);
++	{ u64 x53 = (x22 + (x32 << 0x4));
++	{ u64 x54 = (x53 + (x32 << 0x1));
++	{ u64 x55 = (x54 + x32);
++	{ u64 x56 = (x21 + (x31 << 0x4));
++	{ u64 x57 = (x56 + (x31 << 0x1));
++	{ u64 x58 = (x57 + x31);
++	{ u64 x59 = (x20 + (x30 << 0x4));
++	{ u64 x60 = (x59 + (x30 << 0x1));
++	{ u64 x61 = (x60 + x30);
++	{ u64 x62 = (x19 + (x29 << 0x4));
++	{ u64 x63 = (x62 + (x29 << 0x1));
++	{ u64 x64 = (x63 + x29);
++	{ u64 x65 = (x64 >> 0x1a);
++	{ u32 x66 = ((u32)x64 & 0x3ffffff);
++	{ u64 x67 = (x65 + x61);
++	{ u64 x68 = (x67 >> 0x19);
++	{ u32 x69 = ((u32)x67 & 0x1ffffff);
++	{ u64 x70 = (x68 + x58);
++	{ u64 x71 = (x70 >> 0x1a);
++	{ u32 x72 = ((u32)x70 & 0x3ffffff);
++	{ u64 x73 = (x71 + x55);
++	{ u64 x74 = (x73 >> 0x19);
++	{ u32 x75 = ((u32)x73 & 0x1ffffff);
++	{ u64 x76 = (x74 + x52);
++	{ u64 x77 = (x76 >> 0x1a);
++	{ u32 x78 = ((u32)x76 & 0x3ffffff);
++	{ u64 x79 = (x77 + x49);
++	{ u64 x80 = (x79 >> 0x19);
++	{ u32 x81 = ((u32)x79 & 0x1ffffff);
++	{ u64 x82 = (x80 + x46);
++	{ u64 x83 = (x82 >> 0x1a);
++	{ u32 x84 = ((u32)x82 & 0x3ffffff);
++	{ u64 x85 = (x83 + x43);
++	{ u64 x86 = (x85 >> 0x19);
++	{ u32 x87 = ((u32)x85 & 0x1ffffff);
++	{ u64 x88 = (x86 + x40);
++	{ u64 x89 = (x88 >> 0x1a);
++	{ u32 x90 = ((u32)x88 & 0x3ffffff);
++	{ u64 x91 = (x89 + x28);
++	{ u64 x92 = (x91 >> 0x19);
++	{ u32 x93 = ((u32)x91 & 0x1ffffff);
++	{ u64 x94 = (x66 + (0x13 * x92));
++	{ u32 x95 = (u32) (x94 >> 0x1a);
++	{ u32 x96 = ((u32)x94 & 0x3ffffff);
++	{ u32 x97 = (x95 + x69);
++	{ u32 x98 = (x97 >> 0x19);
++	{ u32 x99 = (x97 & 0x1ffffff);
++	out[0] = x96;
++	out[1] = x99;
++	out[2] = (x98 + x72);
++	out[3] = x75;
++	out[4] = x78;
++	out[5] = x81;
++	out[6] = x84;
++	out[7] = x87;
++	out[8] = x90;
++	out[9] = x93;
++	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
++}
++
++static __always_inline void fe_sq_tl(fe *h, const fe_loose *f)
++{
++	fe_sqr_impl(h->v, f->v);
++}
++
++static __always_inline void fe_sq_tt(fe *h, const fe *f)
++{
++	fe_sqr_impl(h->v, f->v);
++}
++
++static __always_inline void fe_loose_invert(fe *out, const fe_loose *z)
++{
++	fe t0;
++	fe t1;
++	fe t2;
++	fe t3;
++	int i;
++
++	fe_sq_tl(&t0, z);
++	fe_sq_tt(&t1, &t0);
++	for (i = 1; i < 2; ++i)
++		fe_sq_tt(&t1, &t1);
++	fe_mul_tlt(&t1, z, &t1);
++	fe_mul_ttt(&t0, &t0, &t1);
++	fe_sq_tt(&t2, &t0);
++	fe_mul_ttt(&t1, &t1, &t2);
++	fe_sq_tt(&t2, &t1);
++	for (i = 1; i < 5; ++i)
++		fe_sq_tt(&t2, &t2);
++	fe_mul_ttt(&t1, &t2, &t1);
++	fe_sq_tt(&t2, &t1);
++	for (i = 1; i < 10; ++i)
++		fe_sq_tt(&t2, &t2);
++	fe_mul_ttt(&t2, &t2, &t1);
++	fe_sq_tt(&t3, &t2);
++	for (i = 1; i < 20; ++i)
++		fe_sq_tt(&t3, &t3);
++	fe_mul_ttt(&t2, &t3, &t2);
++	fe_sq_tt(&t2, &t2);
++	for (i = 1; i < 10; ++i)
++		fe_sq_tt(&t2, &t2);
++	fe_mul_ttt(&t1, &t2, &t1);
++	fe_sq_tt(&t2, &t1);
++	for (i = 1; i < 50; ++i)
++		fe_sq_tt(&t2, &t2);
++	fe_mul_ttt(&t2, &t2, &t1);
++	fe_sq_tt(&t3, &t2);
++	for (i = 1; i < 100; ++i)
++		fe_sq_tt(&t3, &t3);
++	fe_mul_ttt(&t2, &t3, &t2);
++	fe_sq_tt(&t2, &t2);
++	for (i = 1; i < 50; ++i)
++		fe_sq_tt(&t2, &t2);
++	fe_mul_ttt(&t1, &t2, &t1);
++	fe_sq_tt(&t1, &t1);
++	for (i = 1; i < 5; ++i)
++		fe_sq_tt(&t1, &t1);
++	fe_mul_ttt(out, &t1, &t0);
++}
++
++static __always_inline void fe_invert(fe *out, const fe *z)
++{
++	fe_loose l;
++	fe_copy_lt(&l, z);
++	fe_loose_invert(out, &l);
++}
++
++/* Replace (f,g) with (g,f) if b == 1;
++ * replace (f,g) with (f,g) if b == 0.
++ *
++ * Preconditions: b in {0,1}
++ */
++static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b)
++{
++	unsigned i;
++	b = 0 - b;
++	for (i = 0; i < 10; i++) {
++		u32 x = f->v[i] ^ g->v[i];
++		x &= b;
++		f->v[i] ^= x;
++		g->v[i] ^= x;
++	}
++}
++
++/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
++static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10])
++{
++	{ const u32 x20 = in1[9];
++	{ const u32 x21 = in1[8];
++	{ const u32 x19 = in1[7];
++	{ const u32 x17 = in1[6];
++	{ const u32 x15 = in1[5];
++	{ const u32 x13 = in1[4];
++	{ const u32 x11 = in1[3];
++	{ const u32 x9 = in1[2];
++	{ const u32 x7 = in1[1];
++	{ const u32 x5 = in1[0];
++	{ const u32 x38 = 0;
++	{ const u32 x39 = 0;
++	{ const u32 x37 = 0;
++	{ const u32 x35 = 0;
++	{ const u32 x33 = 0;
++	{ const u32 x31 = 0;
++	{ const u32 x29 = 0;
++	{ const u32 x27 = 0;
++	{ const u32 x25 = 0;
++	{ const u32 x23 = 121666;
++	{ u64 x40 = ((u64)x23 * x5);
++	{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
++	{ u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
++	{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
++	{ u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
++	{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
++	{ u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
++	{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
++	{ u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
++	{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
++	{ u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
++	{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
++	{ u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
++	{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
++	{ u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
++	{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
++	{ u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
++	{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
++	{ u64 x58 = ((u64)(0x2 * x38) * x20);
++	{ u64 x59 = (x48 + (x58 << 0x4));
++	{ u64 x60 = (x59 + (x58 << 0x1));
++	{ u64 x61 = (x60 + x58);
++	{ u64 x62 = (x47 + (x57 << 0x4));
++	{ u64 x63 = (x62 + (x57 << 0x1));
++	{ u64 x64 = (x63 + x57);
++	{ u64 x65 = (x46 + (x56 << 0x4));
++	{ u64 x66 = (x65 + (x56 << 0x1));
++	{ u64 x67 = (x66 + x56);
++	{ u64 x68 = (x45 + (x55 << 0x4));
++	{ u64 x69 = (x68 + (x55 << 0x1));
++	{ u64 x70 = (x69 + x55);
++	{ u64 x71 = (x44 + (x54 << 0x4));
++	{ u64 x72 = (x71 + (x54 << 0x1));
++	{ u64 x73 = (x72 + x54);
++	{ u64 x74 = (x43 + (x53 << 0x4));
++	{ u64 x75 = (x74 + (x53 << 0x1));
++	{ u64 x76 = (x75 + x53);
++	{ u64 x77 = (x42 + (x52 << 0x4));
++	{ u64 x78 = (x77 + (x52 << 0x1));
++	{ u64 x79 = (x78 + x52);
++	{ u64 x80 = (x41 + (x51 << 0x4));
++	{ u64 x81 = (x80 + (x51 << 0x1));
++	{ u64 x82 = (x81 + x51);
++	{ u64 x83 = (x40 + (x50 << 0x4));
++	{ u64 x84 = (x83 + (x50 << 0x1));
++	{ u64 x85 = (x84 + x50);
++	{ u64 x86 = (x85 >> 0x1a);
++	{ u32 x87 = ((u32)x85 & 0x3ffffff);
++	{ u64 x88 = (x86 + x82);
++	{ u64 x89 = (x88 >> 0x19);
++	{ u32 x90 = ((u32)x88 & 0x1ffffff);
++	{ u64 x91 = (x89 + x79);
++	{ u64 x92 = (x91 >> 0x1a);
++	{ u32 x93 = ((u32)x91 & 0x3ffffff);
++	{ u64 x94 = (x92 + x76);
++	{ u64 x95 = (x94 >> 0x19);
++	{ u32 x96 = ((u32)x94 & 0x1ffffff);
++	{ u64 x97 = (x95 + x73);
++	{ u64 x98 = (x97 >> 0x1a);
++	{ u32 x99 = ((u32)x97 & 0x3ffffff);
++	{ u64 x100 = (x98 + x70);
++	{ u64 x101 = (x100 >> 0x19);
++	{ u32 x102 = ((u32)x100 & 0x1ffffff);
++	{ u64 x103 = (x101 + x67);
++	{ u64 x104 = (x103 >> 0x1a);
++	{ u32 x105 = ((u32)x103 & 0x3ffffff);
++	{ u64 x106 = (x104 + x64);
++	{ u64 x107 = (x106 >> 0x19);
++	{ u32 x108 = ((u32)x106 & 0x1ffffff);
++	{ u64 x109 = (x107 + x61);
++	{ u64 x110 = (x109 >> 0x1a);
++	{ u32 x111 = ((u32)x109 & 0x3ffffff);
++	{ u64 x112 = (x110 + x49);
++	{ u64 x113 = (x112 >> 0x19);
++	{ u32 x114 = ((u32)x112 & 0x1ffffff);
++	{ u64 x115 = (x87 + (0x13 * x113));
++	{ u32 x116 = (u32) (x115 >> 0x1a);
++	{ u32 x117 = ((u32)x115 & 0x3ffffff);
++	{ u32 x118 = (x116 + x90);
++	{ u32 x119 = (x118 >> 0x19);
++	{ u32 x120 = (x118 & 0x1ffffff);
++	out[0] = x117;
++	out[1] = x120;
++	out[2] = (x119 + x93);
++	out[3] = x96;
++	out[4] = x99;
++	out[5] = x102;
++	out[6] = x105;
++	out[7] = x108;
++	out[8] = x111;
++	out[9] = x114;
++	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
++}
++
++static __always_inline void fe_mul121666(fe *h, const fe_loose *f)
++{
++	fe_mul_121666_impl(h->v, f->v);
++}
++
++static void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
++			       const u8 scalar[CURVE25519_KEY_SIZE],
++			       const u8 point[CURVE25519_KEY_SIZE])
++{
++	fe x1, x2, z2, x3, z3;
++	fe_loose x2l, z2l, x3l;
++	unsigned swap = 0;
++	int pos;
++	u8 e[32];
++
++	memcpy(e, scalar, 32);
++	curve25519_clamp_secret(e);
++
++	/* The following implementation was transcribed to Coq and proven to
++	 * correspond to unary scalar multiplication in affine coordinates given
++	 * that x1 != 0 is the x coordinate of some point on the curve. It was
++	 * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
++	 * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
++	 * quantified over the underlying field, so it applies to Curve25519
++	 * itself and the quadratic twist of Curve25519. It was not proven in
++	 * Coq that prime-field arithmetic correctly simulates extension-field
++	 * arithmetic on prime-field values. The decoding of the byte array
++	 * representation of e was not considered.
++	 *
++	 * Specification of Montgomery curves in affine coordinates:
++	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
++	 *
++	 * Proof that these form a group that is isomorphic to a Weierstrass
++	 * curve:
++	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
++	 *
++	 * Coq transcription and correctness proof of the loop
++	 * (where scalarbits=255):
++	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
++	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
++	 * preconditions: 0 <= e < 2^255 (not necessarily e < order),
++	 * fe_invert(0) = 0
++	 */
++	fe_frombytes(&x1, point);
++	fe_1(&x2);
++	fe_0(&z2);
++	fe_copy(&x3, &x1);
++	fe_1(&z3);
++
++	for (pos = 254; pos >= 0; --pos) {
++		fe tmp0, tmp1;
++		fe_loose tmp0l, tmp1l;
++		/* loop invariant as of right before the test, for the case
++		 * where x1 != 0:
++		 *   pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
++		 *   is nonzero
++		 *   let r := e >> (pos+1) in the following equalities of
++		 *   projective points:
++		 *   to_xz (r*P)     === if swap then (x3, z3) else (x2, z2)
++		 *   to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
++		 *   x1 is the nonzero x coordinate of the nonzero
++		 *   point (r*P-(r+1)*P)
++		 */
++		unsigned b = 1 & (e[pos / 8] >> (pos & 7));
++		swap ^= b;
++		fe_cswap(&x2, &x3, swap);
++		fe_cswap(&z2, &z3, swap);
++		swap = b;
++		/* Coq transcription of ladderstep formula (called from
++		 * transcribed loop):
++		 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
++		 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
++		 * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
++		 * x1  = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
++		 */
++		fe_sub(&tmp0l, &x3, &z3);
++		fe_sub(&tmp1l, &x2, &z2);
++		fe_add(&x2l, &x2, &z2);
++		fe_add(&z2l, &x3, &z3);
++		fe_mul_tll(&z3, &tmp0l, &x2l);
++		fe_mul_tll(&z2, &z2l, &tmp1l);
++		fe_sq_tl(&tmp0, &tmp1l);
++		fe_sq_tl(&tmp1, &x2l);
++		fe_add(&x3l, &z3, &z2);
++		fe_sub(&z2l, &z3, &z2);
++		fe_mul_ttt(&x2, &tmp1, &tmp0);
++		fe_sub(&tmp1l, &tmp1, &tmp0);
++		fe_sq_tl(&z2, &z2l);
++		fe_mul121666(&z3, &tmp1l);
++		fe_sq_tl(&x3, &x3l);
++		fe_add(&tmp0l, &tmp0, &z3);
++		fe_mul_ttt(&z3, &x1, &z2);
++		fe_mul_tll(&z2, &tmp1l, &tmp0l);
++	}
++	/* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
++	 * else (x2, z2)
++	 */
++	fe_cswap(&x2, &x3, swap);
++	fe_cswap(&z2, &z3, swap);
++
++	fe_invert(&z2, &z2);
++	fe_mul_ttt(&x2, &x2, &z2);
++	fe_tobytes(out, &x2);
++
++	memzero_explicit(&x1, sizeof(x1));
++	memzero_explicit(&x2, sizeof(x2));
++	memzero_explicit(&z2, sizeof(z2));
++	memzero_explicit(&x3, sizeof(x3));
++	memzero_explicit(&z3, sizeof(z3));
++	memzero_explicit(&x2l, sizeof(x2l));
++	memzero_explicit(&z2l, sizeof(z2l));
++	memzero_explicit(&x3l, sizeof(x3l));
++	memzero_explicit(&e, sizeof(e));
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/curve25519/curve25519-hacl64.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,779 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2016-2017 INRIA and Microsoft Corporation.
++ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This is a machine-generated formally verified implementation of Curve25519
++ * ECDH from: <https://github.com/mitls/hacl-star>. Though originally machine
++ * generated, it has been tweaked to be suitable for use in the kernel. It is
++ * optimized for 64-bit machines that can efficiently work with 128-bit
++ * integer types.
++ */
++
++typedef __uint128_t u128;
++
++static __always_inline u64 u64_eq_mask(u64 a, u64 b)
++{
++	u64 x = a ^ b;
++	u64 minus_x = ~x + (u64)1U;
++	u64 x_or_minus_x = x | minus_x;
++	u64 xnx = x_or_minus_x >> (u32)63U;
++	u64 c = xnx - (u64)1U;
++	return c;
++}
++
++static __always_inline u64 u64_gte_mask(u64 a, u64 b)
++{
++	u64 x = a;
++	u64 y = b;
++	u64 x_xor_y = x ^ y;
++	u64 x_sub_y = x - y;
++	u64 x_sub_y_xor_y = x_sub_y ^ y;
++	u64 q = x_xor_y | x_sub_y_xor_y;
++	u64 x_xor_q = x ^ q;
++	u64 x_xor_q_ = x_xor_q >> (u32)63U;
++	u64 c = x_xor_q_ - (u64)1U;
++	return c;
++}
++
++static __always_inline void modulo_carry_top(u64 *b)
++{
++	u64 b4 = b[4];
++	u64 b0 = b[0];
++	u64 b4_ = b4 & 0x7ffffffffffffLLU;
++	u64 b0_ = b0 + 19 * (b4 >> 51);
++	b[4] = b4_;
++	b[0] = b0_;
++}
++
++static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input)
++{
++	{
++		u128 xi = input[0];
++		output[0] = ((u64)(xi));
++	}
++	{
++		u128 xi = input[1];
++		output[1] = ((u64)(xi));
++	}
++	{
++		u128 xi = input[2];
++		output[2] = ((u64)(xi));
++	}
++	{
++		u128 xi = input[3];
++		output[3] = ((u64)(xi));
++	}
++	{
++		u128 xi = input[4];
++		output[4] = ((u64)(xi));
++	}
++}
++
++static __always_inline void
++fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s)
++{
++	output[0] += (u128)input[0] * s;
++	output[1] += (u128)input[1] * s;
++	output[2] += (u128)input[2] * s;
++	output[3] += (u128)input[3] * s;
++	output[4] += (u128)input[4] * s;
++}
++
++static __always_inline void fproduct_carry_wide_(u128 *tmp)
++{
++	{
++		u32 ctr = 0;
++		u128 tctr = tmp[ctr];
++		u128 tctrp1 = tmp[ctr + 1];
++		u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
++		u128 c = ((tctr) >> (51));
++		tmp[ctr] = ((u128)(r0));
++		tmp[ctr + 1] = ((tctrp1) + (c));
++	}
++	{
++		u32 ctr = 1;
++		u128 tctr = tmp[ctr];
++		u128 tctrp1 = tmp[ctr + 1];
++		u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
++		u128 c = ((tctr) >> (51));
++		tmp[ctr] = ((u128)(r0));
++		tmp[ctr + 1] = ((tctrp1) + (c));
++	}
++
++	{
++		u32 ctr = 2;
++		u128 tctr = tmp[ctr];
++		u128 tctrp1 = tmp[ctr + 1];
++		u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
++		u128 c = ((tctr) >> (51));
++		tmp[ctr] = ((u128)(r0));
++		tmp[ctr + 1] = ((tctrp1) + (c));
++	}
++	{
++		u32 ctr = 3;
++		u128 tctr = tmp[ctr];
++		u128 tctrp1 = tmp[ctr + 1];
++		u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
++		u128 c = ((tctr) >> (51));
++		tmp[ctr] = ((u128)(r0));
++		tmp[ctr + 1] = ((tctrp1) + (c));
++	}
++}
++
++static __always_inline void fmul_shift_reduce(u64 *output)
++{
++	u64 tmp = output[4];
++	u64 b0;
++	{
++		u32 ctr = 5 - 0 - 1;
++		u64 z = output[ctr - 1];
++		output[ctr] = z;
++	}
++	{
++		u32 ctr = 5 - 1 - 1;
++		u64 z = output[ctr - 1];
++		output[ctr] = z;
++	}
++	{
++		u32 ctr = 5 - 2 - 1;
++		u64 z = output[ctr - 1];
++		output[ctr] = z;
++	}
++	{
++		u32 ctr = 5 - 3 - 1;
++		u64 z = output[ctr - 1];
++		output[ctr] = z;
++	}
++	output[0] = tmp;
++	b0 = output[0];
++	output[0] = 19 * b0;
++}
++
++static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input,
++						   u64 *input21)
++{
++	u32 i;
++	u64 input2i;
++	{
++		u64 input2i = input21[0];
++		fproduct_sum_scalar_multiplication_(output, input, input2i);
++		fmul_shift_reduce(input);
++	}
++	{
++		u64 input2i = input21[1];
++		fproduct_sum_scalar_multiplication_(output, input, input2i);
++		fmul_shift_reduce(input);
++	}
++	{
++		u64 input2i = input21[2];
++		fproduct_sum_scalar_multiplication_(output, input, input2i);
++		fmul_shift_reduce(input);
++	}
++	{
++		u64 input2i = input21[3];
++		fproduct_sum_scalar_multiplication_(output, input, input2i);
++		fmul_shift_reduce(input);
++	}
++	i = 4;
++	input2i = input21[i];
++	fproduct_sum_scalar_multiplication_(output, input, input2i);
++}
++
++static __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21)
++{
++	u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] };
++	{
++		u128 b4;
++		u128 b0;
++		u128 b4_;
++		u128 b0_;
++		u64 i0;
++		u64 i1;
++		u64 i0_;
++		u64 i1_;
++		u128 t[5] = { 0 };
++		fmul_mul_shift_reduce_(t, tmp, input21);
++		fproduct_carry_wide_(t);
++		b4 = t[4];
++		b0 = t[0];
++		b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
++		b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
++		t[4] = b4_;
++		t[0] = b0_;
++		fproduct_copy_from_wide_(output, t);
++		i0 = output[0];
++		i1 = output[1];
++		i0_ = i0 & 0x7ffffffffffffLLU;
++		i1_ = i1 + (i0 >> 51);
++		output[0] = i0_;
++		output[1] = i1_;
++	}
++}
++
++static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output)
++{
++	u64 r0 = output[0];
++	u64 r1 = output[1];
++	u64 r2 = output[2];
++	u64 r3 = output[3];
++	u64 r4 = output[4];
++	u64 d0 = r0 * 2;
++	u64 d1 = r1 * 2;
++	u64 d2 = r2 * 2 * 19;
++	u64 d419 = r4 * 19;
++	u64 d4 = d419 * 2;
++	u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) +
++		   (((u128)(d2) * (r3))));
++	u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) +
++		   (((u128)(r3 * 19) * (r3))));
++	u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) +
++		   (((u128)(d4) * (r3))));
++	u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) +
++		   (((u128)(r4) * (d419))));
++	u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) +
++		   (((u128)(r2) * (r2))));
++	tmp[0] = s0;
++	tmp[1] = s1;
++	tmp[2] = s2;
++	tmp[3] = s3;
++	tmp[4] = s4;
++}
++
++static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output)
++{
++	u128 b4;
++	u128 b0;
++	u128 b4_;
++	u128 b0_;
++	u64 i0;
++	u64 i1;
++	u64 i0_;
++	u64 i1_;
++	fsquare_fsquare__(tmp, output);
++	fproduct_carry_wide_(tmp);
++	b4 = tmp[4];
++	b0 = tmp[0];
++	b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
++	b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
++	tmp[4] = b4_;
++	tmp[0] = b0_;
++	fproduct_copy_from_wide_(output, tmp);
++	i0 = output[0];
++	i1 = output[1];
++	i0_ = i0 & 0x7ffffffffffffLLU;
++	i1_ = i1 + (i0 >> 51);
++	output[0] = i0_;
++	output[1] = i1_;
++}
++
++static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp,
++						   u32 count1)
++{
++	u32 i;
++	fsquare_fsquare_(tmp, output);
++	for (i = 1; i < count1; ++i)
++		fsquare_fsquare_(tmp, output);
++}
++
++static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input,
++						  u32 count1)
++{
++	u128 t[5];
++	memcpy(output, input, 5 * sizeof(*input));
++	fsquare_fsquare_times_(output, t, count1);
++}
++
++static __always_inline void fsquare_fsquare_times_inplace(u64 *output,
++							  u32 count1)
++{
++	u128 t[5];
++	fsquare_fsquare_times_(output, t, count1);
++}
++
++static __always_inline void crecip_crecip(u64 *out, u64 *z)
++{
++	u64 buf[20] = { 0 };
++	u64 *a0 = buf;
++	u64 *t00 = buf + 5;
++	u64 *b0 = buf + 10;
++	u64 *t01;
++	u64 *b1;
++	u64 *c0;
++	u64 *a;
++	u64 *t0;
++	u64 *b;
++	u64 *c;
++	fsquare_fsquare_times(a0, z, 1);
++	fsquare_fsquare_times(t00, a0, 2);
++	fmul_fmul(b0, t00, z);
++	fmul_fmul(a0, b0, a0);
++	fsquare_fsquare_times(t00, a0, 1);
++	fmul_fmul(b0, t00, b0);
++	fsquare_fsquare_times(t00, b0, 5);
++	t01 = buf + 5;
++	b1 = buf + 10;
++	c0 = buf + 15;
++	fmul_fmul(b1, t01, b1);
++	fsquare_fsquare_times(t01, b1, 10);
++	fmul_fmul(c0, t01, b1);
++	fsquare_fsquare_times(t01, c0, 20);
++	fmul_fmul(t01, t01, c0);
++	fsquare_fsquare_times_inplace(t01, 10);
++	fmul_fmul(b1, t01, b1);
++	fsquare_fsquare_times(t01, b1, 50);
++	a = buf;
++	t0 = buf + 5;
++	b = buf + 10;
++	c = buf + 15;
++	fmul_fmul(c, t0, b);
++	fsquare_fsquare_times(t0, c, 100);
++	fmul_fmul(t0, t0, c);
++	fsquare_fsquare_times_inplace(t0, 50);
++	fmul_fmul(t0, t0, b);
++	fsquare_fsquare_times_inplace(t0, 5);
++	fmul_fmul(out, t0, a);
++}
++
++static __always_inline void fsum(u64 *a, u64 *b)
++{
++	a[0] += b[0];
++	a[1] += b[1];
++	a[2] += b[2];
++	a[3] += b[3];
++	a[4] += b[4];
++}
++
++static __always_inline void fdifference(u64 *a, u64 *b)
++{
++	u64 tmp[5] = { 0 };
++	u64 b0;
++	u64 b1;
++	u64 b2;
++	u64 b3;
++	u64 b4;
++	memcpy(tmp, b, 5 * sizeof(*b));
++	b0 = tmp[0];
++	b1 = tmp[1];
++	b2 = tmp[2];
++	b3 = tmp[3];
++	b4 = tmp[4];
++	tmp[0] = b0 + 0x3fffffffffff68LLU;
++	tmp[1] = b1 + 0x3ffffffffffff8LLU;
++	tmp[2] = b2 + 0x3ffffffffffff8LLU;
++	tmp[3] = b3 + 0x3ffffffffffff8LLU;
++	tmp[4] = b4 + 0x3ffffffffffff8LLU;
++	{
++		u64 xi = a[0];
++		u64 yi = tmp[0];
++		a[0] = yi - xi;
++	}
++	{
++		u64 xi = a[1];
++		u64 yi = tmp[1];
++		a[1] = yi - xi;
++	}
++	{
++		u64 xi = a[2];
++		u64 yi = tmp[2];
++		a[2] = yi - xi;
++	}
++	{
++		u64 xi = a[3];
++		u64 yi = tmp[3];
++		a[3] = yi - xi;
++	}
++	{
++		u64 xi = a[4];
++		u64 yi = tmp[4];
++		a[4] = yi - xi;
++	}
++}
++
++static __always_inline void fscalar(u64 *output, u64 *b, u64 s)
++{
++	u128 tmp[5];
++	u128 b4;
++	u128 b0;
++	u128 b4_;
++	u128 b0_;
++	{
++		u64 xi = b[0];
++		tmp[0] = ((u128)(xi) * (s));
++	}
++	{
++		u64 xi = b[1];
++		tmp[1] = ((u128)(xi) * (s));
++	}
++	{
++		u64 xi = b[2];
++		tmp[2] = ((u128)(xi) * (s));
++	}
++	{
++		u64 xi = b[3];
++		tmp[3] = ((u128)(xi) * (s));
++	}
++	{
++		u64 xi = b[4];
++		tmp[4] = ((u128)(xi) * (s));
++	}
++	fproduct_carry_wide_(tmp);
++	b4 = tmp[4];
++	b0 = tmp[0];
++	b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
++	b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
++	tmp[4] = b4_;
++	tmp[0] = b0_;
++	fproduct_copy_from_wide_(output, tmp);
++}
++
++static __always_inline void crecip(u64 *output, u64 *input)
++{
++	crecip_crecip(output, input);
++}
++
++static __always_inline void point_swap_conditional_step(u64 *a, u64 *b,
++							u64 swap1, u32 ctr)
++{
++	u32 i = ctr - 1;
++	u64 ai = a[i];
++	u64 bi = b[i];
++	u64 x = swap1 & (ai ^ bi);
++	u64 ai1 = ai ^ x;
++	u64 bi1 = bi ^ x;
++	a[i] = ai1;
++	b[i] = bi1;
++}
++
++static __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1)
++{
++	point_swap_conditional_step(a, b, swap1, 5);
++	point_swap_conditional_step(a, b, swap1, 4);
++	point_swap_conditional_step(a, b, swap1, 3);
++	point_swap_conditional_step(a, b, swap1, 2);
++	point_swap_conditional_step(a, b, swap1, 1);
++}
++
++static __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap)
++{
++	u64 swap1 = 0 - iswap;
++	point_swap_conditional5(a, b, swap1);
++	point_swap_conditional5(a + 5, b + 5, swap1);
++}
++
++static __always_inline void point_copy(u64 *output, u64 *input)
++{
++	memcpy(output, input, 5 * sizeof(*input));
++	memcpy(output + 5, input + 5, 5 * sizeof(*input));
++}
++
++static __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p,
++						u64 *pq, u64 *qmqp)
++{
++	u64 *qx = qmqp;
++	u64 *x2 = pp;
++	u64 *z2 = pp + 5;
++	u64 *x3 = ppq;
++	u64 *z3 = ppq + 5;
++	u64 *x = p;
++	u64 *z = p + 5;
++	u64 *xprime = pq;
++	u64 *zprime = pq + 5;
++	u64 buf[40] = { 0 };
++	u64 *origx = buf;
++	u64 *origxprime0 = buf + 5;
++	u64 *xxprime0;
++	u64 *zzprime0;
++	u64 *origxprime;
++	xxprime0 = buf + 25;
++	zzprime0 = buf + 30;
++	memcpy(origx, x, 5 * sizeof(*x));
++	fsum(x, z);
++	fdifference(z, origx);
++	memcpy(origxprime0, xprime, 5 * sizeof(*xprime));
++	fsum(xprime, zprime);
++	fdifference(zprime, origxprime0);
++	fmul_fmul(xxprime0, xprime, z);
++	fmul_fmul(zzprime0, x, zprime);
++	origxprime = buf + 5;
++	{
++		u64 *xx0;
++		u64 *zz0;
++		u64 *xxprime;
++		u64 *zzprime;
++		u64 *zzzprime;
++		xx0 = buf + 15;
++		zz0 = buf + 20;
++		xxprime = buf + 25;
++		zzprime = buf + 30;
++		zzzprime = buf + 35;
++		memcpy(origxprime, xxprime, 5 * sizeof(*xxprime));
++		fsum(xxprime, zzprime);
++		fdifference(zzprime, origxprime);
++		fsquare_fsquare_times(x3, xxprime, 1);
++		fsquare_fsquare_times(zzzprime, zzprime, 1);
++		fmul_fmul(z3, zzzprime, qx);
++		fsquare_fsquare_times(xx0, x, 1);
++		fsquare_fsquare_times(zz0, z, 1);
++		{
++			u64 *zzz;
++			u64 *xx;
++			u64 *zz;
++			u64 scalar;
++			zzz = buf + 10;
++			xx = buf + 15;
++			zz = buf + 20;
++			fmul_fmul(x2, xx, zz);
++			fdifference(zz, xx);
++			scalar = 121665;
++			fscalar(zzz, zz, scalar);
++			fsum(zzz, xx);
++			fmul_fmul(z2, zzz, zz);
++		}
++	}
++}
++
++static __always_inline void
++ladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2,
++				       u64 *q, u8 byt)
++{
++	u64 bit0 = (u64)(byt >> 7);
++	u64 bit;
++	point_swap_conditional(nq, nqpq, bit0);
++	addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q);
++	bit = (u64)(byt >> 7);
++	point_swap_conditional(nq2, nqpq2, bit);
++}
++
++static __always_inline void
++ladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2,
++					      u64 *nqpq2, u64 *q, u8 byt)
++{
++	u8 byt1;
++	ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
++	byt1 = byt << 1;
++	ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
++}
++
++static __always_inline void
++ladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2,
++				  u64 *q, u8 byt, u32 i)
++{
++	while (i--) {
++		ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2,
++							      nqpq2, q, byt);
++		byt <<= 2;
++	}
++}
++
++static __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq,
++							  u64 *nqpq, u64 *nq2,
++							  u64 *nqpq2, u64 *q,
++							  u32 i)
++{
++	while (i--) {
++		u8 byte = n1[i];
++		ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q,
++						  byte, 4);
++	}
++}
++
++static void ladder_cmult(u64 *result, u8 *n1, u64 *q)
++{
++	u64 point_buf[40] = { 0 };
++	u64 *nq = point_buf;
++	u64 *nqpq = point_buf + 10;
++	u64 *nq2 = point_buf + 20;
++	u64 *nqpq2 = point_buf + 30;
++	point_copy(nqpq, q);
++	nq[0] = 1;
++	ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32);
++	point_copy(result, nq);
++}
++
++static __always_inline void format_fexpand(u64 *output, const u8 *input)
++{
++	const u8 *x00 = input + 6;
++	const u8 *x01 = input + 12;
++	const u8 *x02 = input + 19;
++	const u8 *x0 = input + 24;
++	u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4;
++	i0 = get_unaligned_le64(input);
++	i1 = get_unaligned_le64(x00);
++	i2 = get_unaligned_le64(x01);
++	i3 = get_unaligned_le64(x02);
++	i4 = get_unaligned_le64(x0);
++	output0 = i0 & 0x7ffffffffffffLLU;
++	output1 = i1 >> 3 & 0x7ffffffffffffLLU;
++	output2 = i2 >> 6 & 0x7ffffffffffffLLU;
++	output3 = i3 >> 1 & 0x7ffffffffffffLLU;
++	output4 = i4 >> 12 & 0x7ffffffffffffLLU;
++	output[0] = output0;
++	output[1] = output1;
++	output[2] = output2;
++	output[3] = output3;
++	output[4] = output4;
++}
++
++static __always_inline void format_fcontract_first_carry_pass(u64 *input)
++{
++	u64 t0 = input[0];
++	u64 t1 = input[1];
++	u64 t2 = input[2];
++	u64 t3 = input[3];
++	u64 t4 = input[4];
++	u64 t1_ = t1 + (t0 >> 51);
++	u64 t0_ = t0 & 0x7ffffffffffffLLU;
++	u64 t2_ = t2 + (t1_ >> 51);
++	u64 t1__ = t1_ & 0x7ffffffffffffLLU;
++	u64 t3_ = t3 + (t2_ >> 51);
++	u64 t2__ = t2_ & 0x7ffffffffffffLLU;
++	u64 t4_ = t4 + (t3_ >> 51);
++	u64 t3__ = t3_ & 0x7ffffffffffffLLU;
++	input[0] = t0_;
++	input[1] = t1__;
++	input[2] = t2__;
++	input[3] = t3__;
++	input[4] = t4_;
++}
++
++static __always_inline void format_fcontract_first_carry_full(u64 *input)
++{
++	format_fcontract_first_carry_pass(input);
++	modulo_carry_top(input);
++}
++
++static __always_inline void format_fcontract_second_carry_pass(u64 *input)
++{
++	u64 t0 = input[0];
++	u64 t1 = input[1];
++	u64 t2 = input[2];
++	u64 t3 = input[3];
++	u64 t4 = input[4];
++	u64 t1_ = t1 + (t0 >> 51);
++	u64 t0_ = t0 & 0x7ffffffffffffLLU;
++	u64 t2_ = t2 + (t1_ >> 51);
++	u64 t1__ = t1_ & 0x7ffffffffffffLLU;
++	u64 t3_ = t3 + (t2_ >> 51);
++	u64 t2__ = t2_ & 0x7ffffffffffffLLU;
++	u64 t4_ = t4 + (t3_ >> 51);
++	u64 t3__ = t3_ & 0x7ffffffffffffLLU;
++	input[0] = t0_;
++	input[1] = t1__;
++	input[2] = t2__;
++	input[3] = t3__;
++	input[4] = t4_;
++}
++
++static __always_inline void format_fcontract_second_carry_full(u64 *input)
++{
++	u64 i0;
++	u64 i1;
++	u64 i0_;
++	u64 i1_;
++	format_fcontract_second_carry_pass(input);
++	modulo_carry_top(input);
++	i0 = input[0];
++	i1 = input[1];
++	i0_ = i0 & 0x7ffffffffffffLLU;
++	i1_ = i1 + (i0 >> 51);
++	input[0] = i0_;
++	input[1] = i1_;
++}
++
++static __always_inline void format_fcontract_trim(u64 *input)
++{
++	u64 a0 = input[0];
++	u64 a1 = input[1];
++	u64 a2 = input[2];
++	u64 a3 = input[3];
++	u64 a4 = input[4];
++	u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU);
++	u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU);
++	u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU);
++	u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU);
++	u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU);
++	u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
++	u64 a0_ = a0 - (0x7ffffffffffedLLU & mask);
++	u64 a1_ = a1 - (0x7ffffffffffffLLU & mask);
++	u64 a2_ = a2 - (0x7ffffffffffffLLU & mask);
++	u64 a3_ = a3 - (0x7ffffffffffffLLU & mask);
++	u64 a4_ = a4 - (0x7ffffffffffffLLU & mask);
++	input[0] = a0_;
++	input[1] = a1_;
++	input[2] = a2_;
++	input[3] = a3_;
++	input[4] = a4_;
++}
++
++static __always_inline void format_fcontract_store(u8 *output, u64 *input)
++{
++	u64 t0 = input[0];
++	u64 t1 = input[1];
++	u64 t2 = input[2];
++	u64 t3 = input[3];
++	u64 t4 = input[4];
++	u64 o0 = t1 << 51 | t0;
++	u64 o1 = t2 << 38 | t1 >> 13;
++	u64 o2 = t3 << 25 | t2 >> 26;
++	u64 o3 = t4 << 12 | t3 >> 39;
++	u8 *b0 = output;
++	u8 *b1 = output + 8;
++	u8 *b2 = output + 16;
++	u8 *b3 = output + 24;
++	put_unaligned_le64(o0, b0);
++	put_unaligned_le64(o1, b1);
++	put_unaligned_le64(o2, b2);
++	put_unaligned_le64(o3, b3);
++}
++
++static __always_inline void format_fcontract(u8 *output, u64 *input)
++{
++	format_fcontract_first_carry_full(input);
++	format_fcontract_second_carry_full(input);
++	format_fcontract_trim(input);
++	format_fcontract_store(output, input);
++}
++
++static __always_inline void format_scalar_of_point(u8 *scalar, u64 *point)
++{
++	u64 *x = point;
++	u64 *z = point + 5;
++	u64 buf[10] __aligned(32) = { 0 };
++	u64 *zmone = buf;
++	u64 *sc = buf + 5;
++	crecip(zmone, z);
++	fmul_fmul(sc, x, zmone);
++	format_fcontract(scalar, sc);
++}
++
++static void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE],
++			       const u8 secret[CURVE25519_KEY_SIZE],
++			       const u8 basepoint[CURVE25519_KEY_SIZE])
++{
++	u64 buf0[10] __aligned(32) = { 0 };
++	u64 *x0 = buf0;
++	u64 *z = buf0 + 5;
++	u64 *q;
++	format_fexpand(x0, basepoint);
++	z[0] = 1;
++	q = buf0;
++	{
++		u8 e[32] __aligned(32) = { 0 };
++		u8 *scalar;
++		memcpy(e, secret, 32);
++		curve25519_clamp_secret(e);
++		scalar = e;
++		{
++			u64 buf[15] = { 0 };
++			u64 *nq = buf;
++			u64 *x = nq;
++			x[0] = 1;
++			ladder_cmult(nq, scalar, q);
++			format_scalar_of_point(mypublic, nq);
++			memzero_explicit(buf, sizeof(buf));
++		}
++		memzero_explicit(e, sizeof(e));
++	}
++	memzero_explicit(buf0, sizeof(buf0));
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,1369 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
++ */
++
++static __always_inline u64 eq_mask(u64 a, u64 b)
++{
++	u64 x = a ^ b;
++	u64 minus_x = ~x + (u64)1U;
++	u64 x_or_minus_x = x | minus_x;
++	u64 xnx = x_or_minus_x >> (u32)63U;
++	return xnx - (u64)1U;
++}
++
++static __always_inline u64 gte_mask(u64 a, u64 b)
++{
++	u64 x = a;
++	u64 y = b;
++	u64 x_xor_y = x ^ y;
++	u64 x_sub_y = x - y;
++	u64 x_sub_y_xor_y = x_sub_y ^ y;
++	u64 q = x_xor_y | x_sub_y_xor_y;
++	u64 x_xor_q = x ^ q;
++	u64 x_xor_q_ = x_xor_q >> (u32)63U;
++	return x_xor_q_ - (u64)1U;
++}
++
++/* Computes the addition of four-element f1 with value in f2
++ * and returns the carry (if any) */
++static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
++{
++	u64 carry_r;
++
++	asm volatile(
++		/* Clear registers to propagate the carry bit */
++		"  xor %%r8, %%r8;"
++		"  xor %%r9, %%r9;"
++		"  xor %%r10, %%r10;"
++		"  xor %%r11, %%r11;"
++		"  xor %1, %1;"
++
++		/* Begin addition chain */
++		"  addq 0(%3), %0;"
++		"  movq %0, 0(%2);"
++		"  adcxq 8(%3), %%r8;"
++		"  movq %%r8, 8(%2);"
++		"  adcxq 16(%3), %%r9;"
++		"  movq %%r9, 16(%2);"
++		"  adcxq 24(%3), %%r10;"
++		"  movq %%r10, 24(%2);"
++
++		/* Return the carry bit in a register */
++		"  adcx %%r11, %1;"
++	: "+&r" (f2), "=&r" (carry_r)
++	: "r" (out), "r" (f1)
++	: "%r8", "%r9", "%r10", "%r11", "memory", "cc"
++	);
++
++	return carry_r;
++}
++
++/* Computes the field addition of two field elements */
++static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
++{
++	asm volatile(
++		/* Compute the raw addition of f1 + f2 */
++		"  movq 0(%0), %%r8;"
++		"  addq 0(%2), %%r8;"
++		"  movq 8(%0), %%r9;"
++		"  adcxq 8(%2), %%r9;"
++		"  movq 16(%0), %%r10;"
++		"  adcxq 16(%2), %%r10;"
++		"  movq 24(%0), %%r11;"
++		"  adcxq 24(%2), %%r11;"
++
++		/* Wrap the result back into the field */
++
++		/* Step 1: Compute carry*38 */
++		"  mov $0, %%rax;"
++		"  mov $38, %0;"
++		"  cmovc %0, %%rax;"
++
++		/* Step 2: Add carry*38 to the original sum */
++		"  xor %%rcx, %%rcx;"
++		"  add %%rax, %%r8;"
++		"  adcx %%rcx, %%r9;"
++		"  movq %%r9, 8(%1);"
++		"  adcx %%rcx, %%r10;"
++		"  movq %%r10, 16(%1);"
++		"  adcx %%rcx, %%r11;"
++		"  movq %%r11, 24(%1);"
++
++		/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
++		"  mov $0, %%rax;"
++		"  cmovc %0, %%rax;"
++		"  add %%rax, %%r8;"
++		"  movq %%r8, 0(%1);"
++	: "+&r" (f2)
++	: "r" (out), "r" (f1)
++	: "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
++	);
++}
++
++/* Computes the field substraction of two field elements */
++static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
++{
++	asm volatile(
++		/* Compute the raw substraction of f1-f2 */
++		"  movq 0(%1), %%r8;"
++		"  subq 0(%2), %%r8;"
++		"  movq 8(%1), %%r9;"
++		"  sbbq 8(%2), %%r9;"
++		"  movq 16(%1), %%r10;"
++		"  sbbq 16(%2), %%r10;"
++		"  movq 24(%1), %%r11;"
++		"  sbbq 24(%2), %%r11;"
++
++		/* Wrap the result back into the field */
++
++		/* Step 1: Compute carry*38 */
++		"  mov $0, %%rax;"
++		"  mov $38, %%rcx;"
++		"  cmovc %%rcx, %%rax;"
++
++		/* Step 2: Substract carry*38 from the original difference */
++		"  sub %%rax, %%r8;"
++		"  sbb $0, %%r9;"
++		"  sbb $0, %%r10;"
++		"  sbb $0, %%r11;"
++
++		/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
++		"  mov $0, %%rax;"
++		"  cmovc %%rcx, %%rax;"
++		"  sub %%rax, %%r8;"
++
++		/* Store the result */
++		"  movq %%r8, 0(%0);"
++		"  movq %%r9, 8(%0);"
++		"  movq %%r10, 16(%0);"
++		"  movq %%r11, 24(%0);"
++	:
++	: "r" (out), "r" (f1), "r" (f2)
++	: "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
++	);
++}
++
++/* Computes a field multiplication: out <- f1 * f2
++ * Uses the 8-element buffer tmp for intermediate results */
++static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
++{
++	asm volatile(
++		/* Compute the raw multiplication: tmp <- src1 * src2 */
++
++		/* Compute src1[0] * src2 */
++		"  movq 0(%1), %%rdx;"
++		"  mulxq 0(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"     "  movq %%r8, 0(%0);"
++		"  mulxq 8(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  movq %%r10, 8(%0);"
++		"  mulxq 16(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"
++		"  mulxq 24(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"
++		/* Compute src1[1] * src2 */
++		"  movq 8(%1), %%rdx;"
++		"  mulxq 0(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"     "  adcxq 8(%0), %%r8;"    "  movq %%r8, 8(%0);"
++		"  mulxq 8(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  adcx %%rbx, %%r10;"    "  movq %%r10, 16(%0);"
++		"  mulxq 16(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"    "  adcx %%r14, %%rbx;"    "  mov $0, %%r8;"
++		"  mulxq 24(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  adcx %%rax, %%r14;"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"    "  adcx %%r8, %%rax;"
++		/* Compute src1[2] * src2 */
++		"  movq 16(%1), %%rdx;"
++		"  mulxq 0(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"    "  adcxq 16(%0), %%r8;"    "  movq %%r8, 16(%0);"
++		"  mulxq 8(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  adcx %%rbx, %%r10;"    "  movq %%r10, 24(%0);"
++		"  mulxq 16(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"    "  adcx %%r14, %%rbx;"    "  mov $0, %%r8;"
++		"  mulxq 24(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  adcx %%rax, %%r14;"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"    "  adcx %%r8, %%rax;"
++		/* Compute src1[3] * src2 */
++		"  movq 24(%1), %%rdx;"
++		"  mulxq 0(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"    "  adcxq 24(%0), %%r8;"    "  movq %%r8, 24(%0);"
++		"  mulxq 8(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  adcx %%rbx, %%r10;"    "  movq %%r10, 32(%0);"
++		"  mulxq 16(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"    "  adcx %%r14, %%rbx;"    "  movq %%rbx, 40(%0);"    "  mov $0, %%r8;"
++		"  mulxq 24(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  adcx %%rax, %%r14;"    "  movq %%r14, 48(%0);"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"    "  adcx %%r8, %%rax;"     "  movq %%rax, 56(%0);"
++		/* Line up pointers */
++		"  mov %0, %1;"
++		"  mov %2, %0;"
++
++		/* Wrap the result back into the field */
++
++		/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
++		"  mov $38, %%rdx;"
++		"  mulxq 32(%1), %%r8, %%r13;"
++		"  xor %3, %3;"
++		"  adoxq 0(%1), %%r8;"
++		"  mulxq 40(%1), %%r9, %%rbx;"
++		"  adcx %%r13, %%r9;"
++		"  adoxq 8(%1), %%r9;"
++		"  mulxq 48(%1), %%r10, %%r13;"
++		"  adcx %%rbx, %%r10;"
++		"  adoxq 16(%1), %%r10;"
++		"  mulxq 56(%1), %%r11, %%rax;"
++		"  adcx %%r13, %%r11;"
++		"  adoxq 24(%1), %%r11;"
++		"  adcx %3, %%rax;"
++		"  adox %3, %%rax;"
++		"  imul %%rdx, %%rax;"
++
++		/* Step 2: Fold the carry back into dst */
++		"  add %%rax, %%r8;"
++		"  adcx %3, %%r9;"
++		"  movq %%r9, 8(%0);"
++		"  adcx %3, %%r10;"
++		"  movq %%r10, 16(%0);"
++		"  adcx %3, %%r11;"
++		"  movq %%r11, 24(%0);"
++
++		/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
++		"  mov $0, %%rax;"
++		"  cmovc %%rdx, %%rax;"
++		"  add %%rax, %%r8;"
++		"  movq %%r8, 0(%0);"
++	: "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
++	:
++	: "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
++	);
++}
++
++/* Computes two field multiplications:
++ * out[0] <- f1[0] * f2[0]
++ * out[1] <- f1[1] * f2[1]
++ * Uses the 16-element buffer tmp for intermediate results. */
++static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
++{
++	asm volatile(
++		/* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */
++
++		/* Compute src1[0] * src2 */
++		"  movq 0(%1), %%rdx;"
++		"  mulxq 0(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"     "  movq %%r8, 0(%0);"
++		"  mulxq 8(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  movq %%r10, 8(%0);"
++		"  mulxq 16(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"
++		"  mulxq 24(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"
++		/* Compute src1[1] * src2 */
++		"  movq 8(%1), %%rdx;"
++		"  mulxq 0(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"     "  adcxq 8(%0), %%r8;"    "  movq %%r8, 8(%0);"
++		"  mulxq 8(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  adcx %%rbx, %%r10;"    "  movq %%r10, 16(%0);"
++		"  mulxq 16(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"    "  adcx %%r14, %%rbx;"    "  mov $0, %%r8;"
++		"  mulxq 24(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  adcx %%rax, %%r14;"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"    "  adcx %%r8, %%rax;"
++		/* Compute src1[2] * src2 */
++		"  movq 16(%1), %%rdx;"
++		"  mulxq 0(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"    "  adcxq 16(%0), %%r8;"    "  movq %%r8, 16(%0);"
++		"  mulxq 8(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  adcx %%rbx, %%r10;"    "  movq %%r10, 24(%0);"
++		"  mulxq 16(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"    "  adcx %%r14, %%rbx;"    "  mov $0, %%r8;"
++		"  mulxq 24(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  adcx %%rax, %%r14;"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"    "  adcx %%r8, %%rax;"
++		/* Compute src1[3] * src2 */
++		"  movq 24(%1), %%rdx;"
++		"  mulxq 0(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"    "  adcxq 24(%0), %%r8;"    "  movq %%r8, 24(%0);"
++		"  mulxq 8(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  adcx %%rbx, %%r10;"    "  movq %%r10, 32(%0);"
++		"  mulxq 16(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"    "  adcx %%r14, %%rbx;"    "  movq %%rbx, 40(%0);"    "  mov $0, %%r8;"
++		"  mulxq 24(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  adcx %%rax, %%r14;"    "  movq %%r14, 48(%0);"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"    "  adcx %%r8, %%rax;"     "  movq %%rax, 56(%0);"
++
++		/* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */
++
++		/* Compute src1[0] * src2 */
++		"  movq 32(%1), %%rdx;"
++		"  mulxq 32(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"     "  movq %%r8, 64(%0);"
++		"  mulxq 40(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  movq %%r10, 72(%0);"
++		"  mulxq 48(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"
++		"  mulxq 56(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"
++		/* Compute src1[1] * src2 */
++		"  movq 40(%1), %%rdx;"
++		"  mulxq 32(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"     "  adcxq 72(%0), %%r8;"    "  movq %%r8, 72(%0);"
++		"  mulxq 40(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  adcx %%rbx, %%r10;"    "  movq %%r10, 80(%0);"
++		"  mulxq 48(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"    "  adcx %%r14, %%rbx;"    "  mov $0, %%r8;"
++		"  mulxq 56(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  adcx %%rax, %%r14;"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"    "  adcx %%r8, %%rax;"
++		/* Compute src1[2] * src2 */
++		"  movq 48(%1), %%rdx;"
++		"  mulxq 32(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"    "  adcxq 80(%0), %%r8;"    "  movq %%r8, 80(%0);"
++		"  mulxq 40(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  adcx %%rbx, %%r10;"    "  movq %%r10, 88(%0);"
++		"  mulxq 48(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"    "  adcx %%r14, %%rbx;"    "  mov $0, %%r8;"
++		"  mulxq 56(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  adcx %%rax, %%r14;"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"    "  adcx %%r8, %%rax;"
++		/* Compute src1[3] * src2 */
++		"  movq 56(%1), %%rdx;"
++		"  mulxq 32(%3), %%r8, %%r9;"       "  xor %%r10, %%r10;"    "  adcxq 88(%0), %%r8;"    "  movq %%r8, 88(%0);"
++		"  mulxq 40(%3), %%r10, %%r11;"     "  adox %%r9, %%r10;"     "  adcx %%rbx, %%r10;"    "  movq %%r10, 96(%0);"
++		"  mulxq 48(%3), %%rbx, %%r13;"    "  adox %%r11, %%rbx;"    "  adcx %%r14, %%rbx;"    "  movq %%rbx, 104(%0);"    "  mov $0, %%r8;"
++		"  mulxq 56(%3), %%r14, %%rdx;"    "  adox %%r13, %%r14;"    "  adcx %%rax, %%r14;"    "  movq %%r14, 112(%0);"    "  mov $0, %%rax;"
++		                                   "  adox %%rdx, %%rax;"    "  adcx %%r8, %%rax;"     "  movq %%rax, 120(%0);"
++		/* Line up pointers */
++		"  mov %0, %1;"
++		"  mov %2, %0;"
++
++		/* Wrap the results back into the field */
++
++		/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
++		"  mov $38, %%rdx;"
++		"  mulxq 32(%1), %%r8, %%r13;"
++		"  xor %3, %3;"
++		"  adoxq 0(%1), %%r8;"
++		"  mulxq 40(%1), %%r9, %%rbx;"
++		"  adcx %%r13, %%r9;"
++		"  adoxq 8(%1), %%r9;"
++		"  mulxq 48(%1), %%r10, %%r13;"
++		"  adcx %%rbx, %%r10;"
++		"  adoxq 16(%1), %%r10;"
++		"  mulxq 56(%1), %%r11, %%rax;"
++		"  adcx %%r13, %%r11;"
++		"  adoxq 24(%1), %%r11;"
++		"  adcx %3, %%rax;"
++		"  adox %3, %%rax;"
++		"  imul %%rdx, %%rax;"
++
++		/* Step 2: Fold the carry back into dst */
++		"  add %%rax, %%r8;"
++		"  adcx %3, %%r9;"
++		"  movq %%r9, 8(%0);"
++		"  adcx %3, %%r10;"
++		"  movq %%r10, 16(%0);"
++		"  adcx %3, %%r11;"
++		"  movq %%r11, 24(%0);"
++
++		/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
++		"  mov $0, %%rax;"
++		"  cmovc %%rdx, %%rax;"
++		"  add %%rax, %%r8;"
++		"  movq %%r8, 0(%0);"
++
++		/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
++		"  mov $38, %%rdx;"
++		"  mulxq 96(%1), %%r8, %%r13;"
++		"  xor %3, %3;"
++		"  adoxq 64(%1), %%r8;"
++		"  mulxq 104(%1), %%r9, %%rbx;"
++		"  adcx %%r13, %%r9;"
++		"  adoxq 72(%1), %%r9;"
++		"  mulxq 112(%1), %%r10, %%r13;"
++		"  adcx %%rbx, %%r10;"
++		"  adoxq 80(%1), %%r10;"
++		"  mulxq 120(%1), %%r11, %%rax;"
++		"  adcx %%r13, %%r11;"
++		"  adoxq 88(%1), %%r11;"
++		"  adcx %3, %%rax;"
++		"  adox %3, %%rax;"
++		"  imul %%rdx, %%rax;"
++
++		/* Step 2: Fold the carry back into dst */
++		"  add %%rax, %%r8;"
++		"  adcx %3, %%r9;"
++		"  movq %%r9, 40(%0);"
++		"  adcx %3, %%r10;"
++		"  movq %%r10, 48(%0);"
++		"  adcx %3, %%r11;"
++		"  movq %%r11, 56(%0);"
++
++		/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
++		"  mov $0, %%rax;"
++		"  cmovc %%rdx, %%rax;"
++		"  add %%rax, %%r8;"
++		"  movq %%r8, 32(%0);"
++	: "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
++	:
++	: "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
++	);
++}
++
++/* Computes the field multiplication of four-element f1 with value in f2 */
++static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
++{
++	register u64 f2_r asm("rdx") = f2;
++
++	asm volatile(
++		/* Compute the raw multiplication of f1*f2 */
++		"  mulxq 0(%2), %%r8, %%rcx;"      /* f1[0]*f2 */
++		"  mulxq 8(%2), %%r9, %%rbx;"      /* f1[1]*f2 */
++		"  add %%rcx, %%r9;"
++		"  mov $0, %%rcx;"
++		"  mulxq 16(%2), %%r10, %%r13;"    /* f1[2]*f2 */
++		"  adcx %%rbx, %%r10;"
++		"  mulxq 24(%2), %%r11, %%rax;"    /* f1[3]*f2 */
++		"  adcx %%r13, %%r11;"
++		"  adcx %%rcx, %%rax;"
++
++		/* Wrap the result back into the field */
++
++		/* Step 1: Compute carry*38 */
++		"  mov $38, %%rdx;"
++		"  imul %%rdx, %%rax;"
++
++		/* Step 2: Fold the carry back into dst */
++		"  add %%rax, %%r8;"
++		"  adcx %%rcx, %%r9;"
++		"  movq %%r9, 8(%1);"
++		"  adcx %%rcx, %%r10;"
++		"  movq %%r10, 16(%1);"
++		"  adcx %%rcx, %%r11;"
++		"  movq %%r11, 24(%1);"
++
++		/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
++		"  mov $0, %%rax;"
++		"  cmovc %%rdx, %%rax;"
++		"  add %%rax, %%r8;"
++		"  movq %%r8, 0(%1);"
++	: "+&r" (f2_r)
++	: "r" (out), "r" (f1)
++	: "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc"
++	);
++}
++
++/* Computes p1 <- bit ? p2 : p1 in constant time */
++static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
++{
++	asm volatile(
++		/* Invert the polarity of bit to match cmov expectations */
++		"  add $18446744073709551615, %0;"
++
++		/* cswap p1[0], p2[0] */
++		"  movq 0(%1), %%r8;"
++		"  movq 0(%2), %%r9;"
++		"  mov %%r8, %%r10;"
++		"  cmovc %%r9, %%r8;"
++		"  cmovc %%r10, %%r9;"
++		"  movq %%r8, 0(%1);"
++		"  movq %%r9, 0(%2);"
++
++		/* cswap p1[1], p2[1] */
++		"  movq 8(%1), %%r8;"
++		"  movq 8(%2), %%r9;"
++		"  mov %%r8, %%r10;"
++		"  cmovc %%r9, %%r8;"
++		"  cmovc %%r10, %%r9;"
++		"  movq %%r8, 8(%1);"
++		"  movq %%r9, 8(%2);"
++
++		/* cswap p1[2], p2[2] */
++		"  movq 16(%1), %%r8;"
++		"  movq 16(%2), %%r9;"
++		"  mov %%r8, %%r10;"
++		"  cmovc %%r9, %%r8;"
++		"  cmovc %%r10, %%r9;"
++		"  movq %%r8, 16(%1);"
++		"  movq %%r9, 16(%2);"
++
++		/* cswap p1[3], p2[3] */
++		"  movq 24(%1), %%r8;"
++		"  movq 24(%2), %%r9;"
++		"  mov %%r8, %%r10;"
++		"  cmovc %%r9, %%r8;"
++		"  cmovc %%r10, %%r9;"
++		"  movq %%r8, 24(%1);"
++		"  movq %%r9, 24(%2);"
++
++		/* cswap p1[4], p2[4] */
++		"  movq 32(%1), %%r8;"
++		"  movq 32(%2), %%r9;"
++		"  mov %%r8, %%r10;"
++		"  cmovc %%r9, %%r8;"
++		"  cmovc %%r10, %%r9;"
++		"  movq %%r8, 32(%1);"
++		"  movq %%r9, 32(%2);"
++
++		/* cswap p1[5], p2[5] */
++		"  movq 40(%1), %%r8;"
++		"  movq 40(%2), %%r9;"
++		"  mov %%r8, %%r10;"
++		"  cmovc %%r9, %%r8;"
++		"  cmovc %%r10, %%r9;"
++		"  movq %%r8, 40(%1);"
++		"  movq %%r9, 40(%2);"
++
++		/* cswap p1[6], p2[6] */
++		"  movq 48(%1), %%r8;"
++		"  movq 48(%2), %%r9;"
++		"  mov %%r8, %%r10;"
++		"  cmovc %%r9, %%r8;"
++		"  cmovc %%r10, %%r9;"
++		"  movq %%r8, 48(%1);"
++		"  movq %%r9, 48(%2);"
++
++		/* cswap p1[7], p2[7] */
++		"  movq 56(%1), %%r8;"
++		"  movq 56(%2), %%r9;"
++		"  mov %%r8, %%r10;"
++		"  cmovc %%r9, %%r8;"
++		"  cmovc %%r10, %%r9;"
++		"  movq %%r8, 56(%1);"
++		"  movq %%r9, 56(%2);"
++	: "+&r" (bit)
++	: "r" (p1), "r" (p2)
++	: "%r8", "%r9", "%r10", "memory", "cc"
++	);
++}
++
++/* Computes the square of a field element: out <- f * f
++ * Uses the 8-element buffer tmp for intermediate results */
++static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
++{
++	asm volatile(
++		/* Compute the raw multiplication: tmp <- f * f */
++
++		/* Step 1: Compute all partial products */
++		"  movq 0(%1), %%rdx;"                                       /* f[0] */
++		"  mulxq 8(%1), %%r8, %%r14;"      "  xor %%r15, %%r15;"     /* f[1]*f[0] */
++		"  mulxq 16(%1), %%r9, %%r10;"     "  adcx %%r14, %%r9;"     /* f[2]*f[0] */
++		"  mulxq 24(%1), %%rax, %%rcx;"    "  adcx %%rax, %%r10;"    /* f[3]*f[0] */
++		"  movq 24(%1), %%rdx;"                                      /* f[3] */
++		"  mulxq 8(%1), %%r11, %%rbx;"     "  adcx %%rcx, %%r11;"    /* f[1]*f[3] */
++		"  mulxq 16(%1), %%rax, %%r13;"    "  adcx %%rax, %%rbx;"    /* f[2]*f[3] */
++		"  movq 8(%1), %%rdx;"             "  adcx %%r15, %%r13;"    /* f1 */
++		"  mulxq 16(%1), %%rax, %%rcx;"    "  mov $0, %%r14;"        /* f[2]*f[1] */
++
++		/* Step 2: Compute two parallel carry chains */
++		"  xor %%r15, %%r15;"
++		"  adox %%rax, %%r10;"
++		"  adcx %%r8, %%r8;"
++		"  adox %%rcx, %%r11;"
++		"  adcx %%r9, %%r9;"
++		"  adox %%r15, %%rbx;"
++		"  adcx %%r10, %%r10;"
++		"  adox %%r15, %%r13;"
++		"  adcx %%r11, %%r11;"
++		"  adox %%r15, %%r14;"
++		"  adcx %%rbx, %%rbx;"
++		"  adcx %%r13, %%r13;"
++		"  adcx %%r14, %%r14;"
++
++		/* Step 3: Compute intermediate squares */
++		"  movq 0(%1), %%rdx;"     "  mulx %%rdx, %%rax, %%rcx;"    /* f[0]^2 */
++		                           "  movq %%rax, 0(%0);"
++		"  add %%rcx, %%r8;"       "  movq %%r8, 8(%0);"
++		"  movq 8(%1), %%rdx;"     "  mulx %%rdx, %%rax, %%rcx;"    /* f[1]^2 */
++		"  adcx %%rax, %%r9;"      "  movq %%r9, 16(%0);"
++		"  adcx %%rcx, %%r10;"     "  movq %%r10, 24(%0);"
++		"  movq 16(%1), %%rdx;"    "  mulx %%rdx, %%rax, %%rcx;"    /* f[2]^2 */
++		"  adcx %%rax, %%r11;"     "  movq %%r11, 32(%0);"
++		"  adcx %%rcx, %%rbx;"     "  movq %%rbx, 40(%0);"
++		"  movq 24(%1), %%rdx;"    "  mulx %%rdx, %%rax, %%rcx;"    /* f[3]^2 */
++		"  adcx %%rax, %%r13;"     "  movq %%r13, 48(%0);"
++		"  adcx %%rcx, %%r14;"     "  movq %%r14, 56(%0);"
++
++		/* Line up pointers */
++		"  mov %0, %1;"
++		"  mov %2, %0;"
++
++		/* Wrap the result back into the field */
++
++		/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
++		"  mov $38, %%rdx;"
++		"  mulxq 32(%1), %%r8, %%r13;"
++		"  xor %%rcx, %%rcx;"
++		"  adoxq 0(%1), %%r8;"
++		"  mulxq 40(%1), %%r9, %%rbx;"
++		"  adcx %%r13, %%r9;"
++		"  adoxq 8(%1), %%r9;"
++		"  mulxq 48(%1), %%r10, %%r13;"
++		"  adcx %%rbx, %%r10;"
++		"  adoxq 16(%1), %%r10;"
++		"  mulxq 56(%1), %%r11, %%rax;"
++		"  adcx %%r13, %%r11;"
++		"  adoxq 24(%1), %%r11;"
++		"  adcx %%rcx, %%rax;"
++		"  adox %%rcx, %%rax;"
++		"  imul %%rdx, %%rax;"
++
++		/* Step 2: Fold the carry back into dst */
++		"  add %%rax, %%r8;"
++		"  adcx %%rcx, %%r9;"
++		"  movq %%r9, 8(%0);"
++		"  adcx %%rcx, %%r10;"
++		"  movq %%r10, 16(%0);"
++		"  adcx %%rcx, %%r11;"
++		"  movq %%r11, 24(%0);"
++
++		/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
++		"  mov $0, %%rax;"
++		"  cmovc %%rdx, %%rax;"
++		"  add %%rax, %%r8;"
++		"  movq %%r8, 0(%0);"
++	: "+&r" (tmp), "+&r" (f), "+&r" (out)
++	:
++	: "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
++	);
++}
++
++/* Computes two field squarings:
++ * out[0] <- f[0] * f[0]
++ * out[1] <- f[1] * f[1]
++ * Uses the 16-element buffer tmp for intermediate results */
++static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
++{
++	asm volatile(
++		/* Step 1: Compute all partial products */
++		"  movq 0(%1), %%rdx;"                                       /* f[0] */
++		"  mulxq 8(%1), %%r8, %%r14;"      "  xor %%r15, %%r15;"     /* f[1]*f[0] */
++		"  mulxq 16(%1), %%r9, %%r10;"     "  adcx %%r14, %%r9;"     /* f[2]*f[0] */
++		"  mulxq 24(%1), %%rax, %%rcx;"    "  adcx %%rax, %%r10;"    /* f[3]*f[0] */
++		"  movq 24(%1), %%rdx;"                                      /* f[3] */
++		"  mulxq 8(%1), %%r11, %%rbx;"     "  adcx %%rcx, %%r11;"    /* f[1]*f[3] */
++		"  mulxq 16(%1), %%rax, %%r13;"    "  adcx %%rax, %%rbx;"    /* f[2]*f[3] */
++		"  movq 8(%1), %%rdx;"             "  adcx %%r15, %%r13;"    /* f1 */
++		"  mulxq 16(%1), %%rax, %%rcx;"    "  mov $0, %%r14;"        /* f[2]*f[1] */
++
++		/* Step 2: Compute two parallel carry chains */
++		"  xor %%r15, %%r15;"
++		"  adox %%rax, %%r10;"
++		"  adcx %%r8, %%r8;"
++		"  adox %%rcx, %%r11;"
++		"  adcx %%r9, %%r9;"
++		"  adox %%r15, %%rbx;"
++		"  adcx %%r10, %%r10;"
++		"  adox %%r15, %%r13;"
++		"  adcx %%r11, %%r11;"
++		"  adox %%r15, %%r14;"
++		"  adcx %%rbx, %%rbx;"
++		"  adcx %%r13, %%r13;"
++		"  adcx %%r14, %%r14;"
++
++		/* Step 3: Compute intermediate squares */
++		"  movq 0(%1), %%rdx;"     "  mulx %%rdx, %%rax, %%rcx;"    /* f[0]^2 */
++		                           "  movq %%rax, 0(%0);"
++		"  add %%rcx, %%r8;"       "  movq %%r8, 8(%0);"
++		"  movq 8(%1), %%rdx;"     "  mulx %%rdx, %%rax, %%rcx;"    /* f[1]^2 */
++		"  adcx %%rax, %%r9;"      "  movq %%r9, 16(%0);"
++		"  adcx %%rcx, %%r10;"     "  movq %%r10, 24(%0);"
++		"  movq 16(%1), %%rdx;"    "  mulx %%rdx, %%rax, %%rcx;"    /* f[2]^2 */
++		"  adcx %%rax, %%r11;"     "  movq %%r11, 32(%0);"
++		"  adcx %%rcx, %%rbx;"     "  movq %%rbx, 40(%0);"
++		"  movq 24(%1), %%rdx;"    "  mulx %%rdx, %%rax, %%rcx;"    /* f[3]^2 */
++		"  adcx %%rax, %%r13;"     "  movq %%r13, 48(%0);"
++		"  adcx %%rcx, %%r14;"     "  movq %%r14, 56(%0);"
++
++		/* Step 1: Compute all partial products */
++		"  movq 32(%1), %%rdx;"                                       /* f[0] */
++		"  mulxq 40(%1), %%r8, %%r14;"      "  xor %%r15, %%r15;"     /* f[1]*f[0] */
++		"  mulxq 48(%1), %%r9, %%r10;"     "  adcx %%r14, %%r9;"     /* f[2]*f[0] */
++		"  mulxq 56(%1), %%rax, %%rcx;"    "  adcx %%rax, %%r10;"    /* f[3]*f[0] */
++		"  movq 56(%1), %%rdx;"                                      /* f[3] */
++		"  mulxq 40(%1), %%r11, %%rbx;"     "  adcx %%rcx, %%r11;"    /* f[1]*f[3] */
++		"  mulxq 48(%1), %%rax, %%r13;"    "  adcx %%rax, %%rbx;"    /* f[2]*f[3] */
++		"  movq 40(%1), %%rdx;"             "  adcx %%r15, %%r13;"    /* f1 */
++		"  mulxq 48(%1), %%rax, %%rcx;"    "  mov $0, %%r14;"        /* f[2]*f[1] */
++
++		/* Step 2: Compute two parallel carry chains */
++		"  xor %%r15, %%r15;"
++		"  adox %%rax, %%r10;"
++		"  adcx %%r8, %%r8;"
++		"  adox %%rcx, %%r11;"
++		"  adcx %%r9, %%r9;"
++		"  adox %%r15, %%rbx;"
++		"  adcx %%r10, %%r10;"
++		"  adox %%r15, %%r13;"
++		"  adcx %%r11, %%r11;"
++		"  adox %%r15, %%r14;"
++		"  adcx %%rbx, %%rbx;"
++		"  adcx %%r13, %%r13;"
++		"  adcx %%r14, %%r14;"
++
++		/* Step 3: Compute intermediate squares */
++		"  movq 32(%1), %%rdx;"     "  mulx %%rdx, %%rax, %%rcx;"    /* f[0]^2 */
++		                           "  movq %%rax, 64(%0);"
++		"  add %%rcx, %%r8;"       "  movq %%r8, 72(%0);"
++		"  movq 40(%1), %%rdx;"     "  mulx %%rdx, %%rax, %%rcx;"    /* f[1]^2 */
++		"  adcx %%rax, %%r9;"      "  movq %%r9, 80(%0);"
++		"  adcx %%rcx, %%r10;"     "  movq %%r10, 88(%0);"
++		"  movq 48(%1), %%rdx;"    "  mulx %%rdx, %%rax, %%rcx;"    /* f[2]^2 */
++		"  adcx %%rax, %%r11;"     "  movq %%r11, 96(%0);"
++		"  adcx %%rcx, %%rbx;"     "  movq %%rbx, 104(%0);"
++		"  movq 56(%1), %%rdx;"    "  mulx %%rdx, %%rax, %%rcx;"    /* f[3]^2 */
++		"  adcx %%rax, %%r13;"     "  movq %%r13, 112(%0);"
++		"  adcx %%rcx, %%r14;"     "  movq %%r14, 120(%0);"
++
++		/* Line up pointers */
++		"  mov %0, %1;"
++		"  mov %2, %0;"
++
++		/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
++		"  mov $38, %%rdx;"
++		"  mulxq 32(%1), %%r8, %%r13;"
++		"  xor %%rcx, %%rcx;"
++		"  adoxq 0(%1), %%r8;"
++		"  mulxq 40(%1), %%r9, %%rbx;"
++		"  adcx %%r13, %%r9;"
++		"  adoxq 8(%1), %%r9;"
++		"  mulxq 48(%1), %%r10, %%r13;"
++		"  adcx %%rbx, %%r10;"
++		"  adoxq 16(%1), %%r10;"
++		"  mulxq 56(%1), %%r11, %%rax;"
++		"  adcx %%r13, %%r11;"
++		"  adoxq 24(%1), %%r11;"
++		"  adcx %%rcx, %%rax;"
++		"  adox %%rcx, %%rax;"
++		"  imul %%rdx, %%rax;"
++
++		/* Step 2: Fold the carry back into dst */
++		"  add %%rax, %%r8;"
++		"  adcx %%rcx, %%r9;"
++		"  movq %%r9, 8(%0);"
++		"  adcx %%rcx, %%r10;"
++		"  movq %%r10, 16(%0);"
++		"  adcx %%rcx, %%r11;"
++		"  movq %%r11, 24(%0);"
++
++		/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
++		"  mov $0, %%rax;"
++		"  cmovc %%rdx, %%rax;"
++		"  add %%rax, %%r8;"
++		"  movq %%r8, 0(%0);"
++
++		/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
++		"  mov $38, %%rdx;"
++		"  mulxq 96(%1), %%r8, %%r13;"
++		"  xor %%rcx, %%rcx;"
++		"  adoxq 64(%1), %%r8;"
++		"  mulxq 104(%1), %%r9, %%rbx;"
++		"  adcx %%r13, %%r9;"
++		"  adoxq 72(%1), %%r9;"
++		"  mulxq 112(%1), %%r10, %%r13;"
++		"  adcx %%rbx, %%r10;"
++		"  adoxq 80(%1), %%r10;"
++		"  mulxq 120(%1), %%r11, %%rax;"
++		"  adcx %%r13, %%r11;"
++		"  adoxq 88(%1), %%r11;"
++		"  adcx %%rcx, %%rax;"
++		"  adox %%rcx, %%rax;"
++		"  imul %%rdx, %%rax;"
++
++		/* Step 2: Fold the carry back into dst */
++		"  add %%rax, %%r8;"
++		"  adcx %%rcx, %%r9;"
++		"  movq %%r9, 40(%0);"
++		"  adcx %%rcx, %%r10;"
++		"  movq %%r10, 48(%0);"
++		"  adcx %%rcx, %%r11;"
++		"  movq %%r11, 56(%0);"
++
++		/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
++		"  mov $0, %%rax;"
++		"  cmovc %%rdx, %%rax;"
++		"  add %%rax, %%r8;"
++		"  movq %%r8, 32(%0);"
++	: "+&r" (tmp), "+&r" (f), "+&r" (out)
++	:
++	: "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
++	);
++}
++
++static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2)
++{
++	u64 *nq = p01_tmp1;
++	u64 *nq_p1 = p01_tmp1 + (u32)8U;
++	u64 *tmp1 = p01_tmp1 + (u32)16U;
++	u64 *x1 = q;
++	u64 *x2 = nq;
++	u64 *z2 = nq + (u32)4U;
++	u64 *z3 = nq_p1 + (u32)4U;
++	u64 *a = tmp1;
++	u64 *b = tmp1 + (u32)4U;
++	u64 *ab = tmp1;
++	u64 *dc = tmp1 + (u32)8U;
++	u64 *x3;
++	u64 *z31;
++	u64 *d0;
++	u64 *c0;
++	u64 *a1;
++	u64 *b1;
++	u64 *d;
++	u64 *c;
++	u64 *ab1;
++	u64 *dc1;
++	fadd(a, x2, z2);
++	fsub(b, x2, z2);
++	x3 = nq_p1;
++	z31 = nq_p1 + (u32)4U;
++	d0 = dc;
++	c0 = dc + (u32)4U;
++	fadd(c0, x3, z31);
++	fsub(d0, x3, z31);
++	fmul2(dc, dc, ab, tmp2);
++	fadd(x3, d0, c0);
++	fsub(z31, d0, c0);
++	a1 = tmp1;
++	b1 = tmp1 + (u32)4U;
++	d = tmp1 + (u32)8U;
++	c = tmp1 + (u32)12U;
++	ab1 = tmp1;
++	dc1 = tmp1 + (u32)8U;
++	fsqr2(dc1, ab1, tmp2);
++	fsqr2(nq_p1, nq_p1, tmp2);
++	a1[0U] = c[0U];
++	a1[1U] = c[1U];
++	a1[2U] = c[2U];
++	a1[3U] = c[3U];
++	fsub(c, d, c);
++	fmul_scalar(b1, c, (u64)121665U);
++	fadd(b1, b1, d);
++	fmul2(nq, dc1, ab1, tmp2);
++	fmul(z3, z3, x1, tmp2);
++}
++
++static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2)
++{
++	u64 *x2 = nq;
++	u64 *z2 = nq + (u32)4U;
++	u64 *a = tmp1;
++	u64 *b = tmp1 + (u32)4U;
++	u64 *d = tmp1 + (u32)8U;
++	u64 *c = tmp1 + (u32)12U;
++	u64 *ab = tmp1;
++	u64 *dc = tmp1 + (u32)8U;
++	fadd(a, x2, z2);
++	fsub(b, x2, z2);
++	fsqr2(dc, ab, tmp2);
++	a[0U] = c[0U];
++	a[1U] = c[1U];
++	a[2U] = c[2U];
++	a[3U] = c[3U];
++	fsub(c, d, c);
++	fmul_scalar(b, c, (u64)121665U);
++	fadd(b, b, d);
++	fmul2(nq, dc, ab, tmp2);
++}
++
++static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1)
++{
++	u64 tmp2[16U] = { 0U };
++	u64 p01_tmp1_swap[33U] = { 0U };
++	u64 *p0 = p01_tmp1_swap;
++	u64 *p01 = p01_tmp1_swap;
++	u64 *p03 = p01;
++	u64 *p11 = p01 + (u32)8U;
++	u64 *x0;
++	u64 *z0;
++	u64 *p01_tmp1;
++	u64 *p01_tmp11;
++	u64 *nq10;
++	u64 *nq_p11;
++	u64 *swap1;
++	u64 sw0;
++	u64 *nq1;
++	u64 *tmp1;
++	memcpy(p11, init1, (u32)8U * sizeof(init1[0U]));
++	x0 = p03;
++	z0 = p03 + (u32)4U;
++	x0[0U] = (u64)1U;
++	x0[1U] = (u64)0U;
++	x0[2U] = (u64)0U;
++	x0[3U] = (u64)0U;
++	z0[0U] = (u64)0U;
++	z0[1U] = (u64)0U;
++	z0[2U] = (u64)0U;
++	z0[3U] = (u64)0U;
++	p01_tmp1 = p01_tmp1_swap;
++	p01_tmp11 = p01_tmp1_swap;
++	nq10 = p01_tmp1_swap;
++	nq_p11 = p01_tmp1_swap + (u32)8U;
++	swap1 = p01_tmp1_swap + (u32)32U;
++	cswap2((u64)1U, nq10, nq_p11);
++	point_add_and_double(init1, p01_tmp11, tmp2);
++	swap1[0U] = (u64)1U;
++	{
++		u32 i;
++		for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) {
++			u64 *p01_tmp12 = p01_tmp1_swap;
++			u64 *swap2 = p01_tmp1_swap + (u32)32U;
++			u64 *nq2 = p01_tmp12;
++			u64 *nq_p12 = p01_tmp12 + (u32)8U;
++			u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U);
++			u64 sw = swap2[0U] ^ bit;
++			cswap2(sw, nq2, nq_p12);
++			point_add_and_double(init1, p01_tmp12, tmp2);
++			swap2[0U] = bit;
++		}
++	}
++	sw0 = swap1[0U];
++	cswap2(sw0, nq10, nq_p11);
++	nq1 = p01_tmp1;
++	tmp1 = p01_tmp1 + (u32)16U;
++	point_double(nq1, tmp1, tmp2);
++	point_double(nq1, tmp1, tmp2);
++	point_double(nq1, tmp1, tmp2);
++	memcpy(out, p0, (u32)8U * sizeof(p0[0U]));
++
++	memzero_explicit(tmp2, sizeof(tmp2));
++	memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap));
++}
++
++static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1)
++{
++	u32 i;
++	fsqr(o, inp, tmp);
++	for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U)
++		fsqr(o, o, tmp);
++}
++
++static void finv(u64 *o, const u64 *i, u64 *tmp)
++{
++	u64 t1[16U] = { 0U };
++	u64 *a0 = t1;
++	u64 *b = t1 + (u32)4U;
++	u64 *c = t1 + (u32)8U;
++	u64 *t00 = t1 + (u32)12U;
++	u64 *tmp1 = tmp;
++	u64 *a;
++	u64 *t0;
++	fsquare_times(a0, i, tmp1, (u32)1U);
++	fsquare_times(t00, a0, tmp1, (u32)2U);
++	fmul(b, t00, i, tmp);
++	fmul(a0, b, a0, tmp);
++	fsquare_times(t00, a0, tmp1, (u32)1U);
++	fmul(b, t00, b, tmp);
++	fsquare_times(t00, b, tmp1, (u32)5U);
++	fmul(b, t00, b, tmp);
++	fsquare_times(t00, b, tmp1, (u32)10U);
++	fmul(c, t00, b, tmp);
++	fsquare_times(t00, c, tmp1, (u32)20U);
++	fmul(t00, t00, c, tmp);
++	fsquare_times(t00, t00, tmp1, (u32)10U);
++	fmul(b, t00, b, tmp);
++	fsquare_times(t00, b, tmp1, (u32)50U);
++	fmul(c, t00, b, tmp);
++	fsquare_times(t00, c, tmp1, (u32)100U);
++	fmul(t00, t00, c, tmp);
++	fsquare_times(t00, t00, tmp1, (u32)50U);
++	fmul(t00, t00, b, tmp);
++	fsquare_times(t00, t00, tmp1, (u32)5U);
++	a = t1;
++	t0 = t1 + (u32)12U;
++	fmul(o, t0, a, tmp);
++}
++
++static void store_felem(u64 *b, u64 *f)
++{
++	u64 f30 = f[3U];
++	u64 top_bit0 = f30 >> (u32)63U;
++	u64 carry0;
++	u64 f31;
++	u64 top_bit;
++	u64 carry;
++	u64 f0;
++	u64 f1;
++	u64 f2;
++	u64 f3;
++	u64 m0;
++	u64 m1;
++	u64 m2;
++	u64 m3;
++	u64 mask;
++	u64 f0_;
++	u64 f1_;
++	u64 f2_;
++	u64 f3_;
++	u64 o0;
++	u64 o1;
++	u64 o2;
++	u64 o3;
++	f[3U] = f30 & (u64)0x7fffffffffffffffU;
++	carry0 = add_scalar(f, f, (u64)19U * top_bit0);
++	f31 = f[3U];
++	top_bit = f31 >> (u32)63U;
++	f[3U] = f31 & (u64)0x7fffffffffffffffU;
++	carry = add_scalar(f, f, (u64)19U * top_bit);
++	f0 = f[0U];
++	f1 = f[1U];
++	f2 = f[2U];
++	f3 = f[3U];
++	m0 = gte_mask(f0, (u64)0xffffffffffffffedU);
++	m1 = eq_mask(f1, (u64)0xffffffffffffffffU);
++	m2 = eq_mask(f2, (u64)0xffffffffffffffffU);
++	m3 = eq_mask(f3, (u64)0x7fffffffffffffffU);
++	mask = ((m0 & m1) & m2) & m3;
++	f0_ = f0 - (mask & (u64)0xffffffffffffffedU);
++	f1_ = f1 - (mask & (u64)0xffffffffffffffffU);
++	f2_ = f2 - (mask & (u64)0xffffffffffffffffU);
++	f3_ = f3 - (mask & (u64)0x7fffffffffffffffU);
++	o0 = f0_;
++	o1 = f1_;
++	o2 = f2_;
++	o3 = f3_;
++	b[0U] = o0;
++	b[1U] = o1;
++	b[2U] = o2;
++	b[3U] = o3;
++}
++
++static void encode_point(u8 *o, const u64 *i)
++{
++	const u64 *x = i;
++	const u64 *z = i + (u32)4U;
++	u64 tmp[4U] = { 0U };
++	u64 tmp_w[16U] = { 0U };
++	finv(tmp, z, tmp_w);
++	fmul(tmp, tmp, x, tmp_w);
++	store_felem((u64 *)o, tmp);
++}
++
++static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub)
++{
++	u64 init1[8U] = { 0U };
++	u64 tmp[4U] = { 0U };
++	u64 tmp3;
++	u64 *x;
++	u64 *z;
++	{
++		u32 i;
++		for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) {
++			u64 *os = tmp;
++			const u8 *bj = pub + i * (u32)8U;
++			u64 u = *(u64 *)bj;
++			u64 r = u;
++			u64 x0 = r;
++			os[i] = x0;
++		}
++	}
++	tmp3 = tmp[3U];
++	tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU;
++	x = init1;
++	z = init1 + (u32)4U;
++	z[0U] = (u64)1U;
++	z[1U] = (u64)0U;
++	z[2U] = (u64)0U;
++	z[3U] = (u64)0U;
++	x[0U] = tmp[0U];
++	x[1U] = tmp[1U];
++	x[2U] = tmp[2U];
++	x[3U] = tmp[3U];
++	montgomery_ladder(init1, priv, init1);
++	encode_point(out, init1);
++}
++
++/* The below constants were generated using this sage script:
++ *
++ * #!/usr/bin/env sage
++ * import sys
++ * from sage.all import *
++ * def limbs(n):
++ * 	n = int(n)
++ * 	l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64)
++ * 	return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l
++ * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0])
++ * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0]
++ * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s))
++ * print("static const u64 table_ladder[] = {")
++ * p = ec.lift_x(9)
++ * for i in range(252):
++ * 	l = (p[0] + p[2]) / (p[0] - p[2])
++ * 	print(("\t%s" + ("," if i != 251 else "")) % limbs(l))
++ * 	p = p * 2
++ * print("};")
++ *
++ */
++
++static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL };
++
++static const u64 table_ladder[] = {
++	0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL,
++	0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL,
++	0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL,
++	0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL,
++	0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL,
++	0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL,
++	0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL,
++	0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL,
++	0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL,
++	0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL,
++	0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL,
++	0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL,
++	0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL,
++	0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL,
++	0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL,
++	0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL,
++	0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL,
++	0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL,
++	0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL,
++	0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL,
++	0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL,
++	0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL,
++	0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL,
++	0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL,
++	0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL,
++	0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL,
++	0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL,
++	0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL,
++	0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL,
++	0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL,
++	0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL,
++	0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL,
++	0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL,
++	0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL,
++	0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL,
++	0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL,
++	0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL,
++	0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL,
++	0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL,
++	0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL,
++	0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL,
++	0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL,
++	0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL,
++	0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL,
++	0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL,
++	0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL,
++	0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL,
++	0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL,
++	0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL,
++	0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL,
++	0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL,
++	0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL,
++	0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL,
++	0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL,
++	0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL,
++	0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL,
++	0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL,
++	0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL,
++	0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL,
++	0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL,
++	0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL,
++	0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL,
++	0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL,
++	0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL,
++	0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL,
++	0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL,
++	0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL,
++	0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL,
++	0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL,
++	0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL,
++	0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL,
++	0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL,
++	0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL,
++	0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL,
++	0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL,
++	0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL,
++	0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL,
++	0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL,
++	0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL,
++	0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL,
++	0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL,
++	0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL,
++	0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL,
++	0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL,
++	0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL,
++	0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL,
++	0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL,
++	0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL,
++	0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL,
++	0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL,
++	0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL,
++	0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL,
++	0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL,
++	0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL,
++	0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL,
++	0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL,
++	0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL,
++	0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL,
++	0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL,
++	0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL,
++	0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL,
++	0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL,
++	0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL,
++	0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL,
++	0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL,
++	0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL,
++	0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL,
++	0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL,
++	0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL,
++	0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL,
++	0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL,
++	0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL,
++	0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL,
++	0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL,
++	0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL,
++	0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL,
++	0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL,
++	0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL,
++	0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL,
++	0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL,
++	0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL,
++	0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL,
++	0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL,
++	0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL,
++	0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL,
++	0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL,
++	0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL,
++	0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL,
++	0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL,
++	0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL,
++	0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL,
++	0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL,
++	0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL,
++	0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL,
++	0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL,
++	0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL,
++	0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL,
++	0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL,
++	0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL,
++	0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL,
++	0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL,
++	0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL,
++	0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL,
++	0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL,
++	0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL,
++	0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL,
++	0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL,
++	0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL,
++	0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL,
++	0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL,
++	0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL,
++	0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL,
++	0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL,
++	0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL,
++	0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL,
++	0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL,
++	0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL,
++	0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL,
++	0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL,
++	0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL,
++	0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL,
++	0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL,
++	0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL,
++	0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL,
++	0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL,
++	0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL,
++	0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL,
++	0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL,
++	0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL,
++	0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL,
++	0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL,
++	0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL,
++	0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL,
++	0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL,
++	0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL,
++	0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL,
++	0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL,
++	0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL,
++	0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL,
++	0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL,
++	0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL,
++	0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL,
++	0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL,
++	0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL,
++	0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL,
++	0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL,
++	0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL,
++	0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL,
++	0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL,
++	0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL,
++	0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL,
++	0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL,
++	0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL,
++	0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL,
++	0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL,
++	0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL,
++	0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL,
++	0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL,
++	0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL,
++	0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL,
++	0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL,
++	0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL,
++	0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL,
++	0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL,
++	0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL,
++	0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL,
++	0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL,
++	0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL,
++	0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL,
++	0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL,
++	0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL,
++	0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL,
++	0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL,
++	0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL,
++	0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL,
++	0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL,
++	0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL,
++	0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL,
++	0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL,
++	0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL,
++	0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL,
++	0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL,
++	0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL,
++	0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL,
++	0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL,
++	0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL,
++	0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL,
++	0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL,
++	0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL,
++	0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL,
++	0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL,
++	0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL,
++	0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL,
++	0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL,
++	0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL,
++	0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL,
++	0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL,
++	0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL,
++	0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL,
++	0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL,
++	0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL,
++	0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL,
++	0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL,
++	0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL,
++	0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL,
++	0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL,
++	0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL,
++	0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL,
++	0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL,
++	0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL,
++	0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL,
++	0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL
++};
++
++static void curve25519_ever64_base(u8 *out, const u8 *priv)
++{
++	u64 swap = 1;
++	int i, j, k;
++	u64 tmp[16 + 32 + 4];
++	u64 *x1 = &tmp[0];
++	u64 *z1 = &tmp[4];
++	u64 *x2 = &tmp[8];
++	u64 *z2 = &tmp[12];
++	u64 *xz1 = &tmp[0];
++	u64 *xz2 = &tmp[8];
++	u64 *a = &tmp[0 + 16];
++	u64 *b = &tmp[4 + 16];
++	u64 *c = &tmp[8 + 16];
++	u64 *ab = &tmp[0 + 16];
++	u64 *abcd = &tmp[0 + 16];
++	u64 *ef = &tmp[16 + 16];
++	u64 *efgh = &tmp[16 + 16];
++	u64 *key = &tmp[0 + 16 + 32];
++
++	memcpy(key, priv, 32);
++	((u8 *)key)[0] &= 248;
++	((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64;
++
++	x1[0] = 1, x1[1] = x1[2] = x1[3] = 0;
++	z1[0] = 1, z1[1] = z1[2] = z1[3] = 0;
++	z2[0] = 1, z2[1] = z2[2] = z2[3] = 0;
++	memcpy(x2, p_minus_s, sizeof(p_minus_s));
++
++	j = 3;
++	for (i = 0; i < 4; ++i) {
++		while (j < (const int[]){ 64, 64, 64, 63 }[i]) {
++			u64 bit = (key[i] >> j) & 1;
++			k = (64 * i + j - 3);
++			swap = swap ^ bit;
++			cswap2(swap, xz1, xz2);
++			swap = bit;
++			fsub(b, x1, z1);
++			fadd(a, x1, z1);
++			fmul(c, &table_ladder[4 * k], b, ef);
++			fsub(b, a, c);
++			fadd(a, a, c);
++			fsqr2(ab, ab, efgh);
++			fmul2(xz1, xz2, ab, efgh);
++			++j;
++		}
++		j = 0;
++	}
++
++	point_double(xz1, abcd, efgh);
++	point_double(xz1, abcd, efgh);
++	point_double(xz1, abcd, efgh);
++	encode_point(out, xz1);
++
++	memzero_explicit(tmp, sizeof(tmp));
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64-glue.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,44 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include <asm/cpufeature.h>
++#include <asm/processor.h>
++
++#include "curve25519-x86_64.c"
++
++static bool curve25519_use_bmi2_adx __ro_after_init;
++static bool *const curve25519_nobs[] __initconst = {
++	&curve25519_use_bmi2_adx };
++
++static void __init curve25519_fpu_init(void)
++{
++	curve25519_use_bmi2_adx = IS_ENABLED(CONFIG_AS_BMI2) &&
++				  IS_ENABLED(CONFIG_AS_ADX) &&
++				  boot_cpu_has(X86_FEATURE_BMI2) &&
++				  boot_cpu_has(X86_FEATURE_ADX);
++}
++
++static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
++				   const u8 secret[CURVE25519_KEY_SIZE],
++				   const u8 basepoint[CURVE25519_KEY_SIZE])
++{
++	if (IS_ENABLED(CONFIG_AS_ADX) && IS_ENABLED(CONFIG_AS_BMI2) &&
++	    curve25519_use_bmi2_adx) {
++		curve25519_ever64(mypublic, secret, basepoint);
++		return true;
++	}
++	return false;
++}
++
++static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
++					const u8 secret[CURVE25519_KEY_SIZE])
++{
++	if (IS_ENABLED(CONFIG_AS_ADX) && IS_ENABLED(CONFIG_AS_BMI2) &&
++	    curve25519_use_bmi2_adx) {
++		curve25519_ever64_base(pub, secret);
++		return true;
++	}
++	return false;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305-arm-glue.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,140 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include <asm/hwcap.h>
++#include <asm/neon.h>
++
++asmlinkage void poly1305_init_arm(void *ctx, const u8 key[16]);
++asmlinkage void poly1305_blocks_arm(void *ctx, const u8 *inp, const size_t len,
++				    const u32 padbit);
++asmlinkage void poly1305_emit_arm(void *ctx, u8 mac[16], const u32 nonce[4]);
++asmlinkage void poly1305_blocks_neon(void *ctx, const u8 *inp, const size_t len,
++				     const u32 padbit);
++asmlinkage void poly1305_emit_neon(void *ctx, u8 mac[16], const u32 nonce[4]);
++
++static bool poly1305_use_neon __ro_after_init;
++static bool *const poly1305_nobs[] __initconst = { &poly1305_use_neon };
++
++static void __init poly1305_fpu_init(void)
++{
++#if defined(CONFIG_ZINC_ARCH_ARM64)
++	poly1305_use_neon = cpu_have_named_feature(ASIMD);
++#elif defined(CONFIG_ZINC_ARCH_ARM)
++	poly1305_use_neon = elf_hwcap & HWCAP_NEON;
++#endif
++}
++
++#if defined(CONFIG_ZINC_ARCH_ARM64)
++struct poly1305_arch_internal {
++	union {
++		u32 h[5];
++		struct {
++			u64 h0, h1, h2;
++		};
++	};
++	u64 is_base2_26;
++	u64 r[2];
++};
++#elif defined(CONFIG_ZINC_ARCH_ARM)
++struct poly1305_arch_internal {
++	union {
++		u32 h[5];
++		struct {
++			u64 h0, h1;
++			u32 h2;
++		} __packed;
++	};
++	u32 r[4];
++	u32 is_base2_26;
++};
++#endif
++
++/* The NEON code uses base 2^26, while the scalar code uses base 2^64 on 64-bit
++ * and base 2^32 on 32-bit. If we hit the unfortunate situation of using NEON
++ * and then having to go back to scalar -- because the user is silly and has
++ * called the update function from two separate contexts -- then we need to
++ * convert back to the original base before proceeding. The below function is
++ * written for 64-bit integers, and so we have to swap words at the end on
++ * big-endian 32-bit. It is possible to reason that the initial reduction below
++ * is sufficient given the implementation invariants. However, for an avoidance
++ * of doubt and because this is not performance critical, we do the full
++ * reduction anyway.
++ */
++static void convert_to_base2_64(void *ctx)
++{
++	struct poly1305_arch_internal *state = ctx;
++	u32 cy;
++
++	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !state->is_base2_26)
++		return;
++
++	cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
++	cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
++	cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
++	cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
++	state->h0 = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
++	state->h1 = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
++	state->h2 = state->h[4] >> 24;
++	if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
++		state->h0 = rol64(state->h0, 32);
++		state->h1 = rol64(state->h1, 32);
++	}
++#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
++	cy = (state->h2 >> 2) + (state->h2 & ~3ULL);
++	state->h2 &= 3;
++	state->h0 += cy;
++	state->h1 += (cy = ULT(state->h0, cy));
++	state->h2 += ULT(state->h1, cy);
++#undef ULT
++	state->is_base2_26 = 0;
++}
++
++static inline bool poly1305_init_arch(void *ctx,
++				      const u8 key[POLY1305_KEY_SIZE])
++{
++	poly1305_init_arm(ctx, key);
++	return true;
++}
++
++static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
++					size_t len, const u32 padbit,
++					simd_context_t *simd_context)
++{
++	/* SIMD disables preemption, so relax after processing each page. */
++	BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
++		     PAGE_SIZE % POLY1305_BLOCK_SIZE);
++
++	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
++	    !simd_use(simd_context)) {
++		convert_to_base2_64(ctx);
++		poly1305_blocks_arm(ctx, inp, len, padbit);
++		return true;
++	}
++
++	for (;;) {
++		const size_t bytes = min_t(size_t, len, PAGE_SIZE);
++
++		poly1305_blocks_neon(ctx, inp, bytes, padbit);
++		len -= bytes;
++		if (!len)
++			break;
++		inp += bytes;
++		simd_relax(simd_context);
++	}
++	return true;
++}
++
++static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
++				      const u32 nonce[4],
++				      simd_context_t *simd_context)
++{
++	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
++	    !simd_use(simd_context)) {
++		convert_to_base2_64(ctx);
++		poly1305_emit_arm(ctx, mac, nonce);
++	} else
++		poly1305_emit_neon(ctx, mac, nonce);
++	return true;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,168 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * Implementation of the Poly1305 message authenticator.
++ *
++ * Information: https://cr.yp.to/mac.html
++ */
++
++#include <zinc/poly1305.h>
++#include "../selftest/run.h"
++
++#include <asm/unaligned.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/module.h>
++#include <linux/init.h>
++
++#if defined(CONFIG_ZINC_ARCH_X86_64)
++#include "poly1305-x86_64-glue.c"
++#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
++#include "poly1305-arm-glue.c"
++#elif defined(CONFIG_ZINC_ARCH_MIPS) || defined(CONFIG_ZINC_ARCH_MIPS64)
++#include "poly1305-mips-glue.c"
++#else
++static inline bool poly1305_init_arch(void *ctx,
++				      const u8 key[POLY1305_KEY_SIZE])
++{
++	return false;
++}
++static inline bool poly1305_blocks_arch(void *ctx, const u8 *input,
++					size_t len, const u32 padbit,
++					simd_context_t *simd_context)
++{
++	return false;
++}
++static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
++				      const u32 nonce[4],
++				      simd_context_t *simd_context)
++{
++	return false;
++}
++static bool *const poly1305_nobs[] __initconst = { };
++static void __init poly1305_fpu_init(void)
++{
++}
++#endif
++
++#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
++#include "poly1305-donna64.c"
++#else
++#include "poly1305-donna32.c"
++#endif
++
++void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE])
++{
++	ctx->nonce[0] = get_unaligned_le32(&key[16]);
++	ctx->nonce[1] = get_unaligned_le32(&key[20]);
++	ctx->nonce[2] = get_unaligned_le32(&key[24]);
++	ctx->nonce[3] = get_unaligned_le32(&key[28]);
++
++	if (!poly1305_init_arch(ctx->opaque, key))
++		poly1305_init_generic(ctx->opaque, key);
++
++	ctx->num = 0;
++}
++EXPORT_SYMBOL(poly1305_init);
++
++static inline void poly1305_blocks(void *ctx, const u8 *input, const size_t len,
++				   const u32 padbit,
++				   simd_context_t *simd_context)
++{
++	if (!poly1305_blocks_arch(ctx, input, len, padbit, simd_context))
++		poly1305_blocks_generic(ctx, input, len, padbit);
++}
++
++static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE],
++				 const u32 nonce[4],
++				 simd_context_t *simd_context)
++{
++	if (!poly1305_emit_arch(ctx, mac, nonce, simd_context))
++		poly1305_emit_generic(ctx, mac, nonce);
++}
++
++void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
++		     simd_context_t *simd_context)
++{
++	const size_t num = ctx->num;
++	size_t rem;
++
++	if (num) {
++		rem = POLY1305_BLOCK_SIZE - num;
++		if (len < rem) {
++			memcpy(ctx->data + num, input, len);
++			ctx->num = num + len;
++			return;
++		}
++		memcpy(ctx->data + num, input, rem);
++		poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1,
++				simd_context);
++		input += rem;
++		len -= rem;
++	}
++
++	rem = len % POLY1305_BLOCK_SIZE;
++	len -= rem;
++
++	if (len >= POLY1305_BLOCK_SIZE) {
++		poly1305_blocks(ctx->opaque, input, len, 1, simd_context);
++		input += len;
++	}
++
++	if (rem)
++		memcpy(ctx->data, input, rem);
++
++	ctx->num = rem;
++}
++EXPORT_SYMBOL(poly1305_update);
++
++void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
++		    simd_context_t *simd_context)
++{
++	size_t num = ctx->num;
++
++	if (num) {
++		ctx->data[num++] = 1;
++		while (num < POLY1305_BLOCK_SIZE)
++			ctx->data[num++] = 0;
++		poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0,
++				simd_context);
++	}
++
++	poly1305_emit(ctx->opaque, mac, ctx->nonce, simd_context);
++
++	memzero_explicit(ctx, sizeof(*ctx));
++}
++EXPORT_SYMBOL(poly1305_final);
++
++#include "../selftest/poly1305.c"
++
++static bool nosimd __initdata = false;
++
++#ifndef COMPAT_ZINC_IS_A_MODULE
++int __init poly1305_mod_init(void)
++#else
++static int __init mod_init(void)
++#endif
++{
++	if (!nosimd)
++		poly1305_fpu_init();
++	if (!selftest_run("poly1305", poly1305_selftest, poly1305_nobs,
++			  ARRAY_SIZE(poly1305_nobs)))
++		return -ENOTRECOVERABLE;
++	return 0;
++}
++
++#ifdef COMPAT_ZINC_IS_A_MODULE
++static void __exit mod_exit(void)
++{
++}
++
++module_param(nosimd, bool, 0);
++module_init(mod_init);
++module_exit(mod_exit);
++MODULE_LICENSE("GPL v2");
++MODULE_DESCRIPTION("Poly1305 one-time authenticator");
++MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305-donna32.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,205 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This is based in part on Andrew Moon's poly1305-donna, which is in the
++ * public domain.
++ */
++
++struct poly1305_internal {
++	u32 h[5];
++	u32 r[5];
++	u32 s[4];
++};
++
++static void poly1305_init_generic(void *ctx, const u8 key[16])
++{
++	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
++
++	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
++	st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
++	st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
++	st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
++	st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
++	st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
++
++	/* s = 5*r */
++	st->s[0] = st->r[1] * 5;
++	st->s[1] = st->r[2] * 5;
++	st->s[2] = st->r[3] * 5;
++	st->s[3] = st->r[4] * 5;
++
++	/* h = 0 */
++	st->h[0] = 0;
++	st->h[1] = 0;
++	st->h[2] = 0;
++	st->h[3] = 0;
++	st->h[4] = 0;
++}
++
++static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
++				    const u32 padbit)
++{
++	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
++	const u32 hibit = padbit << 24;
++	u32 r0, r1, r2, r3, r4;
++	u32 s1, s2, s3, s4;
++	u32 h0, h1, h2, h3, h4;
++	u64 d0, d1, d2, d3, d4;
++	u32 c;
++
++	r0 = st->r[0];
++	r1 = st->r[1];
++	r2 = st->r[2];
++	r3 = st->r[3];
++	r4 = st->r[4];
++
++	s1 = st->s[0];
++	s2 = st->s[1];
++	s3 = st->s[2];
++	s4 = st->s[3];
++
++	h0 = st->h[0];
++	h1 = st->h[1];
++	h2 = st->h[2];
++	h3 = st->h[3];
++	h4 = st->h[4];
++
++	while (len >= POLY1305_BLOCK_SIZE) {
++		/* h += m[i] */
++		h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
++		h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
++		h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
++		h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
++		h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
++
++		/* h *= r */
++		d0 = ((u64)h0 * r0) + ((u64)h1 * s4) +
++		     ((u64)h2 * s3) + ((u64)h3 * s2) +
++		     ((u64)h4 * s1);
++		d1 = ((u64)h0 * r1) + ((u64)h1 * r0) +
++		     ((u64)h2 * s4) + ((u64)h3 * s3) +
++		     ((u64)h4 * s2);
++		d2 = ((u64)h0 * r2) + ((u64)h1 * r1) +
++		     ((u64)h2 * r0) + ((u64)h3 * s4) +
++		     ((u64)h4 * s3);
++		d3 = ((u64)h0 * r3) + ((u64)h1 * r2) +
++		     ((u64)h2 * r1) + ((u64)h3 * r0) +
++		     ((u64)h4 * s4);
++		d4 = ((u64)h0 * r4) + ((u64)h1 * r3) +
++		     ((u64)h2 * r2) + ((u64)h3 * r1) +
++		     ((u64)h4 * r0);
++
++		/* (partial) h %= p */
++		c = (u32)(d0 >> 26);
++		h0 = (u32)d0 & 0x3ffffff;
++		d1 += c;
++		c = (u32)(d1 >> 26);
++		h1 = (u32)d1 & 0x3ffffff;
++		d2 += c;
++		c = (u32)(d2 >> 26);
++		h2 = (u32)d2 & 0x3ffffff;
++		d3 += c;
++		c = (u32)(d3 >> 26);
++		h3 = (u32)d3 & 0x3ffffff;
++		d4 += c;
++		c = (u32)(d4 >> 26);
++		h4 = (u32)d4 & 0x3ffffff;
++		h0 += c * 5;
++		c = (h0 >> 26);
++		h0 = h0 & 0x3ffffff;
++		h1 += c;
++
++		input += POLY1305_BLOCK_SIZE;
++		len -= POLY1305_BLOCK_SIZE;
++	}
++
++	st->h[0] = h0;
++	st->h[1] = h1;
++	st->h[2] = h2;
++	st->h[3] = h3;
++	st->h[4] = h4;
++}
++
++static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
++{
++	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
++	u32 h0, h1, h2, h3, h4, c;
++	u32 g0, g1, g2, g3, g4;
++	u64 f;
++	u32 mask;
++
++	/* fully carry h */
++	h0 = st->h[0];
++	h1 = st->h[1];
++	h2 = st->h[2];
++	h3 = st->h[3];
++	h4 = st->h[4];
++
++	c = h1 >> 26;
++	h1 = h1 & 0x3ffffff;
++	h2 += c;
++	c = h2 >> 26;
++	h2 = h2 & 0x3ffffff;
++	h3 += c;
++	c = h3 >> 26;
++	h3 = h3 & 0x3ffffff;
++	h4 += c;
++	c = h4 >> 26;
++	h4 = h4 & 0x3ffffff;
++	h0 += c * 5;
++	c = h0 >> 26;
++	h0 = h0 & 0x3ffffff;
++	h1 += c;
++
++	/* compute h + -p */
++	g0 = h0 + 5;
++	c = g0 >> 26;
++	g0 &= 0x3ffffff;
++	g1 = h1 + c;
++	c = g1 >> 26;
++	g1 &= 0x3ffffff;
++	g2 = h2 + c;
++	c = g2 >> 26;
++	g2 &= 0x3ffffff;
++	g3 = h3 + c;
++	c = g3 >> 26;
++	g3 &= 0x3ffffff;
++	g4 = h4 + c - (1UL << 26);
++
++	/* select h if h < p, or h + -p if h >= p */
++	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
++	g0 &= mask;
++	g1 &= mask;
++	g2 &= mask;
++	g3 &= mask;
++	g4 &= mask;
++	mask = ~mask;
++
++	h0 = (h0 & mask) | g0;
++	h1 = (h1 & mask) | g1;
++	h2 = (h2 & mask) | g2;
++	h3 = (h3 & mask) | g3;
++	h4 = (h4 & mask) | g4;
++
++	/* h = h % (2^128) */
++	h0 = ((h0) | (h1 << 26)) & 0xffffffff;
++	h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
++	h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
++	h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
++
++	/* mac = (h + nonce) % (2^128) */
++	f = (u64)h0 + nonce[0];
++	h0 = (u32)f;
++	f = (u64)h1 + nonce[1] + (f >> 32);
++	h1 = (u32)f;
++	f = (u64)h2 + nonce[2] + (f >> 32);
++	h2 = (u32)f;
++	f = (u64)h3 + nonce[3] + (f >> 32);
++	h3 = (u32)f;
++
++	put_unaligned_le32(h0, &mac[0]);
++	put_unaligned_le32(h1, &mac[4]);
++	put_unaligned_le32(h2, &mac[8]);
++	put_unaligned_le32(h3, &mac[12]);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305-donna64.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,182 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This is based in part on Andrew Moon's poly1305-donna, which is in the
++ * public domain.
++ */
++
++typedef __uint128_t u128;
++
++struct poly1305_internal {
++	u64 r[3];
++	u64 h[3];
++	u64 s[2];
++};
++
++static void poly1305_init_generic(void *ctx, const u8 key[16])
++{
++	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
++	u64 t0, t1;
++
++	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
++	t0 = get_unaligned_le64(&key[0]);
++	t1 = get_unaligned_le64(&key[8]);
++
++	st->r[0] = t0 & 0xffc0fffffffULL;
++	st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL;
++	st->r[2] = ((t1 >> 24)) & 0x00ffffffc0fULL;
++
++	/* s = 20*r */
++	st->s[0] = st->r[1] * 20;
++	st->s[1] = st->r[2] * 20;
++
++	/* h = 0 */
++	st->h[0] = 0;
++	st->h[1] = 0;
++	st->h[2] = 0;
++}
++
++static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
++				    const u32 padbit)
++{
++	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
++	const u64 hibit = ((u64)padbit) << 40;
++	u64 r0, r1, r2;
++	u64 s1, s2;
++	u64 h0, h1, h2;
++	u64 c;
++	u128 d0, d1, d2, d;
++
++	r0 = st->r[0];
++	r1 = st->r[1];
++	r2 = st->r[2];
++
++	h0 = st->h[0];
++	h1 = st->h[1];
++	h2 = st->h[2];
++
++	s1 = st->s[0];
++	s2 = st->s[1];
++
++	while (len >= POLY1305_BLOCK_SIZE) {
++		u64 t0, t1;
++
++		/* h += m[i] */
++		t0 = get_unaligned_le64(&input[0]);
++		t1 = get_unaligned_le64(&input[8]);
++
++		h0 += t0 & 0xfffffffffffULL;
++		h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL;
++		h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit;
++
++		/* h *= r */
++		d0 = (u128)h0 * r0;
++		d = (u128)h1 * s2;
++		d0 += d;
++		d = (u128)h2 * s1;
++		d0 += d;
++		d1 = (u128)h0 * r1;
++		d = (u128)h1 * r0;
++		d1 += d;
++		d = (u128)h2 * s2;
++		d1 += d;
++		d2 = (u128)h0 * r2;
++		d = (u128)h1 * r1;
++		d2 += d;
++		d = (u128)h2 * r0;
++		d2 += d;
++
++		/* (partial) h %= p */
++		c = (u64)(d0 >> 44);
++		h0 = (u64)d0 & 0xfffffffffffULL;
++		d1 += c;
++		c = (u64)(d1 >> 44);
++		h1 = (u64)d1 & 0xfffffffffffULL;
++		d2 += c;
++		c = (u64)(d2 >> 42);
++		h2 = (u64)d2 & 0x3ffffffffffULL;
++		h0 += c * 5;
++		c = h0 >> 44;
++		h0 = h0 & 0xfffffffffffULL;
++		h1 += c;
++
++		input += POLY1305_BLOCK_SIZE;
++		len -= POLY1305_BLOCK_SIZE;
++	}
++
++	st->h[0] = h0;
++	st->h[1] = h1;
++	st->h[2] = h2;
++}
++
++static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
++{
++	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
++	u64 h0, h1, h2, c;
++	u64 g0, g1, g2;
++	u64 t0, t1;
++
++	/* fully carry h */
++	h0 = st->h[0];
++	h1 = st->h[1];
++	h2 = st->h[2];
++
++	c = h1 >> 44;
++	h1 &= 0xfffffffffffULL;
++	h2 += c;
++	c = h2 >> 42;
++	h2 &= 0x3ffffffffffULL;
++	h0 += c * 5;
++	c = h0 >> 44;
++	h0 &= 0xfffffffffffULL;
++	h1 += c;
++	c = h1 >> 44;
++	h1 &= 0xfffffffffffULL;
++	h2 += c;
++	c = h2 >> 42;
++	h2 &= 0x3ffffffffffULL;
++	h0 += c * 5;
++	c = h0 >> 44;
++	h0 &= 0xfffffffffffULL;
++	h1 += c;
++
++	/* compute h + -p */
++	g0 = h0 + 5;
++	c  = g0 >> 44;
++	g0 &= 0xfffffffffffULL;
++	g1 = h1 + c;
++	c  = g1 >> 44;
++	g1 &= 0xfffffffffffULL;
++	g2 = h2 + c - (1ULL << 42);
++
++	/* select h if h < p, or h + -p if h >= p */
++	c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1;
++	g0 &= c;
++	g1 &= c;
++	g2 &= c;
++	c  = ~c;
++	h0 = (h0 & c) | g0;
++	h1 = (h1 & c) | g1;
++	h2 = (h2 & c) | g2;
++
++	/* h = (h + nonce) */
++	t0 = ((u64)nonce[1] << 32) | nonce[0];
++	t1 = ((u64)nonce[3] << 32) | nonce[2];
++
++	h0 += t0 & 0xfffffffffffULL;
++	c = h0 >> 44;
++	h0 &= 0xfffffffffffULL;
++	h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c;
++	c = h1 >> 44;
++	h1 &= 0xfffffffffffULL;
++	h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c;
++	h2 &= 0x3ffffffffffULL;
++
++	/* mac = h % (2^128) */
++	h0 = h0 | (h1 << 44);
++	h1 = (h1 >> 20) | (h2 << 24);
++
++	put_unaligned_le64(h0, &mac[0]);
++	put_unaligned_le64(h1, &mac[8]);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305-mips-glue.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,37 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++asmlinkage void poly1305_init_mips(void *ctx, const u8 key[16]);
++asmlinkage void poly1305_blocks_mips(void *ctx, const u8 *inp, const size_t len,
++				     const u32 padbit);
++asmlinkage void poly1305_emit_mips(void *ctx, u8 mac[16], const u32 nonce[4]);
++
++static bool *const poly1305_nobs[] __initconst = { };
++static void __init poly1305_fpu_init(void)
++{
++}
++
++static inline bool poly1305_init_arch(void *ctx,
++				      const u8 key[POLY1305_KEY_SIZE])
++{
++	poly1305_init_mips(ctx, key);
++	return true;
++}
++
++static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
++					size_t len, const u32 padbit,
++					simd_context_t *simd_context)
++{
++	poly1305_blocks_mips(ctx, inp, len, padbit);
++	return true;
++}
++
++static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
++				      const u32 nonce[4],
++				      simd_context_t *simd_context)
++{
++	poly1305_emit_mips(ctx, mac, nonce);
++	return true;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305-x86_64-glue.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,156 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include <asm/cpufeature.h>
++#include <asm/processor.h>
++#include <asm/intel-family.h>
++
++asmlinkage void poly1305_init_x86_64(void *ctx,
++				     const u8 key[POLY1305_KEY_SIZE]);
++asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
++				       const size_t len, const u32 padbit);
++asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_MAC_SIZE],
++				     const u32 nonce[4]);
++asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_MAC_SIZE],
++				  const u32 nonce[4]);
++asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
++				    const u32 padbit);
++asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
++				     const u32 padbit);
++asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
++				       const size_t len, const u32 padbit);
++
++static bool poly1305_use_avx __ro_after_init;
++static bool poly1305_use_avx2 __ro_after_init;
++static bool poly1305_use_avx512 __ro_after_init;
++static bool *const poly1305_nobs[] __initconst = {
++	&poly1305_use_avx, &poly1305_use_avx2, &poly1305_use_avx512 };
++
++static void __init poly1305_fpu_init(void)
++{
++	poly1305_use_avx =
++		boot_cpu_has(X86_FEATURE_AVX) &&
++		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
++	poly1305_use_avx2 =
++		boot_cpu_has(X86_FEATURE_AVX) &&
++		boot_cpu_has(X86_FEATURE_AVX2) &&
++		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
++#ifndef COMPAT_CANNOT_USE_AVX512
++	poly1305_use_avx512 =
++		boot_cpu_has(X86_FEATURE_AVX) &&
++		boot_cpu_has(X86_FEATURE_AVX2) &&
++		boot_cpu_has(X86_FEATURE_AVX512F) &&
++		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
++				  XFEATURE_MASK_AVX512, NULL) &&
++		/* Skylake downclocks unacceptably much when using zmm. */
++		boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
++#endif
++}
++
++static inline bool poly1305_init_arch(void *ctx,
++				      const u8 key[POLY1305_KEY_SIZE])
++{
++	poly1305_init_x86_64(ctx, key);
++	return true;
++}
++
++struct poly1305_arch_internal {
++	union {
++		struct {
++			u32 h[5];
++			u32 is_base2_26;
++		};
++		u64 hs[3];
++	};
++	u64 r[2];
++	u64 pad;
++	struct { u32 r2, r1, r4, r3; } rn[9];
++};
++
++/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
++ * the unfortunate situation of using AVX and then having to go back to scalar
++ * -- because the user is silly and has called the update function from two
++ * separate contexts -- then we need to convert back to the original base before
++ * proceeding. It is possible to reason that the initial reduction below is
++ * sufficient given the implementation invariants. However, for an avoidance of
++ * doubt and because this is not performance critical, we do the full reduction
++ * anyway.
++ */
++static void convert_to_base2_64(void *ctx)
++{
++	struct poly1305_arch_internal *state = ctx;
++	u32 cy;
++
++	if (!state->is_base2_26)
++		return;
++
++	cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
++	cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
++	cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
++	cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
++	state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
++	state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
++	state->hs[2] = state->h[4] >> 24;
++#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
++	cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
++	state->hs[2] &= 3;
++	state->hs[0] += cy;
++	state->hs[1] += (cy = ULT(state->hs[0], cy));
++	state->hs[2] += ULT(state->hs[1], cy);
++#undef ULT
++	state->is_base2_26 = 0;
++}
++
++static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
++					size_t len, const u32 padbit,
++					simd_context_t *simd_context)
++{
++	struct poly1305_arch_internal *state = ctx;
++
++	/* SIMD disables preemption, so relax after processing each page. */
++	BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
++		     PAGE_SIZE % POLY1305_BLOCK_SIZE);
++
++	if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
++	    (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
++	    !simd_use(simd_context)) {
++		convert_to_base2_64(ctx);
++		poly1305_blocks_x86_64(ctx, inp, len, padbit);
++		return true;
++	}
++
++	for (;;) {
++		const size_t bytes = min_t(size_t, len, PAGE_SIZE);
++
++		if (IS_ENABLED(CONFIG_AS_AVX512) && poly1305_use_avx512)
++			poly1305_blocks_avx512(ctx, inp, bytes, padbit);
++		else if (IS_ENABLED(CONFIG_AS_AVX2) && poly1305_use_avx2)
++			poly1305_blocks_avx2(ctx, inp, bytes, padbit);
++		else
++			poly1305_blocks_avx(ctx, inp, bytes, padbit);
++		len -= bytes;
++		if (!len)
++			break;
++		inp += bytes;
++		simd_relax(simd_context);
++	}
++
++	return true;
++}
++
++static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
++				      const u32 nonce[4],
++				      simd_context_t *simd_context)
++{
++	struct poly1305_arch_internal *state = ctx;
++
++	if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
++	    !state->is_base2_26 || !simd_use(simd_context)) {
++		convert_to_base2_64(ctx);
++		poly1305_emit_x86_64(ctx, mac, nonce);
++	} else
++		poly1305_emit_avx(ctx, mac, nonce);
++	return true;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/selftest/blake2s.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,2090 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
++	{ 0x69, 0x21, 0x7a, 0x30, 0x79, 0x90, 0x80, 0x94,
++	  0xe1, 0x11, 0x21, 0xd0, 0x42, 0x35, 0x4a, 0x7c,
++	  0x1f, 0x55, 0xb6, 0x48, 0x2c, 0xa1, 0xa5, 0x1e,
++	  0x1b, 0x25, 0x0d, 0xfd, 0x1e, 0xd0, 0xee, 0xf9 },
++	{ 0xe3, 0x4d, 0x74, 0xdb, 0xaf, 0x4f, 0xf4, 0xc6,
++	  0xab, 0xd8, 0x71, 0xcc, 0x22, 0x04, 0x51, 0xd2,
++	  0xea, 0x26, 0x48, 0x84, 0x6c, 0x77, 0x57, 0xfb,
++	  0xaa, 0xc8, 0x2f, 0xe5, 0x1a, 0xd6, 0x4b, 0xea },
++	{ 0xdd, 0xad, 0x9a, 0xb1, 0x5d, 0xac, 0x45, 0x49,
++	  0xba, 0x42, 0xf4, 0x9d, 0x26, 0x24, 0x96, 0xbe,
++	  0xf6, 0xc0, 0xba, 0xe1, 0xdd, 0x34, 0x2a, 0x88,
++	  0x08, 0xf8, 0xea, 0x26, 0x7c, 0x6e, 0x21, 0x0c },
++	{ 0xe8, 0xf9, 0x1c, 0x6e, 0xf2, 0x32, 0xa0, 0x41,
++	  0x45, 0x2a, 0xb0, 0xe1, 0x49, 0x07, 0x0c, 0xdd,
++	  0x7d, 0xd1, 0x76, 0x9e, 0x75, 0xb3, 0xa5, 0x92,
++	  0x1b, 0xe3, 0x78, 0x76, 0xc4, 0x5c, 0x99, 0x00 },
++	{ 0x0c, 0xc7, 0x0e, 0x00, 0x34, 0x8b, 0x86, 0xba,
++	  0x29, 0x44, 0xd0, 0xc3, 0x20, 0x38, 0xb2, 0x5c,
++	  0x55, 0x58, 0x4f, 0x90, 0xdf, 0x23, 0x04, 0xf5,
++	  0x5f, 0xa3, 0x32, 0xaf, 0x5f, 0xb0, 0x1e, 0x20 },
++	{ 0xec, 0x19, 0x64, 0x19, 0x10, 0x87, 0xa4, 0xfe,
++	  0x9d, 0xf1, 0xc7, 0x95, 0x34, 0x2a, 0x02, 0xff,
++	  0xc1, 0x91, 0xa5, 0xb2, 0x51, 0x76, 0x48, 0x56,
++	  0xae, 0x5b, 0x8b, 0x57, 0x69, 0xf0, 0xc6, 0xcd },
++	{ 0xe1, 0xfa, 0x51, 0x61, 0x8d, 0x7d, 0xf4, 0xeb,
++	  0x70, 0xcf, 0x0d, 0x5a, 0x9e, 0x90, 0x6f, 0x80,
++	  0x6e, 0x9d, 0x19, 0xf7, 0xf4, 0xf0, 0x1e, 0x3b,
++	  0x62, 0x12, 0x88, 0xe4, 0x12, 0x04, 0x05, 0xd6 },
++	{ 0x59, 0x80, 0x01, 0xfa, 0xfb, 0xe8, 0xf9, 0x4e,
++	  0xc6, 0x6d, 0xc8, 0x27, 0xd0, 0x12, 0xcf, 0xcb,
++	  0xba, 0x22, 0x28, 0x56, 0x9f, 0x44, 0x8e, 0x89,
++	  0xea, 0x22, 0x08, 0xc8, 0xbf, 0x76, 0x92, 0x93 },
++	{ 0xc7, 0xe8, 0x87, 0xb5, 0x46, 0x62, 0x36, 0x35,
++	  0xe9, 0x3e, 0x04, 0x95, 0x59, 0x8f, 0x17, 0x26,
++	  0x82, 0x19, 0x96, 0xc2, 0x37, 0x77, 0x05, 0xb9,
++	  0x3a, 0x1f, 0x63, 0x6f, 0x87, 0x2b, 0xfa, 0x2d },
++	{ 0xc3, 0x15, 0xa4, 0x37, 0xdd, 0x28, 0x06, 0x2a,
++	  0x77, 0x0d, 0x48, 0x19, 0x67, 0x13, 0x6b, 0x1b,
++	  0x5e, 0xb8, 0x8b, 0x21, 0xee, 0x53, 0xd0, 0x32,
++	  0x9c, 0x58, 0x97, 0x12, 0x6e, 0x9d, 0xb0, 0x2c },
++	{ 0xbb, 0x47, 0x3d, 0xed, 0xdc, 0x05, 0x5f, 0xea,
++	  0x62, 0x28, 0xf2, 0x07, 0xda, 0x57, 0x53, 0x47,
++	  0xbb, 0x00, 0x40, 0x4c, 0xd3, 0x49, 0xd3, 0x8c,
++	  0x18, 0x02, 0x63, 0x07, 0xa2, 0x24, 0xcb, 0xff },
++	{ 0x68, 0x7e, 0x18, 0x73, 0xa8, 0x27, 0x75, 0x91,
++	  0xbb, 0x33, 0xd9, 0xad, 0xf9, 0xa1, 0x39, 0x12,
++	  0xef, 0xef, 0xe5, 0x57, 0xca, 0xfc, 0x39, 0xa7,
++	  0x95, 0x26, 0x23, 0xe4, 0x72, 0x55, 0xf1, 0x6d },
++	{ 0x1a, 0xc7, 0xba, 0x75, 0x4d, 0x6e, 0x2f, 0x94,
++	  0xe0, 0xe8, 0x6c, 0x46, 0xbf, 0xb2, 0x62, 0xab,
++	  0xbb, 0x74, 0xf4, 0x50, 0xef, 0x45, 0x6d, 0x6b,
++	  0x4d, 0x97, 0xaa, 0x80, 0xce, 0x6d, 0xa7, 0x67 },
++	{ 0x01, 0x2c, 0x97, 0x80, 0x96, 0x14, 0x81, 0x6b,
++	  0x5d, 0x94, 0x94, 0x47, 0x7d, 0x4b, 0x68, 0x7d,
++	  0x15, 0xb9, 0x6e, 0xb6, 0x9c, 0x0e, 0x80, 0x74,
++	  0xa8, 0x51, 0x6f, 0x31, 0x22, 0x4b, 0x5c, 0x98 },
++	{ 0x91, 0xff, 0xd2, 0x6c, 0xfa, 0x4d, 0xa5, 0x13,
++	  0x4c, 0x7e, 0xa2, 0x62, 0xf7, 0x88, 0x9c, 0x32,
++	  0x9f, 0x61, 0xf6, 0xa6, 0x57, 0x22, 0x5c, 0xc2,
++	  0x12, 0xf4, 0x00, 0x56, 0xd9, 0x86, 0xb3, 0xf4 },
++	{ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
++	  0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
++	  0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
++	  0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d },
++	{ 0xef, 0xc0, 0x4c, 0xdc, 0x39, 0x1c, 0x7e, 0x91,
++	  0x19, 0xbd, 0x38, 0x66, 0x8a, 0x53, 0x4e, 0x65,
++	  0xfe, 0x31, 0x03, 0x6d, 0x6a, 0x62, 0x11, 0x2e,
++	  0x44, 0xeb, 0xeb, 0x11, 0xf9, 0xc5, 0x70, 0x80 },
++	{ 0x99, 0x2c, 0xf5, 0xc0, 0x53, 0x44, 0x2a, 0x5f,
++	  0xbc, 0x4f, 0xaf, 0x58, 0x3e, 0x04, 0xe5, 0x0b,
++	  0xb7, 0x0d, 0x2f, 0x39, 0xfb, 0xb6, 0xa5, 0x03,
++	  0xf8, 0x9e, 0x56, 0xa6, 0x3e, 0x18, 0x57, 0x8a },
++	{ 0x38, 0x64, 0x0e, 0x9f, 0x21, 0x98, 0x3e, 0x67,
++	  0xb5, 0x39, 0xca, 0xcc, 0xae, 0x5e, 0xcf, 0x61,
++	  0x5a, 0xe2, 0x76, 0x4f, 0x75, 0xa0, 0x9c, 0x9c,
++	  0x59, 0xb7, 0x64, 0x83, 0xc1, 0xfb, 0xc7, 0x35 },
++	{ 0x21, 0x3d, 0xd3, 0x4c, 0x7e, 0xfe, 0x4f, 0xb2,
++	  0x7a, 0x6b, 0x35, 0xf6, 0xb4, 0x00, 0x0d, 0x1f,
++	  0xe0, 0x32, 0x81, 0xaf, 0x3c, 0x72, 0x3e, 0x5c,
++	  0x9f, 0x94, 0x74, 0x7a, 0x5f, 0x31, 0xcd, 0x3b },
++	{ 0xec, 0x24, 0x6e, 0xee, 0xb9, 0xce, 0xd3, 0xf7,
++	  0xad, 0x33, 0xed, 0x28, 0x66, 0x0d, 0xd9, 0xbb,
++	  0x07, 0x32, 0x51, 0x3d, 0xb4, 0xe2, 0xfa, 0x27,
++	  0x8b, 0x60, 0xcd, 0xe3, 0x68, 0x2a, 0x4c, 0xcd },
++	{ 0xac, 0x9b, 0x61, 0xd4, 0x46, 0x64, 0x8c, 0x30,
++	  0x05, 0xd7, 0x89, 0x2b, 0xf3, 0xa8, 0x71, 0x9f,
++	  0x4c, 0x81, 0x81, 0xcf, 0xdc, 0xbc, 0x2b, 0x79,
++	  0xfe, 0xf1, 0x0a, 0x27, 0x9b, 0x91, 0x10, 0x95 },
++	{ 0x7b, 0xf8, 0xb2, 0x29, 0x59, 0xe3, 0x4e, 0x3a,
++	  0x43, 0xf7, 0x07, 0x92, 0x23, 0xe8, 0x3a, 0x97,
++	  0x54, 0x61, 0x7d, 0x39, 0x1e, 0x21, 0x3d, 0xfd,
++	  0x80, 0x8e, 0x41, 0xb9, 0xbe, 0xad, 0x4c, 0xe7 },
++	{ 0x68, 0xd4, 0xb5, 0xd4, 0xfa, 0x0e, 0x30, 0x2b,
++	  0x64, 0xcc, 0xc5, 0xaf, 0x79, 0x29, 0x13, 0xac,
++	  0x4c, 0x88, 0xec, 0x95, 0xc0, 0x7d, 0xdf, 0x40,
++	  0x69, 0x42, 0x56, 0xeb, 0x88, 0xce, 0x9f, 0x3d },
++	{ 0xb2, 0xc2, 0x42, 0x0f, 0x05, 0xf9, 0xab, 0xe3,
++	  0x63, 0x15, 0x91, 0x93, 0x36, 0xb3, 0x7e, 0x4e,
++	  0x0f, 0xa3, 0x3f, 0xf7, 0xe7, 0x6a, 0x49, 0x27,
++	  0x67, 0x00, 0x6f, 0xdb, 0x5d, 0x93, 0x54, 0x62 },
++	{ 0x13, 0x4f, 0x61, 0xbb, 0xd0, 0xbb, 0xb6, 0x9a,
++	  0xed, 0x53, 0x43, 0x90, 0x45, 0x51, 0xa3, 0xe6,
++	  0xc1, 0xaa, 0x7d, 0xcd, 0xd7, 0x7e, 0x90, 0x3e,
++	  0x70, 0x23, 0xeb, 0x7c, 0x60, 0x32, 0x0a, 0xa7 },
++	{ 0x46, 0x93, 0xf9, 0xbf, 0xf7, 0xd4, 0xf3, 0x98,
++	  0x6a, 0x7d, 0x17, 0x6e, 0x6e, 0x06, 0xf7, 0x2a,
++	  0xd1, 0x49, 0x0d, 0x80, 0x5c, 0x99, 0xe2, 0x53,
++	  0x47, 0xb8, 0xde, 0x77, 0xb4, 0xdb, 0x6d, 0x9b },
++	{ 0x85, 0x3e, 0x26, 0xf7, 0x41, 0x95, 0x3b, 0x0f,
++	  0xd5, 0xbd, 0xb4, 0x24, 0xe8, 0xab, 0x9e, 0x8b,
++	  0x37, 0x50, 0xea, 0xa8, 0xef, 0x61, 0xe4, 0x79,
++	  0x02, 0xc9, 0x1e, 0x55, 0x4e, 0x9c, 0x73, 0xb9 },
++	{ 0xf7, 0xde, 0x53, 0x63, 0x61, 0xab, 0xaa, 0x0e,
++	  0x15, 0x81, 0x56, 0xcf, 0x0e, 0xa4, 0xf6, 0x3a,
++	  0x99, 0xb5, 0xe4, 0x05, 0x4f, 0x8f, 0xa4, 0xc9,
++	  0xd4, 0x5f, 0x62, 0x85, 0xca, 0xd5, 0x56, 0x94 },
++	{ 0x4c, 0x23, 0x06, 0x08, 0x86, 0x0a, 0x99, 0xae,
++	  0x8d, 0x7b, 0xd5, 0xc2, 0xcc, 0x17, 0xfa, 0x52,
++	  0x09, 0x6b, 0x9a, 0x61, 0xbe, 0xdb, 0x17, 0xcb,
++	  0x76, 0x17, 0x86, 0x4a, 0xd2, 0x9c, 0xa7, 0xa6 },
++	{ 0xae, 0xb9, 0x20, 0xea, 0x87, 0x95, 0x2d, 0xad,
++	  0xb1, 0xfb, 0x75, 0x92, 0x91, 0xe3, 0x38, 0x81,
++	  0x39, 0xa8, 0x72, 0x86, 0x50, 0x01, 0x88, 0x6e,
++	  0xd8, 0x47, 0x52, 0xe9, 0x3c, 0x25, 0x0c, 0x2a },
++	{ 0xab, 0xa4, 0xad, 0x9b, 0x48, 0x0b, 0x9d, 0xf3,
++	  0xd0, 0x8c, 0xa5, 0xe8, 0x7b, 0x0c, 0x24, 0x40,
++	  0xd4, 0xe4, 0xea, 0x21, 0x22, 0x4c, 0x2e, 0xb4,
++	  0x2c, 0xba, 0xe4, 0x69, 0xd0, 0x89, 0xb9, 0x31 },
++	{ 0x05, 0x82, 0x56, 0x07, 0xd7, 0xfd, 0xf2, 0xd8,
++	  0x2e, 0xf4, 0xc3, 0xc8, 0xc2, 0xae, 0xa9, 0x61,
++	  0xad, 0x98, 0xd6, 0x0e, 0xdf, 0xf7, 0xd0, 0x18,
++	  0x98, 0x3e, 0x21, 0x20, 0x4c, 0x0d, 0x93, 0xd1 },
++	{ 0xa7, 0x42, 0xf8, 0xb6, 0xaf, 0x82, 0xd8, 0xa6,
++	  0xca, 0x23, 0x57, 0xc5, 0xf1, 0xcf, 0x91, 0xde,
++	  0xfb, 0xd0, 0x66, 0x26, 0x7d, 0x75, 0xc0, 0x48,
++	  0xb3, 0x52, 0x36, 0x65, 0x85, 0x02, 0x59, 0x62 },
++	{ 0x2b, 0xca, 0xc8, 0x95, 0x99, 0x00, 0x0b, 0x42,
++	  0xc9, 0x5a, 0xe2, 0x38, 0x35, 0xa7, 0x13, 0x70,
++	  0x4e, 0xd7, 0x97, 0x89, 0xc8, 0x4f, 0xef, 0x14,
++	  0x9a, 0x87, 0x4f, 0xf7, 0x33, 0xf0, 0x17, 0xa2 },
++	{ 0xac, 0x1e, 0xd0, 0x7d, 0x04, 0x8f, 0x10, 0x5a,
++	  0x9e, 0x5b, 0x7a, 0xb8, 0x5b, 0x09, 0xa4, 0x92,
++	  0xd5, 0xba, 0xff, 0x14, 0xb8, 0xbf, 0xb0, 0xe9,
++	  0xfd, 0x78, 0x94, 0x86, 0xee, 0xa2, 0xb9, 0x74 },
++	{ 0xe4, 0x8d, 0x0e, 0xcf, 0xaf, 0x49, 0x7d, 0x5b,
++	  0x27, 0xc2, 0x5d, 0x99, 0xe1, 0x56, 0xcb, 0x05,
++	  0x79, 0xd4, 0x40, 0xd6, 0xe3, 0x1f, 0xb6, 0x24,
++	  0x73, 0x69, 0x6d, 0xbf, 0x95, 0xe0, 0x10, 0xe4 },
++	{ 0x12, 0xa9, 0x1f, 0xad, 0xf8, 0xb2, 0x16, 0x44,
++	  0xfd, 0x0f, 0x93, 0x4f, 0x3c, 0x4a, 0x8f, 0x62,
++	  0xba, 0x86, 0x2f, 0xfd, 0x20, 0xe8, 0xe9, 0x61,
++	  0x15, 0x4c, 0x15, 0xc1, 0x38, 0x84, 0xed, 0x3d },
++	{ 0x7c, 0xbe, 0xe9, 0x6e, 0x13, 0x98, 0x97, 0xdc,
++	  0x98, 0xfb, 0xef, 0x3b, 0xe8, 0x1a, 0xd4, 0xd9,
++	  0x64, 0xd2, 0x35, 0xcb, 0x12, 0x14, 0x1f, 0xb6,
++	  0x67, 0x27, 0xe6, 0xe5, 0xdf, 0x73, 0xa8, 0x78 },
++	{ 0xeb, 0xf6, 0x6a, 0xbb, 0x59, 0x7a, 0xe5, 0x72,
++	  0xa7, 0x29, 0x7c, 0xb0, 0x87, 0x1e, 0x35, 0x5a,
++	  0xcc, 0xaf, 0xad, 0x83, 0x77, 0xb8, 0xe7, 0x8b,
++	  0xf1, 0x64, 0xce, 0x2a, 0x18, 0xde, 0x4b, 0xaf },
++	{ 0x71, 0xb9, 0x33, 0xb0, 0x7e, 0x4f, 0xf7, 0x81,
++	  0x8c, 0xe0, 0x59, 0xd0, 0x08, 0x82, 0x9e, 0x45,
++	  0x3c, 0x6f, 0xf0, 0x2e, 0xc0, 0xa7, 0xdb, 0x39,
++	  0x3f, 0xc2, 0xd8, 0x70, 0xf3, 0x7a, 0x72, 0x86 },
++	{ 0x7c, 0xf7, 0xc5, 0x13, 0x31, 0x22, 0x0b, 0x8d,
++	  0x3e, 0xba, 0xed, 0x9c, 0x29, 0x39, 0x8a, 0x16,
++	  0xd9, 0x81, 0x56, 0xe2, 0x61, 0x3c, 0xb0, 0x88,
++	  0xf2, 0xb0, 0xe0, 0x8a, 0x1b, 0xe4, 0xcf, 0x4f },
++	{ 0x3e, 0x41, 0xa1, 0x08, 0xe0, 0xf6, 0x4a, 0xd2,
++	  0x76, 0xb9, 0x79, 0xe1, 0xce, 0x06, 0x82, 0x79,
++	  0xe1, 0x6f, 0x7b, 0xc7, 0xe4, 0xaa, 0x1d, 0x21,
++	  0x1e, 0x17, 0xb8, 0x11, 0x61, 0xdf, 0x16, 0x02 },
++	{ 0x88, 0x65, 0x02, 0xa8, 0x2a, 0xb4, 0x7b, 0xa8,
++	  0xd8, 0x67, 0x10, 0xaa, 0x9d, 0xe3, 0xd4, 0x6e,
++	  0xa6, 0x5c, 0x47, 0xaf, 0x6e, 0xe8, 0xde, 0x45,
++	  0x0c, 0xce, 0xb8, 0xb1, 0x1b, 0x04, 0x5f, 0x50 },
++	{ 0xc0, 0x21, 0xbc, 0x5f, 0x09, 0x54, 0xfe, 0xe9,
++	  0x4f, 0x46, 0xea, 0x09, 0x48, 0x7e, 0x10, 0xa8,
++	  0x48, 0x40, 0xd0, 0x2f, 0x64, 0x81, 0x0b, 0xc0,
++	  0x8d, 0x9e, 0x55, 0x1f, 0x7d, 0x41, 0x68, 0x14 },
++	{ 0x20, 0x30, 0x51, 0x6e, 0x8a, 0x5f, 0xe1, 0x9a,
++	  0xe7, 0x9c, 0x33, 0x6f, 0xce, 0x26, 0x38, 0x2a,
++	  0x74, 0x9d, 0x3f, 0xd0, 0xec, 0x91, 0xe5, 0x37,
++	  0xd4, 0xbd, 0x23, 0x58, 0xc1, 0x2d, 0xfb, 0x22 },
++	{ 0x55, 0x66, 0x98, 0xda, 0xc8, 0x31, 0x7f, 0xd3,
++	  0x6d, 0xfb, 0xdf, 0x25, 0xa7, 0x9c, 0xb1, 0x12,
++	  0xd5, 0x42, 0x58, 0x60, 0x60, 0x5c, 0xba, 0xf5,
++	  0x07, 0xf2, 0x3b, 0xf7, 0xe9, 0xf4, 0x2a, 0xfe },
++	{ 0x2f, 0x86, 0x7b, 0xa6, 0x77, 0x73, 0xfd, 0xc3,
++	  0xe9, 0x2f, 0xce, 0xd9, 0x9a, 0x64, 0x09, 0xad,
++	  0x39, 0xd0, 0xb8, 0x80, 0xfd, 0xe8, 0xf1, 0x09,
++	  0xa8, 0x17, 0x30, 0xc4, 0x45, 0x1d, 0x01, 0x78 },
++	{ 0x17, 0x2e, 0xc2, 0x18, 0xf1, 0x19, 0xdf, 0xae,
++	  0x98, 0x89, 0x6d, 0xff, 0x29, 0xdd, 0x98, 0x76,
++	  0xc9, 0x4a, 0xf8, 0x74, 0x17, 0xf9, 0xae, 0x4c,
++	  0x70, 0x14, 0xbb, 0x4e, 0x4b, 0x96, 0xaf, 0xc7 },
++	{ 0x3f, 0x85, 0x81, 0x4a, 0x18, 0x19, 0x5f, 0x87,
++	  0x9a, 0xa9, 0x62, 0xf9, 0x5d, 0x26, 0xbd, 0x82,
++	  0xa2, 0x78, 0xf2, 0xb8, 0x23, 0x20, 0x21, 0x8f,
++	  0x6b, 0x3b, 0xd6, 0xf7, 0xf6, 0x67, 0xa6, 0xd9 },
++	{ 0x1b, 0x61, 0x8f, 0xba, 0xa5, 0x66, 0xb3, 0xd4,
++	  0x98, 0xc1, 0x2e, 0x98, 0x2c, 0x9e, 0xc5, 0x2e,
++	  0x4d, 0xa8, 0x5a, 0x8c, 0x54, 0xf3, 0x8f, 0x34,
++	  0xc0, 0x90, 0x39, 0x4f, 0x23, 0xc1, 0x84, 0xc1 },
++	{ 0x0c, 0x75, 0x8f, 0xb5, 0x69, 0x2f, 0xfd, 0x41,
++	  0xa3, 0x57, 0x5d, 0x0a, 0xf0, 0x0c, 0xc7, 0xfb,
++	  0xf2, 0xcb, 0xe5, 0x90, 0x5a, 0x58, 0x32, 0x3a,
++	  0x88, 0xae, 0x42, 0x44, 0xf6, 0xe4, 0xc9, 0x93 },
++	{ 0xa9, 0x31, 0x36, 0x0c, 0xad, 0x62, 0x8c, 0x7f,
++	  0x12, 0xa6, 0xc1, 0xc4, 0xb7, 0x53, 0xb0, 0xf4,
++	  0x06, 0x2a, 0xef, 0x3c, 0xe6, 0x5a, 0x1a, 0xe3,
++	  0xf1, 0x93, 0x69, 0xda, 0xdf, 0x3a, 0xe2, 0x3d },
++	{ 0xcb, 0xac, 0x7d, 0x77, 0x3b, 0x1e, 0x3b, 0x3c,
++	  0x66, 0x91, 0xd7, 0xab, 0xb7, 0xe9, 0xdf, 0x04,
++	  0x5c, 0x8b, 0xa1, 0x92, 0x68, 0xde, 0xd1, 0x53,
++	  0x20, 0x7f, 0x5e, 0x80, 0x43, 0x52, 0xec, 0x5d },
++	{ 0x23, 0xa1, 0x96, 0xd3, 0x80, 0x2e, 0xd3, 0xc1,
++	  0xb3, 0x84, 0x01, 0x9a, 0x82, 0x32, 0x58, 0x40,
++	  0xd3, 0x2f, 0x71, 0x95, 0x0c, 0x45, 0x80, 0xb0,
++	  0x34, 0x45, 0xe0, 0x89, 0x8e, 0x14, 0x05, 0x3c },
++	{ 0xf4, 0x49, 0x54, 0x70, 0xf2, 0x26, 0xc8, 0xc2,
++	  0x14, 0xbe, 0x08, 0xfd, 0xfa, 0xd4, 0xbc, 0x4a,
++	  0x2a, 0x9d, 0xbe, 0xa9, 0x13, 0x6a, 0x21, 0x0d,
++	  0xf0, 0xd4, 0xb6, 0x49, 0x29, 0xe6, 0xfc, 0x14 },
++	{ 0xe2, 0x90, 0xdd, 0x27, 0x0b, 0x46, 0x7f, 0x34,
++	  0xab, 0x1c, 0x00, 0x2d, 0x34, 0x0f, 0xa0, 0x16,
++	  0x25, 0x7f, 0xf1, 0x9e, 0x58, 0x33, 0xfd, 0xbb,
++	  0xf2, 0xcb, 0x40, 0x1c, 0x3b, 0x28, 0x17, 0xde },
++	{ 0x9f, 0xc7, 0xb5, 0xde, 0xd3, 0xc1, 0x50, 0x42,
++	  0xb2, 0xa6, 0x58, 0x2d, 0xc3, 0x9b, 0xe0, 0x16,
++	  0xd2, 0x4a, 0x68, 0x2d, 0x5e, 0x61, 0xad, 0x1e,
++	  0xff, 0x9c, 0x63, 0x30, 0x98, 0x48, 0xf7, 0x06 },
++	{ 0x8c, 0xca, 0x67, 0xa3, 0x6d, 0x17, 0xd5, 0xe6,
++	  0x34, 0x1c, 0xb5, 0x92, 0xfd, 0x7b, 0xef, 0x99,
++	  0x26, 0xc9, 0xe3, 0xaa, 0x10, 0x27, 0xea, 0x11,
++	  0xa7, 0xd8, 0xbd, 0x26, 0x0b, 0x57, 0x6e, 0x04 },
++	{ 0x40, 0x93, 0x92, 0xf5, 0x60, 0xf8, 0x68, 0x31,
++	  0xda, 0x43, 0x73, 0xee, 0x5e, 0x00, 0x74, 0x26,
++	  0x05, 0x95, 0xd7, 0xbc, 0x24, 0x18, 0x3b, 0x60,
++	  0xed, 0x70, 0x0d, 0x45, 0x83, 0xd3, 0xf6, 0xf0 },
++	{ 0x28, 0x02, 0x16, 0x5d, 0xe0, 0x90, 0x91, 0x55,
++	  0x46, 0xf3, 0x39, 0x8c, 0xd8, 0x49, 0x16, 0x4a,
++	  0x19, 0xf9, 0x2a, 0xdb, 0xc3, 0x61, 0xad, 0xc9,
++	  0x9b, 0x0f, 0x20, 0xc8, 0xea, 0x07, 0x10, 0x54 },
++	{ 0xad, 0x83, 0x91, 0x68, 0xd9, 0xf8, 0xa4, 0xbe,
++	  0x95, 0xba, 0x9e, 0xf9, 0xa6, 0x92, 0xf0, 0x72,
++	  0x56, 0xae, 0x43, 0xfe, 0x6f, 0x98, 0x64, 0xe2,
++	  0x90, 0x69, 0x1b, 0x02, 0x56, 0xce, 0x50, 0xa9 },
++	{ 0x75, 0xfd, 0xaa, 0x50, 0x38, 0xc2, 0x84, 0xb8,
++	  0x6d, 0x6e, 0x8a, 0xff, 0xe8, 0xb2, 0x80, 0x7e,
++	  0x46, 0x7b, 0x86, 0x60, 0x0e, 0x79, 0xaf, 0x36,
++	  0x89, 0xfb, 0xc0, 0x63, 0x28, 0xcb, 0xf8, 0x94 },
++	{ 0xe5, 0x7c, 0xb7, 0x94, 0x87, 0xdd, 0x57, 0x90,
++	  0x24, 0x32, 0xb2, 0x50, 0x73, 0x38, 0x13, 0xbd,
++	  0x96, 0xa8, 0x4e, 0xfc, 0xe5, 0x9f, 0x65, 0x0f,
++	  0xac, 0x26, 0xe6, 0x69, 0x6a, 0xef, 0xaf, 0xc3 },
++	{ 0x56, 0xf3, 0x4e, 0x8b, 0x96, 0x55, 0x7e, 0x90,
++	  0xc1, 0xf2, 0x4b, 0x52, 0xd0, 0xc8, 0x9d, 0x51,
++	  0x08, 0x6a, 0xcf, 0x1b, 0x00, 0xf6, 0x34, 0xcf,
++	  0x1d, 0xde, 0x92, 0x33, 0xb8, 0xea, 0xaa, 0x3e },
++	{ 0x1b, 0x53, 0xee, 0x94, 0xaa, 0xf3, 0x4e, 0x4b,
++	  0x15, 0x9d, 0x48, 0xde, 0x35, 0x2c, 0x7f, 0x06,
++	  0x61, 0xd0, 0xa4, 0x0e, 0xdf, 0xf9, 0x5a, 0x0b,
++	  0x16, 0x39, 0xb4, 0x09, 0x0e, 0x97, 0x44, 0x72 },
++	{ 0x05, 0x70, 0x5e, 0x2a, 0x81, 0x75, 0x7c, 0x14,
++	  0xbd, 0x38, 0x3e, 0xa9, 0x8d, 0xda, 0x54, 0x4e,
++	  0xb1, 0x0e, 0x6b, 0xc0, 0x7b, 0xae, 0x43, 0x5e,
++	  0x25, 0x18, 0xdb, 0xe1, 0x33, 0x52, 0x53, 0x75 },
++	{ 0xd8, 0xb2, 0x86, 0x6e, 0x8a, 0x30, 0x9d, 0xb5,
++	  0x3e, 0x52, 0x9e, 0xc3, 0x29, 0x11, 0xd8, 0x2f,
++	  0x5c, 0xa1, 0x6c, 0xff, 0x76, 0x21, 0x68, 0x91,
++	  0xa9, 0x67, 0x6a, 0xa3, 0x1a, 0xaa, 0x6c, 0x42 },
++	{ 0xf5, 0x04, 0x1c, 0x24, 0x12, 0x70, 0xeb, 0x04,
++	  0xc7, 0x1e, 0xc2, 0xc9, 0x5d, 0x4c, 0x38, 0xd8,
++	  0x03, 0xb1, 0x23, 0x7b, 0x0f, 0x29, 0xfd, 0x4d,
++	  0xb3, 0xeb, 0x39, 0x76, 0x69, 0xe8, 0x86, 0x99 },
++	{ 0x9a, 0x4c, 0xe0, 0x77, 0xc3, 0x49, 0x32, 0x2f,
++	  0x59, 0x5e, 0x0e, 0xe7, 0x9e, 0xd0, 0xda, 0x5f,
++	  0xab, 0x66, 0x75, 0x2c, 0xbf, 0xef, 0x8f, 0x87,
++	  0xd0, 0xe9, 0xd0, 0x72, 0x3c, 0x75, 0x30, 0xdd },
++	{ 0x65, 0x7b, 0x09, 0xf3, 0xd0, 0xf5, 0x2b, 0x5b,
++	  0x8f, 0x2f, 0x97, 0x16, 0x3a, 0x0e, 0xdf, 0x0c,
++	  0x04, 0xf0, 0x75, 0x40, 0x8a, 0x07, 0xbb, 0xeb,
++	  0x3a, 0x41, 0x01, 0xa8, 0x91, 0x99, 0x0d, 0x62 },
++	{ 0x1e, 0x3f, 0x7b, 0xd5, 0xa5, 0x8f, 0xa5, 0x33,
++	  0x34, 0x4a, 0xa8, 0xed, 0x3a, 0xc1, 0x22, 0xbb,
++	  0x9e, 0x70, 0xd4, 0xef, 0x50, 0xd0, 0x04, 0x53,
++	  0x08, 0x21, 0x94, 0x8f, 0x5f, 0xe6, 0x31, 0x5a },
++	{ 0x80, 0xdc, 0xcf, 0x3f, 0xd8, 0x3d, 0xfd, 0x0d,
++	  0x35, 0xaa, 0x28, 0x58, 0x59, 0x22, 0xab, 0x89,
++	  0xd5, 0x31, 0x39, 0x97, 0x67, 0x3e, 0xaf, 0x90,
++	  0x5c, 0xea, 0x9c, 0x0b, 0x22, 0x5c, 0x7b, 0x5f },
++	{ 0x8a, 0x0d, 0x0f, 0xbf, 0x63, 0x77, 0xd8, 0x3b,
++	  0xb0, 0x8b, 0x51, 0x4b, 0x4b, 0x1c, 0x43, 0xac,
++	  0xc9, 0x5d, 0x75, 0x17, 0x14, 0xf8, 0x92, 0x56,
++	  0x45, 0xcb, 0x6b, 0xc8, 0x56, 0xca, 0x15, 0x0a },
++	{ 0x9f, 0xa5, 0xb4, 0x87, 0x73, 0x8a, 0xd2, 0x84,
++	  0x4c, 0xc6, 0x34, 0x8a, 0x90, 0x19, 0x18, 0xf6,
++	  0x59, 0xa3, 0xb8, 0x9e, 0x9c, 0x0d, 0xfe, 0xea,
++	  0xd3, 0x0d, 0xd9, 0x4b, 0xcf, 0x42, 0xef, 0x8e },
++	{ 0x80, 0x83, 0x2c, 0x4a, 0x16, 0x77, 0xf5, 0xea,
++	  0x25, 0x60, 0xf6, 0x68, 0xe9, 0x35, 0x4d, 0xd3,
++	  0x69, 0x97, 0xf0, 0x37, 0x28, 0xcf, 0xa5, 0x5e,
++	  0x1b, 0x38, 0x33, 0x7c, 0x0c, 0x9e, 0xf8, 0x18 },
++	{ 0xab, 0x37, 0xdd, 0xb6, 0x83, 0x13, 0x7e, 0x74,
++	  0x08, 0x0d, 0x02, 0x6b, 0x59, 0x0b, 0x96, 0xae,
++	  0x9b, 0xb4, 0x47, 0x72, 0x2f, 0x30, 0x5a, 0x5a,
++	  0xc5, 0x70, 0xec, 0x1d, 0xf9, 0xb1, 0x74, 0x3c },
++	{ 0x3e, 0xe7, 0x35, 0xa6, 0x94, 0xc2, 0x55, 0x9b,
++	  0x69, 0x3a, 0xa6, 0x86, 0x29, 0x36, 0x1e, 0x15,
++	  0xd1, 0x22, 0x65, 0xad, 0x6a, 0x3d, 0xed, 0xf4,
++	  0x88, 0xb0, 0xb0, 0x0f, 0xac, 0x97, 0x54, 0xba },
++	{ 0xd6, 0xfc, 0xd2, 0x32, 0x19, 0xb6, 0x47, 0xe4,
++	  0xcb, 0xd5, 0xeb, 0x2d, 0x0a, 0xd0, 0x1e, 0xc8,
++	  0x83, 0x8a, 0x4b, 0x29, 0x01, 0xfc, 0x32, 0x5c,
++	  0xc3, 0x70, 0x19, 0x81, 0xca, 0x6c, 0x88, 0x8b },
++	{ 0x05, 0x20, 0xec, 0x2f, 0x5b, 0xf7, 0xa7, 0x55,
++	  0xda, 0xcb, 0x50, 0xc6, 0xbf, 0x23, 0x3e, 0x35,
++	  0x15, 0x43, 0x47, 0x63, 0xdb, 0x01, 0x39, 0xcc,
++	  0xd9, 0xfa, 0xef, 0xbb, 0x82, 0x07, 0x61, 0x2d },
++	{ 0xaf, 0xf3, 0xb7, 0x5f, 0x3f, 0x58, 0x12, 0x64,
++	  0xd7, 0x66, 0x16, 0x62, 0xb9, 0x2f, 0x5a, 0xd3,
++	  0x7c, 0x1d, 0x32, 0xbd, 0x45, 0xff, 0x81, 0xa4,
++	  0xed, 0x8a, 0xdc, 0x9e, 0xf3, 0x0d, 0xd9, 0x89 },
++	{ 0xd0, 0xdd, 0x65, 0x0b, 0xef, 0xd3, 0xba, 0x63,
++	  0xdc, 0x25, 0x10, 0x2c, 0x62, 0x7c, 0x92, 0x1b,
++	  0x9c, 0xbe, 0xb0, 0xb1, 0x30, 0x68, 0x69, 0x35,
++	  0xb5, 0xc9, 0x27, 0xcb, 0x7c, 0xcd, 0x5e, 0x3b },
++	{ 0xe1, 0x14, 0x98, 0x16, 0xb1, 0x0a, 0x85, 0x14,
++	  0xfb, 0x3e, 0x2c, 0xab, 0x2c, 0x08, 0xbe, 0xe9,
++	  0xf7, 0x3c, 0xe7, 0x62, 0x21, 0x70, 0x12, 0x46,
++	  0xa5, 0x89, 0xbb, 0xb6, 0x73, 0x02, 0xd8, 0xa9 },
++	{ 0x7d, 0xa3, 0xf4, 0x41, 0xde, 0x90, 0x54, 0x31,
++	  0x7e, 0x72, 0xb5, 0xdb, 0xf9, 0x79, 0xda, 0x01,
++	  0xe6, 0xbc, 0xee, 0xbb, 0x84, 0x78, 0xea, 0xe6,
++	  0xa2, 0x28, 0x49, 0xd9, 0x02, 0x92, 0x63, 0x5c },
++	{ 0x12, 0x30, 0xb1, 0xfc, 0x8a, 0x7d, 0x92, 0x15,
++	  0xed, 0xc2, 0xd4, 0xa2, 0xde, 0xcb, 0xdd, 0x0a,
++	  0x6e, 0x21, 0x6c, 0x92, 0x42, 0x78, 0xc9, 0x1f,
++	  0xc5, 0xd1, 0x0e, 0x7d, 0x60, 0x19, 0x2d, 0x94 },
++	{ 0x57, 0x50, 0xd7, 0x16, 0xb4, 0x80, 0x8f, 0x75,
++	  0x1f, 0xeb, 0xc3, 0x88, 0x06, 0xba, 0x17, 0x0b,
++	  0xf6, 0xd5, 0x19, 0x9a, 0x78, 0x16, 0xbe, 0x51,
++	  0x4e, 0x3f, 0x93, 0x2f, 0xbe, 0x0c, 0xb8, 0x71 },
++	{ 0x6f, 0xc5, 0x9b, 0x2f, 0x10, 0xfe, 0xba, 0x95,
++	  0x4a, 0xa6, 0x82, 0x0b, 0x3c, 0xa9, 0x87, 0xee,
++	  0x81, 0xd5, 0xcc, 0x1d, 0xa3, 0xc6, 0x3c, 0xe8,
++	  0x27, 0x30, 0x1c, 0x56, 0x9d, 0xfb, 0x39, 0xce },
++	{ 0xc7, 0xc3, 0xfe, 0x1e, 0xeb, 0xdc, 0x7b, 0x5a,
++	  0x93, 0x93, 0x26, 0xe8, 0xdd, 0xb8, 0x3e, 0x8b,
++	  0xf2, 0xb7, 0x80, 0xb6, 0x56, 0x78, 0xcb, 0x62,
++	  0xf2, 0x08, 0xb0, 0x40, 0xab, 0xdd, 0x35, 0xe2 },
++	{ 0x0c, 0x75, 0xc1, 0xa1, 0x5c, 0xf3, 0x4a, 0x31,
++	  0x4e, 0xe4, 0x78, 0xf4, 0xa5, 0xce, 0x0b, 0x8a,
++	  0x6b, 0x36, 0x52, 0x8e, 0xf7, 0xa8, 0x20, 0x69,
++	  0x6c, 0x3e, 0x42, 0x46, 0xc5, 0xa1, 0x58, 0x64 },
++	{ 0x21, 0x6d, 0xc1, 0x2a, 0x10, 0x85, 0x69, 0xa3,
++	  0xc7, 0xcd, 0xde, 0x4a, 0xed, 0x43, 0xa6, 0xc3,
++	  0x30, 0x13, 0x9d, 0xda, 0x3c, 0xcc, 0x4a, 0x10,
++	  0x89, 0x05, 0xdb, 0x38, 0x61, 0x89, 0x90, 0x50 },
++	{ 0xa5, 0x7b, 0xe6, 0xae, 0x67, 0x56, 0xf2, 0x8b,
++	  0x02, 0xf5, 0x9d, 0xad, 0xf7, 0xe0, 0xd7, 0xd8,
++	  0x80, 0x7f, 0x10, 0xfa, 0x15, 0xce, 0xd1, 0xad,
++	  0x35, 0x85, 0x52, 0x1a, 0x1d, 0x99, 0x5a, 0x89 },
++	{ 0x81, 0x6a, 0xef, 0x87, 0x59, 0x53, 0x71, 0x6c,
++	  0xd7, 0xa5, 0x81, 0xf7, 0x32, 0xf5, 0x3d, 0xd4,
++	  0x35, 0xda, 0xb6, 0x6d, 0x09, 0xc3, 0x61, 0xd2,
++	  0xd6, 0x59, 0x2d, 0xe1, 0x77, 0x55, 0xd8, 0xa8 },
++	{ 0x9a, 0x76, 0x89, 0x32, 0x26, 0x69, 0x3b, 0x6e,
++	  0xa9, 0x7e, 0x6a, 0x73, 0x8f, 0x9d, 0x10, 0xfb,
++	  0x3d, 0x0b, 0x43, 0xae, 0x0e, 0x8b, 0x7d, 0x81,
++	  0x23, 0xea, 0x76, 0xce, 0x97, 0x98, 0x9c, 0x7e },
++	{ 0x8d, 0xae, 0xdb, 0x9a, 0x27, 0x15, 0x29, 0xdb,
++	  0xb7, 0xdc, 0x3b, 0x60, 0x7f, 0xe5, 0xeb, 0x2d,
++	  0x32, 0x11, 0x77, 0x07, 0x58, 0xdd, 0x3b, 0x0a,
++	  0x35, 0x93, 0xd2, 0xd7, 0x95, 0x4e, 0x2d, 0x5b },
++	{ 0x16, 0xdb, 0xc0, 0xaa, 0x5d, 0xd2, 0xc7, 0x74,
++	  0xf5, 0x05, 0x10, 0x0f, 0x73, 0x37, 0x86, 0xd8,
++	  0xa1, 0x75, 0xfc, 0xbb, 0xb5, 0x9c, 0x43, 0xe1,
++	  0xfb, 0xff, 0x3e, 0x1e, 0xaf, 0x31, 0xcb, 0x4a },
++	{ 0x86, 0x06, 0xcb, 0x89, 0x9c, 0x6a, 0xea, 0xf5,
++	  0x1b, 0x9d, 0xb0, 0xfe, 0x49, 0x24, 0xa9, 0xfd,
++	  0x5d, 0xab, 0xc1, 0x9f, 0x88, 0x26, 0xf2, 0xbc,
++	  0x1c, 0x1d, 0x7d, 0xa1, 0x4d, 0x2c, 0x2c, 0x99 },
++	{ 0x84, 0x79, 0x73, 0x1a, 0xed, 0xa5, 0x7b, 0xd3,
++	  0x7e, 0xad, 0xb5, 0x1a, 0x50, 0x7e, 0x30, 0x7f,
++	  0x3b, 0xd9, 0x5e, 0x69, 0xdb, 0xca, 0x94, 0xf3,
++	  0xbc, 0x21, 0x72, 0x60, 0x66, 0xad, 0x6d, 0xfd },
++	{ 0x58, 0x47, 0x3a, 0x9e, 0xa8, 0x2e, 0xfa, 0x3f,
++	  0x3b, 0x3d, 0x8f, 0xc8, 0x3e, 0xd8, 0x86, 0x31,
++	  0x27, 0xb3, 0x3a, 0xe8, 0xde, 0xae, 0x63, 0x07,
++	  0x20, 0x1e, 0xdb, 0x6d, 0xde, 0x61, 0xde, 0x29 },
++	{ 0x9a, 0x92, 0x55, 0xd5, 0x3a, 0xf1, 0x16, 0xde,
++	  0x8b, 0xa2, 0x7c, 0xe3, 0x5b, 0x4c, 0x7e, 0x15,
++	  0x64, 0x06, 0x57, 0xa0, 0xfc, 0xb8, 0x88, 0xc7,
++	  0x0d, 0x95, 0x43, 0x1d, 0xac, 0xd8, 0xf8, 0x30 },
++	{ 0x9e, 0xb0, 0x5f, 0xfb, 0xa3, 0x9f, 0xd8, 0x59,
++	  0x6a, 0x45, 0x49, 0x3e, 0x18, 0xd2, 0x51, 0x0b,
++	  0xf3, 0xef, 0x06, 0x5c, 0x51, 0xd6, 0xe1, 0x3a,
++	  0xbe, 0x66, 0xaa, 0x57, 0xe0, 0x5c, 0xfd, 0xb7 },
++	{ 0x81, 0xdc, 0xc3, 0xa5, 0x05, 0xea, 0xce, 0x3f,
++	  0x87, 0x9d, 0x8f, 0x70, 0x27, 0x76, 0x77, 0x0f,
++	  0x9d, 0xf5, 0x0e, 0x52, 0x1d, 0x14, 0x28, 0xa8,
++	  0x5d, 0xaf, 0x04, 0xf9, 0xad, 0x21, 0x50, 0xe0 },
++	{ 0xe3, 0xe3, 0xc4, 0xaa, 0x3a, 0xcb, 0xbc, 0x85,
++	  0x33, 0x2a, 0xf9, 0xd5, 0x64, 0xbc, 0x24, 0x16,
++	  0x5e, 0x16, 0x87, 0xf6, 0xb1, 0xad, 0xcb, 0xfa,
++	  0xe7, 0x7a, 0x8f, 0x03, 0xc7, 0x2a, 0xc2, 0x8c },
++	{ 0x67, 0x46, 0xc8, 0x0b, 0x4e, 0xb5, 0x6a, 0xea,
++	  0x45, 0xe6, 0x4e, 0x72, 0x89, 0xbb, 0xa3, 0xed,
++	  0xbf, 0x45, 0xec, 0xf8, 0x20, 0x64, 0x81, 0xff,
++	  0x63, 0x02, 0x12, 0x29, 0x84, 0xcd, 0x52, 0x6a },
++	{ 0x2b, 0x62, 0x8e, 0x52, 0x76, 0x4d, 0x7d, 0x62,
++	  0xc0, 0x86, 0x8b, 0x21, 0x23, 0x57, 0xcd, 0xd1,
++	  0x2d, 0x91, 0x49, 0x82, 0x2f, 0x4e, 0x98, 0x45,
++	  0xd9, 0x18, 0xa0, 0x8d, 0x1a, 0xe9, 0x90, 0xc0 },
++	{ 0xe4, 0xbf, 0xe8, 0x0d, 0x58, 0xc9, 0x19, 0x94,
++	  0x61, 0x39, 0x09, 0xdc, 0x4b, 0x1a, 0x12, 0x49,
++	  0x68, 0x96, 0xc0, 0x04, 0xaf, 0x7b, 0x57, 0x01,
++	  0x48, 0x3d, 0xe4, 0x5d, 0x28, 0x23, 0xd7, 0x8e },
++	{ 0xeb, 0xb4, 0xba, 0x15, 0x0c, 0xef, 0x27, 0x34,
++	  0x34, 0x5b, 0x5d, 0x64, 0x1b, 0xbe, 0xd0, 0x3a,
++	  0x21, 0xea, 0xfa, 0xe9, 0x33, 0xc9, 0x9e, 0x00,
++	  0x92, 0x12, 0xef, 0x04, 0x57, 0x4a, 0x85, 0x30 },
++	{ 0x39, 0x66, 0xec, 0x73, 0xb1, 0x54, 0xac, 0xc6,
++	  0x97, 0xac, 0x5c, 0xf5, 0xb2, 0x4b, 0x40, 0xbd,
++	  0xb0, 0xdb, 0x9e, 0x39, 0x88, 0x36, 0xd7, 0x6d,
++	  0x4b, 0x88, 0x0e, 0x3b, 0x2a, 0xf1, 0xaa, 0x27 },
++	{ 0xef, 0x7e, 0x48, 0x31, 0xb3, 0xa8, 0x46, 0x36,
++	  0x51, 0x8d, 0x6e, 0x4b, 0xfc, 0xe6, 0x4a, 0x43,
++	  0xdb, 0x2a, 0x5d, 0xda, 0x9c, 0xca, 0x2b, 0x44,
++	  0xf3, 0x90, 0x33, 0xbd, 0xc4, 0x0d, 0x62, 0x43 },
++	{ 0x7a, 0xbf, 0x6a, 0xcf, 0x5c, 0x8e, 0x54, 0x9d,
++	  0xdb, 0xb1, 0x5a, 0xe8, 0xd8, 0xb3, 0x88, 0xc1,
++	  0xc1, 0x97, 0xe6, 0x98, 0x73, 0x7c, 0x97, 0x85,
++	  0x50, 0x1e, 0xd1, 0xf9, 0x49, 0x30, 0xb7, 0xd9 },
++	{ 0x88, 0x01, 0x8d, 0xed, 0x66, 0x81, 0x3f, 0x0c,
++	  0xa9, 0x5d, 0xef, 0x47, 0x4c, 0x63, 0x06, 0x92,
++	  0x01, 0x99, 0x67, 0xb9, 0xe3, 0x68, 0x88, 0xda,
++	  0xdd, 0x94, 0x12, 0x47, 0x19, 0xb6, 0x82, 0xf6 },
++	{ 0x39, 0x30, 0x87, 0x6b, 0x9f, 0xc7, 0x52, 0x90,
++	  0x36, 0xb0, 0x08, 0xb1, 0xb8, 0xbb, 0x99, 0x75,
++	  0x22, 0xa4, 0x41, 0x63, 0x5a, 0x0c, 0x25, 0xec,
++	  0x02, 0xfb, 0x6d, 0x90, 0x26, 0xe5, 0x5a, 0x97 },
++	{ 0x0a, 0x40, 0x49, 0xd5, 0x7e, 0x83, 0x3b, 0x56,
++	  0x95, 0xfa, 0xc9, 0x3d, 0xd1, 0xfb, 0xef, 0x31,
++	  0x66, 0xb4, 0x4b, 0x12, 0xad, 0x11, 0x24, 0x86,
++	  0x62, 0x38, 0x3a, 0xe0, 0x51, 0xe1, 0x58, 0x27 },
++	{ 0x81, 0xdc, 0xc0, 0x67, 0x8b, 0xb6, 0xa7, 0x65,
++	  0xe4, 0x8c, 0x32, 0x09, 0x65, 0x4f, 0xe9, 0x00,
++	  0x89, 0xce, 0x44, 0xff, 0x56, 0x18, 0x47, 0x7e,
++	  0x39, 0xab, 0x28, 0x64, 0x76, 0xdf, 0x05, 0x2b },
++	{ 0xe6, 0x9b, 0x3a, 0x36, 0xa4, 0x46, 0x19, 0x12,
++	  0xdc, 0x08, 0x34, 0x6b, 0x11, 0xdd, 0xcb, 0x9d,
++	  0xb7, 0x96, 0xf8, 0x85, 0xfd, 0x01, 0x93, 0x6e,
++	  0x66, 0x2f, 0xe2, 0x92, 0x97, 0xb0, 0x99, 0xa4 },
++	{ 0x5a, 0xc6, 0x50, 0x3b, 0x0d, 0x8d, 0xa6, 0x91,
++	  0x76, 0x46, 0xe6, 0xdc, 0xc8, 0x7e, 0xdc, 0x58,
++	  0xe9, 0x42, 0x45, 0x32, 0x4c, 0xc2, 0x04, 0xf4,
++	  0xdd, 0x4a, 0xf0, 0x15, 0x63, 0xac, 0xd4, 0x27 },
++	{ 0xdf, 0x6d, 0xda, 0x21, 0x35, 0x9a, 0x30, 0xbc,
++	  0x27, 0x17, 0x80, 0x97, 0x1c, 0x1a, 0xbd, 0x56,
++	  0xa6, 0xef, 0x16, 0x7e, 0x48, 0x08, 0x87, 0x88,
++	  0x8e, 0x73, 0xa8, 0x6d, 0x3b, 0xf6, 0x05, 0xe9 },
++	{ 0xe8, 0xe6, 0xe4, 0x70, 0x71, 0xe7, 0xb7, 0xdf,
++	  0x25, 0x80, 0xf2, 0x25, 0xcf, 0xbb, 0xed, 0xf8,
++	  0x4c, 0xe6, 0x77, 0x46, 0x62, 0x66, 0x28, 0xd3,
++	  0x30, 0x97, 0xe4, 0xb7, 0xdc, 0x57, 0x11, 0x07 },
++	{ 0x53, 0xe4, 0x0e, 0xad, 0x62, 0x05, 0x1e, 0x19,
++	  0xcb, 0x9b, 0xa8, 0x13, 0x3e, 0x3e, 0x5c, 0x1c,
++	  0xe0, 0x0d, 0xdc, 0xad, 0x8a, 0xcf, 0x34, 0x2a,
++	  0x22, 0x43, 0x60, 0xb0, 0xac, 0xc1, 0x47, 0x77 },
++	{ 0x9c, 0xcd, 0x53, 0xfe, 0x80, 0xbe, 0x78, 0x6a,
++	  0xa9, 0x84, 0x63, 0x84, 0x62, 0xfb, 0x28, 0xaf,
++	  0xdf, 0x12, 0x2b, 0x34, 0xd7, 0x8f, 0x46, 0x87,
++	  0xec, 0x63, 0x2b, 0xb1, 0x9d, 0xe2, 0x37, 0x1a },
++	{ 0xcb, 0xd4, 0x80, 0x52, 0xc4, 0x8d, 0x78, 0x84,
++	  0x66, 0xa3, 0xe8, 0x11, 0x8c, 0x56, 0xc9, 0x7f,
++	  0xe1, 0x46, 0xe5, 0x54, 0x6f, 0xaa, 0xf9, 0x3e,
++	  0x2b, 0xc3, 0xc4, 0x7e, 0x45, 0x93, 0x97, 0x53 },
++	{ 0x25, 0x68, 0x83, 0xb1, 0x4e, 0x2a, 0xf4, 0x4d,
++	  0xad, 0xb2, 0x8e, 0x1b, 0x34, 0xb2, 0xac, 0x0f,
++	  0x0f, 0x4c, 0x91, 0xc3, 0x4e, 0xc9, 0x16, 0x9e,
++	  0x29, 0x03, 0x61, 0x58, 0xac, 0xaa, 0x95, 0xb9 },
++	{ 0x44, 0x71, 0xb9, 0x1a, 0xb4, 0x2d, 0xb7, 0xc4,
++	  0xdd, 0x84, 0x90, 0xab, 0x95, 0xa2, 0xee, 0x8d,
++	  0x04, 0xe3, 0xef, 0x5c, 0x3d, 0x6f, 0xc7, 0x1a,
++	  0xc7, 0x4b, 0x2b, 0x26, 0x91, 0x4d, 0x16, 0x41 },
++	{ 0xa5, 0xeb, 0x08, 0x03, 0x8f, 0x8f, 0x11, 0x55,
++	  0xed, 0x86, 0xe6, 0x31, 0x90, 0x6f, 0xc1, 0x30,
++	  0x95, 0xf6, 0xbb, 0xa4, 0x1d, 0xe5, 0xd4, 0xe7,
++	  0x95, 0x75, 0x8e, 0xc8, 0xc8, 0xdf, 0x8a, 0xf1 },
++	{ 0xdc, 0x1d, 0xb6, 0x4e, 0xd8, 0xb4, 0x8a, 0x91,
++	  0x0e, 0x06, 0x0a, 0x6b, 0x86, 0x63, 0x74, 0xc5,
++	  0x78, 0x78, 0x4e, 0x9a, 0xc4, 0x9a, 0xb2, 0x77,
++	  0x40, 0x92, 0xac, 0x71, 0x50, 0x19, 0x34, 0xac },
++	{ 0x28, 0x54, 0x13, 0xb2, 0xf2, 0xee, 0x87, 0x3d,
++	  0x34, 0x31, 0x9e, 0xe0, 0xbb, 0xfb, 0xb9, 0x0f,
++	  0x32, 0xda, 0x43, 0x4c, 0xc8, 0x7e, 0x3d, 0xb5,
++	  0xed, 0x12, 0x1b, 0xb3, 0x98, 0xed, 0x96, 0x4b },
++	{ 0x02, 0x16, 0xe0, 0xf8, 0x1f, 0x75, 0x0f, 0x26,
++	  0xf1, 0x99, 0x8b, 0xc3, 0x93, 0x4e, 0x3e, 0x12,
++	  0x4c, 0x99, 0x45, 0xe6, 0x85, 0xa6, 0x0b, 0x25,
++	  0xe8, 0xfb, 0xd9, 0x62, 0x5a, 0xb6, 0xb5, 0x99 },
++	{ 0x38, 0xc4, 0x10, 0xf5, 0xb9, 0xd4, 0x07, 0x20,
++	  0x50, 0x75, 0x5b, 0x31, 0xdc, 0xa8, 0x9f, 0xd5,
++	  0x39, 0x5c, 0x67, 0x85, 0xee, 0xb3, 0xd7, 0x90,
++	  0xf3, 0x20, 0xff, 0x94, 0x1c, 0x5a, 0x93, 0xbf },
++	{ 0xf1, 0x84, 0x17, 0xb3, 0x9d, 0x61, 0x7a, 0xb1,
++	  0xc1, 0x8f, 0xdf, 0x91, 0xeb, 0xd0, 0xfc, 0x6d,
++	  0x55, 0x16, 0xbb, 0x34, 0xcf, 0x39, 0x36, 0x40,
++	  0x37, 0xbc, 0xe8, 0x1f, 0xa0, 0x4c, 0xec, 0xb1 },
++	{ 0x1f, 0xa8, 0x77, 0xde, 0x67, 0x25, 0x9d, 0x19,
++	  0x86, 0x3a, 0x2a, 0x34, 0xbc, 0xc6, 0x96, 0x2a,
++	  0x2b, 0x25, 0xfc, 0xbf, 0x5c, 0xbe, 0xcd, 0x7e,
++	  0xde, 0x8f, 0x1f, 0xa3, 0x66, 0x88, 0xa7, 0x96 },
++	{ 0x5b, 0xd1, 0x69, 0xe6, 0x7c, 0x82, 0xc2, 0xc2,
++	  0xe9, 0x8e, 0xf7, 0x00, 0x8b, 0xdf, 0x26, 0x1f,
++	  0x2d, 0xdf, 0x30, 0xb1, 0xc0, 0x0f, 0x9e, 0x7f,
++	  0x27, 0x5b, 0xb3, 0xe8, 0xa2, 0x8d, 0xc9, 0xa2 },
++	{ 0xc8, 0x0a, 0xbe, 0xeb, 0xb6, 0x69, 0xad, 0x5d,
++	  0xee, 0xb5, 0xf5, 0xec, 0x8e, 0xa6, 0xb7, 0xa0,
++	  0x5d, 0xdf, 0x7d, 0x31, 0xec, 0x4c, 0x0a, 0x2e,
++	  0xe2, 0x0b, 0x0b, 0x98, 0xca, 0xec, 0x67, 0x46 },
++	{ 0xe7, 0x6d, 0x3f, 0xbd, 0xa5, 0xba, 0x37, 0x4e,
++	  0x6b, 0xf8, 0xe5, 0x0f, 0xad, 0xc3, 0xbb, 0xb9,
++	  0xba, 0x5c, 0x20, 0x6e, 0xbd, 0xec, 0x89, 0xa3,
++	  0xa5, 0x4c, 0xf3, 0xdd, 0x84, 0xa0, 0x70, 0x16 },
++	{ 0x7b, 0xba, 0x9d, 0xc5, 0xb5, 0xdb, 0x20, 0x71,
++	  0xd1, 0x77, 0x52, 0xb1, 0x04, 0x4c, 0x1e, 0xce,
++	  0xd9, 0x6a, 0xaf, 0x2d, 0xd4, 0x6e, 0x9b, 0x43,
++	  0x37, 0x50, 0xe8, 0xea, 0x0d, 0xcc, 0x18, 0x70 },
++	{ 0xf2, 0x9b, 0x1b, 0x1a, 0xb9, 0xba, 0xb1, 0x63,
++	  0x01, 0x8e, 0xe3, 0xda, 0x15, 0x23, 0x2c, 0xca,
++	  0x78, 0xec, 0x52, 0xdb, 0xc3, 0x4e, 0xda, 0x5b,
++	  0x82, 0x2e, 0xc1, 0xd8, 0x0f, 0xc2, 0x1b, 0xd0 },
++	{ 0x9e, 0xe3, 0xe3, 0xe7, 0xe9, 0x00, 0xf1, 0xe1,
++	  0x1d, 0x30, 0x8c, 0x4b, 0x2b, 0x30, 0x76, 0xd2,
++	  0x72, 0xcf, 0x70, 0x12, 0x4f, 0x9f, 0x51, 0xe1,
++	  0xda, 0x60, 0xf3, 0x78, 0x46, 0xcd, 0xd2, 0xf4 },
++	{ 0x70, 0xea, 0x3b, 0x01, 0x76, 0x92, 0x7d, 0x90,
++	  0x96, 0xa1, 0x85, 0x08, 0xcd, 0x12, 0x3a, 0x29,
++	  0x03, 0x25, 0x92, 0x0a, 0x9d, 0x00, 0xa8, 0x9b,
++	  0x5d, 0xe0, 0x42, 0x73, 0xfb, 0xc7, 0x6b, 0x85 },
++	{ 0x67, 0xde, 0x25, 0xc0, 0x2a, 0x4a, 0xab, 0xa2,
++	  0x3b, 0xdc, 0x97, 0x3c, 0x8b, 0xb0, 0xb5, 0x79,
++	  0x6d, 0x47, 0xcc, 0x06, 0x59, 0xd4, 0x3d, 0xff,
++	  0x1f, 0x97, 0xde, 0x17, 0x49, 0x63, 0xb6, 0x8e },
++	{ 0xb2, 0x16, 0x8e, 0x4e, 0x0f, 0x18, 0xb0, 0xe6,
++	  0x41, 0x00, 0xb5, 0x17, 0xed, 0x95, 0x25, 0x7d,
++	  0x73, 0xf0, 0x62, 0x0d, 0xf8, 0x85, 0xc1, 0x3d,
++	  0x2e, 0xcf, 0x79, 0x36, 0x7b, 0x38, 0x4c, 0xee },
++	{ 0x2e, 0x7d, 0xec, 0x24, 0x28, 0x85, 0x3b, 0x2c,
++	  0x71, 0x76, 0x07, 0x45, 0x54, 0x1f, 0x7a, 0xfe,
++	  0x98, 0x25, 0xb5, 0xdd, 0x77, 0xdf, 0x06, 0x51,
++	  0x1d, 0x84, 0x41, 0xa9, 0x4b, 0xac, 0xc9, 0x27 },
++	{ 0xca, 0x9f, 0xfa, 0xc4, 0xc4, 0x3f, 0x0b, 0x48,
++	  0x46, 0x1d, 0xc5, 0xc2, 0x63, 0xbe, 0xa3, 0xf6,
++	  0xf0, 0x06, 0x11, 0xce, 0xac, 0xab, 0xf6, 0xf8,
++	  0x95, 0xba, 0x2b, 0x01, 0x01, 0xdb, 0xb6, 0x8d },
++	{ 0x74, 0x10, 0xd4, 0x2d, 0x8f, 0xd1, 0xd5, 0xe9,
++	  0xd2, 0xf5, 0x81, 0x5c, 0xb9, 0x34, 0x17, 0x99,
++	  0x88, 0x28, 0xef, 0x3c, 0x42, 0x30, 0xbf, 0xbd,
++	  0x41, 0x2d, 0xf0, 0xa4, 0xa7, 0xa2, 0x50, 0x7a },
++	{ 0x50, 0x10, 0xf6, 0x84, 0x51, 0x6d, 0xcc, 0xd0,
++	  0xb6, 0xee, 0x08, 0x52, 0xc2, 0x51, 0x2b, 0x4d,
++	  0xc0, 0x06, 0x6c, 0xf0, 0xd5, 0x6f, 0x35, 0x30,
++	  0x29, 0x78, 0xdb, 0x8a, 0xe3, 0x2c, 0x6a, 0x81 },
++	{ 0xac, 0xaa, 0xb5, 0x85, 0xf7, 0xb7, 0x9b, 0x71,
++	  0x99, 0x35, 0xce, 0xb8, 0x95, 0x23, 0xdd, 0xc5,
++	  0x48, 0x27, 0xf7, 0x5c, 0x56, 0x88, 0x38, 0x56,
++	  0x15, 0x4a, 0x56, 0xcd, 0xcd, 0x5e, 0xe9, 0x88 },
++	{ 0x66, 0x6d, 0xe5, 0xd1, 0x44, 0x0f, 0xee, 0x73,
++	  0x31, 0xaa, 0xf0, 0x12, 0x3a, 0x62, 0xef, 0x2d,
++	  0x8b, 0xa5, 0x74, 0x53, 0xa0, 0x76, 0x96, 0x35,
++	  0xac, 0x6c, 0xd0, 0x1e, 0x63, 0x3f, 0x77, 0x12 },
++	{ 0xa6, 0xf9, 0x86, 0x58, 0xf6, 0xea, 0xba, 0xf9,
++	  0x02, 0xd8, 0xb3, 0x87, 0x1a, 0x4b, 0x10, 0x1d,
++	  0x16, 0x19, 0x6e, 0x8a, 0x4b, 0x24, 0x1e, 0x15,
++	  0x58, 0xfe, 0x29, 0x96, 0x6e, 0x10, 0x3e, 0x8d },
++	{ 0x89, 0x15, 0x46, 0xa8, 0xb2, 0x9f, 0x30, 0x47,
++	  0xdd, 0xcf, 0xe5, 0xb0, 0x0e, 0x45, 0xfd, 0x55,
++	  0x75, 0x63, 0x73, 0x10, 0x5e, 0xa8, 0x63, 0x7d,
++	  0xfc, 0xff, 0x54, 0x7b, 0x6e, 0xa9, 0x53, 0x5f },
++	{ 0x18, 0xdf, 0xbc, 0x1a, 0xc5, 0xd2, 0x5b, 0x07,
++	  0x61, 0x13, 0x7d, 0xbd, 0x22, 0xc1, 0x7c, 0x82,
++	  0x9d, 0x0f, 0x0e, 0xf1, 0xd8, 0x23, 0x44, 0xe9,
++	  0xc8, 0x9c, 0x28, 0x66, 0x94, 0xda, 0x24, 0xe8 },
++	{ 0xb5, 0x4b, 0x9b, 0x67, 0xf8, 0xfe, 0xd5, 0x4b,
++	  0xbf, 0x5a, 0x26, 0x66, 0xdb, 0xdf, 0x4b, 0x23,
++	  0xcf, 0xf1, 0xd1, 0xb6, 0xf4, 0xaf, 0xc9, 0x85,
++	  0xb2, 0xe6, 0xd3, 0x30, 0x5a, 0x9f, 0xf8, 0x0f },
++	{ 0x7d, 0xb4, 0x42, 0xe1, 0x32, 0xba, 0x59, 0xbc,
++	  0x12, 0x89, 0xaa, 0x98, 0xb0, 0xd3, 0xe8, 0x06,
++	  0x00, 0x4f, 0x8e, 0xc1, 0x28, 0x11, 0xaf, 0x1e,
++	  0x2e, 0x33, 0xc6, 0x9b, 0xfd, 0xe7, 0x29, 0xe1 },
++	{ 0x25, 0x0f, 0x37, 0xcd, 0xc1, 0x5e, 0x81, 0x7d,
++	  0x2f, 0x16, 0x0d, 0x99, 0x56, 0xc7, 0x1f, 0xe3,
++	  0xeb, 0x5d, 0xb7, 0x45, 0x56, 0xe4, 0xad, 0xf9,
++	  0xa4, 0xff, 0xaf, 0xba, 0x74, 0x01, 0x03, 0x96 },
++	{ 0x4a, 0xb8, 0xa3, 0xdd, 0x1d, 0xdf, 0x8a, 0xd4,
++	  0x3d, 0xab, 0x13, 0xa2, 0x7f, 0x66, 0xa6, 0x54,
++	  0x4f, 0x29, 0x05, 0x97, 0xfa, 0x96, 0x04, 0x0e,
++	  0x0e, 0x1d, 0xb9, 0x26, 0x3a, 0xa4, 0x79, 0xf8 },
++	{ 0xee, 0x61, 0x72, 0x7a, 0x07, 0x66, 0xdf, 0x93,
++	  0x9c, 0xcd, 0xc8, 0x60, 0x33, 0x40, 0x44, 0xc7,
++	  0x9a, 0x3c, 0x9b, 0x15, 0x62, 0x00, 0xbc, 0x3a,
++	  0xa3, 0x29, 0x73, 0x48, 0x3d, 0x83, 0x41, 0xae },
++	{ 0x3f, 0x68, 0xc7, 0xec, 0x63, 0xac, 0x11, 0xeb,
++	  0xb9, 0x8f, 0x94, 0xb3, 0x39, 0xb0, 0x5c, 0x10,
++	  0x49, 0x84, 0xfd, 0xa5, 0x01, 0x03, 0x06, 0x01,
++	  0x44, 0xe5, 0xa2, 0xbf, 0xcc, 0xc9, 0xda, 0x95 },
++	{ 0x05, 0x6f, 0x29, 0x81, 0x6b, 0x8a, 0xf8, 0xf5,
++	  0x66, 0x82, 0xbc, 0x4d, 0x7c, 0xf0, 0x94, 0x11,
++	  0x1d, 0xa7, 0x73, 0x3e, 0x72, 0x6c, 0xd1, 0x3d,
++	  0x6b, 0x3e, 0x8e, 0xa0, 0x3e, 0x92, 0xa0, 0xd5 },
++	{ 0xf5, 0xec, 0x43, 0xa2, 0x8a, 0xcb, 0xef, 0xf1,
++	  0xf3, 0x31, 0x8a, 0x5b, 0xca, 0xc7, 0xc6, 0x6d,
++	  0xdb, 0x52, 0x30, 0xb7, 0x9d, 0xb2, 0xd1, 0x05,
++	  0xbc, 0xbe, 0x15, 0xf3, 0xc1, 0x14, 0x8d, 0x69 },
++	{ 0x2a, 0x69, 0x60, 0xad, 0x1d, 0x8d, 0xd5, 0x47,
++	  0x55, 0x5c, 0xfb, 0xd5, 0xe4, 0x60, 0x0f, 0x1e,
++	  0xaa, 0x1c, 0x8e, 0xda, 0x34, 0xde, 0x03, 0x74,
++	  0xec, 0x4a, 0x26, 0xea, 0xaa, 0xa3, 0x3b, 0x4e },
++	{ 0xdc, 0xc1, 0xea, 0x7b, 0xaa, 0xb9, 0x33, 0x84,
++	  0xf7, 0x6b, 0x79, 0x68, 0x66, 0x19, 0x97, 0x54,
++	  0x74, 0x2f, 0x7b, 0x96, 0xd6, 0xb4, 0xc1, 0x20,
++	  0x16, 0x5c, 0x04, 0xa6, 0xc4, 0xf5, 0xce, 0x10 },
++	{ 0x13, 0xd5, 0xdf, 0x17, 0x92, 0x21, 0x37, 0x9c,
++	  0x6a, 0x78, 0xc0, 0x7c, 0x79, 0x3f, 0xf5, 0x34,
++	  0x87, 0xca, 0xe6, 0xbf, 0x9f, 0xe8, 0x82, 0x54,
++	  0x1a, 0xb0, 0xe7, 0x35, 0xe3, 0xea, 0xda, 0x3b },
++	{ 0x8c, 0x59, 0xe4, 0x40, 0x76, 0x41, 0xa0, 0x1e,
++	  0x8f, 0xf9, 0x1f, 0x99, 0x80, 0xdc, 0x23, 0x6f,
++	  0x4e, 0xcd, 0x6f, 0xcf, 0x52, 0x58, 0x9a, 0x09,
++	  0x9a, 0x96, 0x16, 0x33, 0x96, 0x77, 0x14, 0xe1 },
++	{ 0x83, 0x3b, 0x1a, 0xc6, 0xa2, 0x51, 0xfd, 0x08,
++	  0xfd, 0x6d, 0x90, 0x8f, 0xea, 0x2a, 0x4e, 0xe1,
++	  0xe0, 0x40, 0xbc, 0xa9, 0x3f, 0xc1, 0xa3, 0x8e,
++	  0xc3, 0x82, 0x0e, 0x0c, 0x10, 0xbd, 0x82, 0xea },
++	{ 0xa2, 0x44, 0xf9, 0x27, 0xf3, 0xb4, 0x0b, 0x8f,
++	  0x6c, 0x39, 0x15, 0x70, 0xc7, 0x65, 0x41, 0x8f,
++	  0x2f, 0x6e, 0x70, 0x8e, 0xac, 0x90, 0x06, 0xc5,
++	  0x1a, 0x7f, 0xef, 0xf4, 0xaf, 0x3b, 0x2b, 0x9e },
++	{ 0x3d, 0x99, 0xed, 0x95, 0x50, 0xcf, 0x11, 0x96,
++	  0xe6, 0xc4, 0xd2, 0x0c, 0x25, 0x96, 0x20, 0xf8,
++	  0x58, 0xc3, 0xd7, 0x03, 0x37, 0x4c, 0x12, 0x8c,
++	  0xe7, 0xb5, 0x90, 0x31, 0x0c, 0x83, 0x04, 0x6d },
++	{ 0x2b, 0x35, 0xc4, 0x7d, 0x7b, 0x87, 0x76, 0x1f,
++	  0x0a, 0xe4, 0x3a, 0xc5, 0x6a, 0xc2, 0x7b, 0x9f,
++	  0x25, 0x83, 0x03, 0x67, 0xb5, 0x95, 0xbe, 0x8c,
++	  0x24, 0x0e, 0x94, 0x60, 0x0c, 0x6e, 0x33, 0x12 },
++	{ 0x5d, 0x11, 0xed, 0x37, 0xd2, 0x4d, 0xc7, 0x67,
++	  0x30, 0x5c, 0xb7, 0xe1, 0x46, 0x7d, 0x87, 0xc0,
++	  0x65, 0xac, 0x4b, 0xc8, 0xa4, 0x26, 0xde, 0x38,
++	  0x99, 0x1f, 0xf5, 0x9a, 0xa8, 0x73, 0x5d, 0x02 },
++	{ 0xb8, 0x36, 0x47, 0x8e, 0x1c, 0xa0, 0x64, 0x0d,
++	  0xce, 0x6f, 0xd9, 0x10, 0xa5, 0x09, 0x62, 0x72,
++	  0xc8, 0x33, 0x09, 0x90, 0xcd, 0x97, 0x86, 0x4a,
++	  0xc2, 0xbf, 0x14, 0xef, 0x6b, 0x23, 0x91, 0x4a },
++	{ 0x91, 0x00, 0xf9, 0x46, 0xd6, 0xcc, 0xde, 0x3a,
++	  0x59, 0x7f, 0x90, 0xd3, 0x9f, 0xc1, 0x21, 0x5b,
++	  0xad, 0xdc, 0x74, 0x13, 0x64, 0x3d, 0x85, 0xc2,
++	  0x1c, 0x3e, 0xee, 0x5d, 0x2d, 0xd3, 0x28, 0x94 },
++	{ 0xda, 0x70, 0xee, 0xdd, 0x23, 0xe6, 0x63, 0xaa,
++	  0x1a, 0x74, 0xb9, 0x76, 0x69, 0x35, 0xb4, 0x79,
++	  0x22, 0x2a, 0x72, 0xaf, 0xba, 0x5c, 0x79, 0x51,
++	  0x58, 0xda, 0xd4, 0x1a, 0x3b, 0xd7, 0x7e, 0x40 },
++	{ 0xf0, 0x67, 0xed, 0x6a, 0x0d, 0xbd, 0x43, 0xaa,
++	  0x0a, 0x92, 0x54, 0xe6, 0x9f, 0xd6, 0x6b, 0xdd,
++	  0x8a, 0xcb, 0x87, 0xde, 0x93, 0x6c, 0x25, 0x8c,
++	  0xfb, 0x02, 0x28, 0x5f, 0x2c, 0x11, 0xfa, 0x79 },
++	{ 0x71, 0x5c, 0x99, 0xc7, 0xd5, 0x75, 0x80, 0xcf,
++	  0x97, 0x53, 0xb4, 0xc1, 0xd7, 0x95, 0xe4, 0x5a,
++	  0x83, 0xfb, 0xb2, 0x28, 0xc0, 0xd3, 0x6f, 0xbe,
++	  0x20, 0xfa, 0xf3, 0x9b, 0xdd, 0x6d, 0x4e, 0x85 },
++	{ 0xe4, 0x57, 0xd6, 0xad, 0x1e, 0x67, 0xcb, 0x9b,
++	  0xbd, 0x17, 0xcb, 0xd6, 0x98, 0xfa, 0x6d, 0x7d,
++	  0xae, 0x0c, 0x9b, 0x7a, 0xd6, 0xcb, 0xd6, 0x53,
++	  0x96, 0x34, 0xe3, 0x2a, 0x71, 0x9c, 0x84, 0x92 },
++	{ 0xec, 0xe3, 0xea, 0x81, 0x03, 0xe0, 0x24, 0x83,
++	  0xc6, 0x4a, 0x70, 0xa4, 0xbd, 0xce, 0xe8, 0xce,
++	  0xb6, 0x27, 0x8f, 0x25, 0x33, 0xf3, 0xf4, 0x8d,
++	  0xbe, 0xed, 0xfb, 0xa9, 0x45, 0x31, 0xd4, 0xae },
++	{ 0x38, 0x8a, 0xa5, 0xd3, 0x66, 0x7a, 0x97, 0xc6,
++	  0x8d, 0x3d, 0x56, 0xf8, 0xf3, 0xee, 0x8d, 0x3d,
++	  0x36, 0x09, 0x1f, 0x17, 0xfe, 0x5d, 0x1b, 0x0d,
++	  0x5d, 0x84, 0xc9, 0x3b, 0x2f, 0xfe, 0x40, 0xbd },
++	{ 0x8b, 0x6b, 0x31, 0xb9, 0xad, 0x7c, 0x3d, 0x5c,
++	  0xd8, 0x4b, 0xf9, 0x89, 0x47, 0xb9, 0xcd, 0xb5,
++	  0x9d, 0xf8, 0xa2, 0x5f, 0xf7, 0x38, 0x10, 0x10,
++	  0x13, 0xbe, 0x4f, 0xd6, 0x5e, 0x1d, 0xd1, 0xa3 },
++	{ 0x06, 0x62, 0x91, 0xf6, 0xbb, 0xd2, 0x5f, 0x3c,
++	  0x85, 0x3d, 0xb7, 0xd8, 0xb9, 0x5c, 0x9a, 0x1c,
++	  0xfb, 0x9b, 0xf1, 0xc1, 0xc9, 0x9f, 0xb9, 0x5a,
++	  0x9b, 0x78, 0x69, 0xd9, 0x0f, 0x1c, 0x29, 0x03 },
++	{ 0xa7, 0x07, 0xef, 0xbc, 0xcd, 0xce, 0xed, 0x42,
++	  0x96, 0x7a, 0x66, 0xf5, 0x53, 0x9b, 0x93, 0xed,
++	  0x75, 0x60, 0xd4, 0x67, 0x30, 0x40, 0x16, 0xc4,
++	  0x78, 0x0d, 0x77, 0x55, 0xa5, 0x65, 0xd4, 0xc4 },
++	{ 0x38, 0xc5, 0x3d, 0xfb, 0x70, 0xbe, 0x7e, 0x79,
++	  0x2b, 0x07, 0xa6, 0xa3, 0x5b, 0x8a, 0x6a, 0x0a,
++	  0xba, 0x02, 0xc5, 0xc5, 0xf3, 0x8b, 0xaf, 0x5c,
++	  0x82, 0x3f, 0xdf, 0xd9, 0xe4, 0x2d, 0x65, 0x7e },
++	{ 0xf2, 0x91, 0x13, 0x86, 0x50, 0x1d, 0x9a, 0xb9,
++	  0xd7, 0x20, 0xcf, 0x8a, 0xd1, 0x05, 0x03, 0xd5,
++	  0x63, 0x4b, 0xf4, 0xb7, 0xd1, 0x2b, 0x56, 0xdf,
++	  0xb7, 0x4f, 0xec, 0xc6, 0xe4, 0x09, 0x3f, 0x68 },
++	{ 0xc6, 0xf2, 0xbd, 0xd5, 0x2b, 0x81, 0xe6, 0xe4,
++	  0xf6, 0x59, 0x5a, 0xbd, 0x4d, 0x7f, 0xb3, 0x1f,
++	  0x65, 0x11, 0x69, 0xd0, 0x0f, 0xf3, 0x26, 0x92,
++	  0x6b, 0x34, 0x94, 0x7b, 0x28, 0xa8, 0x39, 0x59 },
++	{ 0x29, 0x3d, 0x94, 0xb1, 0x8c, 0x98, 0xbb, 0x32,
++	  0x23, 0x36, 0x6b, 0x8c, 0xe7, 0x4c, 0x28, 0xfb,
++	  0xdf, 0x28, 0xe1, 0xf8, 0x4a, 0x33, 0x50, 0xb0,
++	  0xeb, 0x2d, 0x18, 0x04, 0xa5, 0x77, 0x57, 0x9b },
++	{ 0x2c, 0x2f, 0xa5, 0xc0, 0xb5, 0x15, 0x33, 0x16,
++	  0x5b, 0xc3, 0x75, 0xc2, 0x2e, 0x27, 0x81, 0x76,
++	  0x82, 0x70, 0xa3, 0x83, 0x98, 0x5d, 0x13, 0xbd,
++	  0x6b, 0x67, 0xb6, 0xfd, 0x67, 0xf8, 0x89, 0xeb },
++	{ 0xca, 0xa0, 0x9b, 0x82, 0xb7, 0x25, 0x62, 0xe4,
++	  0x3f, 0x4b, 0x22, 0x75, 0xc0, 0x91, 0x91, 0x8e,
++	  0x62, 0x4d, 0x91, 0x16, 0x61, 0xcc, 0x81, 0x1b,
++	  0xb5, 0xfa, 0xec, 0x51, 0xf6, 0x08, 0x8e, 0xf7 },
++	{ 0x24, 0x76, 0x1e, 0x45, 0xe6, 0x74, 0x39, 0x53,
++	  0x79, 0xfb, 0x17, 0x72, 0x9c, 0x78, 0xcb, 0x93,
++	  0x9e, 0x6f, 0x74, 0xc5, 0xdf, 0xfb, 0x9c, 0x96,
++	  0x1f, 0x49, 0x59, 0x82, 0xc3, 0xed, 0x1f, 0xe3 },
++	{ 0x55, 0xb7, 0x0a, 0x82, 0x13, 0x1e, 0xc9, 0x48,
++	  0x88, 0xd7, 0xab, 0x54, 0xa7, 0xc5, 0x15, 0x25,
++	  0x5c, 0x39, 0x38, 0xbb, 0x10, 0xbc, 0x78, 0x4d,
++	  0xc9, 0xb6, 0x7f, 0x07, 0x6e, 0x34, 0x1a, 0x73 },
++	{ 0x6a, 0xb9, 0x05, 0x7b, 0x97, 0x7e, 0xbc, 0x3c,
++	  0xa4, 0xd4, 0xce, 0x74, 0x50, 0x6c, 0x25, 0xcc,
++	  0xcd, 0xc5, 0x66, 0x49, 0x7c, 0x45, 0x0b, 0x54,
++	  0x15, 0xa3, 0x94, 0x86, 0xf8, 0x65, 0x7a, 0x03 },
++	{ 0x24, 0x06, 0x6d, 0xee, 0xe0, 0xec, 0xee, 0x15,
++	  0xa4, 0x5f, 0x0a, 0x32, 0x6d, 0x0f, 0x8d, 0xbc,
++	  0x79, 0x76, 0x1e, 0xbb, 0x93, 0xcf, 0x8c, 0x03,
++	  0x77, 0xaf, 0x44, 0x09, 0x78, 0xfc, 0xf9, 0x94 },
++	{ 0x20, 0x00, 0x0d, 0x3f, 0x66, 0xba, 0x76, 0x86,
++	  0x0d, 0x5a, 0x95, 0x06, 0x88, 0xb9, 0xaa, 0x0d,
++	  0x76, 0xcf, 0xea, 0x59, 0xb0, 0x05, 0xd8, 0x59,
++	  0x91, 0x4b, 0x1a, 0x46, 0x65, 0x3a, 0x93, 0x9b },
++	{ 0xb9, 0x2d, 0xaa, 0x79, 0x60, 0x3e, 0x3b, 0xdb,
++	  0xc3, 0xbf, 0xe0, 0xf4, 0x19, 0xe4, 0x09, 0xb2,
++	  0xea, 0x10, 0xdc, 0x43, 0x5b, 0xee, 0xfe, 0x29,
++	  0x59, 0xda, 0x16, 0x89, 0x5d, 0x5d, 0xca, 0x1c },
++	{ 0xe9, 0x47, 0x94, 0x87, 0x05, 0xb2, 0x06, 0xd5,
++	  0x72, 0xb0, 0xe8, 0xf6, 0x2f, 0x66, 0xa6, 0x55,
++	  0x1c, 0xbd, 0x6b, 0xc3, 0x05, 0xd2, 0x6c, 0xe7,
++	  0x53, 0x9a, 0x12, 0xf9, 0xaa, 0xdf, 0x75, 0x71 },
++	{ 0x3d, 0x67, 0xc1, 0xb3, 0xf9, 0xb2, 0x39, 0x10,
++	  0xe3, 0xd3, 0x5e, 0x6b, 0x0f, 0x2c, 0xcf, 0x44,
++	  0xa0, 0xb5, 0x40, 0xa4, 0x5c, 0x18, 0xba, 0x3c,
++	  0x36, 0x26, 0x4d, 0xd4, 0x8e, 0x96, 0xaf, 0x6a },
++	{ 0xc7, 0x55, 0x8b, 0xab, 0xda, 0x04, 0xbc, 0xcb,
++	  0x76, 0x4d, 0x0b, 0xbf, 0x33, 0x58, 0x42, 0x51,
++	  0x41, 0x90, 0x2d, 0x22, 0x39, 0x1d, 0x9f, 0x8c,
++	  0x59, 0x15, 0x9f, 0xec, 0x9e, 0x49, 0xb1, 0x51 },
++	{ 0x0b, 0x73, 0x2b, 0xb0, 0x35, 0x67, 0x5a, 0x50,
++	  0xff, 0x58, 0xf2, 0xc2, 0x42, 0xe4, 0x71, 0x0a,
++	  0xec, 0xe6, 0x46, 0x70, 0x07, 0x9c, 0x13, 0x04,
++	  0x4c, 0x79, 0xc9, 0xb7, 0x49, 0x1f, 0x70, 0x00 },
++	{ 0xd1, 0x20, 0xb5, 0xef, 0x6d, 0x57, 0xeb, 0xf0,
++	  0x6e, 0xaf, 0x96, 0xbc, 0x93, 0x3c, 0x96, 0x7b,
++	  0x16, 0xcb, 0xe6, 0xe2, 0xbf, 0x00, 0x74, 0x1c,
++	  0x30, 0xaa, 0x1c, 0x54, 0xba, 0x64, 0x80, 0x1f },
++	{ 0x58, 0xd2, 0x12, 0xad, 0x6f, 0x58, 0xae, 0xf0,
++	  0xf8, 0x01, 0x16, 0xb4, 0x41, 0xe5, 0x7f, 0x61,
++	  0x95, 0xbf, 0xef, 0x26, 0xb6, 0x14, 0x63, 0xed,
++	  0xec, 0x11, 0x83, 0xcd, 0xb0, 0x4f, 0xe7, 0x6d },
++	{ 0xb8, 0x83, 0x6f, 0x51, 0xd1, 0xe2, 0x9b, 0xdf,
++	  0xdb, 0xa3, 0x25, 0x56, 0x53, 0x60, 0x26, 0x8b,
++	  0x8f, 0xad, 0x62, 0x74, 0x73, 0xed, 0xec, 0xef,
++	  0x7e, 0xae, 0xfe, 0xe8, 0x37, 0xc7, 0x40, 0x03 },
++	{ 0xc5, 0x47, 0xa3, 0xc1, 0x24, 0xae, 0x56, 0x85,
++	  0xff, 0xa7, 0xb8, 0xed, 0xaf, 0x96, 0xec, 0x86,
++	  0xf8, 0xb2, 0xd0, 0xd5, 0x0c, 0xee, 0x8b, 0xe3,
++	  0xb1, 0xf0, 0xc7, 0x67, 0x63, 0x06, 0x9d, 0x9c },
++	{ 0x5d, 0x16, 0x8b, 0x76, 0x9a, 0x2f, 0x67, 0x85,
++	  0x3d, 0x62, 0x95, 0xf7, 0x56, 0x8b, 0xe4, 0x0b,
++	  0xb7, 0xa1, 0x6b, 0x8d, 0x65, 0xba, 0x87, 0x63,
++	  0x5d, 0x19, 0x78, 0xd2, 0xab, 0x11, 0xba, 0x2a },
++	{ 0xa2, 0xf6, 0x75, 0xdc, 0x73, 0x02, 0x63, 0x8c,
++	  0xb6, 0x02, 0x01, 0x06, 0x4c, 0xa5, 0x50, 0x77,
++	  0x71, 0x4d, 0x71, 0xfe, 0x09, 0x6a, 0x31, 0x5f,
++	  0x2f, 0xe7, 0x40, 0x12, 0x77, 0xca, 0xa5, 0xaf },
++	{ 0xc8, 0xaa, 0xb5, 0xcd, 0x01, 0x60, 0xae, 0x78,
++	  0xcd, 0x2e, 0x8a, 0xc5, 0xfb, 0x0e, 0x09, 0x3c,
++	  0xdb, 0x5c, 0x4b, 0x60, 0x52, 0xa0, 0xa9, 0x7b,
++	  0xb0, 0x42, 0x16, 0x82, 0x6f, 0xa7, 0xa4, 0x37 },
++	{ 0xff, 0x68, 0xca, 0x40, 0x35, 0xbf, 0xeb, 0x43,
++	  0xfb, 0xf1, 0x45, 0xfd, 0xdd, 0x5e, 0x43, 0xf1,
++	  0xce, 0xa5, 0x4f, 0x11, 0xf7, 0xbe, 0xe1, 0x30,
++	  0x58, 0xf0, 0x27, 0x32, 0x9a, 0x4a, 0x5f, 0xa4 },
++	{ 0x1d, 0x4e, 0x54, 0x87, 0xae, 0x3c, 0x74, 0x0f,
++	  0x2b, 0xa6, 0xe5, 0x41, 0xac, 0x91, 0xbc, 0x2b,
++	  0xfc, 0xd2, 0x99, 0x9c, 0x51, 0x8d, 0x80, 0x7b,
++	  0x42, 0x67, 0x48, 0x80, 0x3a, 0x35, 0x0f, 0xd4 },
++	{ 0x6d, 0x24, 0x4e, 0x1a, 0x06, 0xce, 0x4e, 0xf5,
++	  0x78, 0xdd, 0x0f, 0x63, 0xaf, 0xf0, 0x93, 0x67,
++	  0x06, 0x73, 0x51, 0x19, 0xca, 0x9c, 0x8d, 0x22,
++	  0xd8, 0x6c, 0x80, 0x14, 0x14, 0xab, 0x97, 0x41 },
++	{ 0xde, 0xcf, 0x73, 0x29, 0xdb, 0xcc, 0x82, 0x7b,
++	  0x8f, 0xc5, 0x24, 0xc9, 0x43, 0x1e, 0x89, 0x98,
++	  0x02, 0x9e, 0xce, 0x12, 0xce, 0x93, 0xb7, 0xb2,
++	  0xf3, 0xe7, 0x69, 0xa9, 0x41, 0xfb, 0x8c, 0xea },
++	{ 0x2f, 0xaf, 0xcc, 0x0f, 0x2e, 0x63, 0xcb, 0xd0,
++	  0x77, 0x55, 0xbe, 0x7b, 0x75, 0xec, 0xea, 0x0a,
++	  0xdf, 0xf9, 0xaa, 0x5e, 0xde, 0x2a, 0x52, 0xfd,
++	  0xab, 0x4d, 0xfd, 0x03, 0x74, 0xcd, 0x48, 0x3f },
++	{ 0xaa, 0x85, 0x01, 0x0d, 0xd4, 0x6a, 0x54, 0x6b,
++	  0x53, 0x5e, 0xf4, 0xcf, 0x5f, 0x07, 0xd6, 0x51,
++	  0x61, 0xe8, 0x98, 0x28, 0xf3, 0xa7, 0x7d, 0xb7,
++	  0xb9, 0xb5, 0x6f, 0x0d, 0xf5, 0x9a, 0xae, 0x45 },
++	{ 0x07, 0xe8, 0xe1, 0xee, 0x73, 0x2c, 0xb0, 0xd3,
++	  0x56, 0xc9, 0xc0, 0xd1, 0x06, 0x9c, 0x89, 0xd1,
++	  0x7a, 0xdf, 0x6a, 0x9a, 0x33, 0x4f, 0x74, 0x5e,
++	  0xc7, 0x86, 0x73, 0x32, 0x54, 0x8c, 0xa8, 0xe9 },
++	{ 0x0e, 0x01, 0xe8, 0x1c, 0xad, 0xa8, 0x16, 0x2b,
++	  0xfd, 0x5f, 0x8a, 0x8c, 0x81, 0x8a, 0x6c, 0x69,
++	  0xfe, 0xdf, 0x02, 0xce, 0xb5, 0x20, 0x85, 0x23,
++	  0xcb, 0xe5, 0x31, 0x3b, 0x89, 0xca, 0x10, 0x53 },
++	{ 0x6b, 0xb6, 0xc6, 0x47, 0x26, 0x55, 0x08, 0x43,
++	  0x99, 0x85, 0x2e, 0x00, 0x24, 0x9f, 0x8c, 0xb2,
++	  0x47, 0x89, 0x6d, 0x39, 0x2b, 0x02, 0xd7, 0x3b,
++	  0x7f, 0x0d, 0xd8, 0x18, 0xe1, 0xe2, 0x9b, 0x07 },
++	{ 0x42, 0xd4, 0x63, 0x6e, 0x20, 0x60, 0xf0, 0x8f,
++	  0x41, 0xc8, 0x82, 0xe7, 0x6b, 0x39, 0x6b, 0x11,
++	  0x2e, 0xf6, 0x27, 0xcc, 0x24, 0xc4, 0x3d, 0xd5,
++	  0xf8, 0x3a, 0x1d, 0x1a, 0x7e, 0xad, 0x71, 0x1a },
++	{ 0x48, 0x58, 0xc9, 0xa1, 0x88, 0xb0, 0x23, 0x4f,
++	  0xb9, 0xa8, 0xd4, 0x7d, 0x0b, 0x41, 0x33, 0x65,
++	  0x0a, 0x03, 0x0b, 0xd0, 0x61, 0x1b, 0x87, 0xc3,
++	  0x89, 0x2e, 0x94, 0x95, 0x1f, 0x8d, 0xf8, 0x52 },
++	{ 0x3f, 0xab, 0x3e, 0x36, 0x98, 0x8d, 0x44, 0x5a,
++	  0x51, 0xc8, 0x78, 0x3e, 0x53, 0x1b, 0xe3, 0xa0,
++	  0x2b, 0xe4, 0x0c, 0xd0, 0x47, 0x96, 0xcf, 0xb6,
++	  0x1d, 0x40, 0x34, 0x74, 0x42, 0xd3, 0xf7, 0x94 },
++	{ 0xeb, 0xab, 0xc4, 0x96, 0x36, 0xbd, 0x43, 0x3d,
++	  0x2e, 0xc8, 0xf0, 0xe5, 0x18, 0x73, 0x2e, 0xf8,
++	  0xfa, 0x21, 0xd4, 0xd0, 0x71, 0xcc, 0x3b, 0xc4,
++	  0x6c, 0xd7, 0x9f, 0xa3, 0x8a, 0x28, 0xb8, 0x10 },
++	{ 0xa1, 0xd0, 0x34, 0x35, 0x23, 0xb8, 0x93, 0xfc,
++	  0xa8, 0x4f, 0x47, 0xfe, 0xb4, 0xa6, 0x4d, 0x35,
++	  0x0a, 0x17, 0xd8, 0xee, 0xf5, 0x49, 0x7e, 0xce,
++	  0x69, 0x7d, 0x02, 0xd7, 0x91, 0x78, 0xb5, 0x91 },
++	{ 0x26, 0x2e, 0xbf, 0xd9, 0x13, 0x0b, 0x7d, 0x28,
++	  0x76, 0x0d, 0x08, 0xef, 0x8b, 0xfd, 0x3b, 0x86,
++	  0xcd, 0xd3, 0xb2, 0x11, 0x3d, 0x2c, 0xae, 0xf7,
++	  0xea, 0x95, 0x1a, 0x30, 0x3d, 0xfa, 0x38, 0x46 },
++	{ 0xf7, 0x61, 0x58, 0xed, 0xd5, 0x0a, 0x15, 0x4f,
++	  0xa7, 0x82, 0x03, 0xed, 0x23, 0x62, 0x93, 0x2f,
++	  0xcb, 0x82, 0x53, 0xaa, 0xe3, 0x78, 0x90, 0x3e,
++	  0xde, 0xd1, 0xe0, 0x3f, 0x70, 0x21, 0xa2, 0x57 },
++	{ 0x26, 0x17, 0x8e, 0x95, 0x0a, 0xc7, 0x22, 0xf6,
++	  0x7a, 0xe5, 0x6e, 0x57, 0x1b, 0x28, 0x4c, 0x02,
++	  0x07, 0x68, 0x4a, 0x63, 0x34, 0xa1, 0x77, 0x48,
++	  0xa9, 0x4d, 0x26, 0x0b, 0xc5, 0xf5, 0x52, 0x74 },
++	{ 0xc3, 0x78, 0xd1, 0xe4, 0x93, 0xb4, 0x0e, 0xf1,
++	  0x1f, 0xe6, 0xa1, 0x5d, 0x9c, 0x27, 0x37, 0xa3,
++	  0x78, 0x09, 0x63, 0x4c, 0x5a, 0xba, 0xd5, 0xb3,
++	  0x3d, 0x7e, 0x39, 0x3b, 0x4a, 0xe0, 0x5d, 0x03 },
++	{ 0x98, 0x4b, 0xd8, 0x37, 0x91, 0x01, 0xbe, 0x8f,
++	  0xd8, 0x06, 0x12, 0xd8, 0xea, 0x29, 0x59, 0xa7,
++	  0x86, 0x5e, 0xc9, 0x71, 0x85, 0x23, 0x55, 0x01,
++	  0x07, 0xae, 0x39, 0x38, 0xdf, 0x32, 0x01, 0x1b },
++	{ 0xc6, 0xf2, 0x5a, 0x81, 0x2a, 0x14, 0x48, 0x58,
++	  0xac, 0x5c, 0xed, 0x37, 0xa9, 0x3a, 0x9f, 0x47,
++	  0x59, 0xba, 0x0b, 0x1c, 0x0f, 0xdc, 0x43, 0x1d,
++	  0xce, 0x35, 0xf9, 0xec, 0x1f, 0x1f, 0x4a, 0x99 },
++	{ 0x92, 0x4c, 0x75, 0xc9, 0x44, 0x24, 0xff, 0x75,
++	  0xe7, 0x4b, 0x8b, 0x4e, 0x94, 0x35, 0x89, 0x58,
++	  0xb0, 0x27, 0xb1, 0x71, 0xdf, 0x5e, 0x57, 0x89,
++	  0x9a, 0xd0, 0xd4, 0xda, 0xc3, 0x73, 0x53, 0xb6 },
++	{ 0x0a, 0xf3, 0x58, 0x92, 0xa6, 0x3f, 0x45, 0x93,
++	  0x1f, 0x68, 0x46, 0xed, 0x19, 0x03, 0x61, 0xcd,
++	  0x07, 0x30, 0x89, 0xe0, 0x77, 0x16, 0x57, 0x14,
++	  0xb5, 0x0b, 0x81, 0xa2, 0xe3, 0xdd, 0x9b, 0xa1 },
++	{ 0xcc, 0x80, 0xce, 0xfb, 0x26, 0xc3, 0xb2, 0xb0,
++	  0xda, 0xef, 0x23, 0x3e, 0x60, 0x6d, 0x5f, 0xfc,
++	  0x80, 0xfa, 0x17, 0x42, 0x7d, 0x18, 0xe3, 0x04,
++	  0x89, 0x67, 0x3e, 0x06, 0xef, 0x4b, 0x87, 0xf7 },
++	{ 0xc2, 0xf8, 0xc8, 0x11, 0x74, 0x47, 0xf3, 0x97,
++	  0x8b, 0x08, 0x18, 0xdc, 0xf6, 0xf7, 0x01, 0x16,
++	  0xac, 0x56, 0xfd, 0x18, 0x4d, 0xd1, 0x27, 0x84,
++	  0x94, 0xe1, 0x03, 0xfc, 0x6d, 0x74, 0xa8, 0x87 },
++	{ 0xbd, 0xec, 0xf6, 0xbf, 0xc1, 0xba, 0x0d, 0xf6,
++	  0xe8, 0x62, 0xc8, 0x31, 0x99, 0x22, 0x07, 0x79,
++	  0x6a, 0xcc, 0x79, 0x79, 0x68, 0x35, 0x88, 0x28,
++	  0xc0, 0x6e, 0x7a, 0x51, 0xe0, 0x90, 0x09, 0x8f },
++	{ 0x24, 0xd1, 0xa2, 0x6e, 0x3d, 0xab, 0x02, 0xfe,
++	  0x45, 0x72, 0xd2, 0xaa, 0x7d, 0xbd, 0x3e, 0xc3,
++	  0x0f, 0x06, 0x93, 0xdb, 0x26, 0xf2, 0x73, 0xd0,
++	  0xab, 0x2c, 0xb0, 0xc1, 0x3b, 0x5e, 0x64, 0x51 },
++	{ 0xec, 0x56, 0xf5, 0x8b, 0x09, 0x29, 0x9a, 0x30,
++	  0x0b, 0x14, 0x05, 0x65, 0xd7, 0xd3, 0xe6, 0x87,
++	  0x82, 0xb6, 0xe2, 0xfb, 0xeb, 0x4b, 0x7e, 0xa9,
++	  0x7a, 0xc0, 0x57, 0x98, 0x90, 0x61, 0xdd, 0x3f },
++	{ 0x11, 0xa4, 0x37, 0xc1, 0xab, 0xa3, 0xc1, 0x19,
++	  0xdd, 0xfa, 0xb3, 0x1b, 0x3e, 0x8c, 0x84, 0x1d,
++	  0xee, 0xeb, 0x91, 0x3e, 0xf5, 0x7f, 0x7e, 0x48,
++	  0xf2, 0xc9, 0xcf, 0x5a, 0x28, 0xfa, 0x42, 0xbc },
++	{ 0x53, 0xc7, 0xe6, 0x11, 0x4b, 0x85, 0x0a, 0x2c,
++	  0xb4, 0x96, 0xc9, 0xb3, 0xc6, 0x9a, 0x62, 0x3e,
++	  0xae, 0xa2, 0xcb, 0x1d, 0x33, 0xdd, 0x81, 0x7e,
++	  0x47, 0x65, 0xed, 0xaa, 0x68, 0x23, 0xc2, 0x28 },
++	{ 0x15, 0x4c, 0x3e, 0x96, 0xfe, 0xe5, 0xdb, 0x14,
++	  0xf8, 0x77, 0x3e, 0x18, 0xaf, 0x14, 0x85, 0x79,
++	  0x13, 0x50, 0x9d, 0xa9, 0x99, 0xb4, 0x6c, 0xdd,
++	  0x3d, 0x4c, 0x16, 0x97, 0x60, 0xc8, 0x3a, 0xd2 },
++	{ 0x40, 0xb9, 0x91, 0x6f, 0x09, 0x3e, 0x02, 0x7a,
++	  0x87, 0x86, 0x64, 0x18, 0x18, 0x92, 0x06, 0x20,
++	  0x47, 0x2f, 0xbc, 0xf6, 0x8f, 0x70, 0x1d, 0x1b,
++	  0x68, 0x06, 0x32, 0xe6, 0x99, 0x6b, 0xde, 0xd3 },
++	{ 0x24, 0xc4, 0xcb, 0xba, 0x07, 0x11, 0x98, 0x31,
++	  0xa7, 0x26, 0xb0, 0x53, 0x05, 0xd9, 0x6d, 0xa0,
++	  0x2f, 0xf8, 0xb1, 0x48, 0xf0, 0xda, 0x44, 0x0f,
++	  0xe2, 0x33, 0xbc, 0xaa, 0x32, 0xc7, 0x2f, 0x6f },
++	{ 0x5d, 0x20, 0x15, 0x10, 0x25, 0x00, 0x20, 0xb7,
++	  0x83, 0x68, 0x96, 0x88, 0xab, 0xbf, 0x8e, 0xcf,
++	  0x25, 0x94, 0xa9, 0x6a, 0x08, 0xf2, 0xbf, 0xec,
++	  0x6c, 0xe0, 0x57, 0x44, 0x65, 0xdd, 0xed, 0x71 },
++	{ 0x04, 0x3b, 0x97, 0xe3, 0x36, 0xee, 0x6f, 0xdb,
++	  0xbe, 0x2b, 0x50, 0xf2, 0x2a, 0xf8, 0x32, 0x75,
++	  0xa4, 0x08, 0x48, 0x05, 0xd2, 0xd5, 0x64, 0x59,
++	  0x62, 0x45, 0x4b, 0x6c, 0x9b, 0x80, 0x53, 0xa0 },
++	{ 0x56, 0x48, 0x35, 0xcb, 0xae, 0xa7, 0x74, 0x94,
++	  0x85, 0x68, 0xbe, 0x36, 0xcf, 0x52, 0xfc, 0xdd,
++	  0x83, 0x93, 0x4e, 0xb0, 0xa2, 0x75, 0x12, 0xdb,
++	  0xe3, 0xe2, 0xdb, 0x47, 0xb9, 0xe6, 0x63, 0x5a },
++	{ 0xf2, 0x1c, 0x33, 0xf4, 0x7b, 0xde, 0x40, 0xa2,
++	  0xa1, 0x01, 0xc9, 0xcd, 0xe8, 0x02, 0x7a, 0xaf,
++	  0x61, 0xa3, 0x13, 0x7d, 0xe2, 0x42, 0x2b, 0x30,
++	  0x03, 0x5a, 0x04, 0xc2, 0x70, 0x89, 0x41, 0x83 },
++	{ 0x9d, 0xb0, 0xef, 0x74, 0xe6, 0x6c, 0xbb, 0x84,
++	  0x2e, 0xb0, 0xe0, 0x73, 0x43, 0xa0, 0x3c, 0x5c,
++	  0x56, 0x7e, 0x37, 0x2b, 0x3f, 0x23, 0xb9, 0x43,
++	  0xc7, 0x88, 0xa4, 0xf2, 0x50, 0xf6, 0x78, 0x91 },
++	{ 0xab, 0x8d, 0x08, 0x65, 0x5f, 0xf1, 0xd3, 0xfe,
++	  0x87, 0x58, 0xd5, 0x62, 0x23, 0x5f, 0xd2, 0x3e,
++	  0x7c, 0xf9, 0xdc, 0xaa, 0xd6, 0x58, 0x87, 0x2a,
++	  0x49, 0xe5, 0xd3, 0x18, 0x3b, 0x6c, 0xce, 0xbd },
++	{ 0x6f, 0x27, 0xf7, 0x7e, 0x7b, 0xcf, 0x46, 0xa1,
++	  0xe9, 0x63, 0xad, 0xe0, 0x30, 0x97, 0x33, 0x54,
++	  0x30, 0x31, 0xdc, 0xcd, 0xd4, 0x7c, 0xaa, 0xc1,
++	  0x74, 0xd7, 0xd2, 0x7c, 0xe8, 0x07, 0x7e, 0x8b },
++	{ 0xe3, 0xcd, 0x54, 0xda, 0x7e, 0x44, 0x4c, 0xaa,
++	  0x62, 0x07, 0x56, 0x95, 0x25, 0xa6, 0x70, 0xeb,
++	  0xae, 0x12, 0x78, 0xde, 0x4e, 0x3f, 0xe2, 0x68,
++	  0x4b, 0x3e, 0x33, 0xf5, 0xef, 0x90, 0xcc, 0x1b },
++	{ 0xb2, 0xc3, 0xe3, 0x3a, 0x51, 0xd2, 0x2c, 0x4c,
++	  0x08, 0xfc, 0x09, 0x89, 0xc8, 0x73, 0xc9, 0xcc,
++	  0x41, 0x50, 0x57, 0x9b, 0x1e, 0x61, 0x63, 0xfa,
++	  0x69, 0x4a, 0xd5, 0x1d, 0x53, 0xd7, 0x12, 0xdc },
++	{ 0xbe, 0x7f, 0xda, 0x98, 0x3e, 0x13, 0x18, 0x9b,
++	  0x4c, 0x77, 0xe0, 0xa8, 0x09, 0x20, 0xb6, 0xe0,
++	  0xe0, 0xea, 0x80, 0xc3, 0xb8, 0x4d, 0xbe, 0x7e,
++	  0x71, 0x17, 0xd2, 0x53, 0xf4, 0x81, 0x12, 0xf4 },
++	{ 0xb6, 0x00, 0x8c, 0x28, 0xfa, 0xe0, 0x8a, 0xa4,
++	  0x27, 0xe5, 0xbd, 0x3a, 0xad, 0x36, 0xf1, 0x00,
++	  0x21, 0xf1, 0x6c, 0x77, 0xcf, 0xea, 0xbe, 0xd0,
++	  0x7f, 0x97, 0xcc, 0x7d, 0xc1, 0xf1, 0x28, 0x4a },
++	{ 0x6e, 0x4e, 0x67, 0x60, 0xc5, 0x38, 0xf2, 0xe9,
++	  0x7b, 0x3a, 0xdb, 0xfb, 0xbc, 0xde, 0x57, 0xf8,
++	  0x96, 0x6b, 0x7e, 0xa8, 0xfc, 0xb5, 0xbf, 0x7e,
++	  0xfe, 0xc9, 0x13, 0xfd, 0x2a, 0x2b, 0x0c, 0x55 },
++	{ 0x4a, 0xe5, 0x1f, 0xd1, 0x83, 0x4a, 0xa5, 0xbd,
++	  0x9a, 0x6f, 0x7e, 0xc3, 0x9f, 0xc6, 0x63, 0x33,
++	  0x8d, 0xc5, 0xd2, 0xe2, 0x07, 0x61, 0x56, 0x6d,
++	  0x90, 0xcc, 0x68, 0xb1, 0xcb, 0x87, 0x5e, 0xd8 },
++	{ 0xb6, 0x73, 0xaa, 0xd7, 0x5a, 0xb1, 0xfd, 0xb5,
++	  0x40, 0x1a, 0xbf, 0xa1, 0xbf, 0x89, 0xf3, 0xad,
++	  0xd2, 0xeb, 0xc4, 0x68, 0xdf, 0x36, 0x24, 0xa4,
++	  0x78, 0xf4, 0xfe, 0x85, 0x9d, 0x8d, 0x55, 0xe2 },
++	{ 0x13, 0xc9, 0x47, 0x1a, 0x98, 0x55, 0x91, 0x35,
++	  0x39, 0x83, 0x66, 0x60, 0x39, 0x8d, 0xa0, 0xf3,
++	  0xf9, 0x9a, 0xda, 0x08, 0x47, 0x9c, 0x69, 0xd1,
++	  0xb7, 0xfc, 0xaa, 0x34, 0x61, 0xdd, 0x7e, 0x59 },
++	{ 0x2c, 0x11, 0xf4, 0xa7, 0xf9, 0x9a, 0x1d, 0x23,
++	  0xa5, 0x8b, 0xb6, 0x36, 0x35, 0x0f, 0xe8, 0x49,
++	  0xf2, 0x9c, 0xba, 0xc1, 0xb2, 0xa1, 0x11, 0x2d,
++	  0x9f, 0x1e, 0xd5, 0xbc, 0x5b, 0x31, 0x3c, 0xcd },
++	{ 0xc7, 0xd3, 0xc0, 0x70, 0x6b, 0x11, 0xae, 0x74,
++	  0x1c, 0x05, 0xa1, 0xef, 0x15, 0x0d, 0xd6, 0x5b,
++	  0x54, 0x94, 0xd6, 0xd5, 0x4c, 0x9a, 0x86, 0xe2,
++	  0x61, 0x78, 0x54, 0xe6, 0xae, 0xee, 0xbb, 0xd9 },
++	{ 0x19, 0x4e, 0x10, 0xc9, 0x38, 0x93, 0xaf, 0xa0,
++	  0x64, 0xc3, 0xac, 0x04, 0xc0, 0xdd, 0x80, 0x8d,
++	  0x79, 0x1c, 0x3d, 0x4b, 0x75, 0x56, 0xe8, 0x9d,
++	  0x8d, 0x9c, 0xb2, 0x25, 0xc4, 0xb3, 0x33, 0x39 },
++	{ 0x6f, 0xc4, 0x98, 0x8b, 0x8f, 0x78, 0x54, 0x6b,
++	  0x16, 0x88, 0x99, 0x18, 0x45, 0x90, 0x8f, 0x13,
++	  0x4b, 0x6a, 0x48, 0x2e, 0x69, 0x94, 0xb3, 0xd4,
++	  0x83, 0x17, 0xbf, 0x08, 0xdb, 0x29, 0x21, 0x85 },
++	{ 0x56, 0x65, 0xbe, 0xb8, 0xb0, 0x95, 0x55, 0x25,
++	  0x81, 0x3b, 0x59, 0x81, 0xcd, 0x14, 0x2e, 0xd4,
++	  0xd0, 0x3f, 0xba, 0x38, 0xa6, 0xf3, 0xe5, 0xad,
++	  0x26, 0x8e, 0x0c, 0xc2, 0x70, 0xd1, 0xcd, 0x11 },
++	{ 0xb8, 0x83, 0xd6, 0x8f, 0x5f, 0xe5, 0x19, 0x36,
++	  0x43, 0x1b, 0xa4, 0x25, 0x67, 0x38, 0x05, 0x3b,
++	  0x1d, 0x04, 0x26, 0xd4, 0xcb, 0x64, 0xb1, 0x6e,
++	  0x83, 0xba, 0xdc, 0x5e, 0x9f, 0xbe, 0x3b, 0x81 },
++	{ 0x53, 0xe7, 0xb2, 0x7e, 0xa5, 0x9c, 0x2f, 0x6d,
++	  0xbb, 0x50, 0x76, 0x9e, 0x43, 0x55, 0x4d, 0xf3,
++	  0x5a, 0xf8, 0x9f, 0x48, 0x22, 0xd0, 0x46, 0x6b,
++	  0x00, 0x7d, 0xd6, 0xf6, 0xde, 0xaf, 0xff, 0x02 },
++	{ 0x1f, 0x1a, 0x02, 0x29, 0xd4, 0x64, 0x0f, 0x01,
++	  0x90, 0x15, 0x88, 0xd9, 0xde, 0xc2, 0x2d, 0x13,
++	  0xfc, 0x3e, 0xb3, 0x4a, 0x61, 0xb3, 0x29, 0x38,
++	  0xef, 0xbf, 0x53, 0x34, 0xb2, 0x80, 0x0a, 0xfa },
++	{ 0xc2, 0xb4, 0x05, 0xaf, 0xa0, 0xfa, 0x66, 0x68,
++	  0x85, 0x2a, 0xee, 0x4d, 0x88, 0x04, 0x08, 0x53,
++	  0xfa, 0xb8, 0x00, 0xe7, 0x2b, 0x57, 0x58, 0x14,
++	  0x18, 0xe5, 0x50, 0x6f, 0x21, 0x4c, 0x7d, 0x1f },
++	{ 0xc0, 0x8a, 0xa1, 0xc2, 0x86, 0xd7, 0x09, 0xfd,
++	  0xc7, 0x47, 0x37, 0x44, 0x97, 0x71, 0x88, 0xc8,
++	  0x95, 0xba, 0x01, 0x10, 0x14, 0x24, 0x7e, 0x4e,
++	  0xfa, 0x8d, 0x07, 0xe7, 0x8f, 0xec, 0x69, 0x5c },
++	{ 0xf0, 0x3f, 0x57, 0x89, 0xd3, 0x33, 0x6b, 0x80,
++	  0xd0, 0x02, 0xd5, 0x9f, 0xdf, 0x91, 0x8b, 0xdb,
++	  0x77, 0x5b, 0x00, 0x95, 0x6e, 0xd5, 0x52, 0x8e,
++	  0x86, 0xaa, 0x99, 0x4a, 0xcb, 0x38, 0xfe, 0x2d }
++};
++
++static const u8 blake2s_keyed_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
++	{ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
++	  0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
++	  0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
++	  0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49 },
++	{ 0x40, 0xd1, 0x5f, 0xee, 0x7c, 0x32, 0x88, 0x30,
++	  0x16, 0x6a, 0xc3, 0xf9, 0x18, 0x65, 0x0f, 0x80,
++	  0x7e, 0x7e, 0x01, 0xe1, 0x77, 0x25, 0x8c, 0xdc,
++	  0x0a, 0x39, 0xb1, 0x1f, 0x59, 0x80, 0x66, 0xf1 },
++	{ 0x6b, 0xb7, 0x13, 0x00, 0x64, 0x4c, 0xd3, 0x99,
++	  0x1b, 0x26, 0xcc, 0xd4, 0xd2, 0x74, 0xac, 0xd1,
++	  0xad, 0xea, 0xb8, 0xb1, 0xd7, 0x91, 0x45, 0x46,
++	  0xc1, 0x19, 0x8b, 0xbe, 0x9f, 0xc9, 0xd8, 0x03 },
++	{ 0x1d, 0x22, 0x0d, 0xbe, 0x2e, 0xe1, 0x34, 0x66,
++	  0x1f, 0xdf, 0x6d, 0x9e, 0x74, 0xb4, 0x17, 0x04,
++	  0x71, 0x05, 0x56, 0xf2, 0xf6, 0xe5, 0xa0, 0x91,
++	  0xb2, 0x27, 0x69, 0x74, 0x45, 0xdb, 0xea, 0x6b },
++	{ 0xf6, 0xc3, 0xfb, 0xad, 0xb4, 0xcc, 0x68, 0x7a,
++	  0x00, 0x64, 0xa5, 0xbe, 0x6e, 0x79, 0x1b, 0xec,
++	  0x63, 0xb8, 0x68, 0xad, 0x62, 0xfb, 0xa6, 0x1b,
++	  0x37, 0x57, 0xef, 0x9c, 0xa5, 0x2e, 0x05, 0xb2 },
++	{ 0x49, 0xc1, 0xf2, 0x11, 0x88, 0xdf, 0xd7, 0x69,
++	  0xae, 0xa0, 0xe9, 0x11, 0xdd, 0x6b, 0x41, 0xf1,
++	  0x4d, 0xab, 0x10, 0x9d, 0x2b, 0x85, 0x97, 0x7a,
++	  0xa3, 0x08, 0x8b, 0x5c, 0x70, 0x7e, 0x85, 0x98 },
++	{ 0xfd, 0xd8, 0x99, 0x3d, 0xcd, 0x43, 0xf6, 0x96,
++	  0xd4, 0x4f, 0x3c, 0xea, 0x0f, 0xf3, 0x53, 0x45,
++	  0x23, 0x4e, 0xc8, 0xee, 0x08, 0x3e, 0xb3, 0xca,
++	  0xda, 0x01, 0x7c, 0x7f, 0x78, 0xc1, 0x71, 0x43 },
++	{ 0xe6, 0xc8, 0x12, 0x56, 0x37, 0x43, 0x8d, 0x09,
++	  0x05, 0xb7, 0x49, 0xf4, 0x65, 0x60, 0xac, 0x89,
++	  0xfd, 0x47, 0x1c, 0xf8, 0x69, 0x2e, 0x28, 0xfa,
++	  0xb9, 0x82, 0xf7, 0x3f, 0x01, 0x9b, 0x83, 0xa9 },
++	{ 0x19, 0xfc, 0x8c, 0xa6, 0x97, 0x9d, 0x60, 0xe6,
++	  0xed, 0xd3, 0xb4, 0x54, 0x1e, 0x2f, 0x96, 0x7c,
++	  0xed, 0x74, 0x0d, 0xf6, 0xec, 0x1e, 0xae, 0xbb,
++	  0xfe, 0x81, 0x38, 0x32, 0xe9, 0x6b, 0x29, 0x74 },
++	{ 0xa6, 0xad, 0x77, 0x7c, 0xe8, 0x81, 0xb5, 0x2b,
++	  0xb5, 0xa4, 0x42, 0x1a, 0xb6, 0xcd, 0xd2, 0xdf,
++	  0xba, 0x13, 0xe9, 0x63, 0x65, 0x2d, 0x4d, 0x6d,
++	  0x12, 0x2a, 0xee, 0x46, 0x54, 0x8c, 0x14, 0xa7 },
++	{ 0xf5, 0xc4, 0xb2, 0xba, 0x1a, 0x00, 0x78, 0x1b,
++	  0x13, 0xab, 0xa0, 0x42, 0x52, 0x42, 0xc6, 0x9c,
++	  0xb1, 0x55, 0x2f, 0x3f, 0x71, 0xa9, 0xa3, 0xbb,
++	  0x22, 0xb4, 0xa6, 0xb4, 0x27, 0x7b, 0x46, 0xdd },
++	{ 0xe3, 0x3c, 0x4c, 0x9b, 0xd0, 0xcc, 0x7e, 0x45,
++	  0xc8, 0x0e, 0x65, 0xc7, 0x7f, 0xa5, 0x99, 0x7f,
++	  0xec, 0x70, 0x02, 0x73, 0x85, 0x41, 0x50, 0x9e,
++	  0x68, 0xa9, 0x42, 0x38, 0x91, 0xe8, 0x22, 0xa3 },
++	{ 0xfb, 0xa1, 0x61, 0x69, 0xb2, 0xc3, 0xee, 0x10,
++	  0x5b, 0xe6, 0xe1, 0xe6, 0x50, 0xe5, 0xcb, 0xf4,
++	  0x07, 0x46, 0xb6, 0x75, 0x3d, 0x03, 0x6a, 0xb5,
++	  0x51, 0x79, 0x01, 0x4a, 0xd7, 0xef, 0x66, 0x51 },
++	{ 0xf5, 0xc4, 0xbe, 0xc6, 0xd6, 0x2f, 0xc6, 0x08,
++	  0xbf, 0x41, 0xcc, 0x11, 0x5f, 0x16, 0xd6, 0x1c,
++	  0x7e, 0xfd, 0x3f, 0xf6, 0xc6, 0x56, 0x92, 0xbb,
++	  0xe0, 0xaf, 0xff, 0xb1, 0xfe, 0xde, 0x74, 0x75 },
++	{ 0xa4, 0x86, 0x2e, 0x76, 0xdb, 0x84, 0x7f, 0x05,
++	  0xba, 0x17, 0xed, 0xe5, 0xda, 0x4e, 0x7f, 0x91,
++	  0xb5, 0x92, 0x5c, 0xf1, 0xad, 0x4b, 0xa1, 0x27,
++	  0x32, 0xc3, 0x99, 0x57, 0x42, 0xa5, 0xcd, 0x6e },
++	{ 0x65, 0xf4, 0xb8, 0x60, 0xcd, 0x15, 0xb3, 0x8e,
++	  0xf8, 0x14, 0xa1, 0xa8, 0x04, 0x31, 0x4a, 0x55,
++	  0xbe, 0x95, 0x3c, 0xaa, 0x65, 0xfd, 0x75, 0x8a,
++	  0xd9, 0x89, 0xff, 0x34, 0xa4, 0x1c, 0x1e, 0xea },
++	{ 0x19, 0xba, 0x23, 0x4f, 0x0a, 0x4f, 0x38, 0x63,
++	  0x7d, 0x18, 0x39, 0xf9, 0xd9, 0xf7, 0x6a, 0xd9,
++	  0x1c, 0x85, 0x22, 0x30, 0x71, 0x43, 0xc9, 0x7d,
++	  0x5f, 0x93, 0xf6, 0x92, 0x74, 0xce, 0xc9, 0xa7 },
++	{ 0x1a, 0x67, 0x18, 0x6c, 0xa4, 0xa5, 0xcb, 0x8e,
++	  0x65, 0xfc, 0xa0, 0xe2, 0xec, 0xbc, 0x5d, 0xdc,
++	  0x14, 0xae, 0x38, 0x1b, 0xb8, 0xbf, 0xfe, 0xb9,
++	  0xe0, 0xa1, 0x03, 0x44, 0x9e, 0x3e, 0xf0, 0x3c },
++	{ 0xaf, 0xbe, 0xa3, 0x17, 0xb5, 0xa2, 0xe8, 0x9c,
++	  0x0b, 0xd9, 0x0c, 0xcf, 0x5d, 0x7f, 0xd0, 0xed,
++	  0x57, 0xfe, 0x58, 0x5e, 0x4b, 0xe3, 0x27, 0x1b,
++	  0x0a, 0x6b, 0xf0, 0xf5, 0x78, 0x6b, 0x0f, 0x26 },
++	{ 0xf1, 0xb0, 0x15, 0x58, 0xce, 0x54, 0x12, 0x62,
++	  0xf5, 0xec, 0x34, 0x29, 0x9d, 0x6f, 0xb4, 0x09,
++	  0x00, 0x09, 0xe3, 0x43, 0x4b, 0xe2, 0xf4, 0x91,
++	  0x05, 0xcf, 0x46, 0xaf, 0x4d, 0x2d, 0x41, 0x24 },
++	{ 0x13, 0xa0, 0xa0, 0xc8, 0x63, 0x35, 0x63, 0x5e,
++	  0xaa, 0x74, 0xca, 0x2d, 0x5d, 0x48, 0x8c, 0x79,
++	  0x7b, 0xbb, 0x4f, 0x47, 0xdc, 0x07, 0x10, 0x50,
++	  0x15, 0xed, 0x6a, 0x1f, 0x33, 0x09, 0xef, 0xce },
++	{ 0x15, 0x80, 0xaf, 0xee, 0xbe, 0xbb, 0x34, 0x6f,
++	  0x94, 0xd5, 0x9f, 0xe6, 0x2d, 0xa0, 0xb7, 0x92,
++	  0x37, 0xea, 0xd7, 0xb1, 0x49, 0x1f, 0x56, 0x67,
++	  0xa9, 0x0e, 0x45, 0xed, 0xf6, 0xca, 0x8b, 0x03 },
++	{ 0x20, 0xbe, 0x1a, 0x87, 0x5b, 0x38, 0xc5, 0x73,
++	  0xdd, 0x7f, 0xaa, 0xa0, 0xde, 0x48, 0x9d, 0x65,
++	  0x5c, 0x11, 0xef, 0xb6, 0xa5, 0x52, 0x69, 0x8e,
++	  0x07, 0xa2, 0xd3, 0x31, 0xb5, 0xf6, 0x55, 0xc3 },
++	{ 0xbe, 0x1f, 0xe3, 0xc4, 0xc0, 0x40, 0x18, 0xc5,
++	  0x4c, 0x4a, 0x0f, 0x6b, 0x9a, 0x2e, 0xd3, 0xc5,
++	  0x3a, 0xbe, 0x3a, 0x9f, 0x76, 0xb4, 0xd2, 0x6d,
++	  0xe5, 0x6f, 0xc9, 0xae, 0x95, 0x05, 0x9a, 0x99 },
++	{ 0xe3, 0xe3, 0xac, 0xe5, 0x37, 0xeb, 0x3e, 0xdd,
++	  0x84, 0x63, 0xd9, 0xad, 0x35, 0x82, 0xe1, 0x3c,
++	  0xf8, 0x65, 0x33, 0xff, 0xde, 0x43, 0xd6, 0x68,
++	  0xdd, 0x2e, 0x93, 0xbb, 0xdb, 0xd7, 0x19, 0x5a },
++	{ 0x11, 0x0c, 0x50, 0xc0, 0xbf, 0x2c, 0x6e, 0x7a,
++	  0xeb, 0x7e, 0x43, 0x5d, 0x92, 0xd1, 0x32, 0xab,
++	  0x66, 0x55, 0x16, 0x8e, 0x78, 0xa2, 0xde, 0xcd,
++	  0xec, 0x33, 0x30, 0x77, 0x76, 0x84, 0xd9, 0xc1 },
++	{ 0xe9, 0xba, 0x8f, 0x50, 0x5c, 0x9c, 0x80, 0xc0,
++	  0x86, 0x66, 0xa7, 0x01, 0xf3, 0x36, 0x7e, 0x6c,
++	  0xc6, 0x65, 0xf3, 0x4b, 0x22, 0xe7, 0x3c, 0x3c,
++	  0x04, 0x17, 0xeb, 0x1c, 0x22, 0x06, 0x08, 0x2f },
++	{ 0x26, 0xcd, 0x66, 0xfc, 0xa0, 0x23, 0x79, 0xc7,
++	  0x6d, 0xf1, 0x23, 0x17, 0x05, 0x2b, 0xca, 0xfd,
++	  0x6c, 0xd8, 0xc3, 0xa7, 0xb8, 0x90, 0xd8, 0x05,
++	  0xf3, 0x6c, 0x49, 0x98, 0x97, 0x82, 0x43, 0x3a },
++	{ 0x21, 0x3f, 0x35, 0x96, 0xd6, 0xe3, 0xa5, 0xd0,
++	  0xe9, 0x93, 0x2c, 0xd2, 0x15, 0x91, 0x46, 0x01,
++	  0x5e, 0x2a, 0xbc, 0x94, 0x9f, 0x47, 0x29, 0xee,
++	  0x26, 0x32, 0xfe, 0x1e, 0xdb, 0x78, 0xd3, 0x37 },
++	{ 0x10, 0x15, 0xd7, 0x01, 0x08, 0xe0, 0x3b, 0xe1,
++	  0xc7, 0x02, 0xfe, 0x97, 0x25, 0x36, 0x07, 0xd1,
++	  0x4a, 0xee, 0x59, 0x1f, 0x24, 0x13, 0xea, 0x67,
++	  0x87, 0x42, 0x7b, 0x64, 0x59, 0xff, 0x21, 0x9a },
++	{ 0x3c, 0xa9, 0x89, 0xde, 0x10, 0xcf, 0xe6, 0x09,
++	  0x90, 0x94, 0x72, 0xc8, 0xd3, 0x56, 0x10, 0x80,
++	  0x5b, 0x2f, 0x97, 0x77, 0x34, 0xcf, 0x65, 0x2c,
++	  0xc6, 0x4b, 0x3b, 0xfc, 0x88, 0x2d, 0x5d, 0x89 },
++	{ 0xb6, 0x15, 0x6f, 0x72, 0xd3, 0x80, 0xee, 0x9e,
++	  0xa6, 0xac, 0xd1, 0x90, 0x46, 0x4f, 0x23, 0x07,
++	  0xa5, 0xc1, 0x79, 0xef, 0x01, 0xfd, 0x71, 0xf9,
++	  0x9f, 0x2d, 0x0f, 0x7a, 0x57, 0x36, 0x0a, 0xea },
++	{ 0xc0, 0x3b, 0xc6, 0x42, 0xb2, 0x09, 0x59, 0xcb,
++	  0xe1, 0x33, 0xa0, 0x30, 0x3e, 0x0c, 0x1a, 0xbf,
++	  0xf3, 0xe3, 0x1e, 0xc8, 0xe1, 0xa3, 0x28, 0xec,
++	  0x85, 0x65, 0xc3, 0x6d, 0xec, 0xff, 0x52, 0x65 },
++	{ 0x2c, 0x3e, 0x08, 0x17, 0x6f, 0x76, 0x0c, 0x62,
++	  0x64, 0xc3, 0xa2, 0xcd, 0x66, 0xfe, 0xc6, 0xc3,
++	  0xd7, 0x8d, 0xe4, 0x3f, 0xc1, 0x92, 0x45, 0x7b,
++	  0x2a, 0x4a, 0x66, 0x0a, 0x1e, 0x0e, 0xb2, 0x2b },
++	{ 0xf7, 0x38, 0xc0, 0x2f, 0x3c, 0x1b, 0x19, 0x0c,
++	  0x51, 0x2b, 0x1a, 0x32, 0xde, 0xab, 0xf3, 0x53,
++	  0x72, 0x8e, 0x0e, 0x9a, 0xb0, 0x34, 0x49, 0x0e,
++	  0x3c, 0x34, 0x09, 0x94, 0x6a, 0x97, 0xae, 0xec },
++	{ 0x8b, 0x18, 0x80, 0xdf, 0x30, 0x1c, 0xc9, 0x63,
++	  0x41, 0x88, 0x11, 0x08, 0x89, 0x64, 0x83, 0x92,
++	  0x87, 0xff, 0x7f, 0xe3, 0x1c, 0x49, 0xea, 0x6e,
++	  0xbd, 0x9e, 0x48, 0xbd, 0xee, 0xe4, 0x97, 0xc5 },
++	{ 0x1e, 0x75, 0xcb, 0x21, 0xc6, 0x09, 0x89, 0x02,
++	  0x03, 0x75, 0xf1, 0xa7, 0xa2, 0x42, 0x83, 0x9f,
++	  0x0b, 0x0b, 0x68, 0x97, 0x3a, 0x4c, 0x2a, 0x05,
++	  0xcf, 0x75, 0x55, 0xed, 0x5a, 0xae, 0xc4, 0xc1 },
++	{ 0x62, 0xbf, 0x8a, 0x9c, 0x32, 0xa5, 0xbc, 0xcf,
++	  0x29, 0x0b, 0x6c, 0x47, 0x4d, 0x75, 0xb2, 0xa2,
++	  0xa4, 0x09, 0x3f, 0x1a, 0x9e, 0x27, 0x13, 0x94,
++	  0x33, 0xa8, 0xf2, 0xb3, 0xbc, 0xe7, 0xb8, 0xd7 },
++	{ 0x16, 0x6c, 0x83, 0x50, 0xd3, 0x17, 0x3b, 0x5e,
++	  0x70, 0x2b, 0x78, 0x3d, 0xfd, 0x33, 0xc6, 0x6e,
++	  0xe0, 0x43, 0x27, 0x42, 0xe9, 0xb9, 0x2b, 0x99,
++	  0x7f, 0xd2, 0x3c, 0x60, 0xdc, 0x67, 0x56, 0xca },
++	{ 0x04, 0x4a, 0x14, 0xd8, 0x22, 0xa9, 0x0c, 0xac,
++	  0xf2, 0xf5, 0xa1, 0x01, 0x42, 0x8a, 0xdc, 0x8f,
++	  0x41, 0x09, 0x38, 0x6c, 0xcb, 0x15, 0x8b, 0xf9,
++	  0x05, 0xc8, 0x61, 0x8b, 0x8e, 0xe2, 0x4e, 0xc3 },
++	{ 0x38, 0x7d, 0x39, 0x7e, 0xa4, 0x3a, 0x99, 0x4b,
++	  0xe8, 0x4d, 0x2d, 0x54, 0x4a, 0xfb, 0xe4, 0x81,
++	  0xa2, 0x00, 0x0f, 0x55, 0x25, 0x26, 0x96, 0xbb,
++	  0xa2, 0xc5, 0x0c, 0x8e, 0xbd, 0x10, 0x13, 0x47 },
++	{ 0x56, 0xf8, 0xcc, 0xf1, 0xf8, 0x64, 0x09, 0xb4,
++	  0x6c, 0xe3, 0x61, 0x66, 0xae, 0x91, 0x65, 0x13,
++	  0x84, 0x41, 0x57, 0x75, 0x89, 0xdb, 0x08, 0xcb,
++	  0xc5, 0xf6, 0x6c, 0xa2, 0x97, 0x43, 0xb9, 0xfd },
++	{ 0x97, 0x06, 0xc0, 0x92, 0xb0, 0x4d, 0x91, 0xf5,
++	  0x3d, 0xff, 0x91, 0xfa, 0x37, 0xb7, 0x49, 0x3d,
++	  0x28, 0xb5, 0x76, 0xb5, 0xd7, 0x10, 0x46, 0x9d,
++	  0xf7, 0x94, 0x01, 0x66, 0x22, 0x36, 0xfc, 0x03 },
++	{ 0x87, 0x79, 0x68, 0x68, 0x6c, 0x06, 0x8c, 0xe2,
++	  0xf7, 0xe2, 0xad, 0xcf, 0xf6, 0x8b, 0xf8, 0x74,
++	  0x8e, 0xdf, 0x3c, 0xf8, 0x62, 0xcf, 0xb4, 0xd3,
++	  0x94, 0x7a, 0x31, 0x06, 0x95, 0x80, 0x54, 0xe3 },
++	{ 0x88, 0x17, 0xe5, 0x71, 0x98, 0x79, 0xac, 0xf7,
++	  0x02, 0x47, 0x87, 0xec, 0xcd, 0xb2, 0x71, 0x03,
++	  0x55, 0x66, 0xcf, 0xa3, 0x33, 0xe0, 0x49, 0x40,
++	  0x7c, 0x01, 0x78, 0xcc, 0xc5, 0x7a, 0x5b, 0x9f },
++	{ 0x89, 0x38, 0x24, 0x9e, 0x4b, 0x50, 0xca, 0xda,
++	  0xcc, 0xdf, 0x5b, 0x18, 0x62, 0x13, 0x26, 0xcb,
++	  0xb1, 0x52, 0x53, 0xe3, 0x3a, 0x20, 0xf5, 0x63,
++	  0x6e, 0x99, 0x5d, 0x72, 0x47, 0x8d, 0xe4, 0x72 },
++	{ 0xf1, 0x64, 0xab, 0xba, 0x49, 0x63, 0xa4, 0x4d,
++	  0x10, 0x72, 0x57, 0xe3, 0x23, 0x2d, 0x90, 0xac,
++	  0xa5, 0xe6, 0x6a, 0x14, 0x08, 0x24, 0x8c, 0x51,
++	  0x74, 0x1e, 0x99, 0x1d, 0xb5, 0x22, 0x77, 0x56 },
++	{ 0xd0, 0x55, 0x63, 0xe2, 0xb1, 0xcb, 0xa0, 0xc4,
++	  0xa2, 0xa1, 0xe8, 0xbd, 0xe3, 0xa1, 0xa0, 0xd9,
++	  0xf5, 0xb4, 0x0c, 0x85, 0xa0, 0x70, 0xd6, 0xf5,
++	  0xfb, 0x21, 0x06, 0x6e, 0xad, 0x5d, 0x06, 0x01 },
++	{ 0x03, 0xfb, 0xb1, 0x63, 0x84, 0xf0, 0xa3, 0x86,
++	  0x6f, 0x4c, 0x31, 0x17, 0x87, 0x76, 0x66, 0xef,
++	  0xbf, 0x12, 0x45, 0x97, 0x56, 0x4b, 0x29, 0x3d,
++	  0x4a, 0xab, 0x0d, 0x26, 0x9f, 0xab, 0xdd, 0xfa },
++	{ 0x5f, 0xa8, 0x48, 0x6a, 0xc0, 0xe5, 0x29, 0x64,
++	  0xd1, 0x88, 0x1b, 0xbe, 0x33, 0x8e, 0xb5, 0x4b,
++	  0xe2, 0xf7, 0x19, 0x54, 0x92, 0x24, 0x89, 0x20,
++	  0x57, 0xb4, 0xda, 0x04, 0xba, 0x8b, 0x34, 0x75 },
++	{ 0xcd, 0xfa, 0xbc, 0xee, 0x46, 0x91, 0x11, 0x11,
++	  0x23, 0x6a, 0x31, 0x70, 0x8b, 0x25, 0x39, 0xd7,
++	  0x1f, 0xc2, 0x11, 0xd9, 0xb0, 0x9c, 0x0d, 0x85,
++	  0x30, 0xa1, 0x1e, 0x1d, 0xbf, 0x6e, 0xed, 0x01 },
++	{ 0x4f, 0x82, 0xde, 0x03, 0xb9, 0x50, 0x47, 0x93,
++	  0xb8, 0x2a, 0x07, 0xa0, 0xbd, 0xcd, 0xff, 0x31,
++	  0x4d, 0x75, 0x9e, 0x7b, 0x62, 0xd2, 0x6b, 0x78,
++	  0x49, 0x46, 0xb0, 0xd3, 0x6f, 0x91, 0x6f, 0x52 },
++	{ 0x25, 0x9e, 0xc7, 0xf1, 0x73, 0xbc, 0xc7, 0x6a,
++	  0x09, 0x94, 0xc9, 0x67, 0xb4, 0xf5, 0xf0, 0x24,
++	  0xc5, 0x60, 0x57, 0xfb, 0x79, 0xc9, 0x65, 0xc4,
++	  0xfa, 0xe4, 0x18, 0x75, 0xf0, 0x6a, 0x0e, 0x4c },
++	{ 0x19, 0x3c, 0xc8, 0xe7, 0xc3, 0xe0, 0x8b, 0xb3,
++	  0x0f, 0x54, 0x37, 0xaa, 0x27, 0xad, 0xe1, 0xf1,
++	  0x42, 0x36, 0x9b, 0x24, 0x6a, 0x67, 0x5b, 0x23,
++	  0x83, 0xe6, 0xda, 0x9b, 0x49, 0xa9, 0x80, 0x9e },
++	{ 0x5c, 0x10, 0x89, 0x6f, 0x0e, 0x28, 0x56, 0xb2,
++	  0xa2, 0xee, 0xe0, 0xfe, 0x4a, 0x2c, 0x16, 0x33,
++	  0x56, 0x5d, 0x18, 0xf0, 0xe9, 0x3e, 0x1f, 0xab,
++	  0x26, 0xc3, 0x73, 0xe8, 0xf8, 0x29, 0x65, 0x4d },
++	{ 0xf1, 0x60, 0x12, 0xd9, 0x3f, 0x28, 0x85, 0x1a,
++	  0x1e, 0xb9, 0x89, 0xf5, 0xd0, 0xb4, 0x3f, 0x3f,
++	  0x39, 0xca, 0x73, 0xc9, 0xa6, 0x2d, 0x51, 0x81,
++	  0xbf, 0xf2, 0x37, 0x53, 0x6b, 0xd3, 0x48, 0xc3 },
++	{ 0x29, 0x66, 0xb3, 0xcf, 0xae, 0x1e, 0x44, 0xea,
++	  0x99, 0x6d, 0xc5, 0xd6, 0x86, 0xcf, 0x25, 0xfa,
++	  0x05, 0x3f, 0xb6, 0xf6, 0x72, 0x01, 0xb9, 0xe4,
++	  0x6e, 0xad, 0xe8, 0x5d, 0x0a, 0xd6, 0xb8, 0x06 },
++	{ 0xdd, 0xb8, 0x78, 0x24, 0x85, 0xe9, 0x00, 0xbc,
++	  0x60, 0xbc, 0xf4, 0xc3, 0x3a, 0x6f, 0xd5, 0x85,
++	  0x68, 0x0c, 0xc6, 0x83, 0xd5, 0x16, 0xef, 0xa0,
++	  0x3e, 0xb9, 0x98, 0x5f, 0xad, 0x87, 0x15, 0xfb },
++	{ 0x4c, 0x4d, 0x6e, 0x71, 0xae, 0xa0, 0x57, 0x86,
++	  0x41, 0x31, 0x48, 0xfc, 0x7a, 0x78, 0x6b, 0x0e,
++	  0xca, 0xf5, 0x82, 0xcf, 0xf1, 0x20, 0x9f, 0x5a,
++	  0x80, 0x9f, 0xba, 0x85, 0x04, 0xce, 0x66, 0x2c },
++	{ 0xfb, 0x4c, 0x5e, 0x86, 0xd7, 0xb2, 0x22, 0x9b,
++	  0x99, 0xb8, 0xba, 0x6d, 0x94, 0xc2, 0x47, 0xef,
++	  0x96, 0x4a, 0xa3, 0xa2, 0xba, 0xe8, 0xed, 0xc7,
++	  0x75, 0x69, 0xf2, 0x8d, 0xbb, 0xff, 0x2d, 0x4e },
++	{ 0xe9, 0x4f, 0x52, 0x6d, 0xe9, 0x01, 0x96, 0x33,
++	  0xec, 0xd5, 0x4a, 0xc6, 0x12, 0x0f, 0x23, 0x95,
++	  0x8d, 0x77, 0x18, 0xf1, 0xe7, 0x71, 0x7b, 0xf3,
++	  0x29, 0x21, 0x1a, 0x4f, 0xae, 0xed, 0x4e, 0x6d },
++	{ 0xcb, 0xd6, 0x66, 0x0a, 0x10, 0xdb, 0x3f, 0x23,
++	  0xf7, 0xa0, 0x3d, 0x4b, 0x9d, 0x40, 0x44, 0xc7,
++	  0x93, 0x2b, 0x28, 0x01, 0xac, 0x89, 0xd6, 0x0b,
++	  0xc9, 0xeb, 0x92, 0xd6, 0x5a, 0x46, 0xc2, 0xa0 },
++	{ 0x88, 0x18, 0xbb, 0xd3, 0xdb, 0x4d, 0xc1, 0x23,
++	  0xb2, 0x5c, 0xbb, 0xa5, 0xf5, 0x4c, 0x2b, 0xc4,
++	  0xb3, 0xfc, 0xf9, 0xbf, 0x7d, 0x7a, 0x77, 0x09,
++	  0xf4, 0xae, 0x58, 0x8b, 0x26, 0x7c, 0x4e, 0xce },
++	{ 0xc6, 0x53, 0x82, 0x51, 0x3f, 0x07, 0x46, 0x0d,
++	  0xa3, 0x98, 0x33, 0xcb, 0x66, 0x6c, 0x5e, 0xd8,
++	  0x2e, 0x61, 0xb9, 0xe9, 0x98, 0xf4, 0xb0, 0xc4,
++	  0x28, 0x7c, 0xee, 0x56, 0xc3, 0xcc, 0x9b, 0xcd },
++	{ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
++	  0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
++	  0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
++	  0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4 },
++	{ 0x21, 0xfe, 0x0c, 0xeb, 0x00, 0x52, 0xbe, 0x7f,
++	  0xb0, 0xf0, 0x04, 0x18, 0x7c, 0xac, 0xd7, 0xde,
++	  0x67, 0xfa, 0x6e, 0xb0, 0x93, 0x8d, 0x92, 0x76,
++	  0x77, 0xf2, 0x39, 0x8c, 0x13, 0x23, 0x17, 0xa8 },
++	{ 0x2e, 0xf7, 0x3f, 0x3c, 0x26, 0xf1, 0x2d, 0x93,
++	  0x88, 0x9f, 0x3c, 0x78, 0xb6, 0xa6, 0x6c, 0x1d,
++	  0x52, 0xb6, 0x49, 0xdc, 0x9e, 0x85, 0x6e, 0x2c,
++	  0x17, 0x2e, 0xa7, 0xc5, 0x8a, 0xc2, 0xb5, 0xe3 },
++	{ 0x38, 0x8a, 0x3c, 0xd5, 0x6d, 0x73, 0x86, 0x7a,
++	  0xbb, 0x5f, 0x84, 0x01, 0x49, 0x2b, 0x6e, 0x26,
++	  0x81, 0xeb, 0x69, 0x85, 0x1e, 0x76, 0x7f, 0xd8,
++	  0x42, 0x10, 0xa5, 0x60, 0x76, 0xfb, 0x3d, 0xd3 },
++	{ 0xaf, 0x53, 0x3e, 0x02, 0x2f, 0xc9, 0x43, 0x9e,
++	  0x4e, 0x3c, 0xb8, 0x38, 0xec, 0xd1, 0x86, 0x92,
++	  0x23, 0x2a, 0xdf, 0x6f, 0xe9, 0x83, 0x95, 0x26,
++	  0xd3, 0xc3, 0xdd, 0x1b, 0x71, 0x91, 0x0b, 0x1a },
++	{ 0x75, 0x1c, 0x09, 0xd4, 0x1a, 0x93, 0x43, 0x88,
++	  0x2a, 0x81, 0xcd, 0x13, 0xee, 0x40, 0x81, 0x8d,
++	  0x12, 0xeb, 0x44, 0xc6, 0xc7, 0xf4, 0x0d, 0xf1,
++	  0x6e, 0x4a, 0xea, 0x8f, 0xab, 0x91, 0x97, 0x2a },
++	{ 0x5b, 0x73, 0xdd, 0xb6, 0x8d, 0x9d, 0x2b, 0x0a,
++	  0xa2, 0x65, 0xa0, 0x79, 0x88, 0xd6, 0xb8, 0x8a,
++	  0xe9, 0xaa, 0xc5, 0x82, 0xaf, 0x83, 0x03, 0x2f,
++	  0x8a, 0x9b, 0x21, 0xa2, 0xe1, 0xb7, 0xbf, 0x18 },
++	{ 0x3d, 0xa2, 0x91, 0x26, 0xc7, 0xc5, 0xd7, 0xf4,
++	  0x3e, 0x64, 0x24, 0x2a, 0x79, 0xfe, 0xaa, 0x4e,
++	  0xf3, 0x45, 0x9c, 0xde, 0xcc, 0xc8, 0x98, 0xed,
++	  0x59, 0xa9, 0x7f, 0x6e, 0xc9, 0x3b, 0x9d, 0xab },
++	{ 0x56, 0x6d, 0xc9, 0x20, 0x29, 0x3d, 0xa5, 0xcb,
++	  0x4f, 0xe0, 0xaa, 0x8a, 0xbd, 0xa8, 0xbb, 0xf5,
++	  0x6f, 0x55, 0x23, 0x13, 0xbf, 0xf1, 0x90, 0x46,
++	  0x64, 0x1e, 0x36, 0x15, 0xc1, 0xe3, 0xed, 0x3f },
++	{ 0x41, 0x15, 0xbe, 0xa0, 0x2f, 0x73, 0xf9, 0x7f,
++	  0x62, 0x9e, 0x5c, 0x55, 0x90, 0x72, 0x0c, 0x01,
++	  0xe7, 0xe4, 0x49, 0xae, 0x2a, 0x66, 0x97, 0xd4,
++	  0xd2, 0x78, 0x33, 0x21, 0x30, 0x36, 0x92, 0xf9 },
++	{ 0x4c, 0xe0, 0x8f, 0x47, 0x62, 0x46, 0x8a, 0x76,
++	  0x70, 0x01, 0x21, 0x64, 0x87, 0x8d, 0x68, 0x34,
++	  0x0c, 0x52, 0xa3, 0x5e, 0x66, 0xc1, 0x88, 0x4d,
++	  0x5c, 0x86, 0x48, 0x89, 0xab, 0xc9, 0x66, 0x77 },
++	{ 0x81, 0xea, 0x0b, 0x78, 0x04, 0x12, 0x4e, 0x0c,
++	  0x22, 0xea, 0x5f, 0xc7, 0x11, 0x04, 0xa2, 0xaf,
++	  0xcb, 0x52, 0xa1, 0xfa, 0x81, 0x6f, 0x3e, 0xcb,
++	  0x7d, 0xcb, 0x5d, 0x9d, 0xea, 0x17, 0x86, 0xd0 },
++	{ 0xfe, 0x36, 0x27, 0x33, 0xb0, 0x5f, 0x6b, 0xed,
++	  0xaf, 0x93, 0x79, 0xd7, 0xf7, 0x93, 0x6e, 0xde,
++	  0x20, 0x9b, 0x1f, 0x83, 0x23, 0xc3, 0x92, 0x25,
++	  0x49, 0xd9, 0xe7, 0x36, 0x81, 0xb5, 0xdb, 0x7b },
++	{ 0xef, 0xf3, 0x7d, 0x30, 0xdf, 0xd2, 0x03, 0x59,
++	  0xbe, 0x4e, 0x73, 0xfd, 0xf4, 0x0d, 0x27, 0x73,
++	  0x4b, 0x3d, 0xf9, 0x0a, 0x97, 0xa5, 0x5e, 0xd7,
++	  0x45, 0x29, 0x72, 0x94, 0xca, 0x85, 0xd0, 0x9f },
++	{ 0x17, 0x2f, 0xfc, 0x67, 0x15, 0x3d, 0x12, 0xe0,
++	  0xca, 0x76, 0xa8, 0xb6, 0xcd, 0x5d, 0x47, 0x31,
++	  0x88, 0x5b, 0x39, 0xce, 0x0c, 0xac, 0x93, 0xa8,
++	  0x97, 0x2a, 0x18, 0x00, 0x6c, 0x8b, 0x8b, 0xaf },
++	{ 0xc4, 0x79, 0x57, 0xf1, 0xcc, 0x88, 0xe8, 0x3e,
++	  0xf9, 0x44, 0x58, 0x39, 0x70, 0x9a, 0x48, 0x0a,
++	  0x03, 0x6b, 0xed, 0x5f, 0x88, 0xac, 0x0f, 0xcc,
++	  0x8e, 0x1e, 0x70, 0x3f, 0xfa, 0xac, 0x13, 0x2c },
++	{ 0x30, 0xf3, 0x54, 0x83, 0x70, 0xcf, 0xdc, 0xed,
++	  0xa5, 0xc3, 0x7b, 0x56, 0x9b, 0x61, 0x75, 0xe7,
++	  0x99, 0xee, 0xf1, 0xa6, 0x2a, 0xaa, 0x94, 0x32,
++	  0x45, 0xae, 0x76, 0x69, 0xc2, 0x27, 0xa7, 0xb5 },
++	{ 0xc9, 0x5d, 0xcb, 0x3c, 0xf1, 0xf2, 0x7d, 0x0e,
++	  0xef, 0x2f, 0x25, 0xd2, 0x41, 0x38, 0x70, 0x90,
++	  0x4a, 0x87, 0x7c, 0x4a, 0x56, 0xc2, 0xde, 0x1e,
++	  0x83, 0xe2, 0xbc, 0x2a, 0xe2, 0xe4, 0x68, 0x21 },
++	{ 0xd5, 0xd0, 0xb5, 0xd7, 0x05, 0x43, 0x4c, 0xd4,
++	  0x6b, 0x18, 0x57, 0x49, 0xf6, 0x6b, 0xfb, 0x58,
++	  0x36, 0xdc, 0xdf, 0x6e, 0xe5, 0x49, 0xa2, 0xb7,
++	  0xa4, 0xae, 0xe7, 0xf5, 0x80, 0x07, 0xca, 0xaf },
++	{ 0xbb, 0xc1, 0x24, 0xa7, 0x12, 0xf1, 0x5d, 0x07,
++	  0xc3, 0x00, 0xe0, 0x5b, 0x66, 0x83, 0x89, 0xa4,
++	  0x39, 0xc9, 0x17, 0x77, 0xf7, 0x21, 0xf8, 0x32,
++	  0x0c, 0x1c, 0x90, 0x78, 0x06, 0x6d, 0x2c, 0x7e },
++	{ 0xa4, 0x51, 0xb4, 0x8c, 0x35, 0xa6, 0xc7, 0x85,
++	  0x4c, 0xfa, 0xae, 0x60, 0x26, 0x2e, 0x76, 0x99,
++	  0x08, 0x16, 0x38, 0x2a, 0xc0, 0x66, 0x7e, 0x5a,
++	  0x5c, 0x9e, 0x1b, 0x46, 0xc4, 0x34, 0x2d, 0xdf },
++	{ 0xb0, 0xd1, 0x50, 0xfb, 0x55, 0xe7, 0x78, 0xd0,
++	  0x11, 0x47, 0xf0, 0xb5, 0xd8, 0x9d, 0x99, 0xec,
++	  0xb2, 0x0f, 0xf0, 0x7e, 0x5e, 0x67, 0x60, 0xd6,
++	  0xb6, 0x45, 0xeb, 0x5b, 0x65, 0x4c, 0x62, 0x2b },
++	{ 0x34, 0xf7, 0x37, 0xc0, 0xab, 0x21, 0x99, 0x51,
++	  0xee, 0xe8, 0x9a, 0x9f, 0x8d, 0xac, 0x29, 0x9c,
++	  0x9d, 0x4c, 0x38, 0xf3, 0x3f, 0xa4, 0x94, 0xc5,
++	  0xc6, 0xee, 0xfc, 0x92, 0xb6, 0xdb, 0x08, 0xbc },
++	{ 0x1a, 0x62, 0xcc, 0x3a, 0x00, 0x80, 0x0d, 0xcb,
++	  0xd9, 0x98, 0x91, 0x08, 0x0c, 0x1e, 0x09, 0x84,
++	  0x58, 0x19, 0x3a, 0x8c, 0xc9, 0xf9, 0x70, 0xea,
++	  0x99, 0xfb, 0xef, 0xf0, 0x03, 0x18, 0xc2, 0x89 },
++	{ 0xcf, 0xce, 0x55, 0xeb, 0xaf, 0xc8, 0x40, 0xd7,
++	  0xae, 0x48, 0x28, 0x1c, 0x7f, 0xd5, 0x7e, 0xc8,
++	  0xb4, 0x82, 0xd4, 0xb7, 0x04, 0x43, 0x74, 0x95,
++	  0x49, 0x5a, 0xc4, 0x14, 0xcf, 0x4a, 0x37, 0x4b },
++	{ 0x67, 0x46, 0xfa, 0xcf, 0x71, 0x14, 0x6d, 0x99,
++	  0x9d, 0xab, 0xd0, 0x5d, 0x09, 0x3a, 0xe5, 0x86,
++	  0x64, 0x8d, 0x1e, 0xe2, 0x8e, 0x72, 0x61, 0x7b,
++	  0x99, 0xd0, 0xf0, 0x08, 0x6e, 0x1e, 0x45, 0xbf },
++	{ 0x57, 0x1c, 0xed, 0x28, 0x3b, 0x3f, 0x23, 0xb4,
++	  0xe7, 0x50, 0xbf, 0x12, 0xa2, 0xca, 0xf1, 0x78,
++	  0x18, 0x47, 0xbd, 0x89, 0x0e, 0x43, 0x60, 0x3c,
++	  0xdc, 0x59, 0x76, 0x10, 0x2b, 0x7b, 0xb1, 0x1b },
++	{ 0xcf, 0xcb, 0x76, 0x5b, 0x04, 0x8e, 0x35, 0x02,
++	  0x2c, 0x5d, 0x08, 0x9d, 0x26, 0xe8, 0x5a, 0x36,
++	  0xb0, 0x05, 0xa2, 0xb8, 0x04, 0x93, 0xd0, 0x3a,
++	  0x14, 0x4e, 0x09, 0xf4, 0x09, 0xb6, 0xaf, 0xd1 },
++	{ 0x40, 0x50, 0xc7, 0xa2, 0x77, 0x05, 0xbb, 0x27,
++	  0xf4, 0x20, 0x89, 0xb2, 0x99, 0xf3, 0xcb, 0xe5,
++	  0x05, 0x4e, 0xad, 0x68, 0x72, 0x7e, 0x8e, 0xf9,
++	  0x31, 0x8c, 0xe6, 0xf2, 0x5c, 0xd6, 0xf3, 0x1d },
++	{ 0x18, 0x40, 0x70, 0xbd, 0x5d, 0x26, 0x5f, 0xbd,
++	  0xc1, 0x42, 0xcd, 0x1c, 0x5c, 0xd0, 0xd7, 0xe4,
++	  0x14, 0xe7, 0x03, 0x69, 0xa2, 0x66, 0xd6, 0x27,
++	  0xc8, 0xfb, 0xa8, 0x4f, 0xa5, 0xe8, 0x4c, 0x34 },
++	{ 0x9e, 0xdd, 0xa9, 0xa4, 0x44, 0x39, 0x02, 0xa9,
++	  0x58, 0x8c, 0x0d, 0x0c, 0xcc, 0x62, 0xb9, 0x30,
++	  0x21, 0x84, 0x79, 0xa6, 0x84, 0x1e, 0x6f, 0xe7,
++	  0xd4, 0x30, 0x03, 0xf0, 0x4b, 0x1f, 0xd6, 0x43 },
++	{ 0xe4, 0x12, 0xfe, 0xef, 0x79, 0x08, 0x32, 0x4a,
++	  0x6d, 0xa1, 0x84, 0x16, 0x29, 0xf3, 0x5d, 0x3d,
++	  0x35, 0x86, 0x42, 0x01, 0x93, 0x10, 0xec, 0x57,
++	  0xc6, 0x14, 0x83, 0x6b, 0x63, 0xd3, 0x07, 0x63 },
++	{ 0x1a, 0x2b, 0x8e, 0xdf, 0xf3, 0xf9, 0xac, 0xc1,
++	  0x55, 0x4f, 0xcb, 0xae, 0x3c, 0xf1, 0xd6, 0x29,
++	  0x8c, 0x64, 0x62, 0xe2, 0x2e, 0x5e, 0xb0, 0x25,
++	  0x96, 0x84, 0xf8, 0x35, 0x01, 0x2b, 0xd1, 0x3f },
++	{ 0x28, 0x8c, 0x4a, 0xd9, 0xb9, 0x40, 0x97, 0x62,
++	  0xea, 0x07, 0xc2, 0x4a, 0x41, 0xf0, 0x4f, 0x69,
++	  0xa7, 0xd7, 0x4b, 0xee, 0x2d, 0x95, 0x43, 0x53,
++	  0x74, 0xbd, 0xe9, 0x46, 0xd7, 0x24, 0x1c, 0x7b },
++	{ 0x80, 0x56, 0x91, 0xbb, 0x28, 0x67, 0x48, 0xcf,
++	  0xb5, 0x91, 0xd3, 0xae, 0xbe, 0x7e, 0x6f, 0x4e,
++	  0x4d, 0xc6, 0xe2, 0x80, 0x8c, 0x65, 0x14, 0x3c,
++	  0xc0, 0x04, 0xe4, 0xeb, 0x6f, 0xd0, 0x9d, 0x43 },
++	{ 0xd4, 0xac, 0x8d, 0x3a, 0x0a, 0xfc, 0x6c, 0xfa,
++	  0x7b, 0x46, 0x0a, 0xe3, 0x00, 0x1b, 0xae, 0xb3,
++	  0x6d, 0xad, 0xb3, 0x7d, 0xa0, 0x7d, 0x2e, 0x8a,
++	  0xc9, 0x18, 0x22, 0xdf, 0x34, 0x8a, 0xed, 0x3d },
++	{ 0xc3, 0x76, 0x61, 0x70, 0x14, 0xd2, 0x01, 0x58,
++	  0xbc, 0xed, 0x3d, 0x3b, 0xa5, 0x52, 0xb6, 0xec,
++	  0xcf, 0x84, 0xe6, 0x2a, 0xa3, 0xeb, 0x65, 0x0e,
++	  0x90, 0x02, 0x9c, 0x84, 0xd1, 0x3e, 0xea, 0x69 },
++	{ 0xc4, 0x1f, 0x09, 0xf4, 0x3c, 0xec, 0xae, 0x72,
++	  0x93, 0xd6, 0x00, 0x7c, 0xa0, 0xa3, 0x57, 0x08,
++	  0x7d, 0x5a, 0xe5, 0x9b, 0xe5, 0x00, 0xc1, 0xcd,
++	  0x5b, 0x28, 0x9e, 0xe8, 0x10, 0xc7, 0xb0, 0x82 },
++	{ 0x03, 0xd1, 0xce, 0xd1, 0xfb, 0xa5, 0xc3, 0x91,
++	  0x55, 0xc4, 0x4b, 0x77, 0x65, 0xcb, 0x76, 0x0c,
++	  0x78, 0x70, 0x8d, 0xcf, 0xc8, 0x0b, 0x0b, 0xd8,
++	  0xad, 0xe3, 0xa5, 0x6d, 0xa8, 0x83, 0x0b, 0x29 },
++	{ 0x09, 0xbd, 0xe6, 0xf1, 0x52, 0x21, 0x8d, 0xc9,
++	  0x2c, 0x41, 0xd7, 0xf4, 0x53, 0x87, 0xe6, 0x3e,
++	  0x58, 0x69, 0xd8, 0x07, 0xec, 0x70, 0xb8, 0x21,
++	  0x40, 0x5d, 0xbd, 0x88, 0x4b, 0x7f, 0xcf, 0x4b },
++	{ 0x71, 0xc9, 0x03, 0x6e, 0x18, 0x17, 0x9b, 0x90,
++	  0xb3, 0x7d, 0x39, 0xe9, 0xf0, 0x5e, 0xb8, 0x9c,
++	  0xc5, 0xfc, 0x34, 0x1f, 0xd7, 0xc4, 0x77, 0xd0,
++	  0xd7, 0x49, 0x32, 0x85, 0xfa, 0xca, 0x08, 0xa4 },
++	{ 0x59, 0x16, 0x83, 0x3e, 0xbb, 0x05, 0xcd, 0x91,
++	  0x9c, 0xa7, 0xfe, 0x83, 0xb6, 0x92, 0xd3, 0x20,
++	  0x5b, 0xef, 0x72, 0x39, 0x2b, 0x2c, 0xf6, 0xbb,
++	  0x0a, 0x6d, 0x43, 0xf9, 0x94, 0xf9, 0x5f, 0x11 },
++	{ 0xf6, 0x3a, 0xab, 0x3e, 0xc6, 0x41, 0xb3, 0xb0,
++	  0x24, 0x96, 0x4c, 0x2b, 0x43, 0x7c, 0x04, 0xf6,
++	  0x04, 0x3c, 0x4c, 0x7e, 0x02, 0x79, 0x23, 0x99,
++	  0x95, 0x40, 0x19, 0x58, 0xf8, 0x6b, 0xbe, 0x54 },
++	{ 0xf1, 0x72, 0xb1, 0x80, 0xbf, 0xb0, 0x97, 0x40,
++	  0x49, 0x31, 0x20, 0xb6, 0x32, 0x6c, 0xbd, 0xc5,
++	  0x61, 0xe4, 0x77, 0xde, 0xf9, 0xbb, 0xcf, 0xd2,
++	  0x8c, 0xc8, 0xc1, 0xc5, 0xe3, 0x37, 0x9a, 0x31 },
++	{ 0xcb, 0x9b, 0x89, 0xcc, 0x18, 0x38, 0x1d, 0xd9,
++	  0x14, 0x1a, 0xde, 0x58, 0x86, 0x54, 0xd4, 0xe6,
++	  0xa2, 0x31, 0xd5, 0xbf, 0x49, 0xd4, 0xd5, 0x9a,
++	  0xc2, 0x7d, 0x86, 0x9c, 0xbe, 0x10, 0x0c, 0xf3 },
++	{ 0x7b, 0xd8, 0x81, 0x50, 0x46, 0xfd, 0xd8, 0x10,
++	  0xa9, 0x23, 0xe1, 0x98, 0x4a, 0xae, 0xbd, 0xcd,
++	  0xf8, 0x4d, 0x87, 0xc8, 0x99, 0x2d, 0x68, 0xb5,
++	  0xee, 0xb4, 0x60, 0xf9, 0x3e, 0xb3, 0xc8, 0xd7 },
++	{ 0x60, 0x7b, 0xe6, 0x68, 0x62, 0xfd, 0x08, 0xee,
++	  0x5b, 0x19, 0xfa, 0xca, 0xc0, 0x9d, 0xfd, 0xbc,
++	  0xd4, 0x0c, 0x31, 0x21, 0x01, 0xd6, 0x6e, 0x6e,
++	  0xbd, 0x2b, 0x84, 0x1f, 0x1b, 0x9a, 0x93, 0x25 },
++	{ 0x9f, 0xe0, 0x3b, 0xbe, 0x69, 0xab, 0x18, 0x34,
++	  0xf5, 0x21, 0x9b, 0x0d, 0xa8, 0x8a, 0x08, 0xb3,
++	  0x0a, 0x66, 0xc5, 0x91, 0x3f, 0x01, 0x51, 0x96,
++	  0x3c, 0x36, 0x05, 0x60, 0xdb, 0x03, 0x87, 0xb3 },
++	{ 0x90, 0xa8, 0x35, 0x85, 0x71, 0x7b, 0x75, 0xf0,
++	  0xe9, 0xb7, 0x25, 0xe0, 0x55, 0xee, 0xee, 0xb9,
++	  0xe7, 0xa0, 0x28, 0xea, 0x7e, 0x6c, 0xbc, 0x07,
++	  0xb2, 0x09, 0x17, 0xec, 0x03, 0x63, 0xe3, 0x8c },
++	{ 0x33, 0x6e, 0xa0, 0x53, 0x0f, 0x4a, 0x74, 0x69,
++	  0x12, 0x6e, 0x02, 0x18, 0x58, 0x7e, 0xbb, 0xde,
++	  0x33, 0x58, 0xa0, 0xb3, 0x1c, 0x29, 0xd2, 0x00,
++	  0xf7, 0xdc, 0x7e, 0xb1, 0x5c, 0x6a, 0xad, 0xd8 },
++	{ 0xa7, 0x9e, 0x76, 0xdc, 0x0a, 0xbc, 0xa4, 0x39,
++	  0x6f, 0x07, 0x47, 0xcd, 0x7b, 0x74, 0x8d, 0xf9,
++	  0x13, 0x00, 0x76, 0x26, 0xb1, 0xd6, 0x59, 0xda,
++	  0x0c, 0x1f, 0x78, 0xb9, 0x30, 0x3d, 0x01, 0xa3 },
++	{ 0x44, 0xe7, 0x8a, 0x77, 0x37, 0x56, 0xe0, 0x95,
++	  0x15, 0x19, 0x50, 0x4d, 0x70, 0x38, 0xd2, 0x8d,
++	  0x02, 0x13, 0xa3, 0x7e, 0x0c, 0xe3, 0x75, 0x37,
++	  0x17, 0x57, 0xbc, 0x99, 0x63, 0x11, 0xe3, 0xb8 },
++	{ 0x77, 0xac, 0x01, 0x2a, 0x3f, 0x75, 0x4d, 0xcf,
++	  0xea, 0xb5, 0xeb, 0x99, 0x6b, 0xe9, 0xcd, 0x2d,
++	  0x1f, 0x96, 0x11, 0x1b, 0x6e, 0x49, 0xf3, 0x99,
++	  0x4d, 0xf1, 0x81, 0xf2, 0x85, 0x69, 0xd8, 0x25 },
++	{ 0xce, 0x5a, 0x10, 0xdb, 0x6f, 0xcc, 0xda, 0xf1,
++	  0x40, 0xaa, 0xa4, 0xde, 0xd6, 0x25, 0x0a, 0x9c,
++	  0x06, 0xe9, 0x22, 0x2b, 0xc9, 0xf9, 0xf3, 0x65,
++	  0x8a, 0x4a, 0xff, 0x93, 0x5f, 0x2b, 0x9f, 0x3a },
++	{ 0xec, 0xc2, 0x03, 0xa7, 0xfe, 0x2b, 0xe4, 0xab,
++	  0xd5, 0x5b, 0xb5, 0x3e, 0x6e, 0x67, 0x35, 0x72,
++	  0xe0, 0x07, 0x8d, 0xa8, 0xcd, 0x37, 0x5e, 0xf4,
++	  0x30, 0xcc, 0x97, 0xf9, 0xf8, 0x00, 0x83, 0xaf },
++	{ 0x14, 0xa5, 0x18, 0x6d, 0xe9, 0xd7, 0xa1, 0x8b,
++	  0x04, 0x12, 0xb8, 0x56, 0x3e, 0x51, 0xcc, 0x54,
++	  0x33, 0x84, 0x0b, 0x4a, 0x12, 0x9a, 0x8f, 0xf9,
++	  0x63, 0xb3, 0x3a, 0x3c, 0x4a, 0xfe, 0x8e, 0xbb },
++	{ 0x13, 0xf8, 0xef, 0x95, 0xcb, 0x86, 0xe6, 0xa6,
++	  0x38, 0x93, 0x1c, 0x8e, 0x10, 0x76, 0x73, 0xeb,
++	  0x76, 0xba, 0x10, 0xd7, 0xc2, 0xcd, 0x70, 0xb9,
++	  0xd9, 0x92, 0x0b, 0xbe, 0xed, 0x92, 0x94, 0x09 },
++	{ 0x0b, 0x33, 0x8f, 0x4e, 0xe1, 0x2f, 0x2d, 0xfc,
++	  0xb7, 0x87, 0x13, 0x37, 0x79, 0x41, 0xe0, 0xb0,
++	  0x63, 0x21, 0x52, 0x58, 0x1d, 0x13, 0x32, 0x51,
++	  0x6e, 0x4a, 0x2c, 0xab, 0x19, 0x42, 0xcc, 0xa4 },
++	{ 0xea, 0xab, 0x0e, 0xc3, 0x7b, 0x3b, 0x8a, 0xb7,
++	  0x96, 0xe9, 0xf5, 0x72, 0x38, 0xde, 0x14, 0xa2,
++	  0x64, 0xa0, 0x76, 0xf3, 0x88, 0x7d, 0x86, 0xe2,
++	  0x9b, 0xb5, 0x90, 0x6d, 0xb5, 0xa0, 0x0e, 0x02 },
++	{ 0x23, 0xcb, 0x68, 0xb8, 0xc0, 0xe6, 0xdc, 0x26,
++	  0xdc, 0x27, 0x76, 0x6d, 0xdc, 0x0a, 0x13, 0xa9,
++	  0x94, 0x38, 0xfd, 0x55, 0x61, 0x7a, 0xa4, 0x09,
++	  0x5d, 0x8f, 0x96, 0x97, 0x20, 0xc8, 0x72, 0xdf },
++	{ 0x09, 0x1d, 0x8e, 0xe3, 0x0d, 0x6f, 0x29, 0x68,
++	  0xd4, 0x6b, 0x68, 0x7d, 0xd6, 0x52, 0x92, 0x66,
++	  0x57, 0x42, 0xde, 0x0b, 0xb8, 0x3d, 0xcc, 0x00,
++	  0x04, 0xc7, 0x2c, 0xe1, 0x00, 0x07, 0xa5, 0x49 },
++	{ 0x7f, 0x50, 0x7a, 0xbc, 0x6d, 0x19, 0xba, 0x00,
++	  0xc0, 0x65, 0xa8, 0x76, 0xec, 0x56, 0x57, 0x86,
++	  0x88, 0x82, 0xd1, 0x8a, 0x22, 0x1b, 0xc4, 0x6c,
++	  0x7a, 0x69, 0x12, 0x54, 0x1f, 0x5b, 0xc7, 0xba },
++	{ 0xa0, 0x60, 0x7c, 0x24, 0xe1, 0x4e, 0x8c, 0x22,
++	  0x3d, 0xb0, 0xd7, 0x0b, 0x4d, 0x30, 0xee, 0x88,
++	  0x01, 0x4d, 0x60, 0x3f, 0x43, 0x7e, 0x9e, 0x02,
++	  0xaa, 0x7d, 0xaf, 0xa3, 0xcd, 0xfb, 0xad, 0x94 },
++	{ 0xdd, 0xbf, 0xea, 0x75, 0xcc, 0x46, 0x78, 0x82,
++	  0xeb, 0x34, 0x83, 0xce, 0x5e, 0x2e, 0x75, 0x6a,
++	  0x4f, 0x47, 0x01, 0xb7, 0x6b, 0x44, 0x55, 0x19,
++	  0xe8, 0x9f, 0x22, 0xd6, 0x0f, 0xa8, 0x6e, 0x06 },
++	{ 0x0c, 0x31, 0x1f, 0x38, 0xc3, 0x5a, 0x4f, 0xb9,
++	  0x0d, 0x65, 0x1c, 0x28, 0x9d, 0x48, 0x68, 0x56,
++	  0xcd, 0x14, 0x13, 0xdf, 0x9b, 0x06, 0x77, 0xf5,
++	  0x3e, 0xce, 0x2c, 0xd9, 0xe4, 0x77, 0xc6, 0x0a },
++	{ 0x46, 0xa7, 0x3a, 0x8d, 0xd3, 0xe7, 0x0f, 0x59,
++	  0xd3, 0x94, 0x2c, 0x01, 0xdf, 0x59, 0x9d, 0xef,
++	  0x78, 0x3c, 0x9d, 0xa8, 0x2f, 0xd8, 0x32, 0x22,
++	  0xcd, 0x66, 0x2b, 0x53, 0xdc, 0xe7, 0xdb, 0xdf },
++	{ 0xad, 0x03, 0x8f, 0xf9, 0xb1, 0x4d, 0xe8, 0x4a,
++	  0x80, 0x1e, 0x4e, 0x62, 0x1c, 0xe5, 0xdf, 0x02,
++	  0x9d, 0xd9, 0x35, 0x20, 0xd0, 0xc2, 0xfa, 0x38,
++	  0xbf, 0xf1, 0x76, 0xa8, 0xb1, 0xd1, 0x69, 0x8c },
++	{ 0xab, 0x70, 0xc5, 0xdf, 0xbd, 0x1e, 0xa8, 0x17,
++	  0xfe, 0xd0, 0xcd, 0x06, 0x72, 0x93, 0xab, 0xf3,
++	  0x19, 0xe5, 0xd7, 0x90, 0x1c, 0x21, 0x41, 0xd5,
++	  0xd9, 0x9b, 0x23, 0xf0, 0x3a, 0x38, 0xe7, 0x48 },
++	{ 0x1f, 0xff, 0xda, 0x67, 0x93, 0x2b, 0x73, 0xc8,
++	  0xec, 0xaf, 0x00, 0x9a, 0x34, 0x91, 0xa0, 0x26,
++	  0x95, 0x3b, 0xab, 0xfe, 0x1f, 0x66, 0x3b, 0x06,
++	  0x97, 0xc3, 0xc4, 0xae, 0x8b, 0x2e, 0x7d, 0xcb },
++	{ 0xb0, 0xd2, 0xcc, 0x19, 0x47, 0x2d, 0xd5, 0x7f,
++	  0x2b, 0x17, 0xef, 0xc0, 0x3c, 0x8d, 0x58, 0xc2,
++	  0x28, 0x3d, 0xbb, 0x19, 0xda, 0x57, 0x2f, 0x77,
++	  0x55, 0x85, 0x5a, 0xa9, 0x79, 0x43, 0x17, 0xa0 },
++	{ 0xa0, 0xd1, 0x9a, 0x6e, 0xe3, 0x39, 0x79, 0xc3,
++	  0x25, 0x51, 0x0e, 0x27, 0x66, 0x22, 0xdf, 0x41,
++	  0xf7, 0x15, 0x83, 0xd0, 0x75, 0x01, 0xb8, 0x70,
++	  0x71, 0x12, 0x9a, 0x0a, 0xd9, 0x47, 0x32, 0xa5 },
++	{ 0x72, 0x46, 0x42, 0xa7, 0x03, 0x2d, 0x10, 0x62,
++	  0xb8, 0x9e, 0x52, 0xbe, 0xa3, 0x4b, 0x75, 0xdf,
++	  0x7d, 0x8f, 0xe7, 0x72, 0xd9, 0xfe, 0x3c, 0x93,
++	  0xdd, 0xf3, 0xc4, 0x54, 0x5a, 0xb5, 0xa9, 0x9b },
++	{ 0xad, 0xe5, 0xea, 0xa7, 0xe6, 0x1f, 0x67, 0x2d,
++	  0x58, 0x7e, 0xa0, 0x3d, 0xae, 0x7d, 0x7b, 0x55,
++	  0x22, 0x9c, 0x01, 0xd0, 0x6b, 0xc0, 0xa5, 0x70,
++	  0x14, 0x36, 0xcb, 0xd1, 0x83, 0x66, 0xa6, 0x26 },
++	{ 0x01, 0x3b, 0x31, 0xeb, 0xd2, 0x28, 0xfc, 0xdd,
++	  0xa5, 0x1f, 0xab, 0xb0, 0x3b, 0xb0, 0x2d, 0x60,
++	  0xac, 0x20, 0xca, 0x21, 0x5a, 0xaf, 0xa8, 0x3b,
++	  0xdd, 0x85, 0x5e, 0x37, 0x55, 0xa3, 0x5f, 0x0b },
++	{ 0x33, 0x2e, 0xd4, 0x0b, 0xb1, 0x0d, 0xde, 0x3c,
++	  0x95, 0x4a, 0x75, 0xd7, 0xb8, 0x99, 0x9d, 0x4b,
++	  0x26, 0xa1, 0xc0, 0x63, 0xc1, 0xdc, 0x6e, 0x32,
++	  0xc1, 0xd9, 0x1b, 0xab, 0x7b, 0xbb, 0x7d, 0x16 },
++	{ 0xc7, 0xa1, 0x97, 0xb3, 0xa0, 0x5b, 0x56, 0x6b,
++	  0xcc, 0x9f, 0xac, 0xd2, 0x0e, 0x44, 0x1d, 0x6f,
++	  0x6c, 0x28, 0x60, 0xac, 0x96, 0x51, 0xcd, 0x51,
++	  0xd6, 0xb9, 0xd2, 0xcd, 0xee, 0xea, 0x03, 0x90 },
++	{ 0xbd, 0x9c, 0xf6, 0x4e, 0xa8, 0x95, 0x3c, 0x03,
++	  0x71, 0x08, 0xe6, 0xf6, 0x54, 0x91, 0x4f, 0x39,
++	  0x58, 0xb6, 0x8e, 0x29, 0xc1, 0x67, 0x00, 0xdc,
++	  0x18, 0x4d, 0x94, 0xa2, 0x17, 0x08, 0xff, 0x60 },
++	{ 0x88, 0x35, 0xb0, 0xac, 0x02, 0x11, 0x51, 0xdf,
++	  0x71, 0x64, 0x74, 0xce, 0x27, 0xce, 0x4d, 0x3c,
++	  0x15, 0xf0, 0xb2, 0xda, 0xb4, 0x80, 0x03, 0xcf,
++	  0x3f, 0x3e, 0xfd, 0x09, 0x45, 0x10, 0x6b, 0x9a },
++	{ 0x3b, 0xfe, 0xfa, 0x33, 0x01, 0xaa, 0x55, 0xc0,
++	  0x80, 0x19, 0x0c, 0xff, 0xda, 0x8e, 0xae, 0x51,
++	  0xd9, 0xaf, 0x48, 0x8b, 0x4c, 0x1f, 0x24, 0xc3,
++	  0xd9, 0xa7, 0x52, 0x42, 0xfd, 0x8e, 0xa0, 0x1d },
++	{ 0x08, 0x28, 0x4d, 0x14, 0x99, 0x3c, 0xd4, 0x7d,
++	  0x53, 0xeb, 0xae, 0xcf, 0x0d, 0xf0, 0x47, 0x8c,
++	  0xc1, 0x82, 0xc8, 0x9c, 0x00, 0xe1, 0x85, 0x9c,
++	  0x84, 0x85, 0x16, 0x86, 0xdd, 0xf2, 0xc1, 0xb7 },
++	{ 0x1e, 0xd7, 0xef, 0x9f, 0x04, 0xc2, 0xac, 0x8d,
++	  0xb6, 0xa8, 0x64, 0xdb, 0x13, 0x10, 0x87, 0xf2,
++	  0x70, 0x65, 0x09, 0x8e, 0x69, 0xc3, 0xfe, 0x78,
++	  0x71, 0x8d, 0x9b, 0x94, 0x7f, 0x4a, 0x39, 0xd0 },
++	{ 0xc1, 0x61, 0xf2, 0xdc, 0xd5, 0x7e, 0x9c, 0x14,
++	  0x39, 0xb3, 0x1a, 0x9d, 0xd4, 0x3d, 0x8f, 0x3d,
++	  0x7d, 0xd8, 0xf0, 0xeb, 0x7c, 0xfa, 0xc6, 0xfb,
++	  0x25, 0xa0, 0xf2, 0x8e, 0x30, 0x6f, 0x06, 0x61 },
++	{ 0xc0, 0x19, 0x69, 0xad, 0x34, 0xc5, 0x2c, 0xaf,
++	  0x3d, 0xc4, 0xd8, 0x0d, 0x19, 0x73, 0x5c, 0x29,
++	  0x73, 0x1a, 0xc6, 0xe7, 0xa9, 0x20, 0x85, 0xab,
++	  0x92, 0x50, 0xc4, 0x8d, 0xea, 0x48, 0xa3, 0xfc },
++	{ 0x17, 0x20, 0xb3, 0x65, 0x56, 0x19, 0xd2, 0xa5,
++	  0x2b, 0x35, 0x21, 0xae, 0x0e, 0x49, 0xe3, 0x45,
++	  0xcb, 0x33, 0x89, 0xeb, 0xd6, 0x20, 0x8a, 0xca,
++	  0xf9, 0xf1, 0x3f, 0xda, 0xcc, 0xa8, 0xbe, 0x49 },
++	{ 0x75, 0x62, 0x88, 0x36, 0x1c, 0x83, 0xe2, 0x4c,
++	  0x61, 0x7c, 0xf9, 0x5c, 0x90, 0x5b, 0x22, 0xd0,
++	  0x17, 0xcd, 0xc8, 0x6f, 0x0b, 0xf1, 0xd6, 0x58,
++	  0xf4, 0x75, 0x6c, 0x73, 0x79, 0x87, 0x3b, 0x7f },
++	{ 0xe7, 0xd0, 0xed, 0xa3, 0x45, 0x26, 0x93, 0xb7,
++	  0x52, 0xab, 0xcd, 0xa1, 0xb5, 0x5e, 0x27, 0x6f,
++	  0x82, 0x69, 0x8f, 0x5f, 0x16, 0x05, 0x40, 0x3e,
++	  0xff, 0x83, 0x0b, 0xea, 0x00, 0x71, 0xa3, 0x94 },
++	{ 0x2c, 0x82, 0xec, 0xaa, 0x6b, 0x84, 0x80, 0x3e,
++	  0x04, 0x4a, 0xf6, 0x31, 0x18, 0xaf, 0xe5, 0x44,
++	  0x68, 0x7c, 0xb6, 0xe6, 0xc7, 0xdf, 0x49, 0xed,
++	  0x76, 0x2d, 0xfd, 0x7c, 0x86, 0x93, 0xa1, 0xbc },
++	{ 0x61, 0x36, 0xcb, 0xf4, 0xb4, 0x41, 0x05, 0x6f,
++	  0xa1, 0xe2, 0x72, 0x24, 0x98, 0x12, 0x5d, 0x6d,
++	  0xed, 0x45, 0xe1, 0x7b, 0x52, 0x14, 0x39, 0x59,
++	  0xc7, 0xf4, 0xd4, 0xe3, 0x95, 0x21, 0x8a, 0xc2 },
++	{ 0x72, 0x1d, 0x32, 0x45, 0xaa, 0xfe, 0xf2, 0x7f,
++	  0x6a, 0x62, 0x4f, 0x47, 0x95, 0x4b, 0x6c, 0x25,
++	  0x50, 0x79, 0x52, 0x6f, 0xfa, 0x25, 0xe9, 0xff,
++	  0x77, 0xe5, 0xdc, 0xff, 0x47, 0x3b, 0x15, 0x97 },
++	{ 0x9d, 0xd2, 0xfb, 0xd8, 0xce, 0xf1, 0x6c, 0x35,
++	  0x3c, 0x0a, 0xc2, 0x11, 0x91, 0xd5, 0x09, 0xeb,
++	  0x28, 0xdd, 0x9e, 0x3e, 0x0d, 0x8c, 0xea, 0x5d,
++	  0x26, 0xca, 0x83, 0x93, 0x93, 0x85, 0x1c, 0x3a },
++	{ 0xb2, 0x39, 0x4c, 0xea, 0xcd, 0xeb, 0xf2, 0x1b,
++	  0xf9, 0xdf, 0x2c, 0xed, 0x98, 0xe5, 0x8f, 0x1c,
++	  0x3a, 0x4b, 0xbb, 0xff, 0x66, 0x0d, 0xd9, 0x00,
++	  0xf6, 0x22, 0x02, 0xd6, 0x78, 0x5c, 0xc4, 0x6e },
++	{ 0x57, 0x08, 0x9f, 0x22, 0x27, 0x49, 0xad, 0x78,
++	  0x71, 0x76, 0x5f, 0x06, 0x2b, 0x11, 0x4f, 0x43,
++	  0xba, 0x20, 0xec, 0x56, 0x42, 0x2a, 0x8b, 0x1e,
++	  0x3f, 0x87, 0x19, 0x2c, 0x0e, 0xa7, 0x18, 0xc6 },
++	{ 0xe4, 0x9a, 0x94, 0x59, 0x96, 0x1c, 0xd3, 0x3c,
++	  0xdf, 0x4a, 0xae, 0x1b, 0x10, 0x78, 0xa5, 0xde,
++	  0xa7, 0xc0, 0x40, 0xe0, 0xfe, 0xa3, 0x40, 0xc9,
++	  0x3a, 0x72, 0x48, 0x72, 0xfc, 0x4a, 0xf8, 0x06 },
++	{ 0xed, 0xe6, 0x7f, 0x72, 0x0e, 0xff, 0xd2, 0xca,
++	  0x9c, 0x88, 0x99, 0x41, 0x52, 0xd0, 0x20, 0x1d,
++	  0xee, 0x6b, 0x0a, 0x2d, 0x2c, 0x07, 0x7a, 0xca,
++	  0x6d, 0xae, 0x29, 0xf7, 0x3f, 0x8b, 0x63, 0x09 },
++	{ 0xe0, 0xf4, 0x34, 0xbf, 0x22, 0xe3, 0x08, 0x80,
++	  0x39, 0xc2, 0x1f, 0x71, 0x9f, 0xfc, 0x67, 0xf0,
++	  0xf2, 0xcb, 0x5e, 0x98, 0xa7, 0xa0, 0x19, 0x4c,
++	  0x76, 0xe9, 0x6b, 0xf4, 0xe8, 0xe1, 0x7e, 0x61 },
++	{ 0x27, 0x7c, 0x04, 0xe2, 0x85, 0x34, 0x84, 0xa4,
++	  0xeb, 0xa9, 0x10, 0xad, 0x33, 0x6d, 0x01, 0xb4,
++	  0x77, 0xb6, 0x7c, 0xc2, 0x00, 0xc5, 0x9f, 0x3c,
++	  0x8d, 0x77, 0xee, 0xf8, 0x49, 0x4f, 0x29, 0xcd },
++	{ 0x15, 0x6d, 0x57, 0x47, 0xd0, 0xc9, 0x9c, 0x7f,
++	  0x27, 0x09, 0x7d, 0x7b, 0x7e, 0x00, 0x2b, 0x2e,
++	  0x18, 0x5c, 0xb7, 0x2d, 0x8d, 0xd7, 0xeb, 0x42,
++	  0x4a, 0x03, 0x21, 0x52, 0x81, 0x61, 0x21, 0x9f },
++	{ 0x20, 0xdd, 0xd1, 0xed, 0x9b, 0x1c, 0xa8, 0x03,
++	  0x94, 0x6d, 0x64, 0xa8, 0x3a, 0xe4, 0x65, 0x9d,
++	  0xa6, 0x7f, 0xba, 0x7a, 0x1a, 0x3e, 0xdd, 0xb1,
++	  0xe1, 0x03, 0xc0, 0xf5, 0xe0, 0x3e, 0x3a, 0x2c },
++	{ 0xf0, 0xaf, 0x60, 0x4d, 0x3d, 0xab, 0xbf, 0x9a,
++	  0x0f, 0x2a, 0x7d, 0x3d, 0xda, 0x6b, 0xd3, 0x8b,
++	  0xba, 0x72, 0xc6, 0xd0, 0x9b, 0xe4, 0x94, 0xfc,
++	  0xef, 0x71, 0x3f, 0xf1, 0x01, 0x89, 0xb6, 0xe6 },
++	{ 0x98, 0x02, 0xbb, 0x87, 0xde, 0xf4, 0xcc, 0x10,
++	  0xc4, 0xa5, 0xfd, 0x49, 0xaa, 0x58, 0xdf, 0xe2,
++	  0xf3, 0xfd, 0xdb, 0x46, 0xb4, 0x70, 0x88, 0x14,
++	  0xea, 0xd8, 0x1d, 0x23, 0xba, 0x95, 0x13, 0x9b },
++	{ 0x4f, 0x8c, 0xe1, 0xe5, 0x1d, 0x2f, 0xe7, 0xf2,
++	  0x40, 0x43, 0xa9, 0x04, 0xd8, 0x98, 0xeb, 0xfc,
++	  0x91, 0x97, 0x54, 0x18, 0x75, 0x34, 0x13, 0xaa,
++	  0x09, 0x9b, 0x79, 0x5e, 0xcb, 0x35, 0xce, 0xdb },
++	{ 0xbd, 0xdc, 0x65, 0x14, 0xd7, 0xee, 0x6a, 0xce,
++	  0x0a, 0x4a, 0xc1, 0xd0, 0xe0, 0x68, 0x11, 0x22,
++	  0x88, 0xcb, 0xcf, 0x56, 0x04, 0x54, 0x64, 0x27,
++	  0x05, 0x63, 0x01, 0x77, 0xcb, 0xa6, 0x08, 0xbd },
++	{ 0xd6, 0x35, 0x99, 0x4f, 0x62, 0x91, 0x51, 0x7b,
++	  0x02, 0x81, 0xff, 0xdd, 0x49, 0x6a, 0xfa, 0x86,
++	  0x27, 0x12, 0xe5, 0xb3, 0xc4, 0xe5, 0x2e, 0x4c,
++	  0xd5, 0xfd, 0xae, 0x8c, 0x0e, 0x72, 0xfb, 0x08 },
++	{ 0x87, 0x8d, 0x9c, 0xa6, 0x00, 0xcf, 0x87, 0xe7,
++	  0x69, 0xcc, 0x30, 0x5c, 0x1b, 0x35, 0x25, 0x51,
++	  0x86, 0x61, 0x5a, 0x73, 0xa0, 0xda, 0x61, 0x3b,
++	  0x5f, 0x1c, 0x98, 0xdb, 0xf8, 0x12, 0x83, 0xea },
++	{ 0xa6, 0x4e, 0xbe, 0x5d, 0xc1, 0x85, 0xde, 0x9f,
++	  0xdd, 0xe7, 0x60, 0x7b, 0x69, 0x98, 0x70, 0x2e,
++	  0xb2, 0x34, 0x56, 0x18, 0x49, 0x57, 0x30, 0x7d,
++	  0x2f, 0xa7, 0x2e, 0x87, 0xa4, 0x77, 0x02, 0xd6 },
++	{ 0xce, 0x50, 0xea, 0xb7, 0xb5, 0xeb, 0x52, 0xbd,
++	  0xc9, 0xad, 0x8e, 0x5a, 0x48, 0x0a, 0xb7, 0x80,
++	  0xca, 0x93, 0x20, 0xe4, 0x43, 0x60, 0xb1, 0xfe,
++	  0x37, 0xe0, 0x3f, 0x2f, 0x7a, 0xd7, 0xde, 0x01 },
++	{ 0xee, 0xdd, 0xb7, 0xc0, 0xdb, 0x6e, 0x30, 0xab,
++	  0xe6, 0x6d, 0x79, 0xe3, 0x27, 0x51, 0x1e, 0x61,
++	  0xfc, 0xeb, 0xbc, 0x29, 0xf1, 0x59, 0xb4, 0x0a,
++	  0x86, 0xb0, 0x46, 0xec, 0xf0, 0x51, 0x38, 0x23 },
++	{ 0x78, 0x7f, 0xc9, 0x34, 0x40, 0xc1, 0xec, 0x96,
++	  0xb5, 0xad, 0x01, 0xc1, 0x6c, 0xf7, 0x79, 0x16,
++	  0xa1, 0x40, 0x5f, 0x94, 0x26, 0x35, 0x6e, 0xc9,
++	  0x21, 0xd8, 0xdf, 0xf3, 0xea, 0x63, 0xb7, 0xe0 },
++	{ 0x7f, 0x0d, 0x5e, 0xab, 0x47, 0xee, 0xfd, 0xa6,
++	  0x96, 0xc0, 0xbf, 0x0f, 0xbf, 0x86, 0xab, 0x21,
++	  0x6f, 0xce, 0x46, 0x1e, 0x93, 0x03, 0xab, 0xa6,
++	  0xac, 0x37, 0x41, 0x20, 0xe8, 0x90, 0xe8, 0xdf },
++	{ 0xb6, 0x80, 0x04, 0xb4, 0x2f, 0x14, 0xad, 0x02,
++	  0x9f, 0x4c, 0x2e, 0x03, 0xb1, 0xd5, 0xeb, 0x76,
++	  0xd5, 0x71, 0x60, 0xe2, 0x64, 0x76, 0xd2, 0x11,
++	  0x31, 0xbe, 0xf2, 0x0a, 0xda, 0x7d, 0x27, 0xf4 },
++	{ 0xb0, 0xc4, 0xeb, 0x18, 0xae, 0x25, 0x0b, 0x51,
++	  0xa4, 0x13, 0x82, 0xea, 0xd9, 0x2d, 0x0d, 0xc7,
++	  0x45, 0x5f, 0x93, 0x79, 0xfc, 0x98, 0x84, 0x42,
++	  0x8e, 0x47, 0x70, 0x60, 0x8d, 0xb0, 0xfa, 0xec },
++	{ 0xf9, 0x2b, 0x7a, 0x87, 0x0c, 0x05, 0x9f, 0x4d,
++	  0x46, 0x46, 0x4c, 0x82, 0x4e, 0xc9, 0x63, 0x55,
++	  0x14, 0x0b, 0xdc, 0xe6, 0x81, 0x32, 0x2c, 0xc3,
++	  0xa9, 0x92, 0xff, 0x10, 0x3e, 0x3f, 0xea, 0x52 },
++	{ 0x53, 0x64, 0x31, 0x26, 0x14, 0x81, 0x33, 0x98,
++	  0xcc, 0x52, 0x5d, 0x4c, 0x4e, 0x14, 0x6e, 0xde,
++	  0xb3, 0x71, 0x26, 0x5f, 0xba, 0x19, 0x13, 0x3a,
++	  0x2c, 0x3d, 0x21, 0x59, 0x29, 0x8a, 0x17, 0x42 },
++	{ 0xf6, 0x62, 0x0e, 0x68, 0xd3, 0x7f, 0xb2, 0xaf,
++	  0x50, 0x00, 0xfc, 0x28, 0xe2, 0x3b, 0x83, 0x22,
++	  0x97, 0xec, 0xd8, 0xbc, 0xe9, 0x9e, 0x8b, 0xe4,
++	  0xd0, 0x4e, 0x85, 0x30, 0x9e, 0x3d, 0x33, 0x74 },
++	{ 0x53, 0x16, 0xa2, 0x79, 0x69, 0xd7, 0xfe, 0x04,
++	  0xff, 0x27, 0xb2, 0x83, 0x96, 0x1b, 0xff, 0xc3,
++	  0xbf, 0x5d, 0xfb, 0x32, 0xfb, 0x6a, 0x89, 0xd1,
++	  0x01, 0xc6, 0xc3, 0xb1, 0x93, 0x7c, 0x28, 0x71 },
++	{ 0x81, 0xd1, 0x66, 0x4f, 0xdf, 0x3c, 0xb3, 0x3c,
++	  0x24, 0xee, 0xba, 0xc0, 0xbd, 0x64, 0x24, 0x4b,
++	  0x77, 0xc4, 0xab, 0xea, 0x90, 0xbb, 0xe8, 0xb5,
++	  0xee, 0x0b, 0x2a, 0xaf, 0xcf, 0x2d, 0x6a, 0x53 },
++	{ 0x34, 0x57, 0x82, 0xf2, 0x95, 0xb0, 0x88, 0x03,
++	  0x52, 0xe9, 0x24, 0xa0, 0x46, 0x7b, 0x5f, 0xbc,
++	  0x3e, 0x8f, 0x3b, 0xfb, 0xc3, 0xc7, 0xe4, 0x8b,
++	  0x67, 0x09, 0x1f, 0xb5, 0xe8, 0x0a, 0x94, 0x42 },
++	{ 0x79, 0x41, 0x11, 0xea, 0x6c, 0xd6, 0x5e, 0x31,
++	  0x1f, 0x74, 0xee, 0x41, 0xd4, 0x76, 0xcb, 0x63,
++	  0x2c, 0xe1, 0xe4, 0xb0, 0x51, 0xdc, 0x1d, 0x9e,
++	  0x9d, 0x06, 0x1a, 0x19, 0xe1, 0xd0, 0xbb, 0x49 },
++	{ 0x2a, 0x85, 0xda, 0xf6, 0x13, 0x88, 0x16, 0xb9,
++	  0x9b, 0xf8, 0xd0, 0x8b, 0xa2, 0x11, 0x4b, 0x7a,
++	  0xb0, 0x79, 0x75, 0xa7, 0x84, 0x20, 0xc1, 0xa3,
++	  0xb0, 0x6a, 0x77, 0x7c, 0x22, 0xdd, 0x8b, 0xcb },
++	{ 0x89, 0xb0, 0xd5, 0xf2, 0x89, 0xec, 0x16, 0x40,
++	  0x1a, 0x06, 0x9a, 0x96, 0x0d, 0x0b, 0x09, 0x3e,
++	  0x62, 0x5d, 0xa3, 0xcf, 0x41, 0xee, 0x29, 0xb5,
++	  0x9b, 0x93, 0x0c, 0x58, 0x20, 0x14, 0x54, 0x55 },
++	{ 0xd0, 0xfd, 0xcb, 0x54, 0x39, 0x43, 0xfc, 0x27,
++	  0xd2, 0x08, 0x64, 0xf5, 0x21, 0x81, 0x47, 0x1b,
++	  0x94, 0x2c, 0xc7, 0x7c, 0xa6, 0x75, 0xbc, 0xb3,
++	  0x0d, 0xf3, 0x1d, 0x35, 0x8e, 0xf7, 0xb1, 0xeb },
++	{ 0xb1, 0x7e, 0xa8, 0xd7, 0x70, 0x63, 0xc7, 0x09,
++	  0xd4, 0xdc, 0x6b, 0x87, 0x94, 0x13, 0xc3, 0x43,
++	  0xe3, 0x79, 0x0e, 0x9e, 0x62, 0xca, 0x85, 0xb7,
++	  0x90, 0x0b, 0x08, 0x6f, 0x6b, 0x75, 0xc6, 0x72 },
++	{ 0xe7, 0x1a, 0x3e, 0x2c, 0x27, 0x4d, 0xb8, 0x42,
++	  0xd9, 0x21, 0x14, 0xf2, 0x17, 0xe2, 0xc0, 0xea,
++	  0xc8, 0xb4, 0x50, 0x93, 0xfd, 0xfd, 0x9d, 0xf4,
++	  0xca, 0x71, 0x62, 0x39, 0x48, 0x62, 0xd5, 0x01 },
++	{ 0xc0, 0x47, 0x67, 0x59, 0xab, 0x7a, 0xa3, 0x33,
++	  0x23, 0x4f, 0x6b, 0x44, 0xf5, 0xfd, 0x85, 0x83,
++	  0x90, 0xec, 0x23, 0x69, 0x4c, 0x62, 0x2c, 0xb9,
++	  0x86, 0xe7, 0x69, 0xc7, 0x8e, 0xdd, 0x73, 0x3e },
++	{ 0x9a, 0xb8, 0xea, 0xbb, 0x14, 0x16, 0x43, 0x4d,
++	  0x85, 0x39, 0x13, 0x41, 0xd5, 0x69, 0x93, 0xc5,
++	  0x54, 0x58, 0x16, 0x7d, 0x44, 0x18, 0xb1, 0x9a,
++	  0x0f, 0x2a, 0xd8, 0xb7, 0x9a, 0x83, 0xa7, 0x5b },
++	{ 0x79, 0x92, 0xd0, 0xbb, 0xb1, 0x5e, 0x23, 0x82,
++	  0x6f, 0x44, 0x3e, 0x00, 0x50, 0x5d, 0x68, 0xd3,
++	  0xed, 0x73, 0x72, 0x99, 0x5a, 0x5c, 0x3e, 0x49,
++	  0x86, 0x54, 0x10, 0x2f, 0xbc, 0xd0, 0x96, 0x4e },
++	{ 0xc0, 0x21, 0xb3, 0x00, 0x85, 0x15, 0x14, 0x35,
++	  0xdf, 0x33, 0xb0, 0x07, 0xcc, 0xec, 0xc6, 0x9d,
++	  0xf1, 0x26, 0x9f, 0x39, 0xba, 0x25, 0x09, 0x2b,
++	  0xed, 0x59, 0xd9, 0x32, 0xac, 0x0f, 0xdc, 0x28 },
++	{ 0x91, 0xa2, 0x5e, 0xc0, 0xec, 0x0d, 0x9a, 0x56,
++	  0x7f, 0x89, 0xc4, 0xbf, 0xe1, 0xa6, 0x5a, 0x0e,
++	  0x43, 0x2d, 0x07, 0x06, 0x4b, 0x41, 0x90, 0xe2,
++	  0x7d, 0xfb, 0x81, 0x90, 0x1f, 0xd3, 0x13, 0x9b },
++	{ 0x59, 0x50, 0xd3, 0x9a, 0x23, 0xe1, 0x54, 0x5f,
++	  0x30, 0x12, 0x70, 0xaa, 0x1a, 0x12, 0xf2, 0xe6,
++	  0xc4, 0x53, 0x77, 0x6e, 0x4d, 0x63, 0x55, 0xde,
++	  0x42, 0x5c, 0xc1, 0x53, 0xf9, 0x81, 0x88, 0x67 },
++	{ 0xd7, 0x9f, 0x14, 0x72, 0x0c, 0x61, 0x0a, 0xf1,
++	  0x79, 0xa3, 0x76, 0x5d, 0x4b, 0x7c, 0x09, 0x68,
++	  0xf9, 0x77, 0x96, 0x2d, 0xbf, 0x65, 0x5b, 0x52,
++	  0x12, 0x72, 0xb6, 0xf1, 0xe1, 0x94, 0x48, 0x8e },
++	{ 0xe9, 0x53, 0x1b, 0xfc, 0x8b, 0x02, 0x99, 0x5a,
++	  0xea, 0xa7, 0x5b, 0xa2, 0x70, 0x31, 0xfa, 0xdb,
++	  0xcb, 0xf4, 0xa0, 0xda, 0xb8, 0x96, 0x1d, 0x92,
++	  0x96, 0xcd, 0x7e, 0x84, 0xd2, 0x5d, 0x60, 0x06 },
++	{ 0x34, 0xe9, 0xc2, 0x6a, 0x01, 0xd7, 0xf1, 0x61,
++	  0x81, 0xb4, 0x54, 0xa9, 0xd1, 0x62, 0x3c, 0x23,
++	  0x3c, 0xb9, 0x9d, 0x31, 0xc6, 0x94, 0x65, 0x6e,
++	  0x94, 0x13, 0xac, 0xa3, 0xe9, 0x18, 0x69, 0x2f },
++	{ 0xd9, 0xd7, 0x42, 0x2f, 0x43, 0x7b, 0xd4, 0x39,
++	  0xdd, 0xd4, 0xd8, 0x83, 0xda, 0xe2, 0xa0, 0x83,
++	  0x50, 0x17, 0x34, 0x14, 0xbe, 0x78, 0x15, 0x51,
++	  0x33, 0xff, 0xf1, 0x96, 0x4c, 0x3d, 0x79, 0x72 },
++	{ 0x4a, 0xee, 0x0c, 0x7a, 0xaf, 0x07, 0x54, 0x14,
++	  0xff, 0x17, 0x93, 0xea, 0xd7, 0xea, 0xca, 0x60,
++	  0x17, 0x75, 0xc6, 0x15, 0xdb, 0xd6, 0x0b, 0x64,
++	  0x0b, 0x0a, 0x9f, 0x0c, 0xe5, 0x05, 0xd4, 0x35 },
++	{ 0x6b, 0xfd, 0xd1, 0x54, 0x59, 0xc8, 0x3b, 0x99,
++	  0xf0, 0x96, 0xbf, 0xb4, 0x9e, 0xe8, 0x7b, 0x06,
++	  0x3d, 0x69, 0xc1, 0x97, 0x4c, 0x69, 0x28, 0xac,
++	  0xfc, 0xfb, 0x40, 0x99, 0xf8, 0xc4, 0xef, 0x67 },
++	{ 0x9f, 0xd1, 0xc4, 0x08, 0xfd, 0x75, 0xc3, 0x36,
++	  0x19, 0x3a, 0x2a, 0x14, 0xd9, 0x4f, 0x6a, 0xf5,
++	  0xad, 0xf0, 0x50, 0xb8, 0x03, 0x87, 0xb4, 0xb0,
++	  0x10, 0xfb, 0x29, 0xf4, 0xcc, 0x72, 0x70, 0x7c },
++	{ 0x13, 0xc8, 0x84, 0x80, 0xa5, 0xd0, 0x0d, 0x6c,
++	  0x8c, 0x7a, 0xd2, 0x11, 0x0d, 0x76, 0xa8, 0x2d,
++	  0x9b, 0x70, 0xf4, 0xfa, 0x66, 0x96, 0xd4, 0xe5,
++	  0xdd, 0x42, 0xa0, 0x66, 0xdc, 0xaf, 0x99, 0x20 },
++	{ 0x82, 0x0e, 0x72, 0x5e, 0xe2, 0x5f, 0xe8, 0xfd,
++	  0x3a, 0x8d, 0x5a, 0xbe, 0x4c, 0x46, 0xc3, 0xba,
++	  0x88, 0x9d, 0xe6, 0xfa, 0x91, 0x91, 0xaa, 0x22,
++	  0xba, 0x67, 0xd5, 0x70, 0x54, 0x21, 0x54, 0x2b },
++	{ 0x32, 0xd9, 0x3a, 0x0e, 0xb0, 0x2f, 0x42, 0xfb,
++	  0xbc, 0xaf, 0x2b, 0xad, 0x00, 0x85, 0xb2, 0x82,
++	  0xe4, 0x60, 0x46, 0xa4, 0xdf, 0x7a, 0xd1, 0x06,
++	  0x57, 0xc9, 0xd6, 0x47, 0x63, 0x75, 0xb9, 0x3e },
++	{ 0xad, 0xc5, 0x18, 0x79, 0x05, 0xb1, 0x66, 0x9c,
++	  0xd8, 0xec, 0x9c, 0x72, 0x1e, 0x19, 0x53, 0x78,
++	  0x6b, 0x9d, 0x89, 0xa9, 0xba, 0xe3, 0x07, 0x80,
++	  0xf1, 0xe1, 0xea, 0xb2, 0x4a, 0x00, 0x52, 0x3c },
++	{ 0xe9, 0x07, 0x56, 0xff, 0x7f, 0x9a, 0xd8, 0x10,
++	  0xb2, 0x39, 0xa1, 0x0c, 0xed, 0x2c, 0xf9, 0xb2,
++	  0x28, 0x43, 0x54, 0xc1, 0xf8, 0xc7, 0xe0, 0xac,
++	  0xcc, 0x24, 0x61, 0xdc, 0x79, 0x6d, 0x6e, 0x89 },
++	{ 0x12, 0x51, 0xf7, 0x6e, 0x56, 0x97, 0x84, 0x81,
++	  0x87, 0x53, 0x59, 0x80, 0x1d, 0xb5, 0x89, 0xa0,
++	  0xb2, 0x2f, 0x86, 0xd8, 0xd6, 0x34, 0xdc, 0x04,
++	  0x50, 0x6f, 0x32, 0x2e, 0xd7, 0x8f, 0x17, 0xe8 },
++	{ 0x3a, 0xfa, 0x89, 0x9f, 0xd9, 0x80, 0xe7, 0x3e,
++	  0xcb, 0x7f, 0x4d, 0x8b, 0x8f, 0x29, 0x1d, 0xc9,
++	  0xaf, 0x79, 0x6b, 0xc6, 0x5d, 0x27, 0xf9, 0x74,
++	  0xc6, 0xf1, 0x93, 0xc9, 0x19, 0x1a, 0x09, 0xfd },
++	{ 0xaa, 0x30, 0x5b, 0xe2, 0x6e, 0x5d, 0xed, 0xdc,
++	  0x3c, 0x10, 0x10, 0xcb, 0xc2, 0x13, 0xf9, 0x5f,
++	  0x05, 0x1c, 0x78, 0x5c, 0x5b, 0x43, 0x1e, 0x6a,
++	  0x7c, 0xd0, 0x48, 0xf1, 0x61, 0x78, 0x75, 0x28 },
++	{ 0x8e, 0xa1, 0x88, 0x4f, 0xf3, 0x2e, 0x9d, 0x10,
++	  0xf0, 0x39, 0xb4, 0x07, 0xd0, 0xd4, 0x4e, 0x7e,
++	  0x67, 0x0a, 0xbd, 0x88, 0x4a, 0xee, 0xe0, 0xfb,
++	  0x75, 0x7a, 0xe9, 0x4e, 0xaa, 0x97, 0x37, 0x3d },
++	{ 0xd4, 0x82, 0xb2, 0x15, 0x5d, 0x4d, 0xec, 0x6b,
++	  0x47, 0x36, 0xa1, 0xf1, 0x61, 0x7b, 0x53, 0xaa,
++	  0xa3, 0x73, 0x10, 0x27, 0x7d, 0x3f, 0xef, 0x0c,
++	  0x37, 0xad, 0x41, 0x76, 0x8f, 0xc2, 0x35, 0xb4 },
++	{ 0x4d, 0x41, 0x39, 0x71, 0x38, 0x7e, 0x7a, 0x88,
++	  0x98, 0xa8, 0xdc, 0x2a, 0x27, 0x50, 0x07, 0x78,
++	  0x53, 0x9e, 0xa2, 0x14, 0xa2, 0xdf, 0xe9, 0xb3,
++	  0xd7, 0xe8, 0xeb, 0xdc, 0xe5, 0xcf, 0x3d, 0xb3 },
++	{ 0x69, 0x6e, 0x5d, 0x46, 0xe6, 0xc5, 0x7e, 0x87,
++	  0x96, 0xe4, 0x73, 0x5d, 0x08, 0x91, 0x6e, 0x0b,
++	  0x79, 0x29, 0xb3, 0xcf, 0x29, 0x8c, 0x29, 0x6d,
++	  0x22, 0xe9, 0xd3, 0x01, 0x96, 0x53, 0x37, 0x1c },
++	{ 0x1f, 0x56, 0x47, 0xc1, 0xd3, 0xb0, 0x88, 0x22,
++	  0x88, 0x85, 0x86, 0x5c, 0x89, 0x40, 0x90, 0x8b,
++	  0xf4, 0x0d, 0x1a, 0x82, 0x72, 0x82, 0x19, 0x73,
++	  0xb1, 0x60, 0x00, 0x8e, 0x7a, 0x3c, 0xe2, 0xeb },
++	{ 0xb6, 0xe7, 0x6c, 0x33, 0x0f, 0x02, 0x1a, 0x5b,
++	  0xda, 0x65, 0x87, 0x50, 0x10, 0xb0, 0xed, 0xf0,
++	  0x91, 0x26, 0xc0, 0xf5, 0x10, 0xea, 0x84, 0x90,
++	  0x48, 0x19, 0x20, 0x03, 0xae, 0xf4, 0xc6, 0x1c },
++	{ 0x3c, 0xd9, 0x52, 0xa0, 0xbe, 0xad, 0xa4, 0x1a,
++	  0xbb, 0x42, 0x4c, 0xe4, 0x7f, 0x94, 0xb4, 0x2b,
++	  0xe6, 0x4e, 0x1f, 0xfb, 0x0f, 0xd0, 0x78, 0x22,
++	  0x76, 0x80, 0x79, 0x46, 0xd0, 0xd0, 0xbc, 0x55 },
++	{ 0x98, 0xd9, 0x26, 0x77, 0x43, 0x9b, 0x41, 0xb7,
++	  0xbb, 0x51, 0x33, 0x12, 0xaf, 0xb9, 0x2b, 0xcc,
++	  0x8e, 0xe9, 0x68, 0xb2, 0xe3, 0xb2, 0x38, 0xce,
++	  0xcb, 0x9b, 0x0f, 0x34, 0xc9, 0xbb, 0x63, 0xd0 },
++	{ 0xec, 0xbc, 0xa2, 0xcf, 0x08, 0xae, 0x57, 0xd5,
++	  0x17, 0xad, 0x16, 0x15, 0x8a, 0x32, 0xbf, 0xa7,
++	  0xdc, 0x03, 0x82, 0xea, 0xed, 0xa1, 0x28, 0xe9,
++	  0x18, 0x86, 0x73, 0x4c, 0x24, 0xa0, 0xb2, 0x9d },
++	{ 0x94, 0x2c, 0xc7, 0xc0, 0xb5, 0x2e, 0x2b, 0x16,
++	  0xa4, 0xb8, 0x9f, 0xa4, 0xfc, 0x7e, 0x0b, 0xf6,
++	  0x09, 0xe2, 0x9a, 0x08, 0xc1, 0xa8, 0x54, 0x34,
++	  0x52, 0xb7, 0x7c, 0x7b, 0xfd, 0x11, 0xbb, 0x28 },
++	{ 0x8a, 0x06, 0x5d, 0x8b, 0x61, 0xa0, 0xdf, 0xfb,
++	  0x17, 0x0d, 0x56, 0x27, 0x73, 0x5a, 0x76, 0xb0,
++	  0xe9, 0x50, 0x60, 0x37, 0x80, 0x8c, 0xba, 0x16,
++	  0xc3, 0x45, 0x00, 0x7c, 0x9f, 0x79, 0xcf, 0x8f },
++	{ 0x1b, 0x9f, 0xa1, 0x97, 0x14, 0x65, 0x9c, 0x78,
++	  0xff, 0x41, 0x38, 0x71, 0x84, 0x92, 0x15, 0x36,
++	  0x10, 0x29, 0xac, 0x80, 0x2b, 0x1c, 0xbc, 0xd5,
++	  0x4e, 0x40, 0x8b, 0xd8, 0x72, 0x87, 0xf8, 0x1f },
++	{ 0x8d, 0xab, 0x07, 0x1b, 0xcd, 0x6c, 0x72, 0x92,
++	  0xa9, 0xef, 0x72, 0x7b, 0x4a, 0xe0, 0xd8, 0x67,
++	  0x13, 0x30, 0x1d, 0xa8, 0x61, 0x8d, 0x9a, 0x48,
++	  0xad, 0xce, 0x55, 0xf3, 0x03, 0xa8, 0x69, 0xa1 },
++	{ 0x82, 0x53, 0xe3, 0xe7, 0xc7, 0xb6, 0x84, 0xb9,
++	  0xcb, 0x2b, 0xeb, 0x01, 0x4c, 0xe3, 0x30, 0xff,
++	  0x3d, 0x99, 0xd1, 0x7a, 0xbb, 0xdb, 0xab, 0xe4,
++	  0xf4, 0xd6, 0x74, 0xde, 0xd5, 0x3f, 0xfc, 0x6b },
++	{ 0xf1, 0x95, 0xf3, 0x21, 0xe9, 0xe3, 0xd6, 0xbd,
++	  0x7d, 0x07, 0x45, 0x04, 0xdd, 0x2a, 0xb0, 0xe6,
++	  0x24, 0x1f, 0x92, 0xe7, 0x84, 0xb1, 0xaa, 0x27,
++	  0x1f, 0xf6, 0x48, 0xb1, 0xca, 0xb6, 0xd7, 0xf6 },
++	{ 0x27, 0xe4, 0xcc, 0x72, 0x09, 0x0f, 0x24, 0x12,
++	  0x66, 0x47, 0x6a, 0x7c, 0x09, 0x49, 0x5f, 0x2d,
++	  0xb1, 0x53, 0xd5, 0xbc, 0xbd, 0x76, 0x19, 0x03,
++	  0xef, 0x79, 0x27, 0x5e, 0xc5, 0x6b, 0x2e, 0xd8 },
++	{ 0x89, 0x9c, 0x24, 0x05, 0x78, 0x8e, 0x25, 0xb9,
++	  0x9a, 0x18, 0x46, 0x35, 0x5e, 0x64, 0x6d, 0x77,
++	  0xcf, 0x40, 0x00, 0x83, 0x41, 0x5f, 0x7d, 0xc5,
++	  0xaf, 0xe6, 0x9d, 0x6e, 0x17, 0xc0, 0x00, 0x23 },
++	{ 0xa5, 0x9b, 0x78, 0xc4, 0x90, 0x57, 0x44, 0x07,
++	  0x6b, 0xfe, 0xe8, 0x94, 0xde, 0x70, 0x7d, 0x4f,
++	  0x12, 0x0b, 0x5c, 0x68, 0x93, 0xea, 0x04, 0x00,
++	  0x29, 0x7d, 0x0b, 0xb8, 0x34, 0x72, 0x76, 0x32 },
++	{ 0x59, 0xdc, 0x78, 0xb1, 0x05, 0x64, 0x97, 0x07,
++	  0xa2, 0xbb, 0x44, 0x19, 0xc4, 0x8f, 0x00, 0x54,
++	  0x00, 0xd3, 0x97, 0x3d, 0xe3, 0x73, 0x66, 0x10,
++	  0x23, 0x04, 0x35, 0xb1, 0x04, 0x24, 0xb2, 0x4f },
++	{ 0xc0, 0x14, 0x9d, 0x1d, 0x7e, 0x7a, 0x63, 0x53,
++	  0xa6, 0xd9, 0x06, 0xef, 0xe7, 0x28, 0xf2, 0xf3,
++	  0x29, 0xfe, 0x14, 0xa4, 0x14, 0x9a, 0x3e, 0xa7,
++	  0x76, 0x09, 0xbc, 0x42, 0xb9, 0x75, 0xdd, 0xfa },
++	{ 0xa3, 0x2f, 0x24, 0x14, 0x74, 0xa6, 0xc1, 0x69,
++	  0x32, 0xe9, 0x24, 0x3b, 0xe0, 0xcf, 0x09, 0xbc,
++	  0xdc, 0x7e, 0x0c, 0xa0, 0xe7, 0xa6, 0xa1, 0xb9,
++	  0xb1, 0xa0, 0xf0, 0x1e, 0x41, 0x50, 0x23, 0x77 },
++	{ 0xb2, 0x39, 0xb2, 0xe4, 0xf8, 0x18, 0x41, 0x36,
++	  0x1c, 0x13, 0x39, 0xf6, 0x8e, 0x2c, 0x35, 0x9f,
++	  0x92, 0x9a, 0xf9, 0xad, 0x9f, 0x34, 0xe0, 0x1a,
++	  0xab, 0x46, 0x31, 0xad, 0x6d, 0x55, 0x00, 0xb0 },
++	{ 0x85, 0xfb, 0x41, 0x9c, 0x70, 0x02, 0xa3, 0xe0,
++	  0xb4, 0xb6, 0xea, 0x09, 0x3b, 0x4c, 0x1a, 0xc6,
++	  0x93, 0x66, 0x45, 0xb6, 0x5d, 0xac, 0x5a, 0xc1,
++	  0x5a, 0x85, 0x28, 0xb7, 0xb9, 0x4c, 0x17, 0x54 },
++	{ 0x96, 0x19, 0x72, 0x06, 0x25, 0xf1, 0x90, 0xb9,
++	  0x3a, 0x3f, 0xad, 0x18, 0x6a, 0xb3, 0x14, 0x18,
++	  0x96, 0x33, 0xc0, 0xd3, 0xa0, 0x1e, 0x6f, 0x9b,
++	  0xc8, 0xc4, 0xa8, 0xf8, 0x2f, 0x38, 0x3d, 0xbf },
++	{ 0x7d, 0x62, 0x0d, 0x90, 0xfe, 0x69, 0xfa, 0x46,
++	  0x9a, 0x65, 0x38, 0x38, 0x89, 0x70, 0xa1, 0xaa,
++	  0x09, 0xbb, 0x48, 0xa2, 0xd5, 0x9b, 0x34, 0x7b,
++	  0x97, 0xe8, 0xce, 0x71, 0xf4, 0x8c, 0x7f, 0x46 },
++	{ 0x29, 0x43, 0x83, 0x56, 0x85, 0x96, 0xfb, 0x37,
++	  0xc7, 0x5b, 0xba, 0xcd, 0x97, 0x9c, 0x5f, 0xf6,
++	  0xf2, 0x0a, 0x55, 0x6b, 0xf8, 0x87, 0x9c, 0xc7,
++	  0x29, 0x24, 0x85, 0x5d, 0xf9, 0xb8, 0x24, 0x0e },
++	{ 0x16, 0xb1, 0x8a, 0xb3, 0x14, 0x35, 0x9c, 0x2b,
++	  0x83, 0x3c, 0x1c, 0x69, 0x86, 0xd4, 0x8c, 0x55,
++	  0xa9, 0xfc, 0x97, 0xcd, 0xe9, 0xa3, 0xc1, 0xf1,
++	  0x0a, 0x31, 0x77, 0x14, 0x0f, 0x73, 0xf7, 0x38 },
++	{ 0x8c, 0xbb, 0xdd, 0x14, 0xbc, 0x33, 0xf0, 0x4c,
++	  0xf4, 0x58, 0x13, 0xe4, 0xa1, 0x53, 0xa2, 0x73,
++	  0xd3, 0x6a, 0xda, 0xd5, 0xce, 0x71, 0xf4, 0x99,
++	  0xee, 0xb8, 0x7f, 0xb8, 0xac, 0x63, 0xb7, 0x29 },
++	{ 0x69, 0xc9, 0xa4, 0x98, 0xdb, 0x17, 0x4e, 0xca,
++	  0xef, 0xcc, 0x5a, 0x3a, 0xc9, 0xfd, 0xed, 0xf0,
++	  0xf8, 0x13, 0xa5, 0xbe, 0xc7, 0x27, 0xf1, 0xe7,
++	  0x75, 0xba, 0xbd, 0xec, 0x77, 0x18, 0x81, 0x6e },
++	{ 0xb4, 0x62, 0xc3, 0xbe, 0x40, 0x44, 0x8f, 0x1d,
++	  0x4f, 0x80, 0x62, 0x62, 0x54, 0xe5, 0x35, 0xb0,
++	  0x8b, 0xc9, 0xcd, 0xcf, 0xf5, 0x99, 0xa7, 0x68,
++	  0x57, 0x8d, 0x4b, 0x28, 0x81, 0xa8, 0xe3, 0xf0 },
++	{ 0x55, 0x3e, 0x9d, 0x9c, 0x5f, 0x36, 0x0a, 0xc0,
++	  0xb7, 0x4a, 0x7d, 0x44, 0xe5, 0xa3, 0x91, 0xda,
++	  0xd4, 0xce, 0xd0, 0x3e, 0x0c, 0x24, 0x18, 0x3b,
++	  0x7e, 0x8e, 0xca, 0xbd, 0xf1, 0x71, 0x5a, 0x64 },
++	{ 0x7a, 0x7c, 0x55, 0xa5, 0x6f, 0xa9, 0xae, 0x51,
++	  0xe6, 0x55, 0xe0, 0x19, 0x75, 0xd8, 0xa6, 0xff,
++	  0x4a, 0xe9, 0xe4, 0xb4, 0x86, 0xfc, 0xbe, 0x4e,
++	  0xac, 0x04, 0x45, 0x88, 0xf2, 0x45, 0xeb, 0xea },
++	{ 0x2a, 0xfd, 0xf3, 0xc8, 0x2a, 0xbc, 0x48, 0x67,
++	  0xf5, 0xde, 0x11, 0x12, 0x86, 0xc2, 0xb3, 0xbe,
++	  0x7d, 0x6e, 0x48, 0x65, 0x7b, 0xa9, 0x23, 0xcf,
++	  0xbf, 0x10, 0x1a, 0x6d, 0xfc, 0xf9, 0xdb, 0x9a },
++	{ 0x41, 0x03, 0x7d, 0x2e, 0xdc, 0xdc, 0xe0, 0xc4,
++	  0x9b, 0x7f, 0xb4, 0xa6, 0xaa, 0x09, 0x99, 0xca,
++	  0x66, 0x97, 0x6c, 0x74, 0x83, 0xaf, 0xe6, 0x31,
++	  0xd4, 0xed, 0xa2, 0x83, 0x14, 0x4f, 0x6d, 0xfc },
++	{ 0xc4, 0x46, 0x6f, 0x84, 0x97, 0xca, 0x2e, 0xeb,
++	  0x45, 0x83, 0xa0, 0xb0, 0x8e, 0x9d, 0x9a, 0xc7,
++	  0x43, 0x95, 0x70, 0x9f, 0xda, 0x10, 0x9d, 0x24,
++	  0xf2, 0xe4, 0x46, 0x21, 0x96, 0x77, 0x9c, 0x5d },
++	{ 0x75, 0xf6, 0x09, 0x33, 0x8a, 0xa6, 0x7d, 0x96,
++	  0x9a, 0x2a, 0xe2, 0xa2, 0x36, 0x2b, 0x2d, 0xa9,
++	  0xd7, 0x7c, 0x69, 0x5d, 0xfd, 0x1d, 0xf7, 0x22,
++	  0x4a, 0x69, 0x01, 0xdb, 0x93, 0x2c, 0x33, 0x64 },
++	{ 0x68, 0x60, 0x6c, 0xeb, 0x98, 0x9d, 0x54, 0x88,
++	  0xfc, 0x7c, 0xf6, 0x49, 0xf3, 0xd7, 0xc2, 0x72,
++	  0xef, 0x05, 0x5d, 0xa1, 0xa9, 0x3f, 0xae, 0xcd,
++	  0x55, 0xfe, 0x06, 0xf6, 0x96, 0x70, 0x98, 0xca },
++	{ 0x44, 0x34, 0x6b, 0xde, 0xb7, 0xe0, 0x52, 0xf6,
++	  0x25, 0x50, 0x48, 0xf0, 0xd9, 0xb4, 0x2c, 0x42,
++	  0x5b, 0xab, 0x9c, 0x3d, 0xd2, 0x41, 0x68, 0x21,
++	  0x2c, 0x3e, 0xcf, 0x1e, 0xbf, 0x34, 0xe6, 0xae },
++	{ 0x8e, 0x9c, 0xf6, 0xe1, 0xf3, 0x66, 0x47, 0x1f,
++	  0x2a, 0xc7, 0xd2, 0xee, 0x9b, 0x5e, 0x62, 0x66,
++	  0xfd, 0xa7, 0x1f, 0x8f, 0x2e, 0x41, 0x09, 0xf2,
++	  0x23, 0x7e, 0xd5, 0xf8, 0x81, 0x3f, 0xc7, 0x18 },
++	{ 0x84, 0xbb, 0xeb, 0x84, 0x06, 0xd2, 0x50, 0x95,
++	  0x1f, 0x8c, 0x1b, 0x3e, 0x86, 0xa7, 0xc0, 0x10,
++	  0x08, 0x29, 0x21, 0x83, 0x3d, 0xfd, 0x95, 0x55,
++	  0xa2, 0xf9, 0x09, 0xb1, 0x08, 0x6e, 0xb4, 0xb8 },
++	{ 0xee, 0x66, 0x6f, 0x3e, 0xef, 0x0f, 0x7e, 0x2a,
++	  0x9c, 0x22, 0x29, 0x58, 0xc9, 0x7e, 0xaf, 0x35,
++	  0xf5, 0x1c, 0xed, 0x39, 0x3d, 0x71, 0x44, 0x85,
++	  0xab, 0x09, 0xa0, 0x69, 0x34, 0x0f, 0xdf, 0x88 },
++	{ 0xc1, 0x53, 0xd3, 0x4a, 0x65, 0xc4, 0x7b, 0x4a,
++	  0x62, 0xc5, 0xca, 0xcf, 0x24, 0x01, 0x09, 0x75,
++	  0xd0, 0x35, 0x6b, 0x2f, 0x32, 0xc8, 0xf5, 0xda,
++	  0x53, 0x0d, 0x33, 0x88, 0x16, 0xad, 0x5d, 0xe6 },
++	{ 0x9f, 0xc5, 0x45, 0x01, 0x09, 0xe1, 0xb7, 0x79,
++	  0xf6, 0xc7, 0xae, 0x79, 0xd5, 0x6c, 0x27, 0x63,
++	  0x5c, 0x8d, 0xd4, 0x26, 0xc5, 0xa9, 0xd5, 0x4e,
++	  0x25, 0x78, 0xdb, 0x98, 0x9b, 0x8c, 0x3b, 0x4e },
++	{ 0xd1, 0x2b, 0xf3, 0x73, 0x2e, 0xf4, 0xaf, 0x5c,
++	  0x22, 0xfa, 0x90, 0x35, 0x6a, 0xf8, 0xfc, 0x50,
++	  0xfc, 0xb4, 0x0f, 0x8f, 0x2e, 0xa5, 0xc8, 0x59,
++	  0x47, 0x37, 0xa3, 0xb3, 0xd5, 0xab, 0xdb, 0xd7 },
++	{ 0x11, 0x03, 0x0b, 0x92, 0x89, 0xbb, 0xa5, 0xaf,
++	  0x65, 0x26, 0x06, 0x72, 0xab, 0x6f, 0xee, 0x88,
++	  0xb8, 0x74, 0x20, 0xac, 0xef, 0x4a, 0x17, 0x89,
++	  0xa2, 0x07, 0x3b, 0x7e, 0xc2, 0xf2, 0xa0, 0x9e },
++	{ 0x69, 0xcb, 0x19, 0x2b, 0x84, 0x44, 0x00, 0x5c,
++	  0x8c, 0x0c, 0xeb, 0x12, 0xc8, 0x46, 0x86, 0x07,
++	  0x68, 0x18, 0x8c, 0xda, 0x0a, 0xec, 0x27, 0xa9,
++	  0xc8, 0xa5, 0x5c, 0xde, 0xe2, 0x12, 0x36, 0x32 },
++	{ 0xdb, 0x44, 0x4c, 0x15, 0x59, 0x7b, 0x5f, 0x1a,
++	  0x03, 0xd1, 0xf9, 0xed, 0xd1, 0x6e, 0x4a, 0x9f,
++	  0x43, 0xa6, 0x67, 0xcc, 0x27, 0x51, 0x75, 0xdf,
++	  0xa2, 0xb7, 0x04, 0xe3, 0xbb, 0x1a, 0x9b, 0x83 },
++	{ 0x3f, 0xb7, 0x35, 0x06, 0x1a, 0xbc, 0x51, 0x9d,
++	  0xfe, 0x97, 0x9e, 0x54, 0xc1, 0xee, 0x5b, 0xfa,
++	  0xd0, 0xa9, 0xd8, 0x58, 0xb3, 0x31, 0x5b, 0xad,
++	  0x34, 0xbd, 0xe9, 0x99, 0xef, 0xd7, 0x24, 0xdd }
++};
++
++static bool __init blake2s_selftest(void)
++{
++	u8 key[BLAKE2S_KEY_SIZE];
++	u8 buf[ARRAY_SIZE(blake2s_testvecs)];
++	u8 hash[BLAKE2S_HASH_SIZE];
++	size_t i;
++	bool success = true;
++
++	for (i = 0; i < BLAKE2S_KEY_SIZE; ++i)
++		key[i] = (u8)i;
++
++	for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i)
++		buf[i] = (u8)i;
++
++	for (i = 0; i < ARRAY_SIZE(blake2s_keyed_testvecs); ++i) {
++		blake2s(hash, buf, key, BLAKE2S_HASH_SIZE, i, BLAKE2S_KEY_SIZE);
++		if (memcmp(hash, blake2s_keyed_testvecs[i], BLAKE2S_HASH_SIZE)) {
++			pr_err("blake2s keyed self-test %zu: FAIL\n", i + 1);
++			success = false;
++		}
++	}
++
++	for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i) {
++		blake2s(hash, buf, NULL, BLAKE2S_HASH_SIZE, i, 0);
++		if (memcmp(hash, blake2s_testvecs[i], BLAKE2S_HASH_SIZE)) {
++			pr_err("blake2s unkeyed self-test %zu: FAIL\n", i + i);
++			success = false;
++		}
++	}
++	return success;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/selftest/chacha20.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,2698 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++struct chacha20_testvec {
++	const u8 *input, *output, *key;
++	u64 nonce;
++	size_t ilen;
++};
++
++struct hchacha20_testvec {
++	u8 key[HCHACHA20_KEY_SIZE];
++	u8 nonce[HCHACHA20_NONCE_SIZE];
++	u8 output[CHACHA20_KEY_SIZE];
++};
++
++/* These test vectors are generated by reference implementations and are
++ * designed to check chacha20 implementation block handling, as well as from
++ * the draft-arciszewski-xchacha-01 document.
++ */
++
++static const u8 input01[] __initconst = { };
++static const u8 output01[] __initconst = { };
++static const u8 key01[] __initconst = {
++	0x09, 0xf4, 0xe8, 0x57, 0x10, 0xf2, 0x12, 0xc3,
++	0xc6, 0x91, 0xc4, 0x09, 0x97, 0x46, 0xef, 0xfe,
++	0x02, 0x00, 0xe4, 0x5c, 0x82, 0xed, 0x16, 0xf3,
++	0x32, 0xbe, 0xec, 0x7a, 0xe6, 0x68, 0x12, 0x26
++};
++enum { nonce01 = 0x3834e2afca3c66d3ULL };
++
++static const u8 input02[] __initconst = {
++	0x9d
++};
++static const u8 output02[] __initconst = {
++	0x94
++};
++static const u8 key02[] __initconst = {
++	0x8c, 0x01, 0xac, 0xaf, 0x62, 0x63, 0x56, 0x7a,
++	0xad, 0x23, 0x4c, 0x58, 0x29, 0x29, 0xbe, 0xab,
++	0xe9, 0xf8, 0xdf, 0x6c, 0x8c, 0x74, 0x4d, 0x7d,
++	0x13, 0x94, 0x10, 0x02, 0x3d, 0x8e, 0x9f, 0x94
++};
++enum { nonce02 = 0x5d1b3bfdedd9f73aULL };
++
++static const u8 input03[] __initconst = {
++	0x04, 0x16
++};
++static const u8 output03[] __initconst = {
++	0x92, 0x07
++};
++static const u8 key03[] __initconst = {
++	0x22, 0x0c, 0x79, 0x2c, 0x38, 0x51, 0xbe, 0x99,
++	0xa9, 0x59, 0x24, 0x50, 0xef, 0x87, 0x38, 0xa6,
++	0xa0, 0x97, 0x20, 0xcb, 0xb4, 0x0c, 0x94, 0x67,
++	0x1f, 0x98, 0xdc, 0xc4, 0x83, 0xbc, 0x35, 0x4d
++};
++enum { nonce03 = 0x7a3353ad720a3e2eULL };
++
++static const u8 input04[] __initconst = {
++	0xc7, 0xcc, 0xd0
++};
++static const u8 output04[] __initconst = {
++	0xd8, 0x41, 0x80
++};
++static const u8 key04[] __initconst = {
++	0x81, 0x5e, 0x12, 0x01, 0xc4, 0x36, 0x15, 0x03,
++	0x11, 0xa0, 0xe9, 0x86, 0xbb, 0x5a, 0xdc, 0x45,
++	0x7d, 0x5e, 0x98, 0xf8, 0x06, 0x76, 0x1c, 0xec,
++	0xc0, 0xf7, 0xca, 0x4e, 0x99, 0xd9, 0x42, 0x38
++};
++enum { nonce04 = 0x6816e2fc66176da2ULL };
++
++static const u8 input05[] __initconst = {
++	0x48, 0xf1, 0x31, 0x5f
++};
++static const u8 output05[] __initconst = {
++	0x48, 0xf7, 0x13, 0x67
++};
++static const u8 key05[] __initconst = {
++	0x3f, 0xd6, 0xb6, 0x5e, 0x2f, 0xda, 0x82, 0x39,
++	0x97, 0x06, 0xd3, 0x62, 0x4f, 0xbd, 0xcb, 0x9b,
++	0x1d, 0xe6, 0x4a, 0x76, 0xab, 0xdd, 0x14, 0x50,
++	0x59, 0x21, 0xe3, 0xb2, 0xc7, 0x95, 0xbc, 0x45
++};
++enum { nonce05 = 0xc41a7490e228cc42ULL };
++
++static const u8 input06[] __initconst = {
++	0xae, 0xa2, 0x85, 0x1d, 0xc8
++};
++static const u8 output06[] __initconst = {
++	0xfa, 0xff, 0x45, 0x6b, 0x6f
++};
++static const u8 key06[] __initconst = {
++	0x04, 0x8d, 0xea, 0x67, 0x20, 0x78, 0xfb, 0x8f,
++	0x49, 0x80, 0x35, 0xb5, 0x7b, 0xe4, 0x31, 0x74,
++	0x57, 0x43, 0x3a, 0x64, 0x64, 0xb9, 0xe6, 0x23,
++	0x4d, 0xfe, 0xb8, 0x7b, 0x71, 0x4d, 0x9d, 0x21
++};
++enum { nonce06 = 0x251366db50b10903ULL };
++
++static const u8 input07[] __initconst = {
++	0x1a, 0x32, 0x85, 0xb6, 0xe8, 0x52
++};
++static const u8 output07[] __initconst = {
++	0xd3, 0x5f, 0xf0, 0x07, 0x69, 0xec
++};
++static const u8 key07[] __initconst = {
++	0xbf, 0x2d, 0x42, 0x99, 0x97, 0x76, 0x04, 0xad,
++	0xd3, 0x8f, 0x6e, 0x6a, 0x34, 0x85, 0xaf, 0x81,
++	0xef, 0x36, 0x33, 0xd5, 0x43, 0xa2, 0xaa, 0x08,
++	0x0f, 0x77, 0x42, 0x83, 0x58, 0xc5, 0x42, 0x2a
++};
++enum { nonce07 = 0xe0796da17dba9b58ULL };
++
++static const u8 input08[] __initconst = {
++	0x40, 0xae, 0xcd, 0xe4, 0x3d, 0x22, 0xe0
++};
++static const u8 output08[] __initconst = {
++	0xfd, 0x8a, 0x9f, 0x3d, 0x05, 0xc9, 0xd3
++};
++static const u8 key08[] __initconst = {
++	0xdc, 0x3f, 0x41, 0xe3, 0x23, 0x2a, 0x8d, 0xf6,
++	0x41, 0x2a, 0xa7, 0x66, 0x05, 0x68, 0xe4, 0x7b,
++	0xc4, 0x58, 0xd6, 0xcc, 0xdf, 0x0d, 0xc6, 0x25,
++	0x1b, 0x61, 0x32, 0x12, 0x4e, 0xf1, 0xe6, 0x29
++};
++enum { nonce08 = 0xb1d2536d9e159832ULL };
++
++static const u8 input09[] __initconst = {
++	0xba, 0x1d, 0x14, 0x16, 0x9f, 0x83, 0x67, 0x24
++};
++static const u8 output09[] __initconst = {
++	0x7c, 0xe3, 0x78, 0x1d, 0xa2, 0xe7, 0xe9, 0x39
++};
++static const u8 key09[] __initconst = {
++	0x17, 0x55, 0x90, 0x52, 0xa4, 0xce, 0x12, 0xae,
++	0xd4, 0xfd, 0xd4, 0xfb, 0xd5, 0x18, 0x59, 0x50,
++	0x4e, 0x51, 0x99, 0x32, 0x09, 0x31, 0xfc, 0xf7,
++	0x27, 0x10, 0x8e, 0xa2, 0x4b, 0xa5, 0xf5, 0x62
++};
++enum { nonce09 = 0x495fc269536d003ULL };
++
++static const u8 input10[] __initconst = {
++	0x09, 0xfd, 0x3c, 0x0b, 0x3d, 0x0e, 0xf3, 0x9d,
++	0x27
++};
++static const u8 output10[] __initconst = {
++	0xdc, 0xe4, 0x33, 0x60, 0x0c, 0x07, 0xcb, 0x51,
++	0x6b
++};
++static const u8 key10[] __initconst = {
++	0x4e, 0x00, 0x72, 0x37, 0x0f, 0x52, 0x4d, 0x6f,
++	0x37, 0x50, 0x3c, 0xb3, 0x51, 0x81, 0x49, 0x16,
++	0x7e, 0xfd, 0xb1, 0x51, 0x72, 0x2e, 0xe4, 0x16,
++	0x68, 0x5c, 0x5b, 0x8a, 0xc3, 0x90, 0x70, 0x04
++};
++enum { nonce10 = 0x1ad9d1114d88cbbdULL };
++
++static const u8 input11[] __initconst = {
++	0x70, 0x18, 0x52, 0x85, 0xba, 0x66, 0xff, 0x2c,
++	0x9a, 0x46
++};
++static const u8 output11[] __initconst = {
++	0xf5, 0x2a, 0x7a, 0xfd, 0x31, 0x7c, 0x91, 0x41,
++	0xb1, 0xcf
++};
++static const u8 key11[] __initconst = {
++	0x48, 0xb4, 0xd0, 0x7c, 0x88, 0xd1, 0x96, 0x0d,
++	0x80, 0x33, 0xb4, 0xd5, 0x31, 0x9a, 0x88, 0xca,
++	0x14, 0xdc, 0xf0, 0xa8, 0xf3, 0xac, 0xb8, 0x47,
++	0x75, 0x86, 0x7c, 0x88, 0x50, 0x11, 0x43, 0x40
++};
++enum { nonce11 = 0x47c35dd1f4f8aa4fULL };
++
++static const u8 input12[] __initconst = {
++	0x9e, 0x8e, 0x3d, 0x2a, 0x05, 0xfd, 0xe4, 0x90,
++	0x24, 0x1c, 0xd3
++};
++static const u8 output12[] __initconst = {
++	0x97, 0x72, 0x40, 0x9f, 0xc0, 0x6b, 0x05, 0x33,
++	0x42, 0x7e, 0x28
++};
++static const u8 key12[] __initconst = {
++	0xee, 0xff, 0x33, 0x33, 0xe0, 0x28, 0xdf, 0xa2,
++	0xb6, 0x5e, 0x25, 0x09, 0x52, 0xde, 0xa5, 0x9c,
++	0x8f, 0x95, 0xa9, 0x03, 0x77, 0x0f, 0xbe, 0xa1,
++	0xd0, 0x7d, 0x73, 0x2f, 0xf8, 0x7e, 0x51, 0x44
++};
++enum { nonce12 = 0xc22d044dc6ea4af3ULL };
++
++static const u8 input13[] __initconst = {
++	0x9c, 0x16, 0xa2, 0x22, 0x4d, 0xbe, 0x04, 0x9a,
++	0xb3, 0xb5, 0xc6, 0x58
++};
++static const u8 output13[] __initconst = {
++	0xf0, 0x81, 0xdb, 0x6d, 0xa3, 0xe9, 0xb2, 0xc6,
++	0x32, 0x50, 0x16, 0x9f
++};
++static const u8 key13[] __initconst = {
++	0x96, 0xb3, 0x01, 0xd2, 0x7a, 0x8c, 0x94, 0x09,
++	0x4f, 0x58, 0xbe, 0x80, 0xcc, 0xa9, 0x7e, 0x2d,
++	0xad, 0x58, 0x3b, 0x63, 0xb8, 0x5c, 0x17, 0xce,
++	0xbf, 0x43, 0x33, 0x7a, 0x7b, 0x82, 0x28, 0x2f
++};
++enum { nonce13 = 0x2a5d05d88cd7b0daULL };
++
++static const u8 input14[] __initconst = {
++	0x57, 0x4f, 0xaa, 0x30, 0xe6, 0x23, 0x50, 0x86,
++	0x91, 0xa5, 0x60, 0x96, 0x2b
++};
++static const u8 output14[] __initconst = {
++	0x6c, 0x1f, 0x3b, 0x42, 0xb6, 0x2f, 0xf0, 0xbd,
++	0x76, 0x60, 0xc7, 0x7e, 0x8d
++};
++static const u8 key14[] __initconst = {
++	0x22, 0x85, 0xaf, 0x8f, 0xa3, 0x53, 0xa0, 0xc4,
++	0xb5, 0x75, 0xc0, 0xba, 0x30, 0x92, 0xc3, 0x32,
++	0x20, 0x5a, 0x8f, 0x7e, 0x93, 0xda, 0x65, 0x18,
++	0xd1, 0xf6, 0x9a, 0x9b, 0x8f, 0x85, 0x30, 0xe6
++};
++enum { nonce14 = 0xf9946c166aa4475fULL };
++
++static const u8 input15[] __initconst = {
++	0x89, 0x81, 0xc7, 0xe2, 0x00, 0xac, 0x52, 0x70,
++	0xa4, 0x79, 0xab, 0xeb, 0x74, 0xf7
++};
++static const u8 output15[] __initconst = {
++	0xb4, 0xd0, 0xa9, 0x9d, 0x15, 0x5f, 0x48, 0xd6,
++	0x00, 0x7e, 0x4c, 0x77, 0x5a, 0x46
++};
++static const u8 key15[] __initconst = {
++	0x0a, 0x66, 0x36, 0xca, 0x5d, 0x82, 0x23, 0xb6,
++	0xe4, 0x9b, 0xad, 0x5e, 0xd0, 0x7f, 0xf6, 0x7a,
++	0x7b, 0x03, 0xa7, 0x4c, 0xfd, 0xec, 0xd5, 0xa1,
++	0xfc, 0x25, 0x54, 0xda, 0x5a, 0x5c, 0xf0, 0x2c
++};
++enum { nonce15 = 0x9ab2b87a35e772c8ULL };
++
++static const u8 input16[] __initconst = {
++	0x5f, 0x09, 0xc0, 0x8b, 0x1e, 0xde, 0xca, 0xd9,
++	0xb7, 0x5c, 0x23, 0xc9, 0x55, 0x1e, 0xcf
++};
++static const u8 output16[] __initconst = {
++	0x76, 0x9b, 0x53, 0xf3, 0x66, 0x88, 0x28, 0x60,
++	0x98, 0x80, 0x2c, 0xa8, 0x80, 0xa6, 0x48
++};
++static const u8 key16[] __initconst = {
++	0x80, 0xb5, 0x51, 0xdf, 0x17, 0x5b, 0xb0, 0xef,
++	0x8b, 0x5b, 0x2e, 0x3e, 0xc5, 0xe3, 0xa5, 0x86,
++	0xac, 0x0d, 0x8e, 0x32, 0x90, 0x9d, 0x82, 0x27,
++	0xf1, 0x23, 0x26, 0xc3, 0xea, 0x55, 0xb6, 0x63
++};
++enum { nonce16 = 0xa82e9d39e4d02ef5ULL };
++
++static const u8 input17[] __initconst = {
++	0x87, 0x0b, 0x36, 0x71, 0x7c, 0xb9, 0x0b, 0x80,
++	0x4d, 0x77, 0x5c, 0x4f, 0xf5, 0x51, 0x0e, 0x1a
++};
++static const u8 output17[] __initconst = {
++	0xf1, 0x12, 0x4a, 0x8a, 0xd9, 0xd0, 0x08, 0x67,
++	0x66, 0xd7, 0x34, 0xea, 0x32, 0x3b, 0x54, 0x0e
++};
++static const u8 key17[] __initconst = {
++	0xfb, 0x71, 0x5f, 0x3f, 0x7a, 0xc0, 0x9a, 0xc8,
++	0xc8, 0xcf, 0xe8, 0xbc, 0xfb, 0x09, 0xbf, 0x89,
++	0x6a, 0xef, 0xd5, 0xe5, 0x36, 0x87, 0x14, 0x76,
++	0x00, 0xb9, 0x32, 0x28, 0xb2, 0x00, 0x42, 0x53
++};
++enum { nonce17 = 0x229b87e73d557b96ULL };
++
++static const u8 input18[] __initconst = {
++	0x38, 0x42, 0xb5, 0x37, 0xb4, 0x3d, 0xfe, 0x59,
++	0x38, 0x68, 0x88, 0xfa, 0x89, 0x8a, 0x5f, 0x90,
++	0x3c
++};
++static const u8 output18[] __initconst = {
++	0xac, 0xad, 0x14, 0xe8, 0x7e, 0xd7, 0xce, 0x96,
++	0x3d, 0xb3, 0x78, 0x85, 0x22, 0x5a, 0xcb, 0x39,
++	0xd4
++};
++static const u8 key18[] __initconst = {
++	0xe1, 0xc1, 0xa8, 0xe0, 0x91, 0xe7, 0x38, 0x66,
++	0x80, 0x17, 0x12, 0x3c, 0x5e, 0x2d, 0xbb, 0xea,
++	0xeb, 0x6c, 0x8b, 0xc8, 0x1b, 0x6f, 0x7c, 0xea,
++	0x50, 0x57, 0x23, 0x1e, 0x65, 0x6f, 0x6d, 0x81
++};
++enum { nonce18 = 0xfaf5fcf8f30e57a9ULL };
++
++static const u8 input19[] __initconst = {
++	0x1c, 0x4a, 0x30, 0x26, 0xef, 0x9a, 0x32, 0xa7,
++	0x8f, 0xe5, 0xc0, 0x0f, 0x30, 0x3a, 0xbf, 0x38,
++	0x54, 0xba
++};
++static const u8 output19[] __initconst = {
++	0x57, 0x67, 0x54, 0x4f, 0x31, 0xd6, 0xef, 0x35,
++	0x0b, 0xd9, 0x52, 0xa7, 0x46, 0x7d, 0x12, 0x17,
++	0x1e, 0xe3
++};
++static const u8 key19[] __initconst = {
++	0x5a, 0x79, 0xc1, 0xea, 0x33, 0xb3, 0xc7, 0x21,
++	0xec, 0xf8, 0xcb, 0xd2, 0x58, 0x96, 0x23, 0xd6,
++	0x4d, 0xed, 0x2f, 0xdf, 0x8a, 0x79, 0xe6, 0x8b,
++	0x38, 0xa3, 0xc3, 0x7a, 0x33, 0xda, 0x02, 0xc7
++};
++enum { nonce19 = 0x2b23b61840429604ULL };
++
++static const u8 input20[] __initconst = {
++	0xab, 0xe9, 0x32, 0xbb, 0x35, 0x17, 0xe0, 0x60,
++	0x80, 0xb1, 0x27, 0xdc, 0xe6, 0x62, 0x9e, 0x0c,
++	0x77, 0xf4, 0x50
++};
++static const u8 output20[] __initconst = {
++	0x54, 0x6d, 0xaa, 0xfc, 0x08, 0xfb, 0x71, 0xa8,
++	0xd6, 0x1d, 0x7d, 0xf3, 0x45, 0x10, 0xb5, 0x4c,
++	0xcc, 0x4b, 0x45
++};
++static const u8 key20[] __initconst = {
++	0xa3, 0xfd, 0x3d, 0xa9, 0xeb, 0xea, 0x2c, 0x69,
++	0xcf, 0x59, 0x38, 0x13, 0x5b, 0xa7, 0x53, 0x8f,
++	0x5e, 0xa2, 0x33, 0x86, 0x4c, 0x75, 0x26, 0xaf,
++	0x35, 0x12, 0x09, 0x71, 0x81, 0xea, 0x88, 0x66
++};
++enum { nonce20 = 0x7459667a8fadff58ULL };
++
++static const u8 input21[] __initconst = {
++	0xa6, 0x82, 0x21, 0x23, 0xad, 0x27, 0x3f, 0xc6,
++	0xd7, 0x16, 0x0d, 0x6d, 0x24, 0x15, 0x54, 0xc5,
++	0x96, 0x72, 0x59, 0x8a
++};
++static const u8 output21[] __initconst = {
++	0x5f, 0x34, 0x32, 0xea, 0x06, 0xd4, 0x9e, 0x01,
++	0xdc, 0x32, 0x32, 0x40, 0x66, 0x73, 0x6d, 0x4a,
++	0x6b, 0x12, 0x20, 0xe8
++};
++static const u8 key21[] __initconst = {
++	0x96, 0xfd, 0x13, 0x23, 0xa9, 0x89, 0x04, 0xe6,
++	0x31, 0xa5, 0x2c, 0xc1, 0x40, 0xd5, 0x69, 0x5c,
++	0x32, 0x79, 0x56, 0xe0, 0x29, 0x93, 0x8f, 0xe8,
++	0x5f, 0x65, 0x53, 0x7f, 0xc1, 0xe9, 0xaf, 0xaf
++};
++enum { nonce21 = 0xba8defee9d8e13b5ULL };
++
++static const u8 input22[] __initconst = {
++	0xb8, 0x32, 0x1a, 0x81, 0xd8, 0x38, 0x89, 0x5a,
++	0xb0, 0x05, 0xbe, 0xf4, 0xd2, 0x08, 0xc6, 0xee,
++	0x79, 0x7b, 0x3a, 0x76, 0x59
++};
++static const u8 output22[] __initconst = {
++	0xb7, 0xba, 0xae, 0x80, 0xe4, 0x9f, 0x79, 0x84,
++	0x5a, 0x48, 0x50, 0x6d, 0xcb, 0xd0, 0x06, 0x0c,
++	0x15, 0x63, 0xa7, 0x5e, 0xbd
++};
++static const u8 key22[] __initconst = {
++	0x0f, 0x35, 0x3d, 0xeb, 0x5f, 0x0a, 0x82, 0x0d,
++	0x24, 0x59, 0x71, 0xd8, 0xe6, 0x2d, 0x5f, 0xe1,
++	0x7e, 0x0c, 0xae, 0xf6, 0xdc, 0x2c, 0xc5, 0x4a,
++	0x38, 0x88, 0xf2, 0xde, 0xd9, 0x5f, 0x76, 0x7c
++};
++enum { nonce22 = 0xe77f1760e9f5e192ULL };
++
++static const u8 input23[] __initconst = {
++	0x4b, 0x1e, 0x79, 0x99, 0xcf, 0xef, 0x64, 0x4b,
++	0xb0, 0x66, 0xae, 0x99, 0x2e, 0x68, 0x97, 0xf5,
++	0x5d, 0x9b, 0x3f, 0x7a, 0xa9, 0xd9
++};
++static const u8 output23[] __initconst = {
++	0x5f, 0xa4, 0x08, 0x39, 0xca, 0xfa, 0x2b, 0x83,
++	0x5d, 0x95, 0x70, 0x7c, 0x2e, 0xd4, 0xae, 0xfa,
++	0x45, 0x4a, 0x77, 0x7f, 0xa7, 0x65
++};
++static const u8 key23[] __initconst = {
++	0x4a, 0x06, 0x83, 0x64, 0xaa, 0xe3, 0x38, 0x32,
++	0x28, 0x5d, 0xa4, 0xb2, 0x5a, 0xee, 0xcf, 0x8e,
++	0x19, 0x67, 0xf1, 0x09, 0xe8, 0xc9, 0xf6, 0x40,
++	0x02, 0x6d, 0x0b, 0xde, 0xfa, 0x81, 0x03, 0xb1
++};
++enum { nonce23 = 0x9b3f349158709849ULL };
++
++static const u8 input24[] __initconst = {
++	0xc6, 0xfc, 0x47, 0x5e, 0xd8, 0xed, 0xa9, 0xe5,
++	0x4f, 0x82, 0x79, 0x35, 0xee, 0x3e, 0x7e, 0x3e,
++	0x35, 0x70, 0x6e, 0xfa, 0x6d, 0x08, 0xe8
++};
++static const u8 output24[] __initconst = {
++	0x3b, 0xc5, 0xf8, 0xc2, 0xbf, 0x2b, 0x90, 0x33,
++	0xa6, 0xae, 0xf5, 0x5a, 0x65, 0xb3, 0x3d, 0xe1,
++	0xcd, 0x5f, 0x55, 0xfa, 0xe7, 0xa5, 0x4a
++};
++static const u8 key24[] __initconst = {
++	0x00, 0x24, 0xc3, 0x65, 0x5f, 0xe6, 0x31, 0xbb,
++	0x6d, 0xfc, 0x20, 0x7b, 0x1b, 0xa8, 0x96, 0x26,
++	0x55, 0x21, 0x62, 0x25, 0x7e, 0xba, 0x23, 0x97,
++	0xc9, 0xb8, 0x53, 0xa8, 0xef, 0xab, 0xad, 0x61
++};
++enum { nonce24 = 0x13ee0b8f526177c3ULL };
++
++static const u8 input25[] __initconst = {
++	0x33, 0x07, 0x16, 0xb1, 0x34, 0x33, 0x67, 0x04,
++	0x9b, 0x0a, 0xce, 0x1b, 0xe9, 0xde, 0x1a, 0xec,
++	0xd0, 0x55, 0xfb, 0xc6, 0x33, 0xaf, 0x2d, 0xe3
++};
++static const u8 output25[] __initconst = {
++	0x05, 0x93, 0x10, 0xd1, 0x58, 0x6f, 0x68, 0x62,
++	0x45, 0xdb, 0x91, 0xae, 0x70, 0xcf, 0xd4, 0x5f,
++	0xee, 0xdf, 0xd5, 0xba, 0x9e, 0xde, 0x68, 0xe6
++};
++static const u8 key25[] __initconst = {
++	0x83, 0xa9, 0x4f, 0x5d, 0x74, 0xd5, 0x91, 0xb3,
++	0xc9, 0x97, 0x19, 0x15, 0xdb, 0x0d, 0x0b, 0x4a,
++	0x3d, 0x55, 0xcf, 0xab, 0xb2, 0x05, 0x21, 0x35,
++	0x45, 0x50, 0xeb, 0xf8, 0xf5, 0xbf, 0x36, 0x35
++};
++enum { nonce25 = 0x7c6f459e49ebfebcULL };
++
++static const u8 input26[] __initconst = {
++	0xc2, 0xd4, 0x7a, 0xa3, 0x92, 0xe1, 0xac, 0x46,
++	0x1a, 0x15, 0x38, 0xc9, 0xb5, 0xfd, 0xdf, 0x84,
++	0x38, 0xbc, 0x6b, 0x1d, 0xb0, 0x83, 0x43, 0x04,
++	0x39
++};
++static const u8 output26[] __initconst = {
++	0x7f, 0xde, 0xd6, 0x87, 0xcc, 0x34, 0xf4, 0x12,
++	0xae, 0x55, 0xa5, 0x89, 0x95, 0x29, 0xfc, 0x18,
++	0xd8, 0xc7, 0x7c, 0xd3, 0xcb, 0x85, 0x95, 0x21,
++	0xd2
++};
++static const u8 key26[] __initconst = {
++	0xe4, 0xd0, 0x54, 0x1d, 0x7d, 0x47, 0xa8, 0xc1,
++	0x08, 0xca, 0xe2, 0x42, 0x52, 0x95, 0x16, 0x43,
++	0xa3, 0x01, 0x23, 0x03, 0xcc, 0x3b, 0x81, 0x78,
++	0x23, 0xcc, 0xa7, 0x36, 0xd7, 0xa0, 0x97, 0x8d
++};
++enum { nonce26 = 0x524401012231683ULL };
++
++static const u8 input27[] __initconst = {
++	0x0d, 0xb0, 0xcf, 0xec, 0xfc, 0x38, 0x9d, 0x9d,
++	0x89, 0x00, 0x96, 0xf2, 0x79, 0x8a, 0xa1, 0x8d,
++	0x32, 0x5e, 0xc6, 0x12, 0x22, 0xec, 0xf6, 0x52,
++	0xc1, 0x0b
++};
++static const u8 output27[] __initconst = {
++	0xef, 0xe1, 0xf2, 0x67, 0x8e, 0x2c, 0x00, 0x9f,
++	0x1d, 0x4c, 0x66, 0x1f, 0x94, 0x58, 0xdc, 0xbb,
++	0xb9, 0x11, 0x8f, 0x74, 0xfd, 0x0e, 0x14, 0x01,
++	0xa8, 0x21
++};
++static const u8 key27[] __initconst = {
++	0x78, 0x71, 0xa4, 0xe6, 0xb2, 0x95, 0x44, 0x12,
++	0x81, 0xaa, 0x7e, 0x94, 0xa7, 0x8d, 0x44, 0xea,
++	0xc4, 0xbc, 0x01, 0xb7, 0x9e, 0xf7, 0x82, 0x9e,
++	0x3b, 0x23, 0x9f, 0x31, 0xdd, 0xb8, 0x0d, 0x18
++};
++enum { nonce27 = 0xd58fe0e58fb254d6ULL };
++
++static const u8 input28[] __initconst = {
++	0xaa, 0xb7, 0xaa, 0xd9, 0xa8, 0x91, 0xd7, 0x8a,
++	0x97, 0x9b, 0xdb, 0x7c, 0x47, 0x2b, 0xdb, 0xd2,
++	0xda, 0x77, 0xb1, 0xfa, 0x2d, 0x12, 0xe3, 0xe9,
++	0xc4, 0x7f, 0x54
++};
++static const u8 output28[] __initconst = {
++	0x87, 0x84, 0xa9, 0xa6, 0xad, 0x8f, 0xe6, 0x0f,
++	0x69, 0xf8, 0x21, 0xc3, 0x54, 0x95, 0x0f, 0xb0,
++	0x4e, 0xc7, 0x02, 0xe4, 0x04, 0xb0, 0x6c, 0x42,
++	0x8c, 0x63, 0xe3
++};
++static const u8 key28[] __initconst = {
++	0x12, 0x23, 0x37, 0x95, 0x04, 0xb4, 0x21, 0xe8,
++	0xbc, 0x65, 0x46, 0x7a, 0xf4, 0x01, 0x05, 0x3f,
++	0xb1, 0x34, 0x73, 0xd2, 0x49, 0xbf, 0x6f, 0x20,
++	0xbd, 0x23, 0x58, 0x5f, 0xd1, 0x73, 0x57, 0xa6
++};
++enum { nonce28 = 0x3a04d51491eb4e07ULL };
++
++static const u8 input29[] __initconst = {
++	0x55, 0xd0, 0xd4, 0x4b, 0x17, 0xc8, 0xc4, 0x2b,
++	0xc0, 0x28, 0xbd, 0x9d, 0x65, 0x4d, 0xaf, 0x77,
++	0x72, 0x7c, 0x36, 0x68, 0xa7, 0xb6, 0x87, 0x4d,
++	0xb9, 0x27, 0x25, 0x6c
++};
++static const u8 output29[] __initconst = {
++	0x0e, 0xac, 0x4c, 0xf5, 0x12, 0xb5, 0x56, 0xa5,
++	0x00, 0x9a, 0xd6, 0xe5, 0x1a, 0x59, 0x2c, 0xf6,
++	0x42, 0x22, 0xcf, 0x23, 0x98, 0x34, 0x29, 0xac,
++	0x6e, 0xe3, 0x37, 0x6d
++};
++static const u8 key29[] __initconst = {
++	0xda, 0x9d, 0x05, 0x0c, 0x0c, 0xba, 0x75, 0xb9,
++	0x9e, 0xb1, 0x8d, 0xd9, 0x73, 0x26, 0x2c, 0xa9,
++	0x3a, 0xb5, 0xcb, 0x19, 0x49, 0xa7, 0x4f, 0xf7,
++	0x64, 0x35, 0x23, 0x20, 0x2a, 0x45, 0x78, 0xc7
++};
++enum { nonce29 = 0xc25ac9982431cbfULL };
++
++static const u8 input30[] __initconst = {
++	0x4e, 0xd6, 0x85, 0xbb, 0xe7, 0x99, 0xfa, 0x04,
++	0x33, 0x24, 0xfd, 0x75, 0x18, 0xe3, 0xd3, 0x25,
++	0xcd, 0xca, 0xae, 0x00, 0xbe, 0x52, 0x56, 0x4a,
++	0x31, 0xe9, 0x4f, 0xae, 0x8a
++};
++static const u8 output30[] __initconst = {
++	0x30, 0x36, 0x32, 0xa2, 0x3c, 0xb6, 0xf9, 0xf9,
++	0x76, 0x70, 0xad, 0xa6, 0x10, 0x41, 0x00, 0x4a,
++	0xfa, 0xce, 0x1b, 0x86, 0x05, 0xdb, 0x77, 0x96,
++	0xb3, 0xb7, 0x8f, 0x61, 0x24
++};
++static const u8 key30[] __initconst = {
++	0x49, 0x35, 0x4c, 0x15, 0x98, 0xfb, 0xc6, 0x57,
++	0x62, 0x6d, 0x06, 0xc3, 0xd4, 0x79, 0x20, 0x96,
++	0x05, 0x2a, 0x31, 0x63, 0xc0, 0x44, 0x42, 0x09,
++	0x13, 0x13, 0xff, 0x1b, 0xc8, 0x63, 0x1f, 0x0b
++};
++enum { nonce30 = 0x4967f9c08e41568bULL };
++
++static const u8 input31[] __initconst = {
++	0x91, 0x04, 0x20, 0x47, 0x59, 0xee, 0xa6, 0x0f,
++	0x04, 0x75, 0xc8, 0x18, 0x95, 0x44, 0x01, 0x28,
++	0x20, 0x6f, 0x73, 0x68, 0x66, 0xb5, 0x03, 0xb3,
++	0x58, 0x27, 0x6e, 0x7a, 0x76, 0xb8
++};
++static const u8 output31[] __initconst = {
++	0xe8, 0x03, 0x78, 0x9d, 0x13, 0x15, 0x98, 0xef,
++	0x64, 0x68, 0x12, 0x41, 0xb0, 0x29, 0x94, 0x0c,
++	0x83, 0x35, 0x46, 0xa9, 0x74, 0xe1, 0x75, 0xf0,
++	0xb6, 0x96, 0xc3, 0x6f, 0xd7, 0x70
++};
++static const u8 key31[] __initconst = {
++	0xef, 0xcd, 0x5a, 0x4a, 0xf4, 0x7e, 0x6a, 0x3a,
++	0x11, 0x88, 0x72, 0x94, 0xb8, 0xae, 0x84, 0xc3,
++	0x66, 0xe0, 0xde, 0x4b, 0x00, 0xa5, 0xd6, 0x2d,
++	0x50, 0xb7, 0x28, 0xff, 0x76, 0x57, 0x18, 0x1f
++};
++enum { nonce31 = 0xcb6f428fa4192e19ULL };
++
++static const u8 input32[] __initconst = {
++	0x90, 0x06, 0x50, 0x4b, 0x98, 0x14, 0x30, 0xf1,
++	0xb8, 0xd7, 0xf0, 0xa4, 0x3e, 0x4e, 0xd8, 0x00,
++	0xea, 0xdb, 0x4f, 0x93, 0x05, 0xef, 0x02, 0x71,
++	0x1a, 0xcd, 0xa3, 0xb1, 0xae, 0xd3, 0x18
++};
++static const u8 output32[] __initconst = {
++	0xcb, 0x4a, 0x37, 0x3f, 0xea, 0x40, 0xab, 0x86,
++	0xfe, 0xcc, 0x07, 0xd5, 0xdc, 0xb2, 0x25, 0xb6,
++	0xfd, 0x2a, 0x72, 0xbc, 0x5e, 0xd4, 0x75, 0xff,
++	0x71, 0xfc, 0xce, 0x1e, 0x6f, 0x22, 0xc1
++};
++static const u8 key32[] __initconst = {
++	0xfc, 0x6d, 0xc3, 0x80, 0xce, 0xa4, 0x31, 0xa1,
++	0xcc, 0xfa, 0x9d, 0x10, 0x0b, 0xc9, 0x11, 0x77,
++	0x34, 0xdb, 0xad, 0x1b, 0xc4, 0xfc, 0xeb, 0x79,
++	0x91, 0xda, 0x59, 0x3b, 0x0d, 0xb1, 0x19, 0x3b
++};
++enum { nonce32 = 0x88551bf050059467ULL };
++
++static const u8 input33[] __initconst = {
++	0x88, 0x94, 0x71, 0x92, 0xe8, 0xd7, 0xf9, 0xbd,
++	0x55, 0xe3, 0x22, 0xdb, 0x99, 0x51, 0xfb, 0x50,
++	0xbf, 0x82, 0xb5, 0x70, 0x8b, 0x2b, 0x6a, 0x03,
++	0x37, 0xa0, 0xc6, 0x19, 0x5d, 0xc9, 0xbc, 0xcc
++};
++static const u8 output33[] __initconst = {
++	0xb6, 0x17, 0x51, 0xc8, 0xea, 0x8a, 0x14, 0xdc,
++	0x23, 0x1b, 0xd4, 0xed, 0xbf, 0x50, 0xb9, 0x38,
++	0x00, 0xc2, 0x3f, 0x78, 0x3d, 0xbf, 0xa0, 0x84,
++	0xef, 0x45, 0xb2, 0x7d, 0x48, 0x7b, 0x62, 0xa7
++};
++static const u8 key33[] __initconst = {
++	0xb9, 0x8f, 0x6a, 0xad, 0xb4, 0x6f, 0xb5, 0xdc,
++	0x48, 0xfa, 0x43, 0x57, 0x62, 0x97, 0xef, 0x89,
++	0x4c, 0x5a, 0x7b, 0x67, 0xb8, 0x9d, 0xf0, 0x42,
++	0x2b, 0x8f, 0xf3, 0x18, 0x05, 0x2e, 0x48, 0xd0
++};
++enum { nonce33 = 0x31f16488fe8447f5ULL };
++
++static const u8 input34[] __initconst = {
++	0xda, 0x2b, 0x3d, 0x63, 0x9e, 0x4f, 0xc2, 0xb8,
++	0x7f, 0xc2, 0x1a, 0x8b, 0x0d, 0x95, 0x65, 0x55,
++	0x52, 0xba, 0x51, 0x51, 0xc0, 0x61, 0x9f, 0x0a,
++	0x5d, 0xb0, 0x59, 0x8c, 0x64, 0x6a, 0xab, 0xf5,
++	0x57
++};
++static const u8 output34[] __initconst = {
++	0x5c, 0xf6, 0x62, 0x24, 0x8c, 0x45, 0xa3, 0x26,
++	0xd0, 0xe4, 0x88, 0x1c, 0xed, 0xc4, 0x26, 0x58,
++	0xb5, 0x5d, 0x92, 0xc4, 0x17, 0x44, 0x1c, 0xb8,
++	0x2c, 0xf3, 0x55, 0x7e, 0xd6, 0xe5, 0xb3, 0x65,
++	0xa8
++};
++static const u8 key34[] __initconst = {
++	0xde, 0xd1, 0x27, 0xb7, 0x7c, 0xfa, 0xa6, 0x78,
++	0x39, 0x80, 0xdf, 0xb7, 0x46, 0xac, 0x71, 0x26,
++	0xd0, 0x2a, 0x56, 0x79, 0x12, 0xeb, 0x26, 0x37,
++	0x01, 0x0d, 0x30, 0xe0, 0xe3, 0x66, 0xb2, 0xf4
++};
++enum { nonce34 = 0x92d0d9b252c24149ULL };
++
++static const u8 input35[] __initconst = {
++	0x3a, 0x15, 0x5b, 0x75, 0x6e, 0xd0, 0x52, 0x20,
++	0x6c, 0x82, 0xfa, 0xce, 0x5b, 0xea, 0xf5, 0x43,
++	0xc1, 0x81, 0x7c, 0xb2, 0xac, 0x16, 0x3f, 0xd3,
++	0x5a, 0xaf, 0x55, 0x98, 0xf4, 0xc6, 0xba, 0x71,
++	0x25, 0x8b
++};
++static const u8 output35[] __initconst = {
++	0xb3, 0xaf, 0xac, 0x6d, 0x4d, 0xc7, 0x68, 0x56,
++	0x50, 0x5b, 0x69, 0x2a, 0xe5, 0x90, 0xf9, 0x5f,
++	0x99, 0x88, 0xff, 0x0c, 0xa6, 0xb1, 0x83, 0xd6,
++	0x80, 0xa6, 0x1b, 0xde, 0x94, 0xa4, 0x2c, 0xc3,
++	0x74, 0xfa
++};
++static const u8 key35[] __initconst = {
++	0xd8, 0x24, 0xe2, 0x06, 0xd7, 0x7a, 0xce, 0x81,
++	0x52, 0x72, 0x02, 0x69, 0x89, 0xc4, 0xe9, 0x53,
++	0x3b, 0x08, 0x5f, 0x98, 0x1e, 0x1b, 0x99, 0x6e,
++	0x28, 0x17, 0x6d, 0xba, 0xc0, 0x96, 0xf9, 0x3c
++};
++enum { nonce35 = 0x7baf968c4c8e3a37ULL };
++
++static const u8 input36[] __initconst = {
++	0x31, 0x5d, 0x4f, 0xe3, 0xac, 0xad, 0x17, 0xa6,
++	0xb5, 0x01, 0xe2, 0xc6, 0xd4, 0x7e, 0xc4, 0x80,
++	0xc0, 0x59, 0x72, 0xbb, 0x4b, 0x74, 0x6a, 0x41,
++	0x0f, 0x9c, 0xf6, 0xca, 0x20, 0xb3, 0x73, 0x07,
++	0x6b, 0x02, 0x2a
++};
++static const u8 output36[] __initconst = {
++	0xf9, 0x09, 0x92, 0x94, 0x7e, 0x31, 0xf7, 0x53,
++	0xe8, 0x8a, 0x5b, 0x20, 0xef, 0x9b, 0x45, 0x81,
++	0xba, 0x5e, 0x45, 0x63, 0xc1, 0xc7, 0x9e, 0x06,
++	0x0e, 0xd9, 0x62, 0x8e, 0x96, 0xf9, 0xfa, 0x43,
++	0x4d, 0xd4, 0x28
++};
++static const u8 key36[] __initconst = {
++	0x13, 0x30, 0x4c, 0x06, 0xae, 0x18, 0xde, 0x03,
++	0x1d, 0x02, 0x40, 0xf5, 0xbb, 0x19, 0xe3, 0x88,
++	0x41, 0xb1, 0x29, 0x15, 0x97, 0xc2, 0x69, 0x3f,
++	0x32, 0x2a, 0x0c, 0x8b, 0xcf, 0x83, 0x8b, 0x6c
++};
++enum { nonce36 = 0x226d251d475075a0ULL };
++
++static const u8 input37[] __initconst = {
++	0x10, 0x18, 0xbe, 0xfd, 0x66, 0xc9, 0x77, 0xcc,
++	0x43, 0xe5, 0x46, 0x0b, 0x08, 0x8b, 0xae, 0x11,
++	0x86, 0x15, 0xc2, 0xf6, 0x45, 0xd4, 0x5f, 0xd6,
++	0xb6, 0x5f, 0x9f, 0x3e, 0x97, 0xb7, 0xd4, 0xad,
++	0x0b, 0xe8, 0x31, 0x94
++};
++static const u8 output37[] __initconst = {
++	0x03, 0x2c, 0x1c, 0xee, 0xc6, 0xdd, 0xed, 0x38,
++	0x80, 0x6d, 0x84, 0x16, 0xc3, 0xc2, 0x04, 0x63,
++	0xcd, 0xa7, 0x6e, 0x36, 0x8b, 0xed, 0x78, 0x63,
++	0x95, 0xfc, 0x69, 0x7a, 0x3f, 0x8d, 0x75, 0x6b,
++	0x6c, 0x26, 0x56, 0x4d
++};
++static const u8 key37[] __initconst = {
++	0xac, 0x84, 0x4d, 0xa9, 0x29, 0x49, 0x3c, 0x39,
++	0x7f, 0xd9, 0xa6, 0x01, 0xf3, 0x7e, 0xfa, 0x4a,
++	0x14, 0x80, 0x22, 0x74, 0xf0, 0x29, 0x30, 0x2d,
++	0x07, 0x21, 0xda, 0xc0, 0x4d, 0x70, 0x56, 0xa2
++};
++enum { nonce37 = 0x167823ce3b64925aULL };
++
++static const u8 input38[] __initconst = {
++	0x30, 0x8f, 0xfa, 0x24, 0x29, 0xb1, 0xfb, 0xce,
++	0x31, 0x62, 0xdc, 0xd0, 0x46, 0xab, 0xe1, 0x31,
++	0xd9, 0xae, 0x60, 0x0d, 0xca, 0x0a, 0x49, 0x12,
++	0x3d, 0x92, 0xe9, 0x91, 0x67, 0x12, 0x62, 0x18,
++	0x89, 0xe2, 0xf9, 0x1c, 0xcc
++};
++static const u8 output38[] __initconst = {
++	0x56, 0x9c, 0xc8, 0x7a, 0xc5, 0x98, 0xa3, 0x0f,
++	0xba, 0xd5, 0x3e, 0xe1, 0xc9, 0x33, 0x64, 0x33,
++	0xf0, 0xd5, 0xf7, 0x43, 0x66, 0x0e, 0x08, 0x9a,
++	0x6e, 0x09, 0xe4, 0x01, 0x0d, 0x1e, 0x2f, 0x4b,
++	0xed, 0x9c, 0x08, 0x8c, 0x03
++};
++static const u8 key38[] __initconst = {
++	0x77, 0x52, 0x2a, 0x23, 0xf1, 0xc5, 0x96, 0x2b,
++	0x89, 0x4f, 0x3e, 0xf3, 0xff, 0x0e, 0x94, 0xce,
++	0xf1, 0xbd, 0x53, 0xf5, 0x77, 0xd6, 0x9e, 0x47,
++	0x49, 0x3d, 0x16, 0x64, 0xff, 0x95, 0x42, 0x42
++};
++enum { nonce38 = 0xff629d7b82cef357ULL };
++
++static const u8 input39[] __initconst = {
++	0x38, 0x26, 0x27, 0xd0, 0xc2, 0xf5, 0x34, 0xba,
++	0xda, 0x0f, 0x1c, 0x1c, 0x9a, 0x70, 0xe5, 0x8a,
++	0x78, 0x2d, 0x8f, 0x9a, 0xbf, 0x89, 0x6a, 0xfd,
++	0xd4, 0x9c, 0x33, 0xf1, 0xb6, 0x89, 0x16, 0xe3,
++	0x6a, 0x00, 0xfa, 0x3a, 0x0f, 0x26
++};
++static const u8 output39[] __initconst = {
++	0x0f, 0xaf, 0x91, 0x6d, 0x9c, 0x99, 0xa4, 0xf7,
++	0x3b, 0x9d, 0x9a, 0x98, 0xca, 0xbb, 0x50, 0x48,
++	0xee, 0xcb, 0x5d, 0xa1, 0x37, 0x2d, 0x36, 0x09,
++	0x2a, 0xe2, 0x1c, 0x3d, 0x98, 0x40, 0x1c, 0x16,
++	0x56, 0xa7, 0x98, 0xe9, 0x7d, 0x2b
++};
++static const u8 key39[] __initconst = {
++	0x6e, 0x83, 0x15, 0x4d, 0xf8, 0x78, 0xa8, 0x0e,
++	0x71, 0x37, 0xd4, 0x6e, 0x28, 0x5c, 0x06, 0xa1,
++	0x2d, 0x6c, 0x72, 0x7a, 0xfd, 0xf8, 0x65, 0x1a,
++	0xb8, 0xe6, 0x29, 0x7b, 0xe5, 0xb3, 0x23, 0x79
++};
++enum { nonce39 = 0xa4d8c491cf093e9dULL };
++
++static const u8 input40[] __initconst = {
++	0x8f, 0x32, 0x7c, 0x40, 0x37, 0x95, 0x08, 0x00,
++	0x00, 0xfe, 0x2f, 0x95, 0x20, 0x12, 0x40, 0x18,
++	0x5e, 0x7e, 0x5e, 0x99, 0xee, 0x8d, 0x91, 0x7d,
++	0x50, 0x7d, 0x21, 0x45, 0x27, 0xe1, 0x7f, 0xd4,
++	0x73, 0x10, 0xe1, 0x33, 0xbc, 0xf8, 0xdd
++};
++static const u8 output40[] __initconst = {
++	0x78, 0x7c, 0xdc, 0x55, 0x2b, 0xd9, 0x2b, 0x3a,
++	0xdd, 0x56, 0x11, 0x52, 0xd3, 0x2e, 0xe0, 0x0d,
++	0x23, 0x20, 0x8a, 0xf1, 0x4f, 0xee, 0xf1, 0x68,
++	0xf6, 0xdc, 0x53, 0xcf, 0x17, 0xd4, 0xf0, 0x6c,
++	0xdc, 0x80, 0x5f, 0x1c, 0xa4, 0x91, 0x05
++};
++static const u8 key40[] __initconst = {
++	0x0d, 0x86, 0xbf, 0x8a, 0xba, 0x9e, 0x39, 0x91,
++	0xa8, 0xe7, 0x22, 0xf0, 0x0c, 0x43, 0x18, 0xe4,
++	0x1f, 0xb0, 0xaf, 0x8a, 0x34, 0x31, 0xf4, 0x41,
++	0xf0, 0x89, 0x85, 0xca, 0x5d, 0x05, 0x3b, 0x94
++};
++enum { nonce40 = 0xae7acc4f5986439eULL };
++
++static const u8 input41[] __initconst = {
++	0x20, 0x5f, 0xc1, 0x83, 0x36, 0x02, 0x76, 0x96,
++	0xf0, 0xbf, 0x8e, 0x0e, 0x1a, 0xd1, 0xc7, 0x88,
++	0x18, 0xc7, 0x09, 0xc4, 0x15, 0xd9, 0x4f, 0x5e,
++	0x1f, 0xb3, 0xb4, 0x6d, 0xcb, 0xa0, 0xd6, 0x8a,
++	0x3b, 0x40, 0x8e, 0x80, 0xf1, 0xe8, 0x8f, 0x5f
++};
++static const u8 output41[] __initconst = {
++	0x0b, 0xd1, 0x49, 0x9a, 0x9d, 0xe8, 0x97, 0xb8,
++	0xd1, 0xeb, 0x90, 0x62, 0x37, 0xd2, 0x99, 0x15,
++	0x67, 0x6d, 0x27, 0x93, 0xce, 0x37, 0x65, 0xa2,
++	0x94, 0x88, 0xd6, 0x17, 0xbc, 0x1c, 0x6e, 0xa2,
++	0xcc, 0xfb, 0x81, 0x0e, 0x30, 0x60, 0x5a, 0x6f
++};
++static const u8 key41[] __initconst = {
++	0x36, 0x27, 0x57, 0x01, 0x21, 0x68, 0x97, 0xc7,
++	0x00, 0x67, 0x7b, 0xe9, 0x0f, 0x55, 0x49, 0xbb,
++	0x92, 0x18, 0x98, 0xf5, 0x5e, 0xbc, 0xe7, 0x5a,
++	0x9d, 0x3d, 0xc7, 0xbd, 0x59, 0xec, 0x82, 0x8e
++};
++enum { nonce41 = 0x5da05e4c8dfab464ULL };
++
++static const u8 input42[] __initconst = {
++	0xca, 0x30, 0xcd, 0x63, 0xf0, 0x2d, 0xf1, 0x03,
++	0x4d, 0x0d, 0xf2, 0xf7, 0x6f, 0xae, 0xd6, 0x34,
++	0xea, 0xf6, 0x13, 0xcf, 0x1c, 0xa0, 0xd0, 0xe8,
++	0xa4, 0x78, 0x80, 0x3b, 0x1e, 0xa5, 0x32, 0x4c,
++	0x73, 0x12, 0xd4, 0x6a, 0x94, 0xbc, 0xba, 0x80,
++	0x5e
++};
++static const u8 output42[] __initconst = {
++	0xec, 0x3f, 0x18, 0x31, 0xc0, 0x7b, 0xb5, 0xe2,
++	0xad, 0xf3, 0xec, 0xa0, 0x16, 0x9d, 0xef, 0xce,
++	0x05, 0x65, 0x59, 0x9d, 0x5a, 0xca, 0x3e, 0x13,
++	0xb9, 0x5d, 0x5d, 0xb5, 0xeb, 0xae, 0xc0, 0x87,
++	0xbb, 0xfd, 0xe7, 0xe4, 0x89, 0x5b, 0xd2, 0x6c,
++	0x56
++};
++static const u8 key42[] __initconst = {
++	0x7c, 0x6b, 0x7e, 0x77, 0xcc, 0x8c, 0x1b, 0x03,
++	0x8b, 0x2a, 0xb3, 0x7c, 0x5a, 0x73, 0xcc, 0xac,
++	0xdd, 0x53, 0x54, 0x0c, 0x85, 0xed, 0xcd, 0x47,
++	0x24, 0xc1, 0xb8, 0x9b, 0x2e, 0x41, 0x92, 0x36
++};
++enum { nonce42 = 0xe4d7348b09682c9cULL };
++
++static const u8 input43[] __initconst = {
++	0x52, 0xf2, 0x4b, 0x7c, 0xe5, 0x58, 0xe8, 0xd2,
++	0xb7, 0xf3, 0xa1, 0x29, 0x68, 0xa2, 0x50, 0x50,
++	0xae, 0x9c, 0x1b, 0xe2, 0x67, 0x77, 0xe2, 0xdb,
++	0x85, 0x55, 0x7e, 0x84, 0x8a, 0x12, 0x3c, 0xb6,
++	0x2e, 0xed, 0xd3, 0xec, 0x47, 0x68, 0xfa, 0x52,
++	0x46, 0x9d
++};
++static const u8 output43[] __initconst = {
++	0x1b, 0xf0, 0x05, 0xe4, 0x1c, 0xd8, 0x74, 0x9a,
++	0xf0, 0xee, 0x00, 0x54, 0xce, 0x02, 0x83, 0x15,
++	0xfb, 0x23, 0x35, 0x78, 0xc3, 0xda, 0x98, 0xd8,
++	0x9d, 0x1b, 0xb2, 0x51, 0x82, 0xb0, 0xff, 0xbe,
++	0x05, 0xa9, 0xa4, 0x04, 0xba, 0xea, 0x4b, 0x73,
++	0x47, 0x6e
++};
++static const u8 key43[] __initconst = {
++	0xeb, 0xec, 0x0e, 0xa1, 0x65, 0xe2, 0x99, 0x46,
++	0xd8, 0x54, 0x8c, 0x4a, 0x93, 0xdf, 0x6d, 0xbf,
++	0x93, 0x34, 0x94, 0x57, 0xc9, 0x12, 0x9d, 0x68,
++	0x05, 0xc5, 0x05, 0xad, 0x5a, 0xc9, 0x2a, 0x3b
++};
++enum { nonce43 = 0xe14f6a902b7827fULL };
++
++static const u8 input44[] __initconst = {
++	0x3e, 0x22, 0x3e, 0x8e, 0xcd, 0x18, 0xe2, 0xa3,
++	0x8d, 0x8b, 0x38, 0xc3, 0x02, 0xa3, 0x31, 0x48,
++	0xc6, 0x0e, 0xec, 0x99, 0x51, 0x11, 0x6d, 0x8b,
++	0x32, 0x35, 0x3b, 0x08, 0x58, 0x76, 0x25, 0x30,
++	0xe2, 0xfc, 0xa2, 0x46, 0x7d, 0x6e, 0x34, 0x87,
++	0xac, 0x42, 0xbf
++};
++static const u8 output44[] __initconst = {
++	0x08, 0x92, 0x58, 0x02, 0x1a, 0xf4, 0x1f, 0x3d,
++	0x38, 0x7b, 0x6b, 0xf6, 0x84, 0x07, 0xa3, 0x19,
++	0x17, 0x2a, 0xed, 0x57, 0x1c, 0xf9, 0x55, 0x37,
++	0x4e, 0xf4, 0x68, 0x68, 0x82, 0x02, 0x4f, 0xca,
++	0x21, 0x00, 0xc6, 0x66, 0x79, 0x53, 0x19, 0xef,
++	0x7f, 0xdd, 0x74
++};
++static const u8 key44[] __initconst = {
++	0x73, 0xb6, 0x3e, 0xf4, 0x57, 0x52, 0xa6, 0x43,
++	0x51, 0xd8, 0x25, 0x00, 0xdb, 0xb4, 0x52, 0x69,
++	0xd6, 0x27, 0x49, 0xeb, 0x9b, 0xf1, 0x7b, 0xa0,
++	0xd6, 0x7c, 0x9c, 0xd8, 0x95, 0x03, 0x69, 0x26
++};
++enum { nonce44 = 0xf5e6dc4f35ce24e5ULL };
++
++static const u8 input45[] __initconst = {
++	0x55, 0x76, 0xc0, 0xf1, 0x74, 0x03, 0x7a, 0x6d,
++	0x14, 0xd8, 0x36, 0x2c, 0x9f, 0x9a, 0x59, 0x7a,
++	0x2a, 0xf5, 0x77, 0x84, 0x70, 0x7c, 0x1d, 0x04,
++	0x90, 0x45, 0xa4, 0xc1, 0x5e, 0xdd, 0x2e, 0x07,
++	0x18, 0x34, 0xa6, 0x85, 0x56, 0x4f, 0x09, 0xaf,
++	0x2f, 0x83, 0xe1, 0xc6
++};
++static const u8 output45[] __initconst = {
++	0x22, 0x46, 0xe4, 0x0b, 0x3a, 0x55, 0xcc, 0x9b,
++	0xf0, 0xc0, 0x53, 0xcd, 0x95, 0xc7, 0x57, 0x6c,
++	0x77, 0x46, 0x41, 0x72, 0x07, 0xbf, 0xa8, 0xe5,
++	0x68, 0x69, 0xd8, 0x1e, 0x45, 0xc1, 0xa2, 0x50,
++	0xa5, 0xd1, 0x62, 0xc9, 0x5a, 0x7d, 0x08, 0x14,
++	0xae, 0x44, 0x16, 0xb9
++};
++static const u8 key45[] __initconst = {
++	0x41, 0xf3, 0x88, 0xb2, 0x51, 0x25, 0x47, 0x02,
++	0x39, 0xe8, 0x15, 0x3a, 0x22, 0x78, 0x86, 0x0b,
++	0xf9, 0x1e, 0x8d, 0x98, 0xb2, 0x22, 0x82, 0xac,
++	0x42, 0x94, 0xde, 0x64, 0xf0, 0xfd, 0xb3, 0x6c
++};
++enum { nonce45 = 0xf51a582daf4aa01aULL };
++
++static const u8 input46[] __initconst = {
++	0xf6, 0xff, 0x20, 0xf9, 0x26, 0x7e, 0x0f, 0xa8,
++	0x6a, 0x45, 0x5a, 0x91, 0x73, 0xc4, 0x4c, 0x63,
++	0xe5, 0x61, 0x59, 0xca, 0xec, 0xc0, 0x20, 0x35,
++	0xbc, 0x9f, 0x58, 0x9c, 0x5e, 0xa1, 0x17, 0x46,
++	0xcc, 0xab, 0x6e, 0xd0, 0x4f, 0x24, 0xeb, 0x05,
++	0x4d, 0x40, 0x41, 0xe0, 0x9d
++};
++static const u8 output46[] __initconst = {
++	0x31, 0x6e, 0x63, 0x3f, 0x9c, 0xe6, 0xb1, 0xb7,
++	0xef, 0x47, 0x46, 0xd7, 0xb1, 0x53, 0x42, 0x2f,
++	0x2c, 0xc8, 0x01, 0xae, 0x8b, 0xec, 0x42, 0x2c,
++	0x6b, 0x2c, 0x9c, 0xb2, 0xf0, 0x29, 0x06, 0xa5,
++	0xcd, 0x7e, 0xc7, 0x3a, 0x38, 0x98, 0x8a, 0xde,
++	0x03, 0x29, 0x14, 0x8f, 0xf9
++};
++static const u8 key46[] __initconst = {
++	0xac, 0xa6, 0x44, 0x4a, 0x0d, 0x42, 0x10, 0xbc,
++	0xd3, 0xc9, 0x8e, 0x9e, 0x71, 0xa3, 0x1c, 0x14,
++	0x9d, 0x65, 0x0d, 0x49, 0x4d, 0x8c, 0xec, 0x46,
++	0xe1, 0x41, 0xcd, 0xf5, 0xfc, 0x82, 0x75, 0x34
++};
++enum { nonce46 = 0x25f85182df84dec5ULL };
++
++static const u8 input47[] __initconst = {
++	0xa1, 0xd2, 0xf2, 0x52, 0x2f, 0x79, 0x50, 0xb2,
++	0x42, 0x29, 0x5b, 0x44, 0x20, 0xf9, 0xbd, 0x85,
++	0xb7, 0x65, 0x77, 0x86, 0xce, 0x3e, 0x1c, 0xe4,
++	0x70, 0x80, 0xdd, 0x72, 0x07, 0x48, 0x0f, 0x84,
++	0x0d, 0xfd, 0x97, 0xc0, 0xb7, 0x48, 0x9b, 0xb4,
++	0xec, 0xff, 0x73, 0x14, 0x99, 0xe4
++};
++static const u8 output47[] __initconst = {
++	0xe5, 0x3c, 0x78, 0x66, 0x31, 0x1e, 0xd6, 0xc4,
++	0x9e, 0x71, 0xb3, 0xd7, 0xd5, 0xad, 0x84, 0xf2,
++	0x78, 0x61, 0x77, 0xf8, 0x31, 0xf0, 0x13, 0xad,
++	0x66, 0xf5, 0x31, 0x7d, 0xeb, 0xdf, 0xaf, 0xcb,
++	0xac, 0x28, 0x6c, 0xc2, 0x9e, 0xe7, 0x78, 0xa2,
++	0xa2, 0x58, 0xce, 0x84, 0x76, 0x70
++};
++static const u8 key47[] __initconst = {
++	0x05, 0x7f, 0xc0, 0x7f, 0x37, 0x20, 0x71, 0x02,
++	0x3a, 0xe7, 0x20, 0x5a, 0x0a, 0x8f, 0x79, 0x5a,
++	0xfe, 0xbb, 0x43, 0x4d, 0x2f, 0xcb, 0xf6, 0x9e,
++	0xa2, 0x97, 0x00, 0xad, 0x0d, 0x51, 0x7e, 0x17
++};
++enum { nonce47 = 0xae707c60f54de32bULL };
++
++static const u8 input48[] __initconst = {
++	0x80, 0x93, 0x77, 0x2e, 0x8d, 0xe8, 0xe6, 0xc1,
++	0x27, 0xe6, 0xf2, 0x89, 0x5b, 0x33, 0x62, 0x18,
++	0x80, 0x6e, 0x17, 0x22, 0x8e, 0x83, 0x31, 0x40,
++	0x8f, 0xc9, 0x5c, 0x52, 0x6c, 0x0e, 0xa5, 0xe9,
++	0x6c, 0x7f, 0xd4, 0x6a, 0x27, 0x56, 0x99, 0xce,
++	0x8d, 0x37, 0x59, 0xaf, 0xc0, 0x0e, 0xe1
++};
++static const u8 output48[] __initconst = {
++	0x02, 0xa4, 0x2e, 0x33, 0xb7, 0x7c, 0x2b, 0x9a,
++	0x18, 0x5a, 0xba, 0x53, 0x38, 0xaf, 0x00, 0xeb,
++	0xd8, 0x3d, 0x02, 0x77, 0x43, 0x45, 0x03, 0x91,
++	0xe2, 0x5e, 0x4e, 0xeb, 0x50, 0xd5, 0x5b, 0xe0,
++	0xf3, 0x33, 0xa7, 0xa2, 0xac, 0x07, 0x6f, 0xeb,
++	0x3f, 0x6c, 0xcd, 0xf2, 0x6c, 0x61, 0x64
++};
++static const u8 key48[] __initconst = {
++	0xf3, 0x79, 0xe7, 0xf8, 0x0e, 0x02, 0x05, 0x6b,
++	0x83, 0x1a, 0xe7, 0x86, 0x6b, 0xe6, 0x8f, 0x3f,
++	0xd3, 0xa3, 0xe4, 0x6e, 0x29, 0x06, 0xad, 0xbc,
++	0xe8, 0x33, 0x56, 0x39, 0xdf, 0xb0, 0xe2, 0xfe
++};
++enum { nonce48 = 0xd849b938c6569da0ULL };
++
++static const u8 input49[] __initconst = {
++	0x89, 0x3b, 0x88, 0x9e, 0x7b, 0x38, 0x16, 0x9f,
++	0xa1, 0x28, 0xf6, 0xf5, 0x23, 0x74, 0x28, 0xb0,
++	0xdf, 0x6c, 0x9e, 0x8a, 0x71, 0xaf, 0xed, 0x7a,
++	0x39, 0x21, 0x57, 0x7d, 0x31, 0x6c, 0xee, 0x0d,
++	0x11, 0x8d, 0x41, 0x9a, 0x5f, 0xb7, 0x27, 0x40,
++	0x08, 0xad, 0xc6, 0xe0, 0x00, 0x43, 0x9e, 0xae
++};
++static const u8 output49[] __initconst = {
++	0x4d, 0xfd, 0xdb, 0x4c, 0x77, 0xc1, 0x05, 0x07,
++	0x4d, 0x6d, 0x32, 0xcb, 0x2e, 0x0e, 0xff, 0x65,
++	0xc9, 0x27, 0xeb, 0xa9, 0x46, 0x5b, 0xab, 0x06,
++	0xe6, 0xb6, 0x5a, 0x1e, 0x00, 0xfb, 0xcf, 0xe4,
++	0xb9, 0x71, 0x40, 0x10, 0xef, 0x12, 0x39, 0xf0,
++	0xea, 0x40, 0xb8, 0x9a, 0xa2, 0x85, 0x38, 0x48
++};
++static const u8 key49[] __initconst = {
++	0xe7, 0x10, 0x40, 0xd9, 0x66, 0xc0, 0xa8, 0x6d,
++	0xa3, 0xcc, 0x8b, 0xdd, 0x93, 0xf2, 0x6e, 0xe0,
++	0x90, 0x7f, 0xd0, 0xf4, 0x37, 0x0c, 0x8b, 0x9b,
++	0x4c, 0x4d, 0xe6, 0xf2, 0x1f, 0xe9, 0x95, 0x24
++};
++enum { nonce49 = 0xf269817bdae01bc0ULL };
++
++static const u8 input50[] __initconst = {
++	0xda, 0x5b, 0x60, 0xcd, 0xed, 0x58, 0x8e, 0x7f,
++	0xae, 0xdd, 0xc8, 0x2e, 0x16, 0x90, 0xea, 0x4b,
++	0x0c, 0x74, 0x14, 0x35, 0xeb, 0xee, 0x2c, 0xff,
++	0x46, 0x99, 0x97, 0x6e, 0xae, 0xa7, 0x8e, 0x6e,
++	0x38, 0xfe, 0x63, 0xe7, 0x51, 0xd9, 0xaa, 0xce,
++	0x7b, 0x1e, 0x7e, 0x5d, 0xc0, 0xe8, 0x10, 0x06,
++	0x14
++};
++static const u8 output50[] __initconst = {
++	0xe4, 0xe5, 0x86, 0x1b, 0x66, 0x19, 0xac, 0x49,
++	0x1c, 0xbd, 0xee, 0x03, 0xaf, 0x11, 0xfc, 0x1f,
++	0x6a, 0xd2, 0x50, 0x5c, 0xea, 0x2c, 0xa5, 0x75,
++	0xfd, 0xb7, 0x0e, 0x80, 0x8f, 0xed, 0x3f, 0x31,
++	0x47, 0xac, 0x67, 0x43, 0xb8, 0x2e, 0xb4, 0x81,
++	0x6d, 0xe4, 0x1e, 0xb7, 0x8b, 0x0c, 0x53, 0xa9,
++	0x26
++};
++static const u8 key50[] __initconst = {
++	0xd7, 0xb2, 0x04, 0x76, 0x30, 0xcc, 0x38, 0x45,
++	0xef, 0xdb, 0xc5, 0x86, 0x08, 0x61, 0xf0, 0xee,
++	0x6d, 0xd8, 0x22, 0x04, 0x8c, 0xfb, 0xcb, 0x37,
++	0xa6, 0xfb, 0x95, 0x22, 0xe1, 0x87, 0xb7, 0x6f
++};
++enum { nonce50 = 0x3b44d09c45607d38ULL };
++
++static const u8 input51[] __initconst = {
++	0xa9, 0x41, 0x02, 0x4b, 0xd7, 0xd5, 0xd1, 0xf1,
++	0x21, 0x55, 0xb2, 0x75, 0x6d, 0x77, 0x1b, 0x86,
++	0xa9, 0xc8, 0x90, 0xfd, 0xed, 0x4a, 0x7b, 0x6c,
++	0xb2, 0x5f, 0x9b, 0x5f, 0x16, 0xa1, 0x54, 0xdb,
++	0xd6, 0x3f, 0x6a, 0x7f, 0x2e, 0x51, 0x9d, 0x49,
++	0x5b, 0xa5, 0x0e, 0xf9, 0xfb, 0x2a, 0x38, 0xff,
++	0x20, 0x8c
++};
++static const u8 output51[] __initconst = {
++	0x18, 0xf7, 0x88, 0xc1, 0x72, 0xfd, 0x90, 0x4b,
++	0xa9, 0x2d, 0xdb, 0x47, 0xb0, 0xa5, 0xc4, 0x37,
++	0x01, 0x95, 0xc4, 0xb1, 0xab, 0xc5, 0x5b, 0xcd,
++	0xe1, 0x97, 0x78, 0x13, 0xde, 0x6a, 0xff, 0x36,
++	0xce, 0xa4, 0x67, 0xc5, 0x4a, 0x45, 0x2b, 0xd9,
++	0xff, 0x8f, 0x06, 0x7c, 0x63, 0xbb, 0x83, 0x17,
++	0xb4, 0x6b
++};
++static const u8 key51[] __initconst = {
++	0x82, 0x1a, 0x79, 0xab, 0x9a, 0xb5, 0x49, 0x6a,
++	0x30, 0x6b, 0x99, 0x19, 0x11, 0xc7, 0xa2, 0xf4,
++	0xca, 0x55, 0xb9, 0xdd, 0xe7, 0x2f, 0xe7, 0xc1,
++	0xdd, 0x27, 0xad, 0x80, 0xf2, 0x56, 0xad, 0xf3
++};
++enum { nonce51 = 0xe93aff94ca71a4a6ULL };
++
++static const u8 input52[] __initconst = {
++	0x89, 0xdd, 0xf3, 0xfa, 0xb6, 0xc1, 0xaa, 0x9a,
++	0xc8, 0xad, 0x6b, 0x00, 0xa1, 0x65, 0xea, 0x14,
++	0x55, 0x54, 0x31, 0x8f, 0xf0, 0x03, 0x84, 0x51,
++	0x17, 0x1e, 0x0a, 0x93, 0x6e, 0x79, 0x96, 0xa3,
++	0x2a, 0x85, 0x9c, 0x89, 0xf8, 0xd1, 0xe2, 0x15,
++	0x95, 0x05, 0xf4, 0x43, 0x4d, 0x6b, 0xf0, 0x71,
++	0x3b, 0x3e, 0xba
++};
++static const u8 output52[] __initconst = {
++	0x0c, 0x42, 0x6a, 0xb3, 0x66, 0x63, 0x5d, 0x2c,
++	0x9f, 0x3d, 0xa6, 0x6e, 0xc7, 0x5f, 0x79, 0x2f,
++	0x50, 0xe3, 0xd6, 0x07, 0x56, 0xa4, 0x2b, 0x2d,
++	0x8d, 0x10, 0xc0, 0x6c, 0xa2, 0xfc, 0x97, 0xec,
++	0x3f, 0x5c, 0x8d, 0x59, 0xbe, 0x84, 0xf1, 0x3e,
++	0x38, 0x47, 0x4f, 0x75, 0x25, 0x66, 0x88, 0x14,
++	0x03, 0xdd, 0xde
++};
++static const u8 key52[] __initconst = {
++	0x4f, 0xb0, 0x27, 0xb6, 0xdd, 0x24, 0x0c, 0xdb,
++	0x6b, 0x71, 0x2e, 0xac, 0xfc, 0x3f, 0xa6, 0x48,
++	0x5d, 0xd5, 0xff, 0x53, 0xb5, 0x62, 0xf1, 0xe0,
++	0x93, 0xfe, 0x39, 0x4c, 0x9f, 0x03, 0x11, 0xa7
++};
++enum { nonce52 = 0xed8becec3bdf6f25ULL };
++
++static const u8 input53[] __initconst = {
++	0x68, 0xd1, 0xc7, 0x74, 0x44, 0x1c, 0x84, 0xde,
++	0x27, 0x27, 0x35, 0xf0, 0x18, 0x0b, 0x57, 0xaa,
++	0xd0, 0x1a, 0xd3, 0x3b, 0x5e, 0x5c, 0x62, 0x93,
++	0xd7, 0x6b, 0x84, 0x3b, 0x71, 0x83, 0x77, 0x01,
++	0x3e, 0x59, 0x45, 0xf4, 0x77, 0x6c, 0x6b, 0xcb,
++	0x88, 0x45, 0x09, 0x1d, 0xc6, 0x45, 0x6e, 0xdc,
++	0x6e, 0x51, 0xb8, 0x28
++};
++static const u8 output53[] __initconst = {
++	0xc5, 0x90, 0x96, 0x78, 0x02, 0xf5, 0xc4, 0x3c,
++	0xde, 0xd4, 0xd4, 0xc6, 0xa7, 0xad, 0x12, 0x47,
++	0x45, 0xce, 0xcd, 0x8c, 0x35, 0xcc, 0xa6, 0x9e,
++	0x5a, 0xc6, 0x60, 0xbb, 0xe3, 0xed, 0xec, 0x68,
++	0x3f, 0x64, 0xf7, 0x06, 0x63, 0x9c, 0x8c, 0xc8,
++	0x05, 0x3a, 0xad, 0x32, 0x79, 0x8b, 0x45, 0x96,
++	0x93, 0x73, 0x4c, 0xe0
++};
++static const u8 key53[] __initconst = {
++	0x42, 0x4b, 0x20, 0x81, 0x49, 0x50, 0xe9, 0xc2,
++	0x43, 0x69, 0x36, 0xe7, 0x68, 0xae, 0xd5, 0x7e,
++	0x42, 0x1a, 0x1b, 0xb4, 0x06, 0x4d, 0xa7, 0x17,
++	0xb5, 0x31, 0xd6, 0x0c, 0xb0, 0x5c, 0x41, 0x0b
++};
++enum { nonce53 = 0xf44ce1931fbda3d7ULL };
++
++static const u8 input54[] __initconst = {
++	0x7b, 0xf6, 0x8b, 0xae, 0xc0, 0xcb, 0x10, 0x8e,
++	0xe8, 0xd8, 0x2e, 0x3b, 0x14, 0xba, 0xb4, 0xd2,
++	0x58, 0x6b, 0x2c, 0xec, 0xc1, 0x81, 0x71, 0xb4,
++	0xc6, 0xea, 0x08, 0xc5, 0xc9, 0x78, 0xdb, 0xa2,
++	0xfa, 0x44, 0x50, 0x9b, 0xc8, 0x53, 0x8d, 0x45,
++	0x42, 0xe7, 0x09, 0xc4, 0x29, 0xd8, 0x75, 0x02,
++	0xbb, 0xb2, 0x78, 0xcf, 0xe7
++};
++static const u8 output54[] __initconst = {
++	0xaf, 0x2c, 0x83, 0x26, 0x6e, 0x7f, 0xa6, 0xe9,
++	0x03, 0x75, 0xfe, 0xfe, 0x87, 0x58, 0xcf, 0xb5,
++	0xbc, 0x3c, 0x9d, 0xa1, 0x6e, 0x13, 0xf1, 0x0f,
++	0x9e, 0xbc, 0xe0, 0x54, 0x24, 0x32, 0xce, 0x95,
++	0xe6, 0xa5, 0x59, 0x3d, 0x24, 0x1d, 0x8f, 0xb1,
++	0x74, 0x6c, 0x56, 0xe7, 0x96, 0xc1, 0x91, 0xc8,
++	0x2d, 0x0e, 0xb7, 0x51, 0x10
++};
++static const u8 key54[] __initconst = {
++	0x00, 0x68, 0x74, 0xdc, 0x30, 0x9e, 0xe3, 0x52,
++	0xa9, 0xae, 0xb6, 0x7c, 0xa1, 0xdc, 0x12, 0x2d,
++	0x98, 0x32, 0x7a, 0x77, 0xe1, 0xdd, 0xa3, 0x76,
++	0x72, 0x34, 0x83, 0xd8, 0xb7, 0x69, 0xba, 0x77
++};
++enum { nonce54 = 0xbea57d79b798b63aULL };
++
++static const u8 input55[] __initconst = {
++	0xb5, 0xf4, 0x2f, 0xc1, 0x5e, 0x10, 0xa7, 0x4e,
++	0x74, 0x3d, 0xa3, 0x96, 0xc0, 0x4d, 0x7b, 0x92,
++	0x8f, 0xdb, 0x2d, 0x15, 0x52, 0x6a, 0x95, 0x5e,
++	0x40, 0x81, 0x4f, 0x70, 0x73, 0xea, 0x84, 0x65,
++	0x3d, 0x9a, 0x4e, 0x03, 0x95, 0xf8, 0x5d, 0x2f,
++	0x07, 0x02, 0x13, 0x13, 0xdd, 0x82, 0xe6, 0x3b,
++	0xe1, 0x5f, 0xb3, 0x37, 0x9b, 0x88
++};
++static const u8 output55[] __initconst = {
++	0xc1, 0x88, 0xbd, 0x92, 0x77, 0xad, 0x7c, 0x5f,
++	0xaf, 0xa8, 0x57, 0x0e, 0x40, 0x0a, 0xdc, 0x70,
++	0xfb, 0xc6, 0x71, 0xfd, 0xc4, 0x74, 0x60, 0xcc,
++	0xa0, 0x89, 0x8e, 0x99, 0xf0, 0x06, 0xa6, 0x7c,
++	0x97, 0x42, 0x21, 0x81, 0x6a, 0x07, 0xe7, 0xb3,
++	0xf7, 0xa5, 0x03, 0x71, 0x50, 0x05, 0x63, 0x17,
++	0xa9, 0x46, 0x0b, 0xff, 0x30, 0x78
++};
++static const u8 key55[] __initconst = {
++	0x19, 0x8f, 0xe7, 0xd7, 0x6b, 0x7f, 0x6f, 0x69,
++	0x86, 0x91, 0x0f, 0xa7, 0x4a, 0x69, 0x8e, 0x34,
++	0xf3, 0xdb, 0xde, 0xaf, 0xf2, 0x66, 0x1d, 0x64,
++	0x97, 0x0c, 0xcf, 0xfa, 0x33, 0x84, 0xfd, 0x0c
++};
++enum { nonce55 = 0x80aa3d3e2c51ef06ULL };
++
++static const u8 input56[] __initconst = {
++	0x6b, 0xe9, 0x73, 0x42, 0x27, 0x5e, 0x12, 0xcd,
++	0xaa, 0x45, 0x12, 0x8b, 0xb3, 0xe6, 0x54, 0x33,
++	0x31, 0x7d, 0xe2, 0x25, 0xc6, 0x86, 0x47, 0x67,
++	0x86, 0x83, 0xe4, 0x46, 0xb5, 0x8f, 0x2c, 0xbb,
++	0xe4, 0xb8, 0x9f, 0xa2, 0xa4, 0xe8, 0x75, 0x96,
++	0x92, 0x51, 0x51, 0xac, 0x8e, 0x2e, 0x6f, 0xfc,
++	0xbd, 0x0d, 0xa3, 0x9f, 0x16, 0x55, 0x3e
++};
++static const u8 output56[] __initconst = {
++	0x42, 0x99, 0x73, 0x6c, 0xd9, 0x4b, 0x16, 0xe5,
++	0x18, 0x63, 0x1a, 0xd9, 0x0e, 0xf1, 0x15, 0x2e,
++	0x0f, 0x4b, 0xe4, 0x5f, 0xa0, 0x4d, 0xde, 0x9f,
++	0xa7, 0x18, 0xc1, 0x0c, 0x0b, 0xae, 0x55, 0xe4,
++	0x89, 0x18, 0xa4, 0x78, 0x9d, 0x25, 0x0d, 0xd5,
++	0x94, 0x0f, 0xf9, 0x78, 0xa3, 0xa6, 0xe9, 0x9e,
++	0x2c, 0x73, 0xf0, 0xf7, 0x35, 0xf3, 0x2b
++};
++static const u8 key56[] __initconst = {
++	0x7d, 0x12, 0xad, 0x51, 0xd5, 0x6f, 0x8f, 0x96,
++	0xc0, 0x5d, 0x9a, 0xd1, 0x7e, 0x20, 0x98, 0x0e,
++	0x3c, 0x0a, 0x67, 0x6b, 0x1b, 0x88, 0x69, 0xd4,
++	0x07, 0x8c, 0xaf, 0x0f, 0x3a, 0x28, 0xe4, 0x5d
++};
++enum { nonce56 = 0x70f4c372fb8b5984ULL };
++
++static const u8 input57[] __initconst = {
++	0x28, 0xa3, 0x06, 0xe8, 0xe7, 0x08, 0xb9, 0xef,
++	0x0d, 0x63, 0x15, 0x99, 0xb2, 0x78, 0x7e, 0xaf,
++	0x30, 0x50, 0xcf, 0xea, 0xc9, 0x91, 0x41, 0x2f,
++	0x3b, 0x38, 0x70, 0xc4, 0x87, 0xb0, 0x3a, 0xee,
++	0x4a, 0xea, 0xe3, 0x83, 0x68, 0x8b, 0xcf, 0xda,
++	0x04, 0xa5, 0xbd, 0xb2, 0xde, 0x3c, 0x55, 0x13,
++	0xfe, 0x96, 0xad, 0xc1, 0x61, 0x1b, 0x98, 0xde
++};
++static const u8 output57[] __initconst = {
++	0xf4, 0x44, 0xe9, 0xd2, 0x6d, 0xc2, 0x5a, 0xe9,
++	0xfd, 0x7e, 0x41, 0x54, 0x3f, 0xf4, 0x12, 0xd8,
++	0x55, 0x0d, 0x12, 0x9b, 0xd5, 0x2e, 0x95, 0xe5,
++	0x77, 0x42, 0x3f, 0x2c, 0xfb, 0x28, 0x9d, 0x72,
++	0x6d, 0x89, 0x82, 0x27, 0x64, 0x6f, 0x0d, 0x57,
++	0xa1, 0x25, 0xa3, 0x6b, 0x88, 0x9a, 0xac, 0x0c,
++	0x76, 0x19, 0x90, 0xe2, 0x50, 0x5a, 0xf8, 0x12
++};
++static const u8 key57[] __initconst = {
++	0x08, 0x26, 0xb8, 0xac, 0xf3, 0xa5, 0xc6, 0xa3,
++	0x7f, 0x09, 0x87, 0xf5, 0x6c, 0x5a, 0x85, 0x6c,
++	0x3d, 0xbd, 0xde, 0xd5, 0x87, 0xa3, 0x98, 0x7a,
++	0xaa, 0x40, 0x3e, 0xf7, 0xff, 0x44, 0x5d, 0xee
++};
++enum { nonce57 = 0xc03a6130bf06b089ULL };
++
++static const u8 input58[] __initconst = {
++	0x82, 0xa5, 0x38, 0x6f, 0xaa, 0xb4, 0xaf, 0xb2,
++	0x42, 0x01, 0xa8, 0x39, 0x3f, 0x15, 0x51, 0xa8,
++	0x11, 0x1b, 0x93, 0xca, 0x9c, 0xa0, 0x57, 0x68,
++	0x8f, 0xdb, 0x68, 0x53, 0x51, 0x6d, 0x13, 0x22,
++	0x12, 0x9b, 0xbd, 0x33, 0xa8, 0x52, 0x40, 0x57,
++	0x80, 0x9b, 0x98, 0xef, 0x56, 0x70, 0x11, 0xfa,
++	0x36, 0x69, 0x7d, 0x15, 0x48, 0xf9, 0x3b, 0xeb,
++	0x42
++};
++static const u8 output58[] __initconst = {
++	0xff, 0x3a, 0x74, 0xc3, 0x3e, 0x44, 0x64, 0x4d,
++	0x0e, 0x5f, 0x9d, 0xa8, 0xdb, 0xbe, 0x12, 0xef,
++	0xba, 0x56, 0x65, 0x50, 0x76, 0xaf, 0xa4, 0x4e,
++	0x01, 0xc1, 0xd3, 0x31, 0x14, 0xe2, 0xbe, 0x7b,
++	0xa5, 0x67, 0xb4, 0xe3, 0x68, 0x40, 0x9c, 0xb0,
++	0xb1, 0x78, 0xef, 0x49, 0x03, 0x0f, 0x2d, 0x56,
++	0xb4, 0x37, 0xdb, 0xbc, 0x2d, 0x68, 0x1c, 0x3c,
++	0xf1
++};
++static const u8 key58[] __initconst = {
++	0x7e, 0xf1, 0x7c, 0x20, 0x65, 0xed, 0xcd, 0xd7,
++	0x57, 0xe8, 0xdb, 0x90, 0x87, 0xdb, 0x5f, 0x63,
++	0x3d, 0xdd, 0xb8, 0x2b, 0x75, 0x8e, 0x04, 0xb5,
++	0xf4, 0x12, 0x79, 0xa9, 0x4d, 0x42, 0x16, 0x7f
++};
++enum { nonce58 = 0x92838183f80d2f7fULL };
++
++static const u8 input59[] __initconst = {
++	0x37, 0xf1, 0x9d, 0xdd, 0xd7, 0x08, 0x9f, 0x13,
++	0xc5, 0x21, 0x82, 0x75, 0x08, 0x9e, 0x25, 0x16,
++	0xb1, 0xd1, 0x71, 0x42, 0x28, 0x63, 0xac, 0x47,
++	0x71, 0x54, 0xb1, 0xfc, 0x39, 0xf0, 0x61, 0x4f,
++	0x7c, 0x6d, 0x4f, 0xc8, 0x33, 0xef, 0x7e, 0xc8,
++	0xc0, 0x97, 0xfc, 0x1a, 0x61, 0xb4, 0x87, 0x6f,
++	0xdd, 0x5a, 0x15, 0x7b, 0x1b, 0x95, 0x50, 0x94,
++	0x1d, 0xba
++};
++static const u8 output59[] __initconst = {
++	0x73, 0x67, 0xc5, 0x07, 0xbb, 0x57, 0x79, 0xd5,
++	0xc9, 0x04, 0xdd, 0x88, 0xf3, 0x86, 0xe5, 0x70,
++	0x49, 0x31, 0xe0, 0xcc, 0x3b, 0x1d, 0xdf, 0xb0,
++	0xaf, 0xf4, 0x2d, 0xe0, 0x06, 0x10, 0x91, 0x8d,
++	0x1c, 0xcf, 0x31, 0x0b, 0xf6, 0x73, 0xda, 0x1c,
++	0xf0, 0x17, 0x52, 0x9e, 0x20, 0x2e, 0x9f, 0x8c,
++	0xb3, 0x59, 0xce, 0xd4, 0xd3, 0xc1, 0x81, 0xe9,
++	0x11, 0x36
++};
++static const u8 key59[] __initconst = {
++	0xbd, 0x07, 0xd0, 0x53, 0x2c, 0xb3, 0xcc, 0x3f,
++	0xc4, 0x95, 0xfd, 0xe7, 0x81, 0xb3, 0x29, 0x99,
++	0x05, 0x45, 0xd6, 0x95, 0x25, 0x0b, 0x72, 0xd3,
++	0xcd, 0xbb, 0x73, 0xf8, 0xfa, 0xc0, 0x9b, 0x7a
++};
++enum { nonce59 = 0x4a0db819b0d519e2ULL };
++
++static const u8 input60[] __initconst = {
++	0x58, 0x4e, 0xdf, 0x94, 0x3c, 0x76, 0x0a, 0x79,
++	0x47, 0xf1, 0xbe, 0x88, 0xd3, 0xba, 0x94, 0xd8,
++	0xe2, 0x8f, 0xe3, 0x2f, 0x2f, 0x74, 0x82, 0x55,
++	0xc3, 0xda, 0xe2, 0x4e, 0x2c, 0x8c, 0x45, 0x1d,
++	0x72, 0x8f, 0x54, 0x41, 0xb5, 0xb7, 0x69, 0xe4,
++	0xdc, 0xd2, 0x36, 0x21, 0x5c, 0x28, 0x52, 0xf7,
++	0x98, 0x8e, 0x72, 0xa7, 0x6d, 0x57, 0xed, 0xdc,
++	0x3c, 0xe6, 0x6a
++};
++static const u8 output60[] __initconst = {
++	0xda, 0xaf, 0xb5, 0xe3, 0x30, 0x65, 0x5c, 0xb1,
++	0x48, 0x08, 0x43, 0x7b, 0x9e, 0xd2, 0x6a, 0x62,
++	0x56, 0x7c, 0xad, 0xd9, 0xe5, 0xf6, 0x09, 0x71,
++	0xcd, 0xe6, 0x05, 0x6b, 0x3f, 0x44, 0x3a, 0x5c,
++	0xf6, 0xf8, 0xd7, 0xce, 0x7d, 0xd1, 0xe0, 0x4f,
++	0x88, 0x15, 0x04, 0xd8, 0x20, 0xf0, 0x3e, 0xef,
++	0xae, 0xa6, 0x27, 0xa3, 0x0e, 0xfc, 0x18, 0x90,
++	0x33, 0xcd, 0xd3
++};
++static const u8 key60[] __initconst = {
++	0xbf, 0xfd, 0x25, 0xb5, 0xb2, 0xfc, 0x78, 0x0c,
++	0x8e, 0xb9, 0x57, 0x2f, 0x26, 0x4a, 0x7e, 0x71,
++	0xcc, 0xf2, 0xe0, 0xfd, 0x24, 0x11, 0x20, 0x23,
++	0x57, 0x00, 0xff, 0x80, 0x11, 0x0c, 0x1e, 0xff
++};
++enum { nonce60 = 0xf18df56fdb7954adULL };
++
++static const u8 input61[] __initconst = {
++	0xb0, 0xf3, 0x06, 0xbc, 0x22, 0xae, 0x49, 0x40,
++	0xae, 0xff, 0x1b, 0x31, 0xa7, 0x98, 0xab, 0x1d,
++	0xe7, 0x40, 0x23, 0x18, 0x4f, 0xab, 0x8e, 0x93,
++	0x82, 0xf4, 0x56, 0x61, 0xfd, 0x2b, 0xcf, 0xa7,
++	0xc4, 0xb4, 0x0a, 0xf4, 0xcb, 0xc7, 0x8c, 0x40,
++	0x57, 0xac, 0x0b, 0x3e, 0x2a, 0x0a, 0x67, 0x83,
++	0x50, 0xbf, 0xec, 0xb0, 0xc7, 0xf1, 0x32, 0x26,
++	0x98, 0x80, 0x33, 0xb4
++};
++static const u8 output61[] __initconst = {
++	0x9d, 0x23, 0x0e, 0xff, 0xcc, 0x7c, 0xd5, 0xcf,
++	0x1a, 0xb8, 0x59, 0x1e, 0x92, 0xfd, 0x7f, 0xca,
++	0xca, 0x3c, 0x18, 0x81, 0xde, 0xfa, 0x59, 0xc8,
++	0x6f, 0x9c, 0x24, 0x3f, 0x3a, 0xe6, 0x0b, 0xb4,
++	0x34, 0x48, 0x69, 0xfc, 0xb6, 0xea, 0xb2, 0xde,
++	0x9f, 0xfd, 0x92, 0x36, 0x18, 0x98, 0x99, 0xaa,
++	0x65, 0xe2, 0xea, 0xf4, 0xb1, 0x47, 0x8e, 0xb0,
++	0xe7, 0xd4, 0x7a, 0x2c
++};
++static const u8 key61[] __initconst = {
++	0xd7, 0xfd, 0x9b, 0xbd, 0x8f, 0x65, 0x0d, 0x00,
++	0xca, 0xa1, 0x6c, 0x85, 0x85, 0xa4, 0x6d, 0xf1,
++	0xb1, 0x68, 0x0c, 0x8b, 0x5d, 0x37, 0x72, 0xd0,
++	0xd8, 0xd2, 0x25, 0xab, 0x9f, 0x7b, 0x7d, 0x95
++};
++enum { nonce61 = 0xd82caf72a9c4864fULL };
++
++static const u8 input62[] __initconst = {
++	0x10, 0x77, 0xf3, 0x2f, 0xc2, 0x50, 0xd6, 0x0c,
++	0xba, 0xa8, 0x8d, 0xce, 0x0d, 0x58, 0x9e, 0x87,
++	0xb1, 0x59, 0x66, 0x0a, 0x4a, 0xb3, 0xd8, 0xca,
++	0x0a, 0x6b, 0xf8, 0xc6, 0x2b, 0x3f, 0x8e, 0x09,
++	0xe0, 0x0a, 0x15, 0x85, 0xfe, 0xaa, 0xc6, 0xbd,
++	0x30, 0xef, 0xe4, 0x10, 0x78, 0x03, 0xc1, 0xc7,
++	0x8a, 0xd9, 0xde, 0x0b, 0x51, 0x07, 0xc4, 0x7b,
++	0xe2, 0x2e, 0x36, 0x3a, 0xc2
++};
++static const u8 output62[] __initconst = {
++	0xa0, 0x0c, 0xfc, 0xc1, 0xf6, 0xaf, 0xc2, 0xb8,
++	0x5c, 0xef, 0x6e, 0xf3, 0xce, 0x15, 0x48, 0x05,
++	0xb5, 0x78, 0x49, 0x51, 0x1f, 0x9d, 0xf4, 0xbf,
++	0x2f, 0x53, 0xa2, 0xd1, 0x15, 0x20, 0x82, 0x6b,
++	0xd2, 0x22, 0x6c, 0x4e, 0x14, 0x87, 0xe3, 0xd7,
++	0x49, 0x45, 0x84, 0xdb, 0x5f, 0x68, 0x60, 0xc4,
++	0xb3, 0xe6, 0x3f, 0xd1, 0xfc, 0xa5, 0x73, 0xf3,
++	0xfc, 0xbb, 0xbe, 0xc8, 0x9d
++};
++static const u8 key62[] __initconst = {
++	0x6e, 0xc9, 0xaf, 0xce, 0x35, 0xb9, 0x86, 0xd1,
++	0xce, 0x5f, 0xd9, 0xbb, 0xd5, 0x1f, 0x7c, 0xcd,
++	0xfe, 0x19, 0xaa, 0x3d, 0xea, 0x64, 0xc1, 0x28,
++	0x40, 0xba, 0xa1, 0x28, 0xcd, 0x40, 0xb6, 0xf2
++};
++enum { nonce62 = 0xa1c0c265f900cde8ULL };
++
++static const u8 input63[] __initconst = {
++	0x7a, 0x70, 0x21, 0x2c, 0xef, 0xa6, 0x36, 0xd4,
++	0xe0, 0xab, 0x8c, 0x25, 0x73, 0x34, 0xc8, 0x94,
++	0x6c, 0x81, 0xcb, 0x19, 0x8d, 0x5a, 0x49, 0xaa,
++	0x6f, 0xba, 0x83, 0x72, 0x02, 0x5e, 0xf5, 0x89,
++	0xce, 0x79, 0x7e, 0x13, 0x3d, 0x5b, 0x98, 0x60,
++	0x5d, 0xd9, 0xfb, 0x15, 0x93, 0x4c, 0xf3, 0x51,
++	0x49, 0x55, 0xd1, 0x58, 0xdd, 0x7e, 0x6d, 0xfe,
++	0xdd, 0x84, 0x23, 0x05, 0xba, 0xe9
++};
++static const u8 output63[] __initconst = {
++	0x20, 0xb3, 0x5c, 0x03, 0x03, 0x78, 0x17, 0xfc,
++	0x3b, 0x35, 0x30, 0x9a, 0x00, 0x18, 0xf5, 0xc5,
++	0x06, 0x53, 0xf5, 0x04, 0x24, 0x9d, 0xd1, 0xb2,
++	0xac, 0x5a, 0xb6, 0x2a, 0xa5, 0xda, 0x50, 0x00,
++	0xec, 0xff, 0xa0, 0x7a, 0x14, 0x7b, 0xe4, 0x6b,
++	0x63, 0xe8, 0x66, 0x86, 0x34, 0xfd, 0x74, 0x44,
++	0xa2, 0x50, 0x97, 0x0d, 0xdc, 0xc3, 0x84, 0xf8,
++	0x71, 0x02, 0x31, 0x95, 0xed, 0x54
++};
++static const u8 key63[] __initconst = {
++	0x7d, 0x64, 0xb4, 0x12, 0x81, 0xe4, 0xe6, 0x8f,
++	0xcc, 0xe7, 0xd1, 0x1f, 0x70, 0x20, 0xfd, 0xb8,
++	0x3a, 0x7d, 0xa6, 0x53, 0x65, 0x30, 0x5d, 0xe3,
++	0x1a, 0x44, 0xbe, 0x62, 0xed, 0x90, 0xc4, 0xd1
++};
++enum { nonce63 = 0xe8e849596c942276ULL };
++
++static const u8 input64[] __initconst = {
++	0x84, 0xf8, 0xda, 0x87, 0x23, 0x39, 0x60, 0xcf,
++	0xc5, 0x50, 0x7e, 0xc5, 0x47, 0x29, 0x7c, 0x05,
++	0xc2, 0xb4, 0xf4, 0xb2, 0xec, 0x5d, 0x48, 0x36,
++	0xbf, 0xfc, 0x06, 0x8c, 0xf2, 0x0e, 0x88, 0xe7,
++	0xc9, 0xc5, 0xa4, 0xa2, 0x83, 0x20, 0xa1, 0x6f,
++	0x37, 0xe5, 0x2d, 0xa1, 0x72, 0xa1, 0x19, 0xef,
++	0x05, 0x42, 0x08, 0xf2, 0x57, 0x47, 0x31, 0x1e,
++	0x17, 0x76, 0x13, 0xd3, 0xcc, 0x75, 0x2c
++};
++static const u8 output64[] __initconst = {
++	0xcb, 0xec, 0x90, 0x88, 0xeb, 0x31, 0x69, 0x20,
++	0xa6, 0xdc, 0xff, 0x76, 0x98, 0xb0, 0x24, 0x49,
++	0x7b, 0x20, 0xd9, 0xd1, 0x1b, 0xe3, 0x61, 0xdc,
++	0xcf, 0x51, 0xf6, 0x70, 0x72, 0x33, 0x28, 0x94,
++	0xac, 0x73, 0x18, 0xcf, 0x93, 0xfd, 0xca, 0x08,
++	0x0d, 0xa2, 0xb9, 0x57, 0x1e, 0x51, 0xb6, 0x07,
++	0x5c, 0xc1, 0x13, 0x64, 0x1d, 0x18, 0x6f, 0xe6,
++	0x0b, 0xb7, 0x14, 0x03, 0x43, 0xb6, 0xaf
++};
++static const u8 key64[] __initconst = {
++	0xbf, 0x82, 0x65, 0xe4, 0x50, 0xf9, 0x5e, 0xea,
++	0x28, 0x91, 0xd1, 0xd2, 0x17, 0x7c, 0x13, 0x7e,
++	0xf5, 0xd5, 0x6b, 0x06, 0x1c, 0x20, 0xc2, 0x82,
++	0xa1, 0x7a, 0xa2, 0x14, 0xa1, 0xb0, 0x54, 0x58
++};
++enum { nonce64 = 0xe57c5095aa5723c9ULL };
++
++static const u8 input65[] __initconst = {
++	0x1c, 0xfb, 0xd3, 0x3f, 0x85, 0xd7, 0xba, 0x7b,
++	0xae, 0xb1, 0xa5, 0xd2, 0xe5, 0x40, 0xce, 0x4d,
++	0x3e, 0xab, 0x17, 0x9d, 0x7d, 0x9f, 0x03, 0x98,
++	0x3f, 0x9f, 0xc8, 0xdd, 0x36, 0x17, 0x43, 0x5c,
++	0x34, 0xd1, 0x23, 0xe0, 0x77, 0xbf, 0x35, 0x5d,
++	0x8f, 0xb1, 0xcb, 0x82, 0xbb, 0x39, 0x69, 0xd8,
++	0x90, 0x45, 0x37, 0xfd, 0x98, 0x25, 0xf7, 0x5b,
++	0xce, 0x06, 0x43, 0xba, 0x61, 0xa8, 0x47, 0xb9
++};
++static const u8 output65[] __initconst = {
++	0x73, 0xa5, 0x68, 0xab, 0x8b, 0xa5, 0xc3, 0x7e,
++	0x74, 0xf8, 0x9d, 0xf5, 0x93, 0x6e, 0xf2, 0x71,
++	0x6d, 0xde, 0x82, 0xc5, 0x40, 0xa0, 0x46, 0xb3,
++	0x9a, 0x78, 0xa8, 0xf7, 0xdf, 0xb1, 0xc3, 0xdd,
++	0x8d, 0x90, 0x00, 0x68, 0x21, 0x48, 0xe8, 0xba,
++	0x56, 0x9f, 0x8f, 0xe7, 0xa4, 0x4d, 0x36, 0x55,
++	0xd0, 0x34, 0x99, 0xa6, 0x1c, 0x4c, 0xc1, 0xe2,
++	0x65, 0x98, 0x14, 0x8e, 0x6a, 0x05, 0xb1, 0x2b
++};
++static const u8 key65[] __initconst = {
++	0xbd, 0x5c, 0x8a, 0xb0, 0x11, 0x29, 0xf3, 0x00,
++	0x7a, 0x78, 0x32, 0x63, 0x34, 0x00, 0xe6, 0x7d,
++	0x30, 0x54, 0xde, 0x37, 0xda, 0xc2, 0xc4, 0x3d,
++	0x92, 0x6b, 0x4c, 0xc2, 0x92, 0xe9, 0x9e, 0x2a
++};
++enum { nonce65 = 0xf654a3031de746f2ULL };
++
++static const u8 input66[] __initconst = {
++	0x4b, 0x27, 0x30, 0x8f, 0x28, 0xd8, 0x60, 0x46,
++	0x39, 0x06, 0x49, 0xea, 0x1b, 0x71, 0x26, 0xe0,
++	0x99, 0x2b, 0xd4, 0x8f, 0x64, 0x64, 0xcd, 0xac,
++	0x1d, 0x78, 0x88, 0x90, 0xe1, 0x5c, 0x24, 0x4b,
++	0xdc, 0x2d, 0xb7, 0xee, 0x3a, 0xe6, 0x86, 0x2c,
++	0x21, 0xe4, 0x2b, 0xfc, 0xe8, 0x19, 0xca, 0x65,
++	0xe7, 0xdd, 0x6f, 0x52, 0xb3, 0x11, 0xe1, 0xe2,
++	0xbf, 0xe8, 0x70, 0xe3, 0x0d, 0x45, 0xb8, 0xa5,
++	0x20, 0xb7, 0xb5, 0xaf, 0xff, 0x08, 0xcf, 0x23,
++	0x65, 0xdf, 0x8d, 0xc3, 0x31, 0xf3, 0x1e, 0x6a,
++	0x58, 0x8d, 0xcc, 0x45, 0x16, 0x86, 0x1f, 0x31,
++	0x5c, 0x27, 0xcd, 0xc8, 0x6b, 0x19, 0x1e, 0xec,
++	0x44, 0x75, 0x63, 0x97, 0xfd, 0x79, 0xf6, 0x62,
++	0xc5, 0xba, 0x17, 0xc7, 0xab, 0x8f, 0xbb, 0xed,
++	0x85, 0x2a, 0x98, 0x79, 0x21, 0xec, 0x6e, 0x4d,
++	0xdc, 0xfa, 0x72, 0x52, 0xba, 0xc8, 0x4c
++};
++static const u8 output66[] __initconst = {
++	0x76, 0x5b, 0x2c, 0xa7, 0x62, 0xb9, 0x08, 0x4a,
++	0xc6, 0x4a, 0x92, 0xc3, 0xbb, 0x10, 0xb3, 0xee,
++	0xff, 0xb9, 0x07, 0xc7, 0x27, 0xcb, 0x1e, 0xcf,
++	0x58, 0x6f, 0xa1, 0x64, 0xe8, 0xf1, 0x4e, 0xe1,
++	0xef, 0x18, 0x96, 0xab, 0x97, 0x28, 0xd1, 0x7c,
++	0x71, 0x6c, 0xd1, 0xe2, 0xfa, 0xd9, 0x75, 0xcb,
++	0xeb, 0xea, 0x0c, 0x86, 0x82, 0xd8, 0xf4, 0xcc,
++	0xea, 0xa3, 0x00, 0xfa, 0x82, 0xd2, 0xcd, 0xcb,
++	0xdb, 0x63, 0x28, 0xe2, 0x82, 0xe9, 0x01, 0xed,
++	0x31, 0xe6, 0x71, 0x45, 0x08, 0x89, 0x8a, 0x23,
++	0xa8, 0xb5, 0xc2, 0xe2, 0x9f, 0xe9, 0xb8, 0x9a,
++	0xc4, 0x79, 0x6d, 0x71, 0x52, 0x61, 0x74, 0x6c,
++	0x1b, 0xd7, 0x65, 0x6d, 0x03, 0xc4, 0x1a, 0xc0,
++	0x50, 0xba, 0xd6, 0xc9, 0x43, 0x50, 0xbe, 0x09,
++	0x09, 0x8a, 0xdb, 0xaa, 0x76, 0x4e, 0x3b, 0x61,
++	0x3c, 0x7c, 0x44, 0xe7, 0xdb, 0x10, 0xa7
++};
++static const u8 key66[] __initconst = {
++	0x88, 0xdf, 0xca, 0x68, 0xaf, 0x4f, 0xb3, 0xfd,
++	0x6e, 0xa7, 0x95, 0x35, 0x8a, 0xe8, 0x37, 0xe8,
++	0xc8, 0x55, 0xa2, 0x2a, 0x6d, 0x77, 0xf8, 0x93,
++	0x7a, 0x41, 0xf3, 0x7b, 0x95, 0xdf, 0x89, 0xf5
++};
++enum { nonce66 = 0x1024b4fdd415cf82ULL };
++
++static const u8 input67[] __initconst = {
++	0xd4, 0x2e, 0xfa, 0x92, 0xe9, 0x29, 0x68, 0xb7,
++	0x54, 0x2c, 0xf7, 0xa4, 0x2d, 0xb7, 0x50, 0xb5,
++	0xc5, 0xb2, 0x9d, 0x17, 0x5e, 0x0a, 0xca, 0x37,
++	0xbf, 0x60, 0xae, 0xd2, 0x98, 0xe9, 0xfa, 0x59,
++	0x67, 0x62, 0xe6, 0x43, 0x0c, 0x77, 0x80, 0x82,
++	0x33, 0x61, 0xa3, 0xff, 0xc1, 0xa0, 0x8f, 0x56,
++	0xbc, 0xec, 0x65, 0x43, 0x88, 0xa5, 0xff, 0x51,
++	0x64, 0x30, 0xee, 0x34, 0xb7, 0x5c, 0x28, 0x68,
++	0xc3, 0x52, 0xd2, 0xac, 0x78, 0x2a, 0xa6, 0x10,
++	0xb8, 0xb2, 0x4c, 0x80, 0x4f, 0x99, 0xb2, 0x36,
++	0x94, 0x8f, 0x66, 0xcb, 0xa1, 0x91, 0xed, 0x06,
++	0x42, 0x6d, 0xc1, 0xae, 0x55, 0x93, 0xdd, 0x93,
++	0x9e, 0x88, 0x34, 0x7f, 0x98, 0xeb, 0xbe, 0x61,
++	0xf9, 0xa9, 0x0f, 0xd9, 0xc4, 0x87, 0xd5, 0xef,
++	0xcc, 0x71, 0x8c, 0x0e, 0xce, 0xad, 0x02, 0xcf,
++	0xa2, 0x61, 0xdf, 0xb1, 0xfe, 0x3b, 0xdc, 0xc0,
++	0x58, 0xb5, 0x71, 0xa1, 0x83, 0xc9, 0xb4, 0xaf,
++	0x9d, 0x54, 0x12, 0xcd, 0xea, 0x06, 0xd6, 0x4e,
++	0xe5, 0x27, 0x0c, 0xc3, 0xbb, 0xa8, 0x0a, 0x81,
++	0x75, 0xc3, 0xc9, 0xd4, 0x35, 0x3e, 0x53, 0x9f,
++	0xaa, 0x20, 0xc0, 0x68, 0x39, 0x2c, 0x96, 0x39,
++	0x53, 0x81, 0xda, 0x07, 0x0f, 0x44, 0xa5, 0x47,
++	0x0e, 0xb3, 0x87, 0x0d, 0x1b, 0xc1, 0xe5, 0x41,
++	0x35, 0x12, 0x58, 0x96, 0x69, 0x8a, 0x1a, 0xa3,
++	0x9d, 0x3d, 0xd4, 0xb1, 0x8e, 0x1f, 0x96, 0x87,
++	0xda, 0xd3, 0x19, 0xe2, 0xb1, 0x3a, 0x19, 0x74,
++	0xa0, 0x00, 0x9f, 0x4d, 0xbc, 0xcb, 0x0c, 0xe9,
++	0xec, 0x10, 0xdf, 0x2a, 0x88, 0xdc, 0x30, 0x51,
++	0x46, 0x56, 0x53, 0x98, 0x6a, 0x26, 0x14, 0x05,
++	0x54, 0x81, 0x55, 0x0b, 0x3c, 0x85, 0xdd, 0x33,
++	0x81, 0x11, 0x29, 0x82, 0x46, 0x35, 0xe1, 0xdb,
++	0x59, 0x7b
++};
++static const u8 output67[] __initconst = {
++	0x64, 0x6c, 0xda, 0x7f, 0xd4, 0xa9, 0x2a, 0x5e,
++	0x22, 0xae, 0x8d, 0x67, 0xdb, 0xee, 0xfd, 0xd0,
++	0x44, 0x80, 0x17, 0xb2, 0xe3, 0x87, 0xad, 0x57,
++	0x15, 0xcb, 0x88, 0x64, 0xc0, 0xf1, 0x49, 0x3d,
++	0xfa, 0xbe, 0xa8, 0x9f, 0x12, 0xc3, 0x57, 0x56,
++	0x70, 0xa5, 0xc5, 0x6b, 0xf1, 0xab, 0xd5, 0xde,
++	0x77, 0x92, 0x6a, 0x56, 0x03, 0xf5, 0x21, 0x0d,
++	0xb6, 0xc4, 0xcc, 0x62, 0x44, 0x3f, 0xb1, 0xc1,
++	0x61, 0x41, 0x90, 0xb2, 0xd5, 0xb8, 0xf3, 0x57,
++	0xfb, 0xc2, 0x6b, 0x25, 0x58, 0xc8, 0x45, 0x20,
++	0x72, 0x29, 0x6f, 0x9d, 0xb5, 0x81, 0x4d, 0x2b,
++	0xb2, 0x89, 0x9e, 0x91, 0x53, 0x97, 0x1c, 0xd9,
++	0x3d, 0x79, 0xdc, 0x14, 0xae, 0x01, 0x73, 0x75,
++	0xf0, 0xca, 0xd5, 0xab, 0x62, 0x5c, 0x7a, 0x7d,
++	0x3f, 0xfe, 0x22, 0x7d, 0xee, 0xe2, 0xcb, 0x76,
++	0x55, 0xec, 0x06, 0xdd, 0x41, 0x47, 0x18, 0x62,
++	0x1d, 0x57, 0xd0, 0xd6, 0xb6, 0x0f, 0x4b, 0xfc,
++	0x79, 0x19, 0xf4, 0xd6, 0x37, 0x86, 0x18, 0x1f,
++	0x98, 0x0d, 0x9e, 0x15, 0x2d, 0xb6, 0x9a, 0x8a,
++	0x8c, 0x80, 0x22, 0x2f, 0x82, 0xc4, 0xc7, 0x36,
++	0xfa, 0xfa, 0x07, 0xbd, 0xc2, 0x2a, 0xe2, 0xea,
++	0x93, 0xc8, 0xb2, 0x90, 0x33, 0xf2, 0xee, 0x4b,
++	0x1b, 0xf4, 0x37, 0x92, 0x13, 0xbb, 0xe2, 0xce,
++	0xe3, 0x03, 0xcf, 0x07, 0x94, 0xab, 0x9a, 0xc9,
++	0xff, 0x83, 0x69, 0x3a, 0xda, 0x2c, 0xd0, 0x47,
++	0x3d, 0x6c, 0x1a, 0x60, 0x68, 0x47, 0xb9, 0x36,
++	0x52, 0xdd, 0x16, 0xef, 0x6c, 0xbf, 0x54, 0x11,
++	0x72, 0x62, 0xce, 0x8c, 0x9d, 0x90, 0xa0, 0x25,
++	0x06, 0x92, 0x3e, 0x12, 0x7e, 0x1a, 0x1d, 0xe5,
++	0xa2, 0x71, 0xce, 0x1c, 0x4c, 0x6a, 0x7c, 0xdc,
++	0x3d, 0xe3, 0x6e, 0x48, 0x9d, 0xb3, 0x64, 0x7d,
++	0x78, 0x40
++};
++static const u8 key67[] __initconst = {
++	0xa9, 0x20, 0x75, 0x89, 0x7e, 0x37, 0x85, 0x48,
++	0xa3, 0xfb, 0x7b, 0xe8, 0x30, 0xa7, 0xe3, 0x6e,
++	0xa6, 0xc1, 0x71, 0x17, 0xc1, 0x6c, 0x9b, 0xc2,
++	0xde, 0xf0, 0xa7, 0x19, 0xec, 0xce, 0xc6, 0x53
++};
++enum { nonce67 = 0x4adc4d1f968c8a10ULL };
++
++static const u8 input68[] __initconst = {
++	0x99, 0xae, 0x72, 0xfb, 0x16, 0xe1, 0xf1, 0x59,
++	0x43, 0x15, 0x4e, 0x33, 0xa0, 0x95, 0xe7, 0x6c,
++	0x74, 0x24, 0x31, 0xca, 0x3b, 0x2e, 0xeb, 0xd7,
++	0x11, 0xd8, 0xe0, 0x56, 0x92, 0x91, 0x61, 0x57,
++	0xe2, 0x82, 0x9f, 0x8f, 0x37, 0xf5, 0x3d, 0x24,
++	0x92, 0x9d, 0x87, 0x00, 0x8d, 0x89, 0xe0, 0x25,
++	0x8b, 0xe4, 0x20, 0x5b, 0x8a, 0x26, 0x2c, 0x61,
++	0x78, 0xb0, 0xa6, 0x3e, 0x82, 0x18, 0xcf, 0xdc,
++	0x2d, 0x24, 0xdd, 0x81, 0x42, 0xc4, 0x95, 0xf0,
++	0x48, 0x60, 0x71, 0xe3, 0xe3, 0xac, 0xec, 0xbe,
++	0x98, 0x6b, 0x0c, 0xb5, 0x6a, 0xa9, 0xc8, 0x79,
++	0x23, 0x2e, 0x38, 0x0b, 0x72, 0x88, 0x8c, 0xe7,
++	0x71, 0x8b, 0x36, 0xe3, 0x58, 0x3d, 0x9c, 0xa0,
++	0xa2, 0xea, 0xcf, 0x0c, 0x6a, 0x6c, 0x64, 0xdf,
++	0x97, 0x21, 0x8f, 0x93, 0xfb, 0xba, 0xf3, 0x5a,
++	0xd7, 0x8f, 0xa6, 0x37, 0x15, 0x50, 0x43, 0x02,
++	0x46, 0x7f, 0x93, 0x46, 0x86, 0x31, 0xe2, 0xaa,
++	0x24, 0xa8, 0x26, 0xae, 0xe6, 0xc0, 0x05, 0x73,
++	0x0b, 0x4f, 0x7e, 0xed, 0x65, 0xeb, 0x56, 0x1e,
++	0xb6, 0xb3, 0x0b, 0xc3, 0x0e, 0x31, 0x95, 0xa9,
++	0x18, 0x4d, 0xaf, 0x38, 0xd7, 0xec, 0xc6, 0x44,
++	0x72, 0x77, 0x4e, 0x25, 0x4b, 0x25, 0xdd, 0x1e,
++	0x8c, 0xa2, 0xdf, 0xf6, 0x2a, 0x97, 0x1a, 0x88,
++	0x2c, 0x8a, 0x5d, 0xfe, 0xe8, 0xfb, 0x35, 0xe8,
++	0x0f, 0x2b, 0x7a, 0x18, 0x69, 0x43, 0x31, 0x1d,
++	0x38, 0x6a, 0x62, 0x95, 0x0f, 0x20, 0x4b, 0xbb,
++	0x97, 0x3c, 0xe0, 0x64, 0x2f, 0x52, 0xc9, 0x2d,
++	0x4d, 0x9d, 0x54, 0x04, 0x3d, 0xc9, 0xea, 0xeb,
++	0xd0, 0x86, 0x52, 0xff, 0x42, 0xe1, 0x0d, 0x7a,
++	0xad, 0x88, 0xf9, 0x9b, 0x1e, 0x5e, 0x12, 0x27,
++	0x95, 0x3e, 0x0c, 0x2c, 0x13, 0x00, 0x6f, 0x8e,
++	0x93, 0x69, 0x0e, 0x01, 0x8c, 0xc1, 0xfd, 0xb3
++};
++static const u8 output68[] __initconst = {
++	0x26, 0x3e, 0xf2, 0xb1, 0xf5, 0xef, 0x81, 0xa4,
++	0xb7, 0x42, 0xd4, 0x26, 0x18, 0x4b, 0xdd, 0x6a,
++	0x47, 0x15, 0xcb, 0x0e, 0x57, 0xdb, 0xa7, 0x29,
++	0x7e, 0x7b, 0x3f, 0x47, 0x89, 0x57, 0xab, 0xea,
++	0x14, 0x7b, 0xcf, 0x37, 0xdb, 0x1c, 0xe1, 0x11,
++	0x77, 0xae, 0x2e, 0x4c, 0xd2, 0x08, 0x3f, 0xa6,
++	0x62, 0x86, 0xa6, 0xb2, 0x07, 0xd5, 0x3f, 0x9b,
++	0xdc, 0xc8, 0x50, 0x4b, 0x7b, 0xb9, 0x06, 0xe6,
++	0xeb, 0xac, 0x98, 0x8c, 0x36, 0x0c, 0x1e, 0xb2,
++	0xc8, 0xfb, 0x24, 0x60, 0x2c, 0x08, 0x17, 0x26,
++	0x5b, 0xc8, 0xc2, 0xdf, 0x9c, 0x73, 0x67, 0x4a,
++	0xdb, 0xcf, 0xd5, 0x2c, 0x2b, 0xca, 0x24, 0xcc,
++	0xdb, 0xc9, 0xa8, 0xf2, 0x5d, 0x67, 0xdf, 0x5c,
++	0x62, 0x0b, 0x58, 0xc0, 0x83, 0xde, 0x8b, 0xf6,
++	0x15, 0x0a, 0xd6, 0x32, 0xd8, 0xf5, 0xf2, 0x5f,
++	0x33, 0xce, 0x7e, 0xab, 0x76, 0xcd, 0x14, 0x91,
++	0xd8, 0x41, 0x90, 0x93, 0xa1, 0xaf, 0xf3, 0x45,
++	0x6c, 0x1b, 0x25, 0xbd, 0x48, 0x51, 0x6d, 0x15,
++	0x47, 0xe6, 0x23, 0x50, 0x32, 0x69, 0x1e, 0xb5,
++	0x94, 0xd3, 0x97, 0xba, 0xd7, 0x37, 0x4a, 0xba,
++	0xb9, 0xcd, 0xfb, 0x96, 0x9a, 0x90, 0xe0, 0x37,
++	0xf8, 0xdf, 0x91, 0x6c, 0x62, 0x13, 0x19, 0x21,
++	0x4b, 0xa9, 0xf1, 0x12, 0x66, 0xe2, 0x74, 0xd7,
++	0x81, 0xa0, 0x74, 0x8d, 0x7e, 0x7e, 0xc9, 0xb1,
++	0x69, 0x8f, 0xed, 0xb3, 0xf6, 0x97, 0xcd, 0x72,
++	0x78, 0x93, 0xd3, 0x54, 0x6b, 0x43, 0xac, 0x29,
++	0xb4, 0xbc, 0x7d, 0xa4, 0x26, 0x4b, 0x7b, 0xab,
++	0xd6, 0x67, 0x22, 0xff, 0x03, 0x92, 0xb6, 0xd4,
++	0x96, 0x94, 0x5a, 0xe5, 0x02, 0x35, 0x77, 0xfa,
++	0x3f, 0x54, 0x1d, 0xdd, 0x35, 0x39, 0xfe, 0x03,
++	0xdd, 0x8e, 0x3c, 0x8c, 0xc2, 0x69, 0x2a, 0xb1,
++	0xb7, 0xb3, 0xa1, 0x89, 0x84, 0xea, 0x16, 0xe2
++};
++static const u8 key68[] __initconst = {
++	0xd2, 0x49, 0x7f, 0xd7, 0x49, 0x66, 0x0d, 0xb3,
++	0x5a, 0x7e, 0x3c, 0xfc, 0x37, 0x83, 0x0e, 0xf7,
++	0x96, 0xd8, 0xd6, 0x33, 0x79, 0x2b, 0x84, 0x53,
++	0x06, 0xbc, 0x6c, 0x0a, 0x55, 0x84, 0xfe, 0xab
++};
++enum { nonce68 = 0x6a6df7ff0a20de06ULL };
++
++static const u8 input69[] __initconst = {
++	0xf9, 0x18, 0x4c, 0xd2, 0x3f, 0xf7, 0x22, 0xd9,
++	0x58, 0xb6, 0x3b, 0x38, 0x69, 0x79, 0xf4, 0x71,
++	0x5f, 0x38, 0x52, 0x1f, 0x17, 0x6f, 0x6f, 0xd9,
++	0x09, 0x2b, 0xfb, 0x67, 0xdc, 0xc9, 0xe8, 0x4a,
++	0x70, 0x9f, 0x2e, 0x3c, 0x06, 0xe5, 0x12, 0x20,
++	0x25, 0x29, 0xd0, 0xdc, 0x81, 0xc5, 0xc6, 0x0f,
++	0xd2, 0xa8, 0x81, 0x15, 0x98, 0xb2, 0x71, 0x5a,
++	0x9a, 0xe9, 0xfb, 0xaf, 0x0e, 0x5f, 0x8a, 0xf3,
++	0x16, 0x4a, 0x47, 0xf2, 0x5c, 0xbf, 0xda, 0x52,
++	0x9a, 0xa6, 0x36, 0xfd, 0xc6, 0xf7, 0x66, 0x00,
++	0xcc, 0x6c, 0xd4, 0xb3, 0x07, 0x6d, 0xeb, 0xfe,
++	0x92, 0x71, 0x25, 0xd0, 0xcf, 0x9c, 0xe8, 0x65,
++	0x45, 0x10, 0xcf, 0x62, 0x74, 0x7d, 0xf2, 0x1b,
++	0x57, 0xa0, 0xf1, 0x6b, 0xa4, 0xd5, 0xfa, 0x12,
++	0x27, 0x5a, 0xf7, 0x99, 0xfc, 0xca, 0xf3, 0xb8,
++	0x2c, 0x8b, 0xba, 0x28, 0x74, 0xde, 0x8f, 0x78,
++	0xa2, 0x8c, 0xaf, 0x89, 0x4b, 0x05, 0xe2, 0xf3,
++	0xf8, 0xd2, 0xef, 0xac, 0xa4, 0xc4, 0xe2, 0xe2,
++	0x36, 0xbb, 0x5e, 0xae, 0xe6, 0x87, 0x3d, 0x88,
++	0x9f, 0xb8, 0x11, 0xbb, 0xcf, 0x57, 0xce, 0xd0,
++	0xba, 0x62, 0xf4, 0xf8, 0x9b, 0x95, 0x04, 0xc9,
++	0xcf, 0x01, 0xe9, 0xf1, 0xc8, 0xc6, 0x22, 0xa4,
++	0xf2, 0x8b, 0x2f, 0x24, 0x0a, 0xf5, 0x6e, 0xb7,
++	0xd4, 0x2c, 0xb6, 0xf7, 0x5c, 0x97, 0x61, 0x0b,
++	0xd9, 0xb5, 0x06, 0xcd, 0xed, 0x3e, 0x1f, 0xc5,
++	0xb2, 0x6c, 0xa3, 0xea, 0xb8, 0xad, 0xa6, 0x42,
++	0x88, 0x7a, 0x52, 0xd5, 0x64, 0xba, 0xb5, 0x20,
++	0x10, 0xa0, 0x0f, 0x0d, 0xea, 0xef, 0x5a, 0x9b,
++	0x27, 0xb8, 0xca, 0x20, 0x19, 0x6d, 0xa8, 0xc4,
++	0x46, 0x04, 0xb3, 0xe8, 0xf8, 0x66, 0x1b, 0x0a,
++	0xce, 0x76, 0x5d, 0x59, 0x58, 0x05, 0xee, 0x3e,
++	0x3c, 0x86, 0x5b, 0x49, 0x1c, 0x72, 0x18, 0x01,
++	0x62, 0x92, 0x0f, 0x3e, 0xd1, 0x57, 0x5e, 0x20,
++	0x7b, 0xfb, 0x4d, 0x3c, 0xc5, 0x35, 0x43, 0x2f,
++	0xb0, 0xc5, 0x7c, 0xe4, 0xa2, 0x84, 0x13, 0x77
++};
++static const u8 output69[] __initconst = {
++	0xbb, 0x4a, 0x7f, 0x7c, 0xd5, 0x2f, 0x89, 0x06,
++	0xec, 0x20, 0xf1, 0x9a, 0x11, 0x09, 0x14, 0x2e,
++	0x17, 0x50, 0xf9, 0xd5, 0xf5, 0x48, 0x7c, 0x7a,
++	0x55, 0xc0, 0x57, 0x03, 0xe3, 0xc4, 0xb2, 0xb7,
++	0x18, 0x47, 0x95, 0xde, 0xaf, 0x80, 0x06, 0x3c,
++	0x5a, 0xf2, 0xc3, 0x53, 0xe3, 0x29, 0x92, 0xf8,
++	0xff, 0x64, 0x85, 0xb9, 0xf7, 0xd3, 0x80, 0xd2,
++	0x0c, 0x5d, 0x7b, 0x57, 0x0c, 0x51, 0x79, 0x86,
++	0xf3, 0x20, 0xd2, 0xb8, 0x6e, 0x0c, 0x5a, 0xce,
++	0xeb, 0x88, 0x02, 0x8b, 0x82, 0x1b, 0x7f, 0xf5,
++	0xde, 0x7f, 0x48, 0x48, 0xdf, 0xa0, 0x55, 0xc6,
++	0x0c, 0x22, 0xa1, 0x80, 0x8d, 0x3b, 0xcb, 0x40,
++	0x2d, 0x3d, 0x0b, 0xf2, 0xe0, 0x22, 0x13, 0x99,
++	0xe1, 0xa7, 0x27, 0x68, 0x31, 0xe1, 0x24, 0x5d,
++	0xd2, 0xee, 0x16, 0xc1, 0xd7, 0xa8, 0x14, 0x19,
++	0x23, 0x72, 0x67, 0x27, 0xdc, 0x5e, 0xb9, 0xc7,
++	0xd8, 0xe3, 0x55, 0x50, 0x40, 0x98, 0x7b, 0xe7,
++	0x34, 0x1c, 0x3b, 0x18, 0x14, 0xd8, 0x62, 0xc1,
++	0x93, 0x84, 0xf3, 0x5b, 0xdd, 0x9e, 0x1f, 0x3b,
++	0x0b, 0xbc, 0x4e, 0x5b, 0x79, 0xa3, 0xca, 0x74,
++	0x2a, 0x98, 0xe8, 0x04, 0x39, 0xef, 0xc6, 0x76,
++	0x6d, 0xee, 0x9f, 0x67, 0x5b, 0x59, 0x3a, 0xe5,
++	0xf2, 0x3b, 0xca, 0x89, 0xe8, 0x9b, 0x03, 0x3d,
++	0x11, 0xd2, 0x4a, 0x70, 0xaf, 0x88, 0xb0, 0x94,
++	0x96, 0x26, 0xab, 0x3c, 0xc1, 0xb8, 0xe4, 0xe7,
++	0x14, 0x61, 0x64, 0x3a, 0x61, 0x08, 0x0f, 0xa9,
++	0xce, 0x64, 0xb2, 0x40, 0xf8, 0x20, 0x3a, 0xa9,
++	0x31, 0xbd, 0x7e, 0x16, 0xca, 0xf5, 0x62, 0x0f,
++	0x91, 0x9f, 0x8e, 0x1d, 0xa4, 0x77, 0xf3, 0x87,
++	0x61, 0xe8, 0x14, 0xde, 0x18, 0x68, 0x4e, 0x9d,
++	0x73, 0xcd, 0x8a, 0xe4, 0x80, 0x84, 0x23, 0xaa,
++	0x9d, 0x64, 0x1c, 0x80, 0x41, 0xca, 0x82, 0x40,
++	0x94, 0x55, 0xe3, 0x28, 0xa1, 0x97, 0x71, 0xba,
++	0xf2, 0x2c, 0x39, 0x62, 0x29, 0x56, 0xd0, 0xff,
++	0xb2, 0x82, 0x20, 0x59, 0x1f, 0xc3, 0x64, 0x57
++};
++static const u8 key69[] __initconst = {
++	0x19, 0x09, 0xe9, 0x7c, 0xd9, 0x02, 0x4a, 0x0c,
++	0x52, 0x25, 0xad, 0x5c, 0x2e, 0x8d, 0x86, 0x10,
++	0x85, 0x2b, 0xba, 0xa4, 0x44, 0x5b, 0x39, 0x3e,
++	0x18, 0xaa, 0xce, 0x0e, 0xe2, 0x69, 0x3c, 0xcf
++};
++enum { nonce69 = 0xdb925a1948f0f060ULL };
++
++static const u8 input70[] __initconst = {
++	0x10, 0xe7, 0x83, 0xcf, 0x42, 0x9f, 0xf2, 0x41,
++	0xc7, 0xe4, 0xdb, 0xf9, 0xa3, 0x02, 0x1d, 0x8d,
++	0x50, 0x81, 0x2c, 0x6b, 0x92, 0xe0, 0x4e, 0xea,
++	0x26, 0x83, 0x2a, 0xd0, 0x31, 0xf1, 0x23, 0xf3,
++	0x0e, 0x88, 0x14, 0x31, 0xf9, 0x01, 0x63, 0x59,
++	0x21, 0xd1, 0x8b, 0xdd, 0x06, 0xd0, 0xc6, 0xab,
++	0x91, 0x71, 0x82, 0x4d, 0xd4, 0x62, 0x37, 0x17,
++	0xf9, 0x50, 0xf9, 0xb5, 0x74, 0xce, 0x39, 0x80,
++	0x80, 0x78, 0xf8, 0xdc, 0x1c, 0xdb, 0x7c, 0x3d,
++	0xd4, 0x86, 0x31, 0x00, 0x75, 0x7b, 0xd1, 0x42,
++	0x9f, 0x1b, 0x97, 0x88, 0x0e, 0x14, 0x0e, 0x1e,
++	0x7d, 0x7b, 0xc4, 0xd2, 0xf3, 0xc1, 0x6d, 0x17,
++	0x5d, 0xc4, 0x75, 0x54, 0x0f, 0x38, 0x65, 0x89,
++	0xd8, 0x7d, 0xab, 0xc9, 0xa7, 0x0a, 0x21, 0x0b,
++	0x37, 0x12, 0x05, 0x07, 0xb5, 0x68, 0x32, 0x32,
++	0xb9, 0xf8, 0x97, 0x17, 0x03, 0xed, 0x51, 0x8f,
++	0x3d, 0x5a, 0xd0, 0x12, 0x01, 0x6e, 0x2e, 0x91,
++	0x1c, 0xbe, 0x6b, 0xa3, 0xcc, 0x75, 0x62, 0x06,
++	0x8e, 0x65, 0xbb, 0xe2, 0x29, 0x71, 0x4b, 0x89,
++	0x6a, 0x9d, 0x85, 0x8c, 0x8c, 0xdf, 0x94, 0x95,
++	0x23, 0x66, 0xf8, 0x92, 0xee, 0x56, 0xeb, 0xb3,
++	0xeb, 0xd2, 0x4a, 0x3b, 0x77, 0x8a, 0x6e, 0xf6,
++	0xca, 0xd2, 0x34, 0x00, 0xde, 0xbe, 0x1d, 0x7a,
++	0x73, 0xef, 0x2b, 0x80, 0x56, 0x16, 0x29, 0xbf,
++	0x6e, 0x33, 0xed, 0x0d, 0xe2, 0x02, 0x60, 0x74,
++	0xe9, 0x0a, 0xbc, 0xd1, 0xc5, 0xe8, 0x53, 0x02,
++	0x79, 0x0f, 0x25, 0x0c, 0xef, 0xab, 0xd3, 0xbc,
++	0xb7, 0xfc, 0xf3, 0xb0, 0x34, 0xd1, 0x07, 0xd2,
++	0x5a, 0x31, 0x1f, 0xec, 0x1f, 0x87, 0xed, 0xdd,
++	0x6a, 0xc1, 0xe8, 0xb3, 0x25, 0x4c, 0xc6, 0x9b,
++	0x91, 0x73, 0xec, 0x06, 0x73, 0x9e, 0x57, 0x65,
++	0x32, 0x75, 0x11, 0x74, 0x6e, 0xa4, 0x7d, 0x0d,
++	0x74, 0x9f, 0x51, 0x10, 0x10, 0x47, 0xc9, 0x71,
++	0x6e, 0x97, 0xae, 0x44, 0x41, 0xef, 0x98, 0x78,
++	0xf4, 0xc5, 0xbd, 0x5e, 0x00, 0xe5, 0xfd, 0xe2,
++	0xbe, 0x8c, 0xc2, 0xae, 0xc2, 0xee, 0x59, 0xf6,
++	0xcb, 0x20, 0x54, 0x84, 0xc3, 0x31, 0x7e, 0x67,
++	0x71, 0xb6, 0x76, 0xbe, 0x81, 0x8f, 0x82, 0xad,
++	0x01, 0x8f, 0xc4, 0x00, 0x04, 0x3d, 0x8d, 0x34,
++	0xaa, 0xea, 0xc0, 0xea, 0x91, 0x42, 0xb6, 0xb8,
++	0x43, 0xf3, 0x17, 0xb2, 0x73, 0x64, 0x82, 0x97,
++	0xd5, 0xc9, 0x07, 0x77, 0xb1, 0x26, 0xe2, 0x00,
++	0x6a, 0xae, 0x70, 0x0b, 0xbe, 0xe6, 0xb8, 0x42,
++	0x81, 0x55, 0xf7, 0xb8, 0x96, 0x41, 0x9d, 0xd4,
++	0x2c, 0x27, 0x00, 0xcc, 0x91, 0x28, 0x22, 0xa4,
++	0x7b, 0x42, 0x51, 0x9e, 0xd6, 0xec, 0xf3, 0x6b,
++	0x00, 0xff, 0x5c, 0xa2, 0xac, 0x47, 0x33, 0x2d,
++	0xf8, 0x11, 0x65, 0x5f, 0x4d, 0x79, 0x8b, 0x4f,
++	0xad, 0xf0, 0x9d, 0xcd, 0xb9, 0x7b, 0x08, 0xf7,
++	0x32, 0x51, 0xfa, 0x39, 0xaa, 0x78, 0x05, 0xb1,
++	0xf3, 0x5d, 0xe8, 0x7c, 0x8e, 0x4f, 0xa2, 0xe0,
++	0x98, 0x0c, 0xb2, 0xa7, 0xf0, 0x35, 0x8e, 0x70,
++	0x7c, 0x82, 0xf3, 0x1b, 0x26, 0x28, 0x12, 0xe5,
++	0x23, 0x57, 0xe4, 0xb4, 0x9b, 0x00, 0x39, 0x97,
++	0xef, 0x7c, 0x46, 0x9b, 0x34, 0x6b, 0xe7, 0x0e,
++	0xa3, 0x2a, 0x18, 0x11, 0x64, 0xc6, 0x7c, 0x8b,
++	0x06, 0x02, 0xf5, 0x69, 0x76, 0xf9, 0xaa, 0x09,
++	0x5f, 0x68, 0xf8, 0x4a, 0x79, 0x58, 0xec, 0x37,
++	0xcf, 0x3a, 0xcc, 0x97, 0x70, 0x1d, 0x3e, 0x52,
++	0x18, 0x0a, 0xad, 0x28, 0x5b, 0x3b, 0xe9, 0x03,
++	0x84, 0xe9, 0x68, 0x50, 0xce, 0xc4, 0xbc, 0x3e,
++	0x21, 0xad, 0x63, 0xfe, 0xc6, 0xfd, 0x6e, 0x69,
++	0x84, 0xa9, 0x30, 0xb1, 0x7a, 0xc4, 0x31, 0x10,
++	0xc1, 0x1f, 0x6e, 0xeb, 0xa5, 0xa6, 0x01
++};
++static const u8 output70[] __initconst = {
++	0x0f, 0x93, 0x2a, 0x20, 0xb3, 0x87, 0x2d, 0xce,
++	0xd1, 0x3b, 0x30, 0xfd, 0x06, 0x6d, 0x0a, 0xaa,
++	0x3e, 0xc4, 0x29, 0x02, 0x8a, 0xde, 0xa6, 0x4b,
++	0x45, 0x1b, 0x4f, 0x25, 0x59, 0xd5, 0x56, 0x6a,
++	0x3b, 0x37, 0xbd, 0x3e, 0x47, 0x12, 0x2c, 0x4e,
++	0x60, 0x5f, 0x05, 0x75, 0x61, 0x23, 0x05, 0x74,
++	0xcb, 0xfc, 0x5a, 0xb3, 0xac, 0x5c, 0x3d, 0xab,
++	0x52, 0x5f, 0x05, 0xbc, 0x57, 0xc0, 0x7e, 0xcf,
++	0x34, 0x5d, 0x7f, 0x41, 0xa3, 0x17, 0x78, 0xd5,
++	0x9f, 0xec, 0x0f, 0x1e, 0xf9, 0xfe, 0xa3, 0xbd,
++	0x28, 0xb0, 0xba, 0x4d, 0x84, 0xdb, 0xae, 0x8f,
++	0x1d, 0x98, 0xb7, 0xdc, 0xf9, 0xad, 0x55, 0x9c,
++	0x89, 0xfe, 0x9b, 0x9c, 0xa9, 0x89, 0xf6, 0x97,
++	0x9c, 0x3f, 0x09, 0x3e, 0xc6, 0x02, 0xc2, 0x55,
++	0x58, 0x09, 0x54, 0x66, 0xe4, 0x36, 0x81, 0x35,
++	0xca, 0x88, 0x17, 0x89, 0x80, 0x24, 0x2b, 0x21,
++	0x89, 0xee, 0x45, 0x5a, 0xe7, 0x1f, 0xd5, 0xa5,
++	0x16, 0xa4, 0xda, 0x70, 0x7e, 0xe9, 0x4f, 0x24,
++	0x61, 0x97, 0xab, 0xa0, 0xe0, 0xe7, 0xb8, 0x5c,
++	0x0f, 0x25, 0x17, 0x37, 0x75, 0x12, 0xb5, 0x40,
++	0xde, 0x1c, 0x0d, 0x8a, 0x77, 0x62, 0x3c, 0x86,
++	0xd9, 0x70, 0x2e, 0x96, 0x30, 0xd2, 0x55, 0xb3,
++	0x6b, 0xc3, 0xf2, 0x9c, 0x47, 0xf3, 0x3a, 0x24,
++	0x52, 0xc6, 0x38, 0xd8, 0x22, 0xb3, 0x0c, 0xfd,
++	0x2f, 0xa3, 0x3c, 0xb5, 0xe8, 0x26, 0xe1, 0xa3,
++	0xad, 0xb0, 0x82, 0x17, 0xc1, 0x53, 0xb8, 0x34,
++	0x48, 0xee, 0x39, 0xae, 0x51, 0x43, 0xec, 0x82,
++	0xce, 0x87, 0xc6, 0x76, 0xb9, 0x76, 0xd3, 0x53,
++	0xfe, 0x49, 0x24, 0x7d, 0x02, 0x42, 0x2b, 0x72,
++	0xfb, 0xcb, 0xd8, 0x96, 0x02, 0xc6, 0x9a, 0x20,
++	0xf3, 0x5a, 0x67, 0xe8, 0x13, 0xf8, 0xb2, 0xcb,
++	0xa2, 0xec, 0x18, 0x20, 0x4a, 0xb0, 0x73, 0x53,
++	0x21, 0xb0, 0x77, 0x53, 0xd8, 0x76, 0xa1, 0x30,
++	0x17, 0x72, 0x2e, 0x33, 0x5f, 0x33, 0x6b, 0x28,
++	0xfb, 0xb0, 0xf4, 0xec, 0x8e, 0xed, 0x20, 0x7d,
++	0x57, 0x8c, 0x74, 0x28, 0x64, 0x8b, 0xeb, 0x59,
++	0x38, 0x3f, 0xe7, 0x83, 0x2e, 0xe5, 0x64, 0x4d,
++	0x5c, 0x1f, 0xe1, 0x3b, 0xd9, 0x84, 0xdb, 0xc9,
++	0xec, 0xd8, 0xc1, 0x7c, 0x1f, 0x1b, 0x68, 0x35,
++	0xc6, 0x34, 0x10, 0xef, 0x19, 0xc9, 0x0a, 0xd6,
++	0x43, 0x7f, 0xa6, 0xcb, 0x9d, 0xf4, 0xf0, 0x16,
++	0xb1, 0xb1, 0x96, 0x64, 0xec, 0x8d, 0x22, 0x4c,
++	0x4b, 0xe8, 0x1a, 0xba, 0x6f, 0xb7, 0xfc, 0xa5,
++	0x69, 0x3e, 0xad, 0x78, 0x79, 0x19, 0xb5, 0x04,
++	0x69, 0xe5, 0x3f, 0xff, 0x60, 0x8c, 0xda, 0x0b,
++	0x7b, 0xf7, 0xe7, 0xe6, 0x29, 0x3a, 0x85, 0xba,
++	0xb5, 0xb0, 0x35, 0xbd, 0x38, 0xce, 0x34, 0x5e,
++	0xf2, 0xdc, 0xd1, 0x8f, 0xc3, 0x03, 0x24, 0xa2,
++	0x03, 0xf7, 0x4e, 0x49, 0x5b, 0xcf, 0x6d, 0xb0,
++	0xeb, 0xe3, 0x30, 0x28, 0xd5, 0x5b, 0x82, 0x5f,
++	0xe4, 0x7c, 0x1e, 0xec, 0xd2, 0x39, 0xf9, 0x6f,
++	0x2e, 0xb3, 0xcd, 0x01, 0xb1, 0x67, 0xaa, 0xea,
++	0xaa, 0xb3, 0x63, 0xaf, 0xd9, 0xb2, 0x1f, 0xba,
++	0x05, 0x20, 0xeb, 0x19, 0x32, 0xf0, 0x6c, 0x3f,
++	0x40, 0xcc, 0x93, 0xb3, 0xd8, 0x25, 0xa6, 0xe4,
++	0xce, 0xd7, 0x7e, 0x48, 0x99, 0x65, 0x7f, 0x86,
++	0xc5, 0xd4, 0x79, 0x6b, 0xab, 0x43, 0xb8, 0x6b,
++	0xf1, 0x2f, 0xea, 0x4c, 0x5e, 0xf0, 0x3b, 0xb4,
++	0xb8, 0xb0, 0x94, 0x0c, 0x6b, 0xe7, 0x22, 0x93,
++	0xaa, 0x01, 0xcb, 0xf1, 0x11, 0x60, 0xf6, 0x69,
++	0xcf, 0x14, 0xde, 0xfb, 0x90, 0x05, 0x27, 0x0c,
++	0x1a, 0x9e, 0xf0, 0xb4, 0xc6, 0xa1, 0xe8, 0xdd,
++	0xd0, 0x4c, 0x25, 0x4f, 0x9c, 0xb7, 0xb1, 0xb0,
++	0x21, 0xdb, 0x87, 0x09, 0x03, 0xf2, 0xb3
++};
++static const u8 key70[] __initconst = {
++	0x3b, 0x5b, 0x59, 0x36, 0x44, 0xd1, 0xba, 0x71,
++	0x55, 0x87, 0x4d, 0x62, 0x3d, 0xc2, 0xfc, 0xaa,
++	0x3f, 0x4e, 0x1a, 0xe4, 0xca, 0x09, 0xfc, 0x6a,
++	0xb2, 0xd6, 0x5d, 0x79, 0xf9, 0x1a, 0x91, 0xa7
++};
++enum { nonce70 = 0x3fd6786dd147a85ULL };
++
++static const u8 input71[] __initconst = {
++	0x18, 0x78, 0xd6, 0x79, 0xe4, 0x9a, 0x6c, 0x73,
++	0x17, 0xd4, 0x05, 0x0f, 0x1e, 0x9f, 0xd9, 0x2b,
++	0x86, 0x48, 0x7d, 0xf4, 0xd9, 0x1c, 0x76, 0xfc,
++	0x8e, 0x22, 0x34, 0xe1, 0x48, 0x4a, 0x8d, 0x79,
++	0xb7, 0xbb, 0x88, 0xab, 0x90, 0xde, 0xc5, 0xb4,
++	0xb4, 0xe7, 0x85, 0x49, 0xda, 0x57, 0xeb, 0xc9,
++	0xcd, 0x21, 0xfc, 0x45, 0x6e, 0x32, 0x67, 0xf2,
++	0x4f, 0xa6, 0x54, 0xe5, 0x20, 0xed, 0xcf, 0xc6,
++	0x62, 0x25, 0x8e, 0x00, 0xf8, 0x6b, 0xa2, 0x80,
++	0xac, 0x88, 0xa6, 0x59, 0x27, 0x83, 0x95, 0x11,
++	0x3f, 0x70, 0x5e, 0x3f, 0x11, 0xfb, 0x26, 0xbf,
++	0xe1, 0x48, 0x75, 0xf9, 0x86, 0xbf, 0xa6, 0x5d,
++	0x15, 0x61, 0x66, 0xbf, 0x78, 0x8f, 0x6b, 0x9b,
++	0xda, 0x98, 0xb7, 0x19, 0xe2, 0xf2, 0xa3, 0x9c,
++	0x7c, 0x6a, 0x9a, 0xd8, 0x3d, 0x4c, 0x2c, 0xe1,
++	0x09, 0xb4, 0x28, 0x82, 0x4e, 0xab, 0x0c, 0x75,
++	0x63, 0xeb, 0xbc, 0xd0, 0x71, 0xa2, 0x73, 0x85,
++	0xed, 0x53, 0x7a, 0x3f, 0x68, 0x9f, 0xd0, 0xa9,
++	0x00, 0x5a, 0x9e, 0x80, 0x55, 0x00, 0xe6, 0xae,
++	0x0c, 0x03, 0x40, 0xed, 0xfc, 0x68, 0x4a, 0xb7,
++	0x1e, 0x09, 0x65, 0x30, 0x5a, 0x3d, 0x97, 0x4d,
++	0x5e, 0x51, 0x8e, 0xda, 0xc3, 0x55, 0x8c, 0xfb,
++	0xcf, 0x83, 0x05, 0x35, 0x0d, 0x08, 0x1b, 0xf3,
++	0x3a, 0x57, 0x96, 0xac, 0x58, 0x8b, 0xfa, 0x00,
++	0x49, 0x15, 0x78, 0xd2, 0x4b, 0xed, 0xb8, 0x59,
++	0x78, 0x9b, 0x7f, 0xaa, 0xfc, 0xe7, 0x46, 0xdc,
++	0x7b, 0x34, 0xd0, 0x34, 0xe5, 0x10, 0xff, 0x4d,
++	0x5a, 0x4d, 0x60, 0xa7, 0x16, 0x54, 0xc4, 0xfd,
++	0xca, 0x5d, 0x68, 0xc7, 0x4a, 0x01, 0x8d, 0x7f,
++	0x74, 0x5d, 0xff, 0xb8, 0x37, 0x15, 0x62, 0xfa,
++	0x44, 0x45, 0xcf, 0x77, 0x3b, 0x1d, 0xb2, 0xd2,
++	0x0d, 0x42, 0x00, 0x39, 0x68, 0x1f, 0xcc, 0x89,
++	0x73, 0x5d, 0xa9, 0x2e, 0xfd, 0x58, 0x62, 0xca,
++	0x35, 0x8e, 0x70, 0x70, 0xaa, 0x6e, 0x14, 0xe9,
++	0xa4, 0xe2, 0x10, 0x66, 0x71, 0xdc, 0x4c, 0xfc,
++	0xa9, 0xdc, 0x8f, 0x57, 0x4d, 0xc5, 0xac, 0xd7,
++	0xa9, 0xf3, 0xf3, 0xa1, 0xff, 0x62, 0xa0, 0x8f,
++	0xe4, 0x96, 0x3e, 0xcb, 0x9f, 0x76, 0x42, 0x39,
++	0x1f, 0x24, 0xfd, 0xfd, 0x79, 0xe8, 0x27, 0xdf,
++	0xa8, 0xf6, 0x33, 0x8b, 0x31, 0x59, 0x69, 0xcf,
++	0x6a, 0xef, 0x89, 0x4d, 0xa7, 0xf6, 0x7e, 0x97,
++	0x14, 0xbd, 0xda, 0xdd, 0xb4, 0x84, 0x04, 0x24,
++	0xe0, 0x17, 0xe1, 0x0f, 0x1f, 0x8a, 0x6a, 0x71,
++	0x74, 0x41, 0xdc, 0x59, 0x5c, 0x8f, 0x01, 0x25,
++	0x92, 0xf0, 0x2e, 0x15, 0x62, 0x71, 0x9a, 0x9f,
++	0x87, 0xdf, 0x62, 0x49, 0x7f, 0x86, 0x62, 0xfc,
++	0x20, 0x84, 0xd7, 0xe3, 0x3a, 0xd9, 0x37, 0x85,
++	0xb7, 0x84, 0x5a, 0xf9, 0xed, 0x21, 0x32, 0x94,
++	0x3e, 0x04, 0xe7, 0x8c, 0x46, 0x76, 0x21, 0x67,
++	0xf6, 0x95, 0x64, 0x92, 0xb7, 0x15, 0xf6, 0xe3,
++	0x41, 0x27, 0x9d, 0xd7, 0xe3, 0x79, 0x75, 0x92,
++	0xd0, 0xc1, 0xf3, 0x40, 0x92, 0x08, 0xde, 0x90,
++	0x22, 0x82, 0xb2, 0x69, 0xae, 0x1a, 0x35, 0x11,
++	0x89, 0xc8, 0x06, 0x82, 0x95, 0x23, 0x44, 0x08,
++	0x22, 0xf2, 0x71, 0x73, 0x1b, 0x88, 0x11, 0xcf,
++	0x1c, 0x7e, 0x8a, 0x2e, 0xdc, 0x79, 0x57, 0xce,
++	0x1f, 0xe7, 0x6c, 0x07, 0xd8, 0x06, 0xbe, 0xec,
++	0xa3, 0xcf, 0xf9, 0x68, 0xa5, 0xb8, 0xf0, 0xe3,
++	0x3f, 0x01, 0x92, 0xda, 0xf1, 0xa0, 0x2d, 0x7b,
++	0xab, 0x57, 0x58, 0x2a, 0xaf, 0xab, 0xbd, 0xf2,
++	0xe5, 0xaf, 0x7e, 0x1f, 0x46, 0x24, 0x9e, 0x20,
++	0x22, 0x0f, 0x84, 0x4c, 0xb7, 0xd8, 0x03, 0xe8,
++	0x09, 0x73, 0x6c, 0xc6, 0x9b, 0x90, 0xe0, 0xdb,
++	0xf2, 0x71, 0xba, 0xad, 0xb3, 0xec, 0xda, 0x7a
++};
++static const u8 output71[] __initconst = {
++	0x28, 0xc5, 0x9b, 0x92, 0xf9, 0x21, 0x4f, 0xbb,
++	0xef, 0x3b, 0xf0, 0xf5, 0x3a, 0x6d, 0x7f, 0xd6,
++	0x6a, 0x8d, 0xa1, 0x01, 0x5c, 0x62, 0x20, 0x8b,
++	0x5b, 0x39, 0xd5, 0xd3, 0xc2, 0xf6, 0x9d, 0x5e,
++	0xcc, 0xe1, 0xa2, 0x61, 0x16, 0xe2, 0xce, 0xe9,
++	0x86, 0xd0, 0xfc, 0xce, 0x9a, 0x28, 0x27, 0xc4,
++	0x0c, 0xb9, 0xaa, 0x8d, 0x48, 0xdb, 0xbf, 0x82,
++	0x7d, 0xd0, 0x35, 0xc4, 0x06, 0x34, 0xb4, 0x19,
++	0x51, 0x73, 0xf4, 0x7a, 0xf4, 0xfd, 0xe9, 0x1d,
++	0xdc, 0x0f, 0x7e, 0xf7, 0x96, 0x03, 0xe3, 0xb1,
++	0x2e, 0x22, 0x59, 0xb7, 0x6d, 0x1c, 0x97, 0x8c,
++	0xd7, 0x31, 0x08, 0x26, 0x4c, 0x6d, 0xc6, 0x14,
++	0xa5, 0xeb, 0x45, 0x6a, 0x88, 0xa3, 0xa2, 0x36,
++	0xc4, 0x35, 0xb1, 0x5a, 0xa0, 0xad, 0xf7, 0x06,
++	0x9b, 0x5d, 0xc1, 0x15, 0xc1, 0xce, 0x0a, 0xb0,
++	0x57, 0x2e, 0x3f, 0x6f, 0x0d, 0x10, 0xd9, 0x11,
++	0x2c, 0x9c, 0xad, 0x2d, 0xa5, 0x81, 0xfb, 0x4e,
++	0x8f, 0xd5, 0x32, 0x4e, 0xaf, 0x5c, 0xc1, 0x86,
++	0xde, 0x56, 0x5a, 0x33, 0x29, 0xf7, 0x67, 0xc6,
++	0x37, 0x6f, 0xb2, 0x37, 0x4e, 0xd4, 0x69, 0x79,
++	0xaf, 0xd5, 0x17, 0x79, 0xe0, 0xba, 0x62, 0xa3,
++	0x68, 0xa4, 0x87, 0x93, 0x8d, 0x7e, 0x8f, 0xa3,
++	0x9c, 0xef, 0xda, 0xe3, 0xa5, 0x1f, 0xcd, 0x30,
++	0xa6, 0x55, 0xac, 0x4c, 0x69, 0x74, 0x02, 0xc7,
++	0x5d, 0x95, 0x81, 0x4a, 0x68, 0x11, 0xd3, 0xa9,
++	0x98, 0xb1, 0x0b, 0x0d, 0xae, 0x40, 0x86, 0x65,
++	0xbf, 0xcc, 0x2d, 0xef, 0x57, 0xca, 0x1f, 0xe4,
++	0x34, 0x4e, 0xa6, 0x5e, 0x82, 0x6e, 0x61, 0xad,
++	0x0b, 0x3c, 0xf8, 0xeb, 0x01, 0x43, 0x7f, 0x87,
++	0xa2, 0xa7, 0x6a, 0xe9, 0x62, 0x23, 0x24, 0x61,
++	0xf1, 0xf7, 0x36, 0xdb, 0x10, 0xe5, 0x57, 0x72,
++	0x3a, 0xc2, 0xae, 0xcc, 0x75, 0xc7, 0x80, 0x05,
++	0x0a, 0x5c, 0x4c, 0x95, 0xda, 0x02, 0x01, 0x14,
++	0x06, 0x6b, 0x5c, 0x65, 0xc2, 0xb8, 0x4a, 0xd6,
++	0xd3, 0xb4, 0xd8, 0x12, 0x52, 0xb5, 0x60, 0xd3,
++	0x8e, 0x5f, 0x5c, 0x76, 0x33, 0x7a, 0x05, 0xe5,
++	0xcb, 0xef, 0x4f, 0x89, 0xf1, 0xba, 0x32, 0x6f,
++	0x33, 0xcd, 0x15, 0x8d, 0xa3, 0x0c, 0x3f, 0x63,
++	0x11, 0xe7, 0x0e, 0xe0, 0x00, 0x01, 0xe9, 0xe8,
++	0x8e, 0x36, 0x34, 0x8d, 0x96, 0xb5, 0x03, 0xcf,
++	0x55, 0x62, 0x49, 0x7a, 0x34, 0x44, 0xa5, 0xee,
++	0x8c, 0x46, 0x06, 0x22, 0xab, 0x1d, 0x53, 0x9c,
++	0xa1, 0xf9, 0x67, 0x18, 0x57, 0x89, 0xf9, 0xc2,
++	0xd1, 0x7e, 0xbe, 0x36, 0x40, 0xcb, 0xe9, 0x04,
++	0xde, 0xb1, 0x3b, 0x29, 0x52, 0xc5, 0x9a, 0xb5,
++	0xa2, 0x7c, 0x7b, 0xfe, 0xe5, 0x92, 0x73, 0xea,
++	0xea, 0x7b, 0xba, 0x0a, 0x8c, 0x88, 0x15, 0xe6,
++	0x53, 0xbf, 0x1c, 0x33, 0xf4, 0x9b, 0x9a, 0x5e,
++	0x8d, 0xae, 0x60, 0xdc, 0xcb, 0x5d, 0xfa, 0xbe,
++	0x06, 0xc3, 0x3f, 0x06, 0xe7, 0x00, 0x40, 0x7b,
++	0xaa, 0x94, 0xfa, 0x6d, 0x1f, 0xe4, 0xc5, 0xa9,
++	0x1b, 0x5f, 0x36, 0xea, 0x5a, 0xdd, 0xa5, 0x48,
++	0x6a, 0x55, 0xd2, 0x47, 0x28, 0xbf, 0x96, 0xf1,
++	0x9f, 0xb6, 0x11, 0x4b, 0xd3, 0x44, 0x7d, 0x48,
++	0x41, 0x61, 0xdb, 0x12, 0xd4, 0xc2, 0x59, 0x82,
++	0x4c, 0x47, 0x5c, 0x04, 0xf6, 0x7b, 0xd3, 0x92,
++	0x2e, 0xe8, 0x40, 0xef, 0x15, 0x32, 0x97, 0xdc,
++	0x35, 0x4c, 0x6e, 0xa4, 0x97, 0xe9, 0x24, 0xde,
++	0x63, 0x8b, 0xb1, 0x6b, 0x48, 0xbb, 0x46, 0x1f,
++	0x84, 0xd6, 0x17, 0xb0, 0x5a, 0x4a, 0x4e, 0xd5,
++	0x31, 0xd7, 0xcf, 0xa0, 0x39, 0xc6, 0x2e, 0xfc,
++	0xa6, 0xa3, 0xd3, 0x0f, 0xa4, 0x28, 0xac, 0xb2,
++	0xf4, 0x48, 0x8d, 0x50, 0xa5, 0x1c, 0x44, 0x5d,
++	0x6e, 0x38, 0xb7, 0x2b, 0x8a, 0x45, 0xa7, 0x3d
++};
++static const u8 key71[] __initconst = {
++	0x8b, 0x68, 0xc4, 0xb7, 0x0d, 0x81, 0xef, 0x52,
++	0x1e, 0x05, 0x96, 0x72, 0x62, 0x89, 0x27, 0x83,
++	0xd0, 0xc7, 0x33, 0x6d, 0xf2, 0xcc, 0x69, 0xf9,
++	0x23, 0xae, 0x99, 0xb1, 0xd1, 0x05, 0x4e, 0x54
++};
++enum { nonce71 = 0x983f03656d64b5f6ULL };
++
++static const u8 input72[] __initconst = {
++	0x6b, 0x09, 0xc9, 0x57, 0x3d, 0x79, 0x04, 0x8c,
++	0x65, 0xad, 0x4a, 0x0f, 0xa1, 0x31, 0x3a, 0xdd,
++	0x14, 0x8e, 0xe8, 0xfe, 0xbf, 0x42, 0x87, 0x98,
++	0x2e, 0x8d, 0x83, 0xa3, 0xf8, 0x55, 0x3d, 0x84,
++	0x1e, 0x0e, 0x05, 0x4a, 0x38, 0x9e, 0xe7, 0xfe,
++	0xd0, 0x4d, 0x79, 0x74, 0x3a, 0x0b, 0x9b, 0xe1,
++	0xfd, 0x51, 0x84, 0x4e, 0xb2, 0x25, 0xe4, 0x64,
++	0x4c, 0xda, 0xcf, 0x46, 0xec, 0xba, 0x12, 0xeb,
++	0x5a, 0x33, 0x09, 0x6e, 0x78, 0x77, 0x8f, 0x30,
++	0xb1, 0x7d, 0x3f, 0x60, 0x8c, 0xf2, 0x1d, 0x8e,
++	0xb4, 0x70, 0xa2, 0x90, 0x7c, 0x79, 0x1a, 0x2c,
++	0xf6, 0x28, 0x79, 0x7c, 0x53, 0xc5, 0xfa, 0xcc,
++	0x65, 0x9b, 0xe1, 0x51, 0xd1, 0x7f, 0x1d, 0xc4,
++	0xdb, 0xd4, 0xd9, 0x04, 0x61, 0x7d, 0xbe, 0x12,
++	0xfc, 0xcd, 0xaf, 0xe4, 0x0f, 0x9c, 0x20, 0xb5,
++	0x22, 0x40, 0x18, 0xda, 0xe4, 0xda, 0x8c, 0x2d,
++	0x84, 0xe3, 0x5f, 0x53, 0x17, 0xed, 0x78, 0xdc,
++	0x2f, 0xe8, 0x31, 0xc7, 0xe6, 0x39, 0x71, 0x40,
++	0xb4, 0x0f, 0xc9, 0xa9, 0x7e, 0x78, 0x87, 0xc1,
++	0x05, 0x78, 0xbb, 0x01, 0xf2, 0x8f, 0x33, 0xb0,
++	0x6e, 0x84, 0xcd, 0x36, 0x33, 0x5c, 0x5b, 0x8e,
++	0xf1, 0xac, 0x30, 0xfe, 0x33, 0xec, 0x08, 0xf3,
++	0x7e, 0xf2, 0xf0, 0x4c, 0xf2, 0xad, 0xd8, 0xc1,
++	0xd4, 0x4e, 0x87, 0x06, 0xd4, 0x75, 0xe7, 0xe3,
++	0x09, 0xd3, 0x4d, 0xe3, 0x21, 0x32, 0xba, 0xb4,
++	0x68, 0x68, 0xcb, 0x4c, 0xa3, 0x1e, 0xb3, 0x87,
++	0x7b, 0xd3, 0x0c, 0x63, 0x37, 0x71, 0x79, 0xfb,
++	0x58, 0x36, 0x57, 0x0f, 0x34, 0x1d, 0xc1, 0x42,
++	0x02, 0x17, 0xe7, 0xed, 0xe8, 0xe7, 0x76, 0xcb,
++	0x42, 0xc4, 0x4b, 0xe2, 0xb2, 0x5e, 0x42, 0xd5,
++	0xec, 0x9d, 0xc1, 0x32, 0x71, 0xe4, 0xeb, 0x10,
++	0x68, 0x1a, 0x6e, 0x99, 0x8e, 0x73, 0x12, 0x1f,
++	0x97, 0x0c, 0x9e, 0xcd, 0x02, 0x3e, 0x4c, 0xa0,
++	0xf2, 0x8d, 0xe5, 0x44, 0xca, 0x6d, 0xfe, 0x07,
++	0xe3, 0xe8, 0x9b, 0x76, 0xc1, 0x6d, 0xb7, 0x6e,
++	0x0d, 0x14, 0x00, 0x6f, 0x8a, 0xfd, 0x43, 0xc6,
++	0x43, 0xa5, 0x9c, 0x02, 0x47, 0x10, 0xd4, 0xb4,
++	0x9b, 0x55, 0x67, 0xc8, 0x7f, 0xc1, 0x8a, 0x1f,
++	0x1e, 0xd1, 0xbc, 0x99, 0x5d, 0x50, 0x4f, 0x89,
++	0xf1, 0xe6, 0x5d, 0x91, 0x40, 0xdc, 0x20, 0x67,
++	0x56, 0xc2, 0xef, 0xbd, 0x2c, 0xa2, 0x99, 0x38,
++	0xe0, 0x45, 0xec, 0x44, 0x05, 0x52, 0x65, 0x11,
++	0xfc, 0x3b, 0x19, 0xcb, 0x71, 0xc2, 0x8e, 0x0e,
++	0x03, 0x2a, 0x03, 0x3b, 0x63, 0x06, 0x31, 0x9a,
++	0xac, 0x53, 0x04, 0x14, 0xd4, 0x80, 0x9d, 0x6b,
++	0x42, 0x7e, 0x7e, 0x4e, 0xdc, 0xc7, 0x01, 0x49,
++	0x9f, 0xf5, 0x19, 0x86, 0x13, 0x28, 0x2b, 0xa6,
++	0xa6, 0xbe, 0xa1, 0x7e, 0x71, 0x05, 0x00, 0xff,
++	0x59, 0x2d, 0xb6, 0x63, 0xf0, 0x1e, 0x2e, 0x69,
++	0x9b, 0x85, 0xf1, 0x1e, 0x8a, 0x64, 0x39, 0xab,
++	0x00, 0x12, 0xe4, 0x33, 0x4b, 0xb5, 0xd8, 0xb3,
++	0x6b, 0x5b, 0x8b, 0x5c, 0xd7, 0x6f, 0x23, 0xcf,
++	0x3f, 0x2e, 0x5e, 0x47, 0xb9, 0xb8, 0x1f, 0xf0,
++	0x1d, 0xda, 0xe7, 0x4f, 0x6e, 0xab, 0xc3, 0x36,
++	0xb4, 0x74, 0x6b, 0xeb, 0xc7, 0x5d, 0x91, 0xe5,
++	0xda, 0xf2, 0xc2, 0x11, 0x17, 0x48, 0xf8, 0x9c,
++	0xc9, 0x8b, 0xc1, 0xa2, 0xf4, 0xcd, 0x16, 0xf8,
++	0x27, 0xd9, 0x6c, 0x6f, 0xb5, 0x8f, 0x77, 0xca,
++	0x1b, 0xd8, 0xef, 0x84, 0x68, 0x71, 0x53, 0xc1,
++	0x43, 0x0f, 0x9f, 0x98, 0xae, 0x7e, 0x31, 0xd2,
++	0x98, 0xfb, 0x20, 0xa2, 0xad, 0x00, 0x10, 0x83,
++	0x00, 0x8b, 0xeb, 0x56, 0xd2, 0xc4, 0xcc, 0x7f,
++	0x2f, 0x4e, 0xfa, 0x88, 0x13, 0xa4, 0x2c, 0xde,
++	0x6b, 0x77, 0x86, 0x10, 0x6a, 0xab, 0x43, 0x0a,
++	0x02
++};
++static const u8 output72[] __initconst = {
++	0x42, 0x89, 0xa4, 0x80, 0xd2, 0xcb, 0x5f, 0x7f,
++	0x2a, 0x1a, 0x23, 0x00, 0xa5, 0x6a, 0x95, 0xa3,
++	0x9a, 0x41, 0xa1, 0xd0, 0x2d, 0x1e, 0xd6, 0x13,
++	0x34, 0x40, 0x4e, 0x7f, 0x1a, 0xbe, 0xa0, 0x3d,
++	0x33, 0x9c, 0x56, 0x2e, 0x89, 0x25, 0x45, 0xf9,
++	0xf0, 0xba, 0x9c, 0x6d, 0xd1, 0xd1, 0xde, 0x51,
++	0x47, 0x63, 0xc9, 0xbd, 0xfa, 0xa2, 0x9e, 0xad,
++	0x6a, 0x7b, 0x21, 0x1a, 0x6c, 0x3e, 0xff, 0x46,
++	0xbe, 0xf3, 0x35, 0x7a, 0x6e, 0xb3, 0xb9, 0xf7,
++	0xda, 0x5e, 0xf0, 0x14, 0xb5, 0x70, 0xa4, 0x2b,
++	0xdb, 0xbb, 0xc7, 0x31, 0x4b, 0x69, 0x5a, 0x83,
++	0x70, 0xd9, 0x58, 0xd4, 0x33, 0x84, 0x23, 0xf0,
++	0xae, 0xbb, 0x6d, 0x26, 0x7c, 0xc8, 0x30, 0xf7,
++	0x24, 0xad, 0xbd, 0xe4, 0x2c, 0x38, 0x38, 0xac,
++	0xe1, 0x4a, 0x9b, 0xac, 0x33, 0x0e, 0x4a, 0xf4,
++	0x93, 0xed, 0x07, 0x82, 0x81, 0x4f, 0x8f, 0xb1,
++	0xdd, 0x73, 0xd5, 0x50, 0x6d, 0x44, 0x1e, 0xbe,
++	0xa7, 0xcd, 0x17, 0x57, 0xd5, 0x3b, 0x62, 0x36,
++	0xcf, 0x7d, 0xc8, 0xd8, 0xd1, 0x78, 0xd7, 0x85,
++	0x46, 0x76, 0x5d, 0xcc, 0xfe, 0xe8, 0x94, 0xc5,
++	0xad, 0xbc, 0x5e, 0xbc, 0x8d, 0x1d, 0xdf, 0x03,
++	0xc9, 0x6b, 0x1b, 0x81, 0xd1, 0xb6, 0x5a, 0x24,
++	0xe3, 0xdc, 0x3f, 0x20, 0xc9, 0x07, 0x73, 0x4c,
++	0x43, 0x13, 0x87, 0x58, 0x34, 0x0d, 0x14, 0x63,
++	0x0f, 0x6f, 0xad, 0x8d, 0xac, 0x7c, 0x67, 0x68,
++	0xa3, 0x9d, 0x7f, 0x00, 0xdf, 0x28, 0xee, 0x67,
++	0xf4, 0x5c, 0x26, 0xcb, 0xef, 0x56, 0x71, 0xc8,
++	0xc6, 0x67, 0x5f, 0x38, 0xbb, 0xa0, 0xb1, 0x5c,
++	0x1f, 0xb3, 0x08, 0xd9, 0x38, 0xcf, 0x74, 0x54,
++	0xc6, 0xa4, 0xc4, 0xc0, 0x9f, 0xb3, 0xd0, 0xda,
++	0x62, 0x67, 0x8b, 0x81, 0x33, 0xf0, 0xa9, 0x73,
++	0xa4, 0xd1, 0x46, 0x88, 0x8d, 0x85, 0x12, 0x40,
++	0xba, 0x1a, 0xcd, 0x82, 0xd8, 0x8d, 0xc4, 0x52,
++	0xe7, 0x01, 0x94, 0x2e, 0x0e, 0xd0, 0xaf, 0xe7,
++	0x2d, 0x3f, 0x3c, 0xaa, 0xf4, 0xf5, 0xa7, 0x01,
++	0x4c, 0x14, 0xe2, 0xc2, 0x96, 0x76, 0xbe, 0x05,
++	0xaa, 0x19, 0xb1, 0xbd, 0x95, 0xbb, 0x5a, 0xf9,
++	0xa5, 0xa7, 0xe6, 0x16, 0x38, 0x34, 0xf7, 0x9d,
++	0x19, 0x66, 0x16, 0x8e, 0x7f, 0x2b, 0x5a, 0xfb,
++	0xb5, 0x29, 0x79, 0xbf, 0x52, 0xae, 0x30, 0x95,
++	0x3f, 0x31, 0x33, 0x28, 0xde, 0xc5, 0x0d, 0x55,
++	0x89, 0xec, 0x21, 0x11, 0x0f, 0x8b, 0xfe, 0x63,
++	0x3a, 0xf1, 0x95, 0x5c, 0xcd, 0x50, 0xe4, 0x5d,
++	0x8f, 0xa7, 0xc8, 0xca, 0x93, 0xa0, 0x67, 0x82,
++	0x63, 0x5c, 0xd0, 0xed, 0xe7, 0x08, 0xc5, 0x60,
++	0xf8, 0xb4, 0x47, 0xf0, 0x1a, 0x65, 0x4e, 0xa3,
++	0x51, 0x68, 0xc7, 0x14, 0xa1, 0xd9, 0x39, 0x72,
++	0xa8, 0x6f, 0x7c, 0x7e, 0xf6, 0x03, 0x0b, 0x25,
++	0x9b, 0xf2, 0xca, 0x49, 0xae, 0x5b, 0xf8, 0x0f,
++	0x71, 0x51, 0x01, 0xa6, 0x23, 0xa9, 0xdf, 0xd0,
++	0x7a, 0x39, 0x19, 0xf5, 0xc5, 0x26, 0x44, 0x7b,
++	0x0a, 0x4a, 0x41, 0xbf, 0xf2, 0x8e, 0x83, 0x50,
++	0x91, 0x96, 0x72, 0x02, 0xf6, 0x80, 0xbf, 0x95,
++	0x41, 0xac, 0xda, 0xb0, 0xba, 0xe3, 0x76, 0xb1,
++	0x9d, 0xff, 0x1f, 0x33, 0x02, 0x85, 0xfc, 0x2a,
++	0x29, 0xe6, 0xe3, 0x9d, 0xd0, 0xef, 0xc2, 0xd6,
++	0x9c, 0x4a, 0x62, 0xac, 0xcb, 0xea, 0x8b, 0xc3,
++	0x08, 0x6e, 0x49, 0x09, 0x26, 0x19, 0xc1, 0x30,
++	0xcc, 0x27, 0xaa, 0xc6, 0x45, 0x88, 0xbd, 0xae,
++	0xd6, 0x79, 0xff, 0x4e, 0xfc, 0x66, 0x4d, 0x02,
++	0xa5, 0xee, 0x8e, 0xa5, 0xb6, 0x15, 0x72, 0x24,
++	0xb1, 0xbf, 0xbf, 0x64, 0xcf, 0xcc, 0x93, 0xe9,
++	0xb6, 0xfd, 0xb4, 0xb6, 0x21, 0xb5, 0x48, 0x08,
++	0x0f, 0x11, 0x65, 0xe1, 0x47, 0xee, 0x93, 0x29,
++	0xad
++};
++static const u8 key72[] __initconst = {
++	0xb9, 0xa2, 0xfc, 0x59, 0x06, 0x3f, 0x77, 0xa5,
++	0x66, 0xd0, 0x2b, 0x22, 0x74, 0x22, 0x4c, 0x1e,
++	0x6a, 0x39, 0xdf, 0xe1, 0x0d, 0x4c, 0x64, 0x99,
++	0x54, 0x8a, 0xba, 0x1d, 0x2c, 0x21, 0x5f, 0xc3
++};
++enum { nonce72 = 0x3d069308fa3db04bULL };
++
++static const u8 input73[] __initconst = {
++	0xe4, 0xdd, 0x36, 0xd4, 0xf5, 0x70, 0x51, 0x73,
++	0x97, 0x1d, 0x45, 0x05, 0x92, 0xe7, 0xeb, 0xb7,
++	0x09, 0x82, 0x6e, 0x25, 0x6c, 0x50, 0xf5, 0x40,
++	0x19, 0xba, 0xbc, 0xf4, 0x39, 0x14, 0xc5, 0x15,
++	0x83, 0x40, 0xbd, 0x26, 0xe0, 0xff, 0x3b, 0x22,
++	0x7c, 0x7c, 0xd7, 0x0b, 0xe9, 0x25, 0x0c, 0x3d,
++	0x92, 0x38, 0xbe, 0xe4, 0x22, 0x75, 0x65, 0xf1,
++	0x03, 0x85, 0x34, 0x09, 0xb8, 0x77, 0xfb, 0x48,
++	0xb1, 0x2e, 0x21, 0x67, 0x9b, 0x9d, 0xad, 0x18,
++	0x82, 0x0d, 0x6b, 0xc3, 0xcf, 0x00, 0x61, 0x6e,
++	0xda, 0xdc, 0xa7, 0x0b, 0x5c, 0x02, 0x1d, 0xa6,
++	0x4e, 0x0d, 0x7f, 0x37, 0x01, 0x5a, 0x37, 0xf3,
++	0x2b, 0xbf, 0xba, 0xe2, 0x1c, 0xb3, 0xa3, 0xbc,
++	0x1c, 0x93, 0x1a, 0xb1, 0x71, 0xaf, 0xe2, 0xdd,
++	0x17, 0xee, 0x53, 0xfa, 0xfb, 0x02, 0x40, 0x3e,
++	0x03, 0xca, 0xe7, 0xc3, 0x51, 0x81, 0xcc, 0x8c,
++	0xca, 0xcf, 0x4e, 0xc5, 0x78, 0x99, 0xfd, 0xbf,
++	0xea, 0xab, 0x38, 0x81, 0xfc, 0xd1, 0x9e, 0x41,
++	0x0b, 0x84, 0x25, 0xf1, 0x6b, 0x3c, 0xf5, 0x40,
++	0x0d, 0xc4, 0x3e, 0xb3, 0x6a, 0xec, 0x6e, 0x75,
++	0xdc, 0x9b, 0xdf, 0x08, 0x21, 0x16, 0xfb, 0x7a,
++	0x8e, 0x19, 0x13, 0x02, 0xa7, 0xfc, 0x58, 0x21,
++	0xc3, 0xb3, 0x59, 0x5a, 0x9c, 0xef, 0x38, 0xbd,
++	0x87, 0x55, 0xd7, 0x0d, 0x1f, 0x84, 0xdc, 0x98,
++	0x22, 0xca, 0x87, 0x96, 0x71, 0x6d, 0x68, 0x00,
++	0xcb, 0x4f, 0x2f, 0xc4, 0x64, 0x0c, 0xc1, 0x53,
++	0x0c, 0x90, 0xe7, 0x3c, 0x88, 0xca, 0xc5, 0x85,
++	0xa3, 0x2a, 0x96, 0x7c, 0x82, 0x6d, 0x45, 0xf5,
++	0xb7, 0x8d, 0x17, 0x69, 0xd6, 0xcd, 0x3c, 0xd3,
++	0xe7, 0x1c, 0xce, 0x93, 0x50, 0xd4, 0x59, 0xa2,
++	0xd8, 0x8b, 0x72, 0x60, 0x5b, 0x25, 0x14, 0xcd,
++	0x5a, 0xe8, 0x8c, 0xdb, 0x23, 0x8d, 0x2b, 0x59,
++	0x12, 0x13, 0x10, 0x47, 0xa4, 0xc8, 0x3c, 0xc1,
++	0x81, 0x89, 0x6c, 0x98, 0xec, 0x8f, 0x7b, 0x32,
++	0xf2, 0x87, 0xd9, 0xa2, 0x0d, 0xc2, 0x08, 0xf9,
++	0xd5, 0xf3, 0x91, 0xe7, 0xb3, 0x87, 0xa7, 0x0b,
++	0x64, 0x8f, 0xb9, 0x55, 0x1c, 0x81, 0x96, 0x6c,
++	0xa1, 0xc9, 0x6e, 0x3b, 0xcd, 0x17, 0x1b, 0xfc,
++	0xa6, 0x05, 0xba, 0x4a, 0x7d, 0x03, 0x3c, 0x59,
++	0xc8, 0xee, 0x50, 0xb2, 0x5b, 0xe1, 0x4d, 0x6a,
++	0x1f, 0x09, 0xdc, 0xa2, 0x51, 0xd1, 0x93, 0x3a,
++	0x5f, 0x72, 0x1d, 0x26, 0x14, 0x62, 0xa2, 0x41,
++	0x3d, 0x08, 0x70, 0x7b, 0x27, 0x3d, 0xbc, 0xdf,
++	0x15, 0xfa, 0xb9, 0x5f, 0xb5, 0x38, 0x84, 0x0b,
++	0x58, 0x3d, 0xee, 0x3f, 0x32, 0x65, 0x6d, 0xd7,
++	0xce, 0x97, 0x3c, 0x8d, 0xfb, 0x63, 0xb9, 0xb0,
++	0xa8, 0x4a, 0x72, 0x99, 0x97, 0x58, 0xc8, 0xa7,
++	0xf9, 0x4c, 0xae, 0xc1, 0x63, 0xb9, 0x57, 0x18,
++	0x8a, 0xfa, 0xab, 0xe9, 0xf3, 0x67, 0xe6, 0xfd,
++	0xd2, 0x9d, 0x5c, 0xa9, 0x8e, 0x11, 0x0a, 0xf4,
++	0x4b, 0xf1, 0xec, 0x1a, 0xaf, 0x50, 0x5d, 0x16,
++	0x13, 0x69, 0x2e, 0xbd, 0x0d, 0xe6, 0xf0, 0xb2,
++	0xed, 0xb4, 0x4c, 0x59, 0x77, 0x37, 0x00, 0x0b,
++	0xc7, 0xa7, 0x9e, 0x37, 0xf3, 0x60, 0x70, 0xef,
++	0xf3, 0xc1, 0x74, 0x52, 0x87, 0xc6, 0xa1, 0x81,
++	0xbd, 0x0a, 0x2c, 0x5d, 0x2c, 0x0c, 0x6a, 0x81,
++	0xa1, 0xfe, 0x26, 0x78, 0x6c, 0x03, 0x06, 0x07,
++	0x34, 0xaa, 0xd1, 0x1b, 0x40, 0x03, 0x39, 0x56,
++	0xcf, 0x2a, 0x92, 0xc1, 0x4e, 0xdf, 0x29, 0x24,
++	0x83, 0x22, 0x7a, 0xea, 0x67, 0x1e, 0xe7, 0x54,
++	0x64, 0xd3, 0xbd, 0x3a, 0x5d, 0xae, 0xca, 0xf0,
++	0x9c, 0xd6, 0x5a, 0x9a, 0x62, 0xc8, 0xc7, 0x83,
++	0xf9, 0x89, 0xde, 0x2d, 0x53, 0x64, 0x61, 0xf7,
++	0xa3, 0xa7, 0x31, 0x38, 0xc6, 0x22, 0x9c, 0xb4,
++	0x87, 0xe0
++};
++static const u8 output73[] __initconst = {
++	0x34, 0xed, 0x05, 0xb0, 0x14, 0xbc, 0x8c, 0xcc,
++	0x95, 0xbd, 0x99, 0x0f, 0xb1, 0x98, 0x17, 0x10,
++	0xae, 0xe0, 0x08, 0x53, 0xa3, 0x69, 0xd2, 0xed,
++	0x66, 0xdb, 0x2a, 0x34, 0x8d, 0x0c, 0x6e, 0xce,
++	0x63, 0x69, 0xc9, 0xe4, 0x57, 0xc3, 0x0c, 0x8b,
++	0xa6, 0x2c, 0xa7, 0xd2, 0x08, 0xff, 0x4f, 0xec,
++	0x61, 0x8c, 0xee, 0x0d, 0xfa, 0x6b, 0xe0, 0xe8,
++	0x71, 0xbc, 0x41, 0x46, 0xd7, 0x33, 0x1d, 0xc0,
++	0xfd, 0xad, 0xca, 0x8b, 0x34, 0x56, 0xa4, 0x86,
++	0x71, 0x62, 0xae, 0x5e, 0x3d, 0x2b, 0x66, 0x3e,
++	0xae, 0xd8, 0xc0, 0xe1, 0x21, 0x3b, 0xca, 0xd2,
++	0x6b, 0xa2, 0xb8, 0xc7, 0x98, 0x4a, 0xf3, 0xcf,
++	0xb8, 0x62, 0xd8, 0x33, 0xe6, 0x80, 0xdb, 0x2f,
++	0x0a, 0xaf, 0x90, 0x3c, 0xe1, 0xec, 0xe9, 0x21,
++	0x29, 0x42, 0x9e, 0xa5, 0x50, 0xe9, 0x93, 0xd3,
++	0x53, 0x1f, 0xac, 0x2a, 0x24, 0x07, 0xb8, 0xed,
++	0xed, 0x38, 0x2c, 0xc4, 0xa1, 0x2b, 0x31, 0x5d,
++	0x9c, 0x24, 0x7b, 0xbf, 0xd9, 0xbb, 0x4e, 0x87,
++	0x8f, 0x32, 0x30, 0xf1, 0x11, 0x29, 0x54, 0x94,
++	0x00, 0x95, 0x1d, 0x1d, 0x24, 0xc0, 0xd4, 0x34,
++	0x49, 0x1d, 0xd5, 0xe3, 0xa6, 0xde, 0x8b, 0xbf,
++	0x5a, 0x9f, 0x58, 0x5a, 0x9b, 0x70, 0xe5, 0x9b,
++	0xb3, 0xdb, 0xe8, 0xb8, 0xca, 0x1b, 0x43, 0xe3,
++	0xc6, 0x6f, 0x0a, 0xd6, 0x32, 0x11, 0xd4, 0x04,
++	0xef, 0xa3, 0xe4, 0x3f, 0x12, 0xd8, 0xc1, 0x73,
++	0x51, 0x87, 0x03, 0xbd, 0xba, 0x60, 0x79, 0xee,
++	0x08, 0xcc, 0xf7, 0xc0, 0xaa, 0x4c, 0x33, 0xc4,
++	0xc7, 0x09, 0xf5, 0x91, 0xcb, 0x74, 0x57, 0x08,
++	0x1b, 0x90, 0xa9, 0x1b, 0x60, 0x02, 0xd2, 0x3f,
++	0x7a, 0xbb, 0xfd, 0x78, 0xf0, 0x15, 0xf9, 0x29,
++	0x82, 0x8f, 0xc4, 0xb2, 0x88, 0x1f, 0xbc, 0xcc,
++	0x53, 0x27, 0x8b, 0x07, 0x5f, 0xfc, 0x91, 0x29,
++	0x82, 0x80, 0x59, 0x0a, 0x3c, 0xea, 0xc4, 0x7e,
++	0xad, 0xd2, 0x70, 0x46, 0xbd, 0x9e, 0x3b, 0x1c,
++	0x8a, 0x62, 0xea, 0x69, 0xbd, 0xf6, 0x96, 0x15,
++	0xb5, 0x57, 0xe8, 0x63, 0x5f, 0x65, 0x46, 0x84,
++	0x58, 0x50, 0x87, 0x4b, 0x0e, 0x5b, 0x52, 0x90,
++	0xb0, 0xae, 0x37, 0x0f, 0xdd, 0x7e, 0xa2, 0xa0,
++	0x8b, 0x78, 0xc8, 0x5a, 0x1f, 0x53, 0xdb, 0xc5,
++	0xbf, 0x73, 0x20, 0xa9, 0x44, 0xfb, 0x1e, 0xc7,
++	0x97, 0xb2, 0x3a, 0x5a, 0x17, 0xe6, 0x8b, 0x9b,
++	0xe8, 0xf8, 0x2a, 0x01, 0x27, 0xa3, 0x71, 0x28,
++	0xe3, 0x19, 0xc6, 0xaf, 0xf5, 0x3a, 0x26, 0xc0,
++	0x5c, 0x69, 0x30, 0x78, 0x75, 0x27, 0xf2, 0x0c,
++	0x22, 0x71, 0x65, 0xc6, 0x8e, 0x7b, 0x47, 0xe3,
++	0x31, 0xaf, 0x7b, 0xc6, 0xc2, 0x55, 0x68, 0x81,
++	0xaa, 0x1b, 0x21, 0x65, 0xfb, 0x18, 0x35, 0x45,
++	0x36, 0x9a, 0x44, 0xba, 0x5c, 0xff, 0x06, 0xde,
++	0x3a, 0xc8, 0x44, 0x0b, 0xaa, 0x8e, 0x34, 0xe2,
++	0x84, 0xac, 0x18, 0xfe, 0x9b, 0xe1, 0x4f, 0xaa,
++	0xb6, 0x90, 0x0b, 0x1c, 0x2c, 0xd9, 0x9a, 0x10,
++	0x18, 0xf9, 0x49, 0x41, 0x42, 0x1b, 0xb5, 0xe1,
++	0x26, 0xac, 0x2d, 0x38, 0x00, 0x00, 0xe4, 0xb4,
++	0x50, 0x6f, 0x14, 0x18, 0xd6, 0x3d, 0x00, 0x59,
++	0x3c, 0x45, 0xf3, 0x42, 0x13, 0x44, 0xb8, 0x57,
++	0xd4, 0x43, 0x5c, 0x8a, 0x2a, 0xb4, 0xfc, 0x0a,
++	0x25, 0x5a, 0xdc, 0x8f, 0x11, 0x0b, 0x11, 0x44,
++	0xc7, 0x0e, 0x54, 0x8b, 0x22, 0x01, 0x7e, 0x67,
++	0x2e, 0x15, 0x3a, 0xb9, 0xee, 0x84, 0x10, 0xd4,
++	0x80, 0x57, 0xd7, 0x75, 0xcf, 0x8b, 0xcb, 0x03,
++	0xc9, 0x92, 0x2b, 0x69, 0xd8, 0x5a, 0x9b, 0x06,
++	0x85, 0x47, 0xaa, 0x4c, 0x28, 0xde, 0x49, 0x58,
++	0xe6, 0x11, 0x1e, 0x5e, 0x64, 0x8e, 0x3b, 0xe0,
++	0x40, 0x2e, 0xac, 0x96, 0x97, 0x15, 0x37, 0x1e,
++	0x30, 0xdd
++};
++static const u8 key73[] __initconst = {
++	0x96, 0x06, 0x1e, 0xc1, 0x6d, 0xba, 0x49, 0x5b,
++	0x65, 0x80, 0x79, 0xdd, 0xf3, 0x67, 0xa8, 0x6e,
++	0x2d, 0x9c, 0x54, 0x46, 0xd8, 0x4a, 0xeb, 0x7e,
++	0x23, 0x86, 0x51, 0xd8, 0x49, 0x49, 0x56, 0xe0
++};
++enum { nonce73 = 0xbefb83cb67e11ffdULL };
++
++static const u8 input74[] __initconst = {
++	0x47, 0x22, 0x70, 0xe5, 0x2f, 0x41, 0x18, 0x45,
++	0x07, 0xd3, 0x6d, 0x32, 0x0d, 0x43, 0x92, 0x2b,
++	0x9b, 0x65, 0x73, 0x13, 0x1a, 0x4f, 0x49, 0x8f,
++	0xff, 0xf8, 0xcc, 0xae, 0x15, 0xab, 0x9d, 0x7d,
++	0xee, 0x22, 0x5d, 0x8b, 0xde, 0x81, 0x5b, 0x81,
++	0x83, 0x49, 0x35, 0x9b, 0xb4, 0xbc, 0x4e, 0x01,
++	0xc2, 0x29, 0xa7, 0xf1, 0xca, 0x3a, 0xce, 0x3f,
++	0xf5, 0x31, 0x93, 0xa8, 0xe2, 0xc9, 0x7d, 0x03,
++	0x26, 0xa4, 0xbc, 0xa8, 0x9c, 0xb9, 0x68, 0xf3,
++	0xb3, 0x91, 0xe8, 0xe6, 0xc7, 0x2b, 0x1a, 0xce,
++	0xd2, 0x41, 0x53, 0xbd, 0xa3, 0x2c, 0x54, 0x94,
++	0x21, 0xa1, 0x40, 0xae, 0xc9, 0x0c, 0x11, 0x92,
++	0xfd, 0x91, 0xa9, 0x40, 0xca, 0xde, 0x21, 0x4e,
++	0x1e, 0x3d, 0xcc, 0x2c, 0x87, 0x11, 0xef, 0x46,
++	0xed, 0x52, 0x03, 0x11, 0x19, 0x43, 0x25, 0xc7,
++	0x0d, 0xc3, 0x37, 0x5f, 0xd3, 0x6f, 0x0c, 0x6a,
++	0x45, 0x30, 0x88, 0xec, 0xf0, 0x21, 0xef, 0x1d,
++	0x7b, 0x38, 0x63, 0x4b, 0x49, 0x0c, 0x72, 0xf6,
++	0x4c, 0x40, 0xc3, 0xcc, 0x03, 0xa7, 0xae, 0xa8,
++	0x8c, 0x37, 0x03, 0x1c, 0x11, 0xae, 0x0d, 0x1b,
++	0x62, 0x97, 0x27, 0xfc, 0x56, 0x4b, 0xb7, 0xfd,
++	0xbc, 0xfb, 0x0e, 0xfc, 0x61, 0xad, 0xc6, 0xb5,
++	0x9c, 0x8c, 0xc6, 0x38, 0x27, 0x91, 0x29, 0x3d,
++	0x29, 0xc8, 0x37, 0xc9, 0x96, 0x69, 0xe3, 0xdc,
++	0x3e, 0x61, 0x35, 0x9b, 0x99, 0x4f, 0xb9, 0x4e,
++	0x5a, 0x29, 0x1c, 0x2e, 0xcf, 0x16, 0xcb, 0x69,
++	0x87, 0xe4, 0x1a, 0xc4, 0x6e, 0x78, 0x43, 0x00,
++	0x03, 0xb2, 0x8b, 0x03, 0xd0, 0xb4, 0xf1, 0xd2,
++	0x7d, 0x2d, 0x7e, 0xfc, 0x19, 0x66, 0x5b, 0xa3,
++	0x60, 0x3f, 0x9d, 0xbd, 0xfa, 0x3e, 0xca, 0x7b,
++	0x26, 0x08, 0x19, 0x16, 0x93, 0x5d, 0x83, 0xfd,
++	0xf9, 0x21, 0xc6, 0x31, 0x34, 0x6f, 0x0c, 0xaa,
++	0x28, 0xf9, 0x18, 0xa2, 0xc4, 0x78, 0x3b, 0x56,
++	0xc0, 0x88, 0x16, 0xba, 0x22, 0x2c, 0x07, 0x2f,
++	0x70, 0xd0, 0xb0, 0x46, 0x35, 0xc7, 0x14, 0xdc,
++	0xbb, 0x56, 0x23, 0x1e, 0x36, 0x36, 0x2d, 0x73,
++	0x78, 0xc7, 0xce, 0xf3, 0x58, 0xf7, 0x58, 0xb5,
++	0x51, 0xff, 0x33, 0x86, 0x0e, 0x3b, 0x39, 0xfb,
++	0x1a, 0xfd, 0xf8, 0x8b, 0x09, 0x33, 0x1b, 0x83,
++	0xf2, 0xe6, 0x38, 0x37, 0xef, 0x47, 0x84, 0xd9,
++	0x82, 0x77, 0x2b, 0x82, 0xcc, 0xf9, 0xee, 0x94,
++	0x71, 0x78, 0x81, 0xc8, 0x4d, 0x91, 0xd7, 0x35,
++	0x29, 0x31, 0x30, 0x5c, 0x4a, 0x23, 0x23, 0xb1,
++	0x38, 0x6b, 0xac, 0x22, 0x3f, 0x80, 0xc7, 0xe0,
++	0x7d, 0xfa, 0x76, 0x47, 0xd4, 0x6f, 0x93, 0xa0,
++	0xa0, 0x93, 0x5d, 0x68, 0xf7, 0x43, 0x25, 0x8f,
++	0x1b, 0xc7, 0x87, 0xea, 0x59, 0x0c, 0xa2, 0xfa,
++	0xdb, 0x2f, 0x72, 0x43, 0xcf, 0x90, 0xf1, 0xd6,
++	0x58, 0xf3, 0x17, 0x6a, 0xdf, 0xb3, 0x4e, 0x0e,
++	0x38, 0x24, 0x48, 0x1f, 0xb7, 0x01, 0xec, 0x81,
++	0xb1, 0x87, 0x5b, 0xec, 0x9c, 0x11, 0x1a, 0xff,
++	0xa5, 0xca, 0x5a, 0x63, 0x31, 0xb2, 0xe4, 0xc6,
++	0x3c, 0x1d, 0xaf, 0x27, 0xb2, 0xd4, 0x19, 0xa2,
++	0xcc, 0x04, 0x92, 0x42, 0xd2, 0xc1, 0x8c, 0x3b,
++	0xce, 0xf5, 0x74, 0xc1, 0x81, 0xf8, 0x20, 0x23,
++	0x6f, 0x20, 0x6d, 0x78, 0x36, 0x72, 0x2c, 0x52,
++	0xdf, 0x5e, 0xe8, 0x75, 0xce, 0x1c, 0x49, 0x9d,
++	0x93, 0x6f, 0x65, 0xeb, 0xb1, 0xbd, 0x8e, 0x5e,
++	0xe5, 0x89, 0xc4, 0x8a, 0x81, 0x3d, 0x9a, 0xa7,
++	0x11, 0x82, 0x8e, 0x38, 0x5b, 0x5b, 0xca, 0x7d,
++	0x4b, 0x72, 0xc2, 0x9c, 0x30, 0x5e, 0x7f, 0xc0,
++	0x6f, 0x91, 0xd5, 0x67, 0x8c, 0x3e, 0xae, 0xda,
++	0x2b, 0x3c, 0x53, 0xcc, 0x50, 0x97, 0x36, 0x0b,
++	0x79, 0xd6, 0x73, 0x6e, 0x7d, 0x42, 0x56, 0xe1,
++	0xaa, 0xfc, 0xb3, 0xa7, 0xc8, 0x01, 0xaa, 0xc1,
++	0xfc, 0x5c, 0x72, 0x8e, 0x63, 0xa8, 0x46, 0x18,
++	0xee, 0x11, 0xe7, 0x30, 0x09, 0x83, 0x6c, 0xd9,
++	0xf4, 0x7a, 0x7b, 0xb5, 0x1f, 0x6d, 0xc7, 0xbc,
++	0xcb, 0x55, 0xea, 0x40, 0x58, 0x7a, 0x00, 0x00,
++	0x90, 0x60, 0xc5, 0x64, 0x69, 0x05, 0x99, 0xd2,
++	0x49, 0x62, 0x4f, 0xcb, 0x97, 0xdf, 0xdd, 0x6b,
++	0x60, 0x75, 0xe2, 0xe0, 0x6f, 0x76, 0xd0, 0x37,
++	0x67, 0x0a, 0xcf, 0xff, 0xc8, 0x61, 0x84, 0x14,
++	0x80, 0x7c, 0x1d, 0x31, 0x8d, 0x90, 0xde, 0x0b,
++	0x1c, 0x74, 0x9f, 0x82, 0x96, 0x80, 0xda, 0xaf,
++	0x8d, 0x99, 0x86, 0x9f, 0x24, 0x99, 0x28, 0x3e,
++	0xe0, 0xa3, 0xc3, 0x90, 0x2d, 0x14, 0x65, 0x1e,
++	0x3b, 0xb9, 0xba, 0x13, 0xa5, 0x77, 0x73, 0x63,
++	0x9a, 0x06, 0x3d, 0xa9, 0x28, 0x9b, 0xba, 0x25,
++	0x61, 0xc9, 0xcd, 0xcf, 0x7a, 0x4d, 0x96, 0x09,
++	0xcb, 0xca, 0x03, 0x9c, 0x54, 0x34, 0x31, 0x85,
++	0xa0, 0x3d, 0xe5, 0xbc, 0xa5, 0x5f, 0x1b, 0xd3,
++	0x10, 0x63, 0x74, 0x9d, 0x01, 0x92, 0x88, 0xf0,
++	0x27, 0x9c, 0x28, 0xd9, 0xfd, 0xe2, 0x4e, 0x01,
++	0x8d, 0x61, 0x79, 0x60, 0x61, 0x5b, 0x76, 0xab,
++	0x06, 0xd3, 0x44, 0x87, 0x43, 0x52, 0xcd, 0x06,
++	0x68, 0x1e, 0x2d, 0xc5, 0xb0, 0x07, 0x25, 0xdf,
++	0x0a, 0x50, 0xd7, 0xd9, 0x08, 0x53, 0x65, 0xf1,
++	0x0c, 0x2c, 0xde, 0x3f, 0x9d, 0x03, 0x1f, 0xe1,
++	0x49, 0x43, 0x3c, 0x83, 0x81, 0x37, 0xf8, 0xa2,
++	0x0b, 0xf9, 0x61, 0x1c, 0xc1, 0xdb, 0x79, 0xbc,
++	0x64, 0xce, 0x06, 0x4e, 0x87, 0x89, 0x62, 0x73,
++	0x51, 0xbc, 0xa4, 0x32, 0xd4, 0x18, 0x62, 0xab,
++	0x65, 0x7e, 0xad, 0x1e, 0x91, 0xa3, 0xfa, 0x2d,
++	0x58, 0x9e, 0x2a, 0xe9, 0x74, 0x44, 0x64, 0x11,
++	0xe6, 0xb6, 0xb3, 0x00, 0x7e, 0xa3, 0x16, 0xef,
++	0x72
++};
++static const u8 output74[] __initconst = {
++	0xf5, 0xca, 0x45, 0x65, 0x50, 0x35, 0x47, 0x67,
++	0x6f, 0x4f, 0x67, 0xff, 0x34, 0xd9, 0xc3, 0x37,
++	0x2a, 0x26, 0xb0, 0x4f, 0x08, 0x1e, 0x45, 0x13,
++	0xc7, 0x2c, 0x14, 0x75, 0x33, 0xd8, 0x8e, 0x1e,
++	0x1b, 0x11, 0x0d, 0x97, 0x04, 0x33, 0x8a, 0xe4,
++	0xd8, 0x8d, 0x0e, 0x12, 0x8d, 0xdb, 0x6e, 0x02,
++	0xfa, 0xe5, 0xbd, 0x3a, 0xb5, 0x28, 0x07, 0x7d,
++	0x20, 0xf0, 0x12, 0x64, 0x83, 0x2f, 0x59, 0x79,
++	0x17, 0x88, 0x3c, 0x2d, 0x08, 0x2f, 0x55, 0xda,
++	0xcc, 0x02, 0x3a, 0x82, 0xcd, 0x03, 0x94, 0xdf,
++	0xdf, 0xab, 0x8a, 0x13, 0xf5, 0xe6, 0x74, 0xdf,
++	0x7b, 0xe2, 0xab, 0x34, 0xbc, 0x00, 0x85, 0xbf,
++	0x5a, 0x48, 0xc8, 0xff, 0x8d, 0x6c, 0x27, 0x48,
++	0x19, 0x2d, 0x08, 0xfa, 0x82, 0x62, 0x39, 0x55,
++	0x32, 0x11, 0xa8, 0xd7, 0xb9, 0x08, 0x2c, 0xd6,
++	0x7a, 0xd9, 0x83, 0x9f, 0x9b, 0xfb, 0xec, 0x3a,
++	0xd1, 0x08, 0xc7, 0xad, 0xdc, 0x98, 0x4c, 0xbc,
++	0x98, 0xeb, 0x36, 0xb0, 0x39, 0xf4, 0x3a, 0xd6,
++	0x53, 0x02, 0xa0, 0xa9, 0x73, 0xa1, 0xca, 0xef,
++	0xd8, 0xd2, 0xec, 0x0e, 0xf8, 0xf5, 0xac, 0x8d,
++	0x34, 0x41, 0x06, 0xa8, 0xc6, 0xc3, 0x31, 0xbc,
++	0xe5, 0xcc, 0x7e, 0x72, 0x63, 0x59, 0x3e, 0x63,
++	0xc2, 0x8d, 0x2b, 0xd5, 0xb9, 0xfd, 0x1e, 0x31,
++	0x69, 0x32, 0x05, 0xd6, 0xde, 0xc9, 0xe6, 0x4c,
++	0xac, 0x68, 0xf7, 0x1f, 0x9d, 0xcd, 0x0e, 0xa2,
++	0x15, 0x3d, 0xd6, 0x47, 0x99, 0xab, 0x08, 0x5f,
++	0x28, 0xc3, 0x4c, 0xc2, 0xd5, 0xdd, 0x10, 0xb7,
++	0xbd, 0xdb, 0x9b, 0xcf, 0x85, 0x27, 0x29, 0x76,
++	0x98, 0xeb, 0xad, 0x31, 0x64, 0xe7, 0xfb, 0x61,
++	0xe0, 0xd8, 0x1a, 0xa6, 0xe2, 0xe7, 0x43, 0x42,
++	0x77, 0xc9, 0x82, 0x00, 0xac, 0x85, 0xe0, 0xa2,
++	0xd4, 0x62, 0xe3, 0xb7, 0x17, 0x6e, 0xb2, 0x9e,
++	0x21, 0x58, 0x73, 0xa9, 0x53, 0x2d, 0x3c, 0xe1,
++	0xdd, 0xd6, 0x6e, 0x92, 0xf2, 0x1d, 0xc2, 0x22,
++	0x5f, 0x9a, 0x7e, 0xd0, 0x52, 0xbf, 0x54, 0x19,
++	0xd7, 0x80, 0x63, 0x3e, 0xd0, 0x08, 0x2d, 0x37,
++	0x0c, 0x15, 0xf7, 0xde, 0xab, 0x2b, 0xe3, 0x16,
++	0x21, 0x3a, 0xee, 0xa5, 0xdc, 0xdf, 0xde, 0xa3,
++	0x69, 0xcb, 0xfd, 0x92, 0x89, 0x75, 0xcf, 0xc9,
++	0x8a, 0xa4, 0xc8, 0xdd, 0xcc, 0x21, 0xe6, 0xfe,
++	0x9e, 0x43, 0x76, 0xb2, 0x45, 0x22, 0xb9, 0xb5,
++	0xac, 0x7e, 0x3d, 0x26, 0xb0, 0x53, 0xc8, 0xab,
++	0xfd, 0xea, 0x2c, 0xd1, 0x44, 0xc5, 0x60, 0x1b,
++	0x8a, 0x99, 0x0d, 0xa5, 0x0e, 0x67, 0x6e, 0x3a,
++	0x96, 0x55, 0xec, 0xe8, 0xcc, 0xbe, 0x49, 0xd9,
++	0xf2, 0x72, 0x9f, 0x30, 0x21, 0x97, 0x57, 0x19,
++	0xbe, 0x5e, 0x33, 0x0c, 0xee, 0xc0, 0x72, 0x0d,
++	0x2e, 0xd1, 0xe1, 0x52, 0xc2, 0xea, 0x41, 0xbb,
++	0xe1, 0x6d, 0xd4, 0x17, 0xa9, 0x8d, 0x89, 0xa9,
++	0xd6, 0x4b, 0xc6, 0x4c, 0xf2, 0x88, 0x97, 0x54,
++	0x3f, 0x4f, 0x57, 0xb7, 0x37, 0xf0, 0x2c, 0x11,
++	0x15, 0x56, 0xdb, 0x28, 0xb5, 0x16, 0x84, 0x66,
++	0xce, 0x45, 0x3f, 0x61, 0x75, 0xb6, 0xbe, 0x00,
++	0xd1, 0xe4, 0xf5, 0x27, 0x54, 0x7f, 0xc2, 0xf1,
++	0xb3, 0x32, 0x9a, 0xe8, 0x07, 0x02, 0xf3, 0xdb,
++	0xa9, 0xd1, 0xc2, 0xdf, 0xee, 0xad, 0xe5, 0x8a,
++	0x3c, 0xfa, 0x67, 0xec, 0x6b, 0xa4, 0x08, 0xfe,
++	0xba, 0x5a, 0x58, 0x0b, 0x78, 0x11, 0x91, 0x76,
++	0xe3, 0x1a, 0x28, 0x54, 0x5e, 0xbd, 0x71, 0x1b,
++	0x8b, 0xdc, 0x6c, 0xf4, 0x6f, 0xd7, 0xf4, 0xf3,
++	0xe1, 0x03, 0xa4, 0x3c, 0x8d, 0x91, 0x2e, 0xba,
++	0x5f, 0x7f, 0x8c, 0xaf, 0x69, 0x89, 0x29, 0x0a,
++	0x5b, 0x25, 0x13, 0xc4, 0x2e, 0x16, 0xc2, 0x15,
++	0x07, 0x5d, 0x58, 0x33, 0x7c, 0xe0, 0xf0, 0x55,
++	0x5f, 0xbf, 0x5e, 0xf0, 0x71, 0x48, 0x8f, 0xf7,
++	0x48, 0xb3, 0xf7, 0x0d, 0xa1, 0xd0, 0x63, 0xb1,
++	0xad, 0xae, 0xb5, 0xb0, 0x5f, 0x71, 0xaf, 0x24,
++	0x8b, 0xb9, 0x1c, 0x44, 0xd2, 0x1a, 0x53, 0xd1,
++	0xd5, 0xb4, 0xa9, 0xff, 0x88, 0x73, 0xb5, 0xaa,
++	0x15, 0x32, 0x5f, 0x59, 0x9d, 0x2e, 0xb5, 0xcb,
++	0xde, 0x21, 0x2e, 0xe9, 0x35, 0xed, 0xfd, 0x0f,
++	0xb6, 0xbb, 0xe6, 0x4b, 0x16, 0xf1, 0x45, 0x1e,
++	0xb4, 0x84, 0xe9, 0x58, 0x1c, 0x0c, 0x95, 0xc0,
++	0xcf, 0x49, 0x8b, 0x59, 0xa1, 0x78, 0xe6, 0x80,
++	0x12, 0x49, 0x7a, 0xd4, 0x66, 0x62, 0xdf, 0x9c,
++	0x18, 0xc8, 0x8c, 0xda, 0xc1, 0xa6, 0xbc, 0x65,
++	0x28, 0xd2, 0xa4, 0xe8, 0xf1, 0x35, 0xdb, 0x5a,
++	0x75, 0x1f, 0x73, 0x60, 0xec, 0xa8, 0xda, 0x5a,
++	0x43, 0x15, 0x83, 0x9b, 0xe7, 0xb1, 0xa6, 0x81,
++	0xbb, 0xef, 0xf3, 0x8f, 0x0f, 0xd3, 0x79, 0xa2,
++	0xe5, 0xaa, 0x42, 0xef, 0xa0, 0x13, 0x4e, 0x91,
++	0x2d, 0xcb, 0x61, 0x7a, 0x9a, 0x33, 0x14, 0x50,
++	0x77, 0x4a, 0xd0, 0x91, 0x48, 0xe0, 0x0c, 0xe0,
++	0x11, 0xcb, 0xdf, 0xb0, 0xce, 0x06, 0xd2, 0x79,
++	0x4d, 0x69, 0xb9, 0xc9, 0x36, 0x74, 0x8f, 0x81,
++	0x72, 0x73, 0xf3, 0x17, 0xb7, 0x13, 0xcb, 0x5b,
++	0xd2, 0x5c, 0x33, 0x61, 0xb7, 0x61, 0x79, 0xb0,
++	0xc0, 0x4d, 0xa1, 0xc7, 0x5d, 0x98, 0xc9, 0xe1,
++	0x98, 0xbd, 0x78, 0x5a, 0x2c, 0x64, 0x53, 0xaf,
++	0xaf, 0x66, 0x51, 0x47, 0xe4, 0x48, 0x66, 0x8b,
++	0x07, 0x52, 0xa3, 0x03, 0x93, 0x28, 0xad, 0xcc,
++	0xa3, 0x86, 0xad, 0x63, 0x04, 0x35, 0x6c, 0x49,
++	0xd5, 0x28, 0x0e, 0x00, 0x47, 0xf4, 0xd4, 0x32,
++	0x27, 0x19, 0xb3, 0x29, 0xe7, 0xbc, 0xbb, 0xce,
++	0x3e, 0x3e, 0xd5, 0x67, 0x20, 0xe4, 0x0b, 0x75,
++	0x95, 0x24, 0xe0, 0x6c, 0xb6, 0x29, 0x0c, 0x14,
++	0xfd
++};
++static const u8 key74[] __initconst = {
++	0xf0, 0x41, 0x5b, 0x00, 0x56, 0xc4, 0xac, 0xf6,
++	0xa2, 0x4c, 0x33, 0x41, 0x16, 0x09, 0x1b, 0x8e,
++	0x4d, 0xe8, 0x8c, 0xd9, 0x48, 0xab, 0x3e, 0x60,
++	0xcb, 0x49, 0x3e, 0xaf, 0x2b, 0x8b, 0xc8, 0xf0
++};
++enum { nonce74 = 0xcbdb0ffd0e923384ULL };
++
++static const struct chacha20_testvec chacha20_testvecs[] __initconst = {
++	{ input01, output01, key01, nonce01, sizeof(input01) },
++	{ input02, output02, key02, nonce02, sizeof(input02) },
++	{ input03, output03, key03, nonce03, sizeof(input03) },
++	{ input04, output04, key04, nonce04, sizeof(input04) },
++	{ input05, output05, key05, nonce05, sizeof(input05) },
++	{ input06, output06, key06, nonce06, sizeof(input06) },
++	{ input07, output07, key07, nonce07, sizeof(input07) },
++	{ input08, output08, key08, nonce08, sizeof(input08) },
++	{ input09, output09, key09, nonce09, sizeof(input09) },
++	{ input10, output10, key10, nonce10, sizeof(input10) },
++	{ input11, output11, key11, nonce11, sizeof(input11) },
++	{ input12, output12, key12, nonce12, sizeof(input12) },
++	{ input13, output13, key13, nonce13, sizeof(input13) },
++	{ input14, output14, key14, nonce14, sizeof(input14) },
++	{ input15, output15, key15, nonce15, sizeof(input15) },
++	{ input16, output16, key16, nonce16, sizeof(input16) },
++	{ input17, output17, key17, nonce17, sizeof(input17) },
++	{ input18, output18, key18, nonce18, sizeof(input18) },
++	{ input19, output19, key19, nonce19, sizeof(input19) },
++	{ input20, output20, key20, nonce20, sizeof(input20) },
++	{ input21, output21, key21, nonce21, sizeof(input21) },
++	{ input22, output22, key22, nonce22, sizeof(input22) },
++	{ input23, output23, key23, nonce23, sizeof(input23) },
++	{ input24, output24, key24, nonce24, sizeof(input24) },
++	{ input25, output25, key25, nonce25, sizeof(input25) },
++	{ input26, output26, key26, nonce26, sizeof(input26) },
++	{ input27, output27, key27, nonce27, sizeof(input27) },
++	{ input28, output28, key28, nonce28, sizeof(input28) },
++	{ input29, output29, key29, nonce29, sizeof(input29) },
++	{ input30, output30, key30, nonce30, sizeof(input30) },
++	{ input31, output31, key31, nonce31, sizeof(input31) },
++	{ input32, output32, key32, nonce32, sizeof(input32) },
++	{ input33, output33, key33, nonce33, sizeof(input33) },
++	{ input34, output34, key34, nonce34, sizeof(input34) },
++	{ input35, output35, key35, nonce35, sizeof(input35) },
++	{ input36, output36, key36, nonce36, sizeof(input36) },
++	{ input37, output37, key37, nonce37, sizeof(input37) },
++	{ input38, output38, key38, nonce38, sizeof(input38) },
++	{ input39, output39, key39, nonce39, sizeof(input39) },
++	{ input40, output40, key40, nonce40, sizeof(input40) },
++	{ input41, output41, key41, nonce41, sizeof(input41) },
++	{ input42, output42, key42, nonce42, sizeof(input42) },
++	{ input43, output43, key43, nonce43, sizeof(input43) },
++	{ input44, output44, key44, nonce44, sizeof(input44) },
++	{ input45, output45, key45, nonce45, sizeof(input45) },
++	{ input46, output46, key46, nonce46, sizeof(input46) },
++	{ input47, output47, key47, nonce47, sizeof(input47) },
++	{ input48, output48, key48, nonce48, sizeof(input48) },
++	{ input49, output49, key49, nonce49, sizeof(input49) },
++	{ input50, output50, key50, nonce50, sizeof(input50) },
++	{ input51, output51, key51, nonce51, sizeof(input51) },
++	{ input52, output52, key52, nonce52, sizeof(input52) },
++	{ input53, output53, key53, nonce53, sizeof(input53) },
++	{ input54, output54, key54, nonce54, sizeof(input54) },
++	{ input55, output55, key55, nonce55, sizeof(input55) },
++	{ input56, output56, key56, nonce56, sizeof(input56) },
++	{ input57, output57, key57, nonce57, sizeof(input57) },
++	{ input58, output58, key58, nonce58, sizeof(input58) },
++	{ input59, output59, key59, nonce59, sizeof(input59) },
++	{ input60, output60, key60, nonce60, sizeof(input60) },
++	{ input61, output61, key61, nonce61, sizeof(input61) },
++	{ input62, output62, key62, nonce62, sizeof(input62) },
++	{ input63, output63, key63, nonce63, sizeof(input63) },
++	{ input64, output64, key64, nonce64, sizeof(input64) },
++	{ input65, output65, key65, nonce65, sizeof(input65) },
++	{ input66, output66, key66, nonce66, sizeof(input66) },
++	{ input67, output67, key67, nonce67, sizeof(input67) },
++	{ input68, output68, key68, nonce68, sizeof(input68) },
++	{ input69, output69, key69, nonce69, sizeof(input69) },
++	{ input70, output70, key70, nonce70, sizeof(input70) },
++	{ input71, output71, key71, nonce71, sizeof(input71) },
++	{ input72, output72, key72, nonce72, sizeof(input72) },
++	{ input73, output73, key73, nonce73, sizeof(input73) },
++	{ input74, output74, key74, nonce74, sizeof(input74) }
++};
++
++static const struct hchacha20_testvec hchacha20_testvecs[] __initconst = {{
++	.key	= { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++		    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
++		    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
++		    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
++	.nonce	= { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a,
++		    0x00, 0x00, 0x00, 0x00, 0x31, 0x41, 0x59, 0x27 },
++	.output	= { 0x82, 0x41, 0x3b, 0x42, 0x27, 0xb2, 0x7b, 0xfe,
++		    0xd3, 0x0e, 0x42, 0x50, 0x8a, 0x87, 0x7d, 0x73,
++		    0xa0, 0xf9, 0xe4, 0xd5, 0x8a, 0x74, 0xa8, 0x53,
++		    0xc1, 0x2e, 0xc4, 0x13, 0x26, 0xd3, 0xec, 0xdc }
++}};
++
++static bool __init chacha20_selftest(void)
++{
++	enum {
++		MAXIMUM_TEST_BUFFER_LEN = 1UL << 10,
++		OUTRAGEOUSLY_HUGE_BUFFER_LEN = PAGE_SIZE * 35 + 17 /* 143k */
++	};
++	size_t i, j, k;
++	u32 derived_key[CHACHA20_KEY_WORDS];
++	u8 *offset_input = NULL, *computed_output = NULL, *massive_input = NULL;
++	u8 offset_key[CHACHA20_KEY_SIZE + 1]
++			__aligned(__alignof__(unsigned long));
++	struct chacha20_ctx state;
++	bool success = true;
++	simd_context_t simd_context;
++
++	offset_input = kmalloc(MAXIMUM_TEST_BUFFER_LEN + 1, GFP_KERNEL);
++	computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN + 1, GFP_KERNEL);
++	massive_input = vzalloc(OUTRAGEOUSLY_HUGE_BUFFER_LEN);
++	if (!computed_output || !offset_input || !massive_input) {
++		pr_err("chacha20 self-test malloc: FAIL\n");
++		success = false;
++		goto out;
++	}
++
++	simd_get(&simd_context);
++	for (i = 0; i < ARRAY_SIZE(chacha20_testvecs); ++i) {
++		/* Boring case */
++		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1);
++		memset(&state, 0, sizeof(state));
++		chacha20_init(&state, chacha20_testvecs[i].key,
++			      chacha20_testvecs[i].nonce);
++		chacha20(&state, computed_output, chacha20_testvecs[i].input,
++			 chacha20_testvecs[i].ilen, &simd_context);
++		if (memcmp(computed_output, chacha20_testvecs[i].output,
++			   chacha20_testvecs[i].ilen)) {
++			pr_err("chacha20 self-test %zu: FAIL\n", i + 1);
++			success = false;
++		}
++		for (k = chacha20_testvecs[i].ilen;
++		     k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) {
++			if (computed_output[k]) {
++				pr_err("chacha20 self-test %zu (zero check): FAIL\n",
++				       i + 1);
++				success = false;
++				break;
++			}
++		}
++
++		/* Unaligned case */
++		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1);
++		memset(&state, 0, sizeof(state));
++		memcpy(offset_input + 1, chacha20_testvecs[i].input,
++		       chacha20_testvecs[i].ilen);
++		memcpy(offset_key + 1, chacha20_testvecs[i].key,
++		       CHACHA20_KEY_SIZE);
++		chacha20_init(&state, offset_key + 1, chacha20_testvecs[i].nonce);
++		chacha20(&state, computed_output + 1, offset_input + 1,
++			 chacha20_testvecs[i].ilen, &simd_context);
++		if (memcmp(computed_output + 1, chacha20_testvecs[i].output,
++			   chacha20_testvecs[i].ilen)) {
++			pr_err("chacha20 self-test %zu (unaligned): FAIL\n",
++			       i + 1);
++			success = false;
++		}
++		if (computed_output[0]) {
++			pr_err("chacha20 self-test %zu (unaligned, zero check): FAIL\n",
++			       i + 1);
++			success = false;
++		}
++		for (k = chacha20_testvecs[i].ilen + 1;
++		     k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) {
++			if (computed_output[k]) {
++				pr_err("chacha20 self-test %zu (unaligned, zero check): FAIL\n",
++				       i + 1);
++				success = false;
++				break;
++			}
++		}
++
++		/* Chunked case */
++		if (chacha20_testvecs[i].ilen <= CHACHA20_BLOCK_SIZE)
++			goto next_test;
++		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1);
++		memset(&state, 0, sizeof(state));
++		chacha20_init(&state, chacha20_testvecs[i].key,
++			      chacha20_testvecs[i].nonce);
++		chacha20(&state, computed_output, chacha20_testvecs[i].input,
++			 CHACHA20_BLOCK_SIZE, &simd_context);
++		chacha20(&state, computed_output + CHACHA20_BLOCK_SIZE,
++			 chacha20_testvecs[i].input + CHACHA20_BLOCK_SIZE,
++			 chacha20_testvecs[i].ilen - CHACHA20_BLOCK_SIZE,
++			 &simd_context);
++		if (memcmp(computed_output, chacha20_testvecs[i].output,
++			   chacha20_testvecs[i].ilen)) {
++			pr_err("chacha20 self-test %zu (chunked): FAIL\n",
++			       i + 1);
++			success = false;
++		}
++		for (k = chacha20_testvecs[i].ilen;
++		     k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) {
++			if (computed_output[k]) {
++				pr_err("chacha20 self-test %zu (chunked, zero check): FAIL\n",
++				       i + 1);
++				success = false;
++				break;
++			}
++		}
++
++next_test:
++		/* Sliding unaligned case */
++		if (chacha20_testvecs[i].ilen > CHACHA20_BLOCK_SIZE + 1 ||
++		    !chacha20_testvecs[i].ilen)
++			continue;
++		for (j = 1; j < CHACHA20_BLOCK_SIZE; ++j) {
++			memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1);
++			memset(&state, 0, sizeof(state));
++			memcpy(offset_input + j, chacha20_testvecs[i].input,
++			       chacha20_testvecs[i].ilen);
++			chacha20_init(&state, chacha20_testvecs[i].key,
++				      chacha20_testvecs[i].nonce);
++			chacha20(&state, computed_output + j, offset_input + j,
++				 chacha20_testvecs[i].ilen, &simd_context);
++			if (memcmp(computed_output + j,
++				   chacha20_testvecs[i].output,
++				   chacha20_testvecs[i].ilen)) {
++				pr_err("chacha20 self-test %zu (unaligned, slide %zu): FAIL\n",
++				       i + 1, j);
++				success = false;
++			}
++			for (k = j; k < j; ++k) {
++				if (computed_output[k]) {
++					pr_err("chacha20 self-test %zu (unaligned, slide %zu, zero check): FAIL\n",
++					       i + 1, j);
++					success = false;
++					break;
++				}
++			}
++			for (k = chacha20_testvecs[i].ilen + j;
++			     k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) {
++				if (computed_output[k]) {
++					pr_err("chacha20 self-test %zu (unaligned, slide %zu, zero check): FAIL\n",
++					       i + 1, j);
++					success = false;
++					break;
++				}
++			}
++		}
++	}
++	for (i = 0; i < ARRAY_SIZE(hchacha20_testvecs); ++i) {
++		memset(&derived_key, 0, sizeof(derived_key));
++		hchacha20(derived_key, hchacha20_testvecs[i].nonce,
++			  hchacha20_testvecs[i].key, &simd_context);
++		cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
++		if (memcmp(derived_key, hchacha20_testvecs[i].output,
++			   CHACHA20_KEY_SIZE)) {
++			pr_err("hchacha20 self-test %zu: FAIL\n", i + 1);
++			success = false;
++		}
++	}
++	memset(&state, 0, sizeof(state));
++	chacha20_init(&state, chacha20_testvecs[0].key,
++		      chacha20_testvecs[0].nonce);
++	chacha20(&state, massive_input, massive_input,
++		 OUTRAGEOUSLY_HUGE_BUFFER_LEN, &simd_context);
++	chacha20_init(&state, chacha20_testvecs[0].key,
++		      chacha20_testvecs[0].nonce);
++	chacha20(&state, massive_input, massive_input,
++		 OUTRAGEOUSLY_HUGE_BUFFER_LEN, DONT_USE_SIMD);
++	for (k = 0; k < OUTRAGEOUSLY_HUGE_BUFFER_LEN; ++k) {
++		if (massive_input[k]) {
++			pr_err("chacha20 self-test massive: FAIL\n");
++			success = false;
++			break;
++		}
++	}
++
++	simd_put(&simd_context);
++
++out:
++	kfree(offset_input);
++	kfree(computed_output);
++	vfree(massive_input);
++	return success;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/selftest/chacha20poly1305.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,9076 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++struct chacha20poly1305_testvec {
++	const u8 *input, *output, *assoc, *nonce, *key;
++	size_t ilen, alen, nlen;
++	bool failure;
++};
++
++/* The first of these are the ChaCha20-Poly1305 AEAD test vectors from RFC7539
++ * 2.8.2. After they are generated by reference implementations. And the final
++ * marked ones are taken from wycheproof, but we only do these for the encrypt
++ * side, because mostly we're stressing the primitives rather than the actual
++ * chapoly construction. This also requires adding a 96-bit nonce construction,
++ * just for the purpose of the tests.
++ */
++
++static const u8 enc_input001[] __initconst = {
++	0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
++	0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
++	0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
++	0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
++	0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
++	0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
++	0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
++	0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
++	0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
++	0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
++	0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
++	0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
++	0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
++	0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
++	0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
++	0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
++	0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
++	0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
++	0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
++	0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
++	0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
++	0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
++	0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
++	0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
++	0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
++	0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
++	0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
++	0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
++	0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
++	0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
++	0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
++	0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
++	0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
++	0x9d
++};
++static const u8 enc_output001[] __initconst = {
++	0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4,
++	0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd,
++	0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89,
++	0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2,
++	0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee,
++	0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0,
++	0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00,
++	0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf,
++	0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce,
++	0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81,
++	0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd,
++	0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55,
++	0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61,
++	0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38,
++	0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0,
++	0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4,
++	0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46,
++	0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9,
++	0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e,
++	0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e,
++	0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15,
++	0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a,
++	0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea,
++	0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a,
++	0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99,
++	0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e,
++	0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10,
++	0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10,
++	0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94,
++	0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30,
++	0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf,
++	0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29,
++	0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70,
++	0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb,
++	0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f,
++	0x38
++};
++static const u8 enc_assoc001[] __initconst = {
++	0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x4e, 0x91
++};
++static const u8 enc_nonce001[] __initconst = {
++	0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
++};
++static const u8 enc_key001[] __initconst = {
++	0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
++	0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
++	0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
++	0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
++};
++
++static const u8 enc_input002[] __initconst = { };
++static const u8 enc_output002[] __initconst = {
++	0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1,
++	0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92
++};
++static const u8 enc_assoc002[] __initconst = { };
++static const u8 enc_nonce002[] __initconst = {
++	0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e
++};
++static const u8 enc_key002[] __initconst = {
++	0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f,
++	0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86,
++	0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef,
++	0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68
++};
++
++static const u8 enc_input003[] __initconst = { };
++static const u8 enc_output003[] __initconst = {
++	0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6,
++	0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77
++};
++static const u8 enc_assoc003[] __initconst = {
++	0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b
++};
++static const u8 enc_nonce003[] __initconst = {
++	0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d
++};
++static const u8 enc_key003[] __initconst = {
++	0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88,
++	0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a,
++	0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08,
++	0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d
++};
++
++static const u8 enc_input004[] __initconst = {
++	0xa4
++};
++static const u8 enc_output004[] __initconst = {
++	0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2,
++	0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac,
++	0x89
++};
++static const u8 enc_assoc004[] __initconst = {
++	0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40
++};
++static const u8 enc_nonce004[] __initconst = {
++	0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4
++};
++static const u8 enc_key004[] __initconst = {
++	0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8,
++	0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1,
++	0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d,
++	0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e
++};
++
++static const u8 enc_input005[] __initconst = {
++	0x2d
++};
++static const u8 enc_output005[] __initconst = {
++	0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e,
++	0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c,
++	0xac
++};
++static const u8 enc_assoc005[] __initconst = { };
++static const u8 enc_nonce005[] __initconst = {
++	0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30
++};
++static const u8 enc_key005[] __initconst = {
++	0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31,
++	0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87,
++	0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01,
++	0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87
++};
++
++static const u8 enc_input006[] __initconst = {
++	0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a,
++	0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92,
++	0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37,
++	0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50,
++	0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec,
++	0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb,
++	0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66,
++	0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb,
++	0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b,
++	0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e,
++	0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3,
++	0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0,
++	0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb,
++	0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41,
++	0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc,
++	0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde,
++	0x8f
++};
++static const u8 enc_output006[] __initconst = {
++	0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1,
++	0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15,
++	0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c,
++	0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda,
++	0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11,
++	0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8,
++	0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc,
++	0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3,
++	0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5,
++	0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02,
++	0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93,
++	0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78,
++	0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1,
++	0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66,
++	0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc,
++	0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0,
++	0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d,
++	0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a,
++	0xeb
++};
++static const u8 enc_assoc006[] __initconst = {
++	0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b
++};
++static const u8 enc_nonce006[] __initconst = {
++	0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c
++};
++static const u8 enc_key006[] __initconst = {
++	0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae,
++	0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78,
++	0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9,
++	0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01
++};
++
++static const u8 enc_input007[] __initconst = {
++	0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5,
++	0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a,
++	0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1,
++	0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17,
++	0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c,
++	0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1,
++	0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51,
++	0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1,
++	0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86,
++	0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a,
++	0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a,
++	0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98,
++	0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36,
++	0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34,
++	0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57,
++	0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84,
++	0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4,
++	0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80,
++	0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82,
++	0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5,
++	0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d,
++	0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c,
++	0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf,
++	0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc,
++	0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3,
++	0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14,
++	0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81,
++	0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77,
++	0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3,
++	0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2,
++	0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b,
++	0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3
++};
++static const u8 enc_output007[] __initconst = {
++	0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c,
++	0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8,
++	0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c,
++	0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb,
++	0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0,
++	0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21,
++	0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70,
++	0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac,
++	0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99,
++	0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9,
++	0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f,
++	0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7,
++	0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53,
++	0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12,
++	0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6,
++	0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0,
++	0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54,
++	0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6,
++	0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e,
++	0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb,
++	0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30,
++	0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f,
++	0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2,
++	0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e,
++	0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34,
++	0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39,
++	0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7,
++	0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9,
++	0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82,
++	0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04,
++	0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34,
++	0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef,
++	0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42,
++	0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53
++};
++static const u8 enc_assoc007[] __initconst = { };
++static const u8 enc_nonce007[] __initconst = {
++	0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0
++};
++static const u8 enc_key007[] __initconst = {
++	0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd,
++	0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c,
++	0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80,
++	0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01
++};
++
++static const u8 enc_input008[] __initconst = {
++	0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10,
++	0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2,
++	0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c,
++	0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb,
++	0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12,
++	0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa,
++	0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6,
++	0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4,
++	0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91,
++	0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb,
++	0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47,
++	0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15,
++	0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f,
++	0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a,
++	0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3,
++	0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97,
++	0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80,
++	0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e,
++	0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f,
++	0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10,
++	0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a,
++	0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0,
++	0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35,
++	0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d,
++	0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d,
++	0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57,
++	0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4,
++	0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f,
++	0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39,
++	0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda,
++	0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17,
++	0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43,
++	0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19,
++	0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09,
++	0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21,
++	0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07,
++	0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f,
++	0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b,
++	0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a,
++	0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed,
++	0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2,
++	0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca,
++	0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff,
++	0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b,
++	0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b,
++	0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b,
++	0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6,
++	0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04,
++	0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48,
++	0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b,
++	0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13,
++	0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8,
++	0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f,
++	0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0,
++	0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92,
++	0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a,
++	0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41,
++	0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17,
++	0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30,
++	0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20,
++	0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49,
++	0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a,
++	0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b,
++	0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3
++};
++static const u8 enc_output008[] __initconst = {
++	0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd,
++	0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1,
++	0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93,
++	0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d,
++	0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c,
++	0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6,
++	0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4,
++	0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5,
++	0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84,
++	0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd,
++	0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed,
++	0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab,
++	0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13,
++	0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49,
++	0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6,
++	0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8,
++	0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2,
++	0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94,
++	0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18,
++	0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60,
++	0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8,
++	0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b,
++	0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f,
++	0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c,
++	0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20,
++	0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff,
++	0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9,
++	0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c,
++	0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9,
++	0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6,
++	0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea,
++	0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e,
++	0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82,
++	0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1,
++	0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70,
++	0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1,
++	0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c,
++	0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7,
++	0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc,
++	0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc,
++	0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3,
++	0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb,
++	0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97,
++	0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f,
++	0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39,
++	0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f,
++	0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d,
++	0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2,
++	0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d,
++	0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96,
++	0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b,
++	0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20,
++	0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95,
++	0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb,
++	0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35,
++	0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62,
++	0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9,
++	0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6,
++	0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8,
++	0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a,
++	0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93,
++	0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14,
++	0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99,
++	0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86,
++	0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f,
++	0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54
++};
++static const u8 enc_assoc008[] __initconst = { };
++static const u8 enc_nonce008[] __initconst = {
++	0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02
++};
++static const u8 enc_key008[] __initconst = {
++	0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53,
++	0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0,
++	0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86,
++	0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba
++};
++
++static const u8 enc_input009[] __initconst = {
++	0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b,
++	0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8,
++	0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca,
++	0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09,
++	0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5,
++	0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85,
++	0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44,
++	0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97,
++	0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77,
++	0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41,
++	0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c,
++	0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00,
++	0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82,
++	0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f,
++	0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e,
++	0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55,
++	0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab,
++	0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17,
++	0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e,
++	0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f,
++	0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82,
++	0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3,
++	0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f,
++	0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0,
++	0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08,
++	0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b,
++	0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85,
++	0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28,
++	0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c,
++	0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62,
++	0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2,
++	0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3,
++	0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62,
++	0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40,
++	0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f,
++	0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b,
++	0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91,
++	0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5,
++	0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c,
++	0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4,
++	0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49,
++	0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04,
++	0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03,
++	0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa,
++	0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec,
++	0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6,
++	0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69,
++	0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36,
++	0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8,
++	0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf,
++	0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe,
++	0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82,
++	0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab,
++	0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d,
++	0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3,
++	0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5,
++	0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34,
++	0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49,
++	0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f,
++	0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d,
++	0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42,
++	0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef,
++	0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27,
++	0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52,
++	0x65
++};
++static const u8 enc_output009[] __initconst = {
++	0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf,
++	0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66,
++	0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72,
++	0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd,
++	0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28,
++	0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe,
++	0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06,
++	0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5,
++	0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7,
++	0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09,
++	0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a,
++	0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00,
++	0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62,
++	0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb,
++	0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2,
++	0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28,
++	0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e,
++	0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a,
++	0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6,
++	0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83,
++	0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9,
++	0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a,
++	0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79,
++	0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a,
++	0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea,
++	0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b,
++	0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52,
++	0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb,
++	0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89,
++	0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad,
++	0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19,
++	0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71,
++	0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d,
++	0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54,
++	0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a,
++	0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d,
++	0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95,
++	0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42,
++	0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16,
++	0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6,
++	0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf,
++	0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d,
++	0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f,
++	0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b,
++	0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e,
++	0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4,
++	0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c,
++	0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4,
++	0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1,
++	0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb,
++	0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff,
++	0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2,
++	0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06,
++	0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66,
++	0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90,
++	0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55,
++	0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc,
++	0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8,
++	0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62,
++	0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba,
++	0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2,
++	0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89,
++	0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06,
++	0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90,
++	0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf,
++	0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8,
++	0xae
++};
++static const u8 enc_assoc009[] __initconst = {
++	0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e,
++	0xef
++};
++static const u8 enc_nonce009[] __initconst = {
++	0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78
++};
++static const u8 enc_key009[] __initconst = {
++	0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5,
++	0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86,
++	0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2,
++	0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b
++};
++
++static const u8 enc_input010[] __initconst = {
++	0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf,
++	0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c,
++	0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22,
++	0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc,
++	0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16,
++	0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7,
++	0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4,
++	0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d,
++	0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5,
++	0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46,
++	0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82,
++	0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b,
++	0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a,
++	0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf,
++	0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca,
++	0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95,
++	0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09,
++	0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3,
++	0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3,
++	0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f,
++	0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58,
++	0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad,
++	0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde,
++	0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44,
++	0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a,
++	0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9,
++	0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26,
++	0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc,
++	0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74,
++	0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b,
++	0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93,
++	0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37,
++	0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f,
++	0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d,
++	0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca,
++	0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73,
++	0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f,
++	0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1,
++	0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9,
++	0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76,
++	0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac,
++	0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7,
++	0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce,
++	0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30,
++	0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb,
++	0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa,
++	0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd,
++	0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f,
++	0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb,
++	0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34,
++	0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e,
++	0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f,
++	0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53,
++	0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41,
++	0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e,
++	0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d,
++	0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27,
++	0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e,
++	0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8,
++	0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a,
++	0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12,
++	0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3,
++	0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66,
++	0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0,
++	0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c,
++	0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4,
++	0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49,
++	0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90,
++	0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11,
++	0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c,
++	0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b,
++	0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74,
++	0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c,
++	0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27,
++	0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1,
++	0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27,
++	0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88,
++	0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27,
++	0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b,
++	0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39,
++	0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7,
++	0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc,
++	0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe,
++	0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5,
++	0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf,
++	0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05,
++	0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73,
++	0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda,
++	0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe,
++	0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71,
++	0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed,
++	0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d,
++	0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33,
++	0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f,
++	0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a,
++	0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa,
++	0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e,
++	0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e,
++	0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87,
++	0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5,
++	0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4,
++	0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38,
++	0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34,
++	0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f,
++	0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36,
++	0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69,
++	0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44,
++	0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5,
++	0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce,
++	0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd,
++	0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27,
++	0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f,
++	0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8,
++	0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a,
++	0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5,
++	0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca,
++	0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e,
++	0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92,
++	0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13,
++	0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf,
++	0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6,
++	0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3,
++	0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b,
++	0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d,
++	0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f,
++	0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40,
++	0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c,
++	0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f
++};
++static const u8 enc_output010[] __initconst = {
++	0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b,
++	0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74,
++	0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1,
++	0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd,
++	0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6,
++	0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5,
++	0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96,
++	0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02,
++	0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30,
++	0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57,
++	0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53,
++	0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65,
++	0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71,
++	0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9,
++	0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18,
++	0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce,
++	0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a,
++	0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69,
++	0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2,
++	0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95,
++	0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49,
++	0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e,
++	0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a,
++	0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a,
++	0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e,
++	0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19,
++	0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b,
++	0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75,
++	0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d,
++	0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d,
++	0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f,
++	0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a,
++	0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d,
++	0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5,
++	0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c,
++	0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77,
++	0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46,
++	0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43,
++	0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe,
++	0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8,
++	0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76,
++	0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47,
++	0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8,
++	0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32,
++	0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59,
++	0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae,
++	0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a,
++	0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3,
++	0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74,
++	0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75,
++	0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2,
++	0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e,
++	0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2,
++	0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9,
++	0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1,
++	0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07,
++	0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79,
++	0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71,
++	0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad,
++	0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a,
++	0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c,
++	0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9,
++	0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79,
++	0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27,
++	0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90,
++	0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe,
++	0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99,
++	0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1,
++	0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9,
++	0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0,
++	0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28,
++	0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e,
++	0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20,
++	0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60,
++	0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47,
++	0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68,
++	0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe,
++	0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33,
++	0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8,
++	0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38,
++	0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7,
++	0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04,
++	0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c,
++	0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f,
++	0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c,
++	0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77,
++	0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54,
++	0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5,
++	0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4,
++	0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2,
++	0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e,
++	0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27,
++	0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f,
++	0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92,
++	0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55,
++	0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe,
++	0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04,
++	0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4,
++	0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56,
++	0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02,
++	0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2,
++	0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8,
++	0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27,
++	0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47,
++	0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10,
++	0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43,
++	0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0,
++	0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee,
++	0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47,
++	0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6,
++	0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d,
++	0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c,
++	0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3,
++	0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b,
++	0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09,
++	0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d,
++	0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1,
++	0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd,
++	0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4,
++	0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63,
++	0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87,
++	0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd,
++	0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e,
++	0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a,
++	0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c,
++	0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38,
++	0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a,
++	0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5,
++	0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9,
++	0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0
++};
++static const u8 enc_assoc010[] __initconst = {
++	0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27,
++	0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2
++};
++static const u8 enc_nonce010[] __initconst = {
++	0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30
++};
++static const u8 enc_key010[] __initconst = {
++	0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44,
++	0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf,
++	0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74,
++	0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7
++};
++
++static const u8 enc_input011[] __initconst = {
++	0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b,
++	0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b,
++	0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d,
++	0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee,
++	0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30,
++	0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20,
++	0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f,
++	0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e,
++	0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66,
++	0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46,
++	0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35,
++	0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6,
++	0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0,
++	0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15,
++	0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13,
++	0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7,
++	0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3,
++	0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37,
++	0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc,
++	0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95,
++	0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8,
++	0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac,
++	0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45,
++	0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf,
++	0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d,
++	0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc,
++	0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45,
++	0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a,
++	0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec,
++	0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e,
++	0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10,
++	0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8,
++	0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66,
++	0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0,
++	0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62,
++	0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b,
++	0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4,
++	0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96,
++	0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7,
++	0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74,
++	0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8,
++	0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b,
++	0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70,
++	0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95,
++	0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3,
++	0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9,
++	0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d,
++	0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e,
++	0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32,
++	0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5,
++	0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80,
++	0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3,
++	0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad,
++	0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d,
++	0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20,
++	0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17,
++	0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6,
++	0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d,
++	0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82,
++	0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c,
++	0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9,
++	0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb,
++	0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96,
++	0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9,
++	0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f,
++	0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40,
++	0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc,
++	0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce,
++	0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71,
++	0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f,
++	0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35,
++	0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90,
++	0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8,
++	0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01,
++	0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1,
++	0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe,
++	0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4,
++	0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf,
++	0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9,
++	0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f,
++	0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04,
++	0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7,
++	0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15,
++	0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc,
++	0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0,
++	0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae,
++	0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb,
++	0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed,
++	0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51,
++	0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52,
++	0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84,
++	0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5,
++	0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4,
++	0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e,
++	0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74,
++	0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f,
++	0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13,
++	0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea,
++	0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b,
++	0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef,
++	0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09,
++	0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe,
++	0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1,
++	0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9,
++	0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15,
++	0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a,
++	0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab,
++	0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36,
++	0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd,
++	0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde,
++	0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd,
++	0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47,
++	0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5,
++	0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69,
++	0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21,
++	0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98,
++	0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07,
++	0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57,
++	0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd,
++	0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03,
++	0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11,
++	0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96,
++	0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91,
++	0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d,
++	0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0,
++	0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9,
++	0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42,
++	0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a,
++	0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18,
++	0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc,
++	0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce,
++	0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc,
++	0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0,
++	0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf,
++	0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7,
++	0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80,
++	0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c,
++	0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82,
++	0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9,
++	0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20,
++	0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58,
++	0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6,
++	0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc,
++	0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50,
++	0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86,
++	0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a,
++	0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80,
++	0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec,
++	0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08,
++	0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c,
++	0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde,
++	0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d,
++	0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17,
++	0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f,
++	0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26,
++	0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96,
++	0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97,
++	0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6,
++	0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55,
++	0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e,
++	0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88,
++	0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5,
++	0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b,
++	0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15,
++	0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1,
++	0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4,
++	0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3,
++	0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf,
++	0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e,
++	0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb,
++	0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76,
++	0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5,
++	0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c,
++	0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde,
++	0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f,
++	0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51,
++	0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9,
++	0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99,
++	0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6,
++	0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04,
++	0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31,
++	0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a,
++	0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56,
++	0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e,
++	0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78,
++	0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a,
++	0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7,
++	0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb,
++	0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6,
++	0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8,
++	0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc,
++	0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84,
++	0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86,
++	0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76,
++	0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a,
++	0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73,
++	0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8,
++	0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6,
++	0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2,
++	0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56,
++	0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb,
++	0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab,
++	0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76,
++	0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69,
++	0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d,
++	0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc,
++	0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22,
++	0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39,
++	0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6,
++	0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9,
++	0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f,
++	0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1,
++	0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83,
++	0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc,
++	0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4,
++	0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59,
++	0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68,
++	0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef,
++	0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1,
++	0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3,
++	0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44,
++	0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09,
++	0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8,
++	0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a,
++	0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d,
++	0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae,
++	0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2,
++	0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10,
++	0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a,
++	0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34,
++	0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f,
++	0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9,
++	0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b,
++	0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d,
++	0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57,
++	0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03,
++	0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87,
++	0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca,
++	0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53,
++	0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f,
++	0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61,
++	0x10, 0x1e, 0xbf, 0xec, 0xa8
++};
++static const u8 enc_output011[] __initconst = {
++	0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8,
++	0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc,
++	0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74,
++	0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73,
++	0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e,
++	0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9,
++	0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e,
++	0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd,
++	0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57,
++	0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19,
++	0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f,
++	0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45,
++	0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e,
++	0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39,
++	0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03,
++	0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f,
++	0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0,
++	0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce,
++	0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb,
++	0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52,
++	0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21,
++	0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a,
++	0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35,
++	0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91,
++	0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b,
++	0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e,
++	0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19,
++	0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07,
++	0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18,
++	0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96,
++	0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68,
++	0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4,
++	0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57,
++	0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c,
++	0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23,
++	0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8,
++	0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6,
++	0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40,
++	0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab,
++	0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb,
++	0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea,
++	0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8,
++	0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31,
++	0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0,
++	0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc,
++	0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94,
++	0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1,
++	0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46,
++	0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6,
++	0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7,
++	0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71,
++	0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a,
++	0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33,
++	0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38,
++	0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23,
++	0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb,
++	0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65,
++	0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73,
++	0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8,
++	0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb,
++	0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a,
++	0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca,
++	0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5,
++	0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71,
++	0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8,
++	0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d,
++	0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6,
++	0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d,
++	0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7,
++	0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5,
++	0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8,
++	0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd,
++	0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29,
++	0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22,
++	0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5,
++	0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67,
++	0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11,
++	0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e,
++	0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09,
++	0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4,
++	0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f,
++	0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa,
++	0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec,
++	0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b,
++	0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d,
++	0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b,
++	0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48,
++	0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3,
++	0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63,
++	0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd,
++	0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78,
++	0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed,
++	0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82,
++	0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f,
++	0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3,
++	0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9,
++	0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72,
++	0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74,
++	0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40,
++	0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b,
++	0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a,
++	0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5,
++	0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98,
++	0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71,
++	0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e,
++	0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4,
++	0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46,
++	0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e,
++	0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f,
++	0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93,
++	0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0,
++	0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5,
++	0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61,
++	0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64,
++	0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85,
++	0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20,
++	0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6,
++	0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc,
++	0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8,
++	0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50,
++	0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4,
++	0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80,
++	0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0,
++	0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a,
++	0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35,
++	0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43,
++	0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12,
++	0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7,
++	0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34,
++	0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42,
++	0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0,
++	0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95,
++	0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74,
++	0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5,
++	0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12,
++	0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6,
++	0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86,
++	0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97,
++	0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45,
++	0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19,
++	0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86,
++	0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c,
++	0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba,
++	0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29,
++	0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6,
++	0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6,
++	0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09,
++	0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31,
++	0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99,
++	0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b,
++	0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca,
++	0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00,
++	0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93,
++	0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3,
++	0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07,
++	0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda,
++	0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90,
++	0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b,
++	0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a,
++	0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6,
++	0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c,
++	0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57,
++	0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15,
++	0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e,
++	0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51,
++	0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75,
++	0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19,
++	0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08,
++	0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14,
++	0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba,
++	0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff,
++	0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90,
++	0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e,
++	0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93,
++	0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad,
++	0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2,
++	0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac,
++	0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d,
++	0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06,
++	0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c,
++	0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91,
++	0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17,
++	0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20,
++	0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7,
++	0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf,
++	0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c,
++	0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2,
++	0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e,
++	0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a,
++	0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05,
++	0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58,
++	0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8,
++	0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d,
++	0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71,
++	0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3,
++	0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe,
++	0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62,
++	0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16,
++	0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66,
++	0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4,
++	0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2,
++	0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35,
++	0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3,
++	0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4,
++	0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f,
++	0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe,
++	0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56,
++	0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b,
++	0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37,
++	0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3,
++	0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f,
++	0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f,
++	0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0,
++	0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70,
++	0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd,
++	0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f,
++	0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e,
++	0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67,
++	0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51,
++	0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23,
++	0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3,
++	0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5,
++	0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09,
++	0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7,
++	0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed,
++	0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb,
++	0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6,
++	0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5,
++	0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96,
++	0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe,
++	0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44,
++	0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6,
++	0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e,
++	0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0,
++	0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79,
++	0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f,
++	0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d,
++	0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82,
++	0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47,
++	0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93,
++	0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6,
++	0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69,
++	0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e,
++	0x2b, 0xdf, 0xcd, 0xf9, 0x3c
++};
++static const u8 enc_assoc011[] __initconst = {
++	0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7
++};
++static const u8 enc_nonce011[] __initconst = {
++	0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa
++};
++static const u8 enc_key011[] __initconst = {
++	0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85,
++	0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca,
++	0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52,
++	0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38
++};
++
++static const u8 enc_input012[] __initconst = {
++	0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0,
++	0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5,
++	0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57,
++	0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff,
++	0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5,
++	0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b,
++	0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46,
++	0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b,
++	0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71,
++	0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0,
++	0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b,
++	0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d,
++	0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f,
++	0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24,
++	0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23,
++	0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e,
++	0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14,
++	0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d,
++	0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb,
++	0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4,
++	0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf,
++	0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e,
++	0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6,
++	0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33,
++	0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb,
++	0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0,
++	0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe,
++	0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00,
++	0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d,
++	0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b,
++	0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50,
++	0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e,
++	0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4,
++	0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28,
++	0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8,
++	0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b,
++	0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86,
++	0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67,
++	0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff,
++	0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59,
++	0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe,
++	0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6,
++	0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e,
++	0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b,
++	0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50,
++	0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39,
++	0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02,
++	0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9,
++	0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a,
++	0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38,
++	0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9,
++	0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65,
++	0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb,
++	0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2,
++	0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae,
++	0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee,
++	0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00,
++	0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c,
++	0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8,
++	0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31,
++	0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68,
++	0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4,
++	0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0,
++	0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11,
++	0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7,
++	0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39,
++	0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1,
++	0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1,
++	0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2,
++	0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66,
++	0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49,
++	0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2,
++	0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5,
++	0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3,
++	0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c,
++	0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa,
++	0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00,
++	0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54,
++	0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87,
++	0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03,
++	0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39,
++	0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40,
++	0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6,
++	0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22,
++	0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5,
++	0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e,
++	0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32,
++	0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53,
++	0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42,
++	0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c,
++	0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68,
++	0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48,
++	0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c,
++	0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce,
++	0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd,
++	0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa,
++	0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69,
++	0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8,
++	0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58,
++	0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0,
++	0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45,
++	0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb,
++	0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33,
++	0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c,
++	0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23,
++	0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80,
++	0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1,
++	0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff,
++	0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24,
++	0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9,
++	0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46,
++	0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8,
++	0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20,
++	0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35,
++	0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63,
++	0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb,
++	0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36,
++	0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a,
++	0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c,
++	0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f,
++	0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02,
++	0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03,
++	0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa,
++	0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16,
++	0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d,
++	0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5,
++	0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7,
++	0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac,
++	0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47,
++	0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3,
++	0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35,
++	0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e,
++	0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6,
++	0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74,
++	0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e,
++	0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a,
++	0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0,
++	0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4,
++	0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8,
++	0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16,
++	0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32,
++	0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65,
++	0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06,
++	0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a,
++	0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7,
++	0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85,
++	0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb,
++	0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46,
++	0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e,
++	0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61,
++	0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb,
++	0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d,
++	0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00,
++	0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5,
++	0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6,
++	0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1,
++	0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a,
++	0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7,
++	0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63,
++	0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38,
++	0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3,
++	0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed,
++	0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49,
++	0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42,
++	0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0,
++	0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f,
++	0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1,
++	0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd,
++	0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d,
++	0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88,
++	0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1,
++	0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25,
++	0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22,
++	0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28,
++	0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f,
++	0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53,
++	0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28,
++	0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8,
++	0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc,
++	0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8,
++	0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb,
++	0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3,
++	0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3,
++	0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac,
++	0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2,
++	0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a,
++	0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad,
++	0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e,
++	0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd,
++	0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf,
++	0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba,
++	0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41,
++	0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91,
++	0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d,
++	0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6,
++	0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf,
++	0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92,
++	0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e,
++	0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72,
++	0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04,
++	0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46,
++	0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55,
++	0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84,
++	0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61,
++	0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d,
++	0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8,
++	0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d,
++	0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87,
++	0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70,
++	0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94,
++	0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f,
++	0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb,
++	0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90,
++	0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31,
++	0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06,
++	0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05,
++	0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7,
++	0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e,
++	0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae,
++	0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2,
++	0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21,
++	0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0,
++	0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d,
++	0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0,
++	0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6,
++	0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5,
++	0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9,
++	0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8,
++	0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57,
++	0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1,
++	0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c,
++	0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b,
++	0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69,
++	0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d,
++	0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d,
++	0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19,
++	0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82,
++	0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20,
++	0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f,
++	0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e,
++	0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f,
++	0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47,
++	0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b,
++	0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4,
++	0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b,
++	0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4,
++	0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9,
++	0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3,
++	0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0,
++	0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16,
++	0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d,
++	0x78, 0xec, 0x00
++};
++static const u8 enc_output012[] __initconst = {
++	0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3,
++	0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf,
++	0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1,
++	0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f,
++	0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e,
++	0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5,
++	0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b,
++	0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b,
++	0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2,
++	0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1,
++	0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74,
++	0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e,
++	0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae,
++	0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd,
++	0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04,
++	0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55,
++	0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef,
++	0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b,
++	0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74,
++	0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26,
++	0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f,
++	0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64,
++	0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd,
++	0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad,
++	0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b,
++	0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e,
++	0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e,
++	0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0,
++	0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f,
++	0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50,
++	0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97,
++	0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03,
++	0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a,
++	0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15,
++	0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb,
++	0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34,
++	0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47,
++	0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86,
++	0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24,
++	0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c,
++	0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9,
++	0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7,
++	0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48,
++	0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b,
++	0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e,
++	0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61,
++	0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75,
++	0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26,
++	0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74,
++	0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43,
++	0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1,
++	0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79,
++	0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3,
++	0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5,
++	0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9,
++	0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d,
++	0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8,
++	0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26,
++	0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5,
++	0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d,
++	0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29,
++	0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57,
++	0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92,
++	0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9,
++	0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc,
++	0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd,
++	0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57,
++	0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3,
++	0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4,
++	0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c,
++	0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27,
++	0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c,
++	0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5,
++	0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14,
++	0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94,
++	0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b,
++	0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99,
++	0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84,
++	0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a,
++	0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa,
++	0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75,
++	0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74,
++	0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40,
++	0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72,
++	0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f,
++	0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92,
++	0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8,
++	0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c,
++	0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f,
++	0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb,
++	0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a,
++	0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b,
++	0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d,
++	0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c,
++	0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4,
++	0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00,
++	0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b,
++	0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4,
++	0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84,
++	0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba,
++	0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47,
++	0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4,
++	0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88,
++	0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81,
++	0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1,
++	0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a,
++	0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e,
++	0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1,
++	0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07,
++	0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24,
++	0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f,
++	0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a,
++	0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9,
++	0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9,
++	0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51,
++	0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1,
++	0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c,
++	0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53,
++	0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40,
++	0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a,
++	0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2,
++	0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2,
++	0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8,
++	0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07,
++	0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9,
++	0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d,
++	0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde,
++	0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f,
++	0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d,
++	0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d,
++	0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56,
++	0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c,
++	0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3,
++	0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d,
++	0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26,
++	0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10,
++	0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c,
++	0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11,
++	0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf,
++	0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c,
++	0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb,
++	0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79,
++	0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa,
++	0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80,
++	0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08,
++	0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c,
++	0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc,
++	0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab,
++	0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6,
++	0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9,
++	0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7,
++	0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2,
++	0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33,
++	0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2,
++	0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e,
++	0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c,
++	0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b,
++	0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66,
++	0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6,
++	0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44,
++	0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74,
++	0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6,
++	0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f,
++	0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24,
++	0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1,
++	0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2,
++	0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5,
++	0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d,
++	0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0,
++	0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b,
++	0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3,
++	0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0,
++	0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3,
++	0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c,
++	0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b,
++	0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5,
++	0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51,
++	0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71,
++	0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68,
++	0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb,
++	0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e,
++	0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b,
++	0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8,
++	0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb,
++	0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54,
++	0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7,
++	0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff,
++	0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd,
++	0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde,
++	0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c,
++	0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1,
++	0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8,
++	0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14,
++	0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c,
++	0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4,
++	0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06,
++	0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52,
++	0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d,
++	0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c,
++	0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6,
++	0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5,
++	0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f,
++	0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e,
++	0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98,
++	0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8,
++	0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb,
++	0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b,
++	0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79,
++	0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11,
++	0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d,
++	0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10,
++	0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23,
++	0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23,
++	0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90,
++	0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4,
++	0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1,
++	0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7,
++	0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11,
++	0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50,
++	0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8,
++	0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97,
++	0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38,
++	0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f,
++	0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33,
++	0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f,
++	0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75,
++	0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21,
++	0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90,
++	0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8,
++	0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91,
++	0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1,
++	0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f,
++	0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3,
++	0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc,
++	0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a,
++	0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62,
++	0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55,
++	0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23,
++	0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6,
++	0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac,
++	0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12,
++	0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a,
++	0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7,
++	0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec,
++	0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28,
++	0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88,
++	0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4,
++	0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17,
++	0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2,
++	0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33,
++	0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a,
++	0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28,
++	0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62,
++	0x70, 0xcf, 0xd6
++};
++static const u8 enc_assoc012[] __initconst = {
++	0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8,
++	0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce,
++	0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c,
++	0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc,
++	0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e,
++	0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f,
++	0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b,
++	0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9
++};
++static const u8 enc_nonce012[] __initconst = {
++	0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06
++};
++static const u8 enc_key012[] __initconst = {
++	0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e,
++	0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d,
++	0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e,
++	0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
++};
++
++/* wycheproof - rfc7539 */
++static const u8 enc_input013[] __initconst = {
++	0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61,
++	0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c,
++	0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20,
++	0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73,
++	0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39,
++	0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63,
++	0x6f, 0x75, 0x6c, 0x64, 0x20, 0x6f, 0x66, 0x66,
++	0x65, 0x72, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x6f,
++	0x6e, 0x6c, 0x79, 0x20, 0x6f, 0x6e, 0x65, 0x20,
++	0x74, 0x69, 0x70, 0x20, 0x66, 0x6f, 0x72, 0x20,
++	0x74, 0x68, 0x65, 0x20, 0x66, 0x75, 0x74, 0x75,
++	0x72, 0x65, 0x2c, 0x20, 0x73, 0x75, 0x6e, 0x73,
++	0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f,
++	0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69,
++	0x74, 0x2e
++};
++static const u8 enc_output013[] __initconst = {
++	0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb,
++	0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2,
++	0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x08, 0xfe,
++	0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6,
++	0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12,
++	0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b,
++	0x1a, 0x71, 0xde, 0x0a, 0x9e, 0x06, 0x0b, 0x29,
++	0x05, 0xd6, 0xa5, 0xb6, 0x7e, 0xcd, 0x3b, 0x36,
++	0x92, 0xdd, 0xbd, 0x7f, 0x2d, 0x77, 0x8b, 0x8c,
++	0x98, 0x03, 0xae, 0xe3, 0x28, 0x09, 0x1b, 0x58,
++	0xfa, 0xb3, 0x24, 0xe4, 0xfa, 0xd6, 0x75, 0x94,
++	0x55, 0x85, 0x80, 0x8b, 0x48, 0x31, 0xd7, 0xbc,
++	0x3f, 0xf4, 0xde, 0xf0, 0x8e, 0x4b, 0x7a, 0x9d,
++	0xe5, 0x76, 0xd2, 0x65, 0x86, 0xce, 0xc6, 0x4b,
++	0x61, 0x16, 0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09,
++	0xe2, 0x6a, 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60,
++	0x06, 0x91
++};
++static const u8 enc_assoc013[] __initconst = {
++	0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3,
++	0xc4, 0xc5, 0xc6, 0xc7
++};
++static const u8 enc_nonce013[] __initconst = {
++	0x07, 0x00, 0x00, 0x00, 0x40, 0x41, 0x42, 0x43,
++	0x44, 0x45, 0x46, 0x47
++};
++static const u8 enc_key013[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input014[] __initconst = { };
++static const u8 enc_output014[] __initconst = {
++	0x76, 0xac, 0xb3, 0x42, 0xcf, 0x31, 0x66, 0xa5,
++	0xb6, 0x3c, 0x0c, 0x0e, 0xa1, 0x38, 0x3c, 0x8d
++};
++static const u8 enc_assoc014[] __initconst = { };
++static const u8 enc_nonce014[] __initconst = {
++	0x4d, 0xa5, 0xbf, 0x8d, 0xfd, 0x58, 0x52, 0xc1,
++	0xea, 0x12, 0x37, 0x9d
++};
++static const u8 enc_key014[] __initconst = {
++	0x80, 0xba, 0x31, 0x92, 0xc8, 0x03, 0xce, 0x96,
++	0x5e, 0xa3, 0x71, 0xd5, 0xff, 0x07, 0x3c, 0xf0,
++	0xf4, 0x3b, 0x6a, 0x2a, 0xb5, 0x76, 0xb2, 0x08,
++	0x42, 0x6e, 0x11, 0x40, 0x9c, 0x09, 0xb9, 0xb0
++};
++
++/* wycheproof - misc */
++static const u8 enc_input015[] __initconst = { };
++static const u8 enc_output015[] __initconst = {
++	0x90, 0x6f, 0xa6, 0x28, 0x4b, 0x52, 0xf8, 0x7b,
++	0x73, 0x59, 0xcb, 0xaa, 0x75, 0x63, 0xc7, 0x09
++};
++static const u8 enc_assoc015[] __initconst = {
++	0xbd, 0x50, 0x67, 0x64, 0xf2, 0xd2, 0xc4, 0x10
++};
++static const u8 enc_nonce015[] __initconst = {
++	0xa9, 0x2e, 0xf0, 0xac, 0x99, 0x1d, 0xd5, 0x16,
++	0xa3, 0xc6, 0xf6, 0x89
++};
++static const u8 enc_key015[] __initconst = {
++	0x7a, 0x4c, 0xd7, 0x59, 0x17, 0x2e, 0x02, 0xeb,
++	0x20, 0x4d, 0xb2, 0xc3, 0xf5, 0xc7, 0x46, 0x22,
++	0x7d, 0xf5, 0x84, 0xfc, 0x13, 0x45, 0x19, 0x63,
++	0x91, 0xdb, 0xb9, 0x57, 0x7a, 0x25, 0x07, 0x42
++};
++
++/* wycheproof - misc */
++static const u8 enc_input016[] __initconst = {
++	0x2a
++};
++static const u8 enc_output016[] __initconst = {
++	0x3a, 0xca, 0xc2, 0x7d, 0xec, 0x09, 0x68, 0x80,
++	0x1e, 0x9f, 0x6e, 0xde, 0xd6, 0x9d, 0x80, 0x75,
++	0x22
++};
++static const u8 enc_assoc016[] __initconst = { };
++static const u8 enc_nonce016[] __initconst = {
++	0x99, 0xe2, 0x3e, 0xc4, 0x89, 0x85, 0xbc, 0xcd,
++	0xee, 0xab, 0x60, 0xf1
++};
++static const u8 enc_key016[] __initconst = {
++	0xcc, 0x56, 0xb6, 0x80, 0x55, 0x2e, 0xb7, 0x50,
++	0x08, 0xf5, 0x48, 0x4b, 0x4c, 0xb8, 0x03, 0xfa,
++	0x50, 0x63, 0xeb, 0xd6, 0xea, 0xb9, 0x1f, 0x6a,
++	0xb6, 0xae, 0xf4, 0x91, 0x6a, 0x76, 0x62, 0x73
++};
++
++/* wycheproof - misc */
++static const u8 enc_input017[] __initconst = {
++	0x51
++};
++static const u8 enc_output017[] __initconst = {
++	0xc4, 0x16, 0x83, 0x10, 0xca, 0x45, 0xb1, 0xf7,
++	0xc6, 0x6c, 0xad, 0x4e, 0x99, 0xe4, 0x3f, 0x72,
++	0xb9
++};
++static const u8 enc_assoc017[] __initconst = {
++	0x91, 0xca, 0x6c, 0x59, 0x2c, 0xbc, 0xca, 0x53
++};
++static const u8 enc_nonce017[] __initconst = {
++	0xab, 0x0d, 0xca, 0x71, 0x6e, 0xe0, 0x51, 0xd2,
++	0x78, 0x2f, 0x44, 0x03
++};
++static const u8 enc_key017[] __initconst = {
++	0x46, 0xf0, 0x25, 0x49, 0x65, 0xf7, 0x69, 0xd5,
++	0x2b, 0xdb, 0x4a, 0x70, 0xb4, 0x43, 0x19, 0x9f,
++	0x8e, 0xf2, 0x07, 0x52, 0x0d, 0x12, 0x20, 0xc5,
++	0x5e, 0x4b, 0x70, 0xf0, 0xfd, 0xa6, 0x20, 0xee
++};
++
++/* wycheproof - misc */
++static const u8 enc_input018[] __initconst = {
++	0x5c, 0x60
++};
++static const u8 enc_output018[] __initconst = {
++	0x4d, 0x13, 0x91, 0xe8, 0xb6, 0x1e, 0xfb, 0x39,
++	0xc1, 0x22, 0x19, 0x54, 0x53, 0x07, 0x7b, 0x22,
++	0xe5, 0xe2
++};
++static const u8 enc_assoc018[] __initconst = { };
++static const u8 enc_nonce018[] __initconst = {
++	0x46, 0x1a, 0xf1, 0x22, 0xe9, 0xf2, 0xe0, 0x34,
++	0x7e, 0x03, 0xf2, 0xdb
++};
++static const u8 enc_key018[] __initconst = {
++	0x2f, 0x7f, 0x7e, 0x4f, 0x59, 0x2b, 0xb3, 0x89,
++	0x19, 0x49, 0x89, 0x74, 0x35, 0x07, 0xbf, 0x3e,
++	0xe9, 0xcb, 0xde, 0x17, 0x86, 0xb6, 0x69, 0x5f,
++	0xe6, 0xc0, 0x25, 0xfd, 0x9b, 0xa4, 0xc1, 0x00
++};
++
++/* wycheproof - misc */
++static const u8 enc_input019[] __initconst = {
++	0xdd, 0xf2
++};
++static const u8 enc_output019[] __initconst = {
++	0xb6, 0x0d, 0xea, 0xd0, 0xfd, 0x46, 0x97, 0xec,
++	0x2e, 0x55, 0x58, 0x23, 0x77, 0x19, 0xd0, 0x24,
++	0x37, 0xa2
++};
++static const u8 enc_assoc019[] __initconst = {
++	0x88, 0x36, 0x4f, 0xc8, 0x06, 0x05, 0x18, 0xbf
++};
++static const u8 enc_nonce019[] __initconst = {
++	0x61, 0x54, 0x6b, 0xa5, 0xf1, 0x72, 0x05, 0x90,
++	0xb6, 0x04, 0x0a, 0xc6
++};
++static const u8 enc_key019[] __initconst = {
++	0xc8, 0x83, 0x3d, 0xce, 0x5e, 0xa9, 0xf2, 0x48,
++	0xaa, 0x20, 0x30, 0xea, 0xcf, 0xe7, 0x2b, 0xff,
++	0xe6, 0x9a, 0x62, 0x0c, 0xaf, 0x79, 0x33, 0x44,
++	0xe5, 0x71, 0x8f, 0xe0, 0xd7, 0xab, 0x1a, 0x58
++};
++
++/* wycheproof - misc */
++static const u8 enc_input020[] __initconst = {
++	0xab, 0x85, 0xe9, 0xc1, 0x57, 0x17, 0x31
++};
++static const u8 enc_output020[] __initconst = {
++	0x5d, 0xfe, 0x34, 0x40, 0xdb, 0xb3, 0xc3, 0xed,
++	0x7a, 0x43, 0x4e, 0x26, 0x02, 0xd3, 0x94, 0x28,
++	0x1e, 0x0a, 0xfa, 0x9f, 0xb7, 0xaa, 0x42
++};
++static const u8 enc_assoc020[] __initconst = { };
++static const u8 enc_nonce020[] __initconst = {
++	0x3c, 0x4e, 0x65, 0x4d, 0x66, 0x3f, 0xa4, 0x59,
++	0x6d, 0xc5, 0x5b, 0xb7
++};
++static const u8 enc_key020[] __initconst = {
++	0x55, 0x56, 0x81, 0x58, 0xd3, 0xa6, 0x48, 0x3f,
++	0x1f, 0x70, 0x21, 0xea, 0xb6, 0x9b, 0x70, 0x3f,
++	0x61, 0x42, 0x51, 0xca, 0xdc, 0x1a, 0xf5, 0xd3,
++	0x4a, 0x37, 0x4f, 0xdb, 0xfc, 0x5a, 0xda, 0xc7
++};
++
++/* wycheproof - misc */
++static const u8 enc_input021[] __initconst = {
++	0x4e, 0xe5, 0xcd, 0xa2, 0x0d, 0x42, 0x90
++};
++static const u8 enc_output021[] __initconst = {
++	0x4b, 0xd4, 0x72, 0x12, 0x94, 0x1c, 0xe3, 0x18,
++	0x5f, 0x14, 0x08, 0xee, 0x7f, 0xbf, 0x18, 0xf5,
++	0xab, 0xad, 0x6e, 0x22, 0x53, 0xa1, 0xba
++};
++static const u8 enc_assoc021[] __initconst = {
++	0x84, 0xe4, 0x6b, 0xe8, 0xc0, 0x91, 0x90, 0x53
++};
++static const u8 enc_nonce021[] __initconst = {
++	0x58, 0x38, 0x93, 0x75, 0xc6, 0x9e, 0xe3, 0x98,
++	0xde, 0x94, 0x83, 0x96
++};
++static const u8 enc_key021[] __initconst = {
++	0xe3, 0xc0, 0x9e, 0x7f, 0xab, 0x1a, 0xef, 0xb5,
++	0x16, 0xda, 0x6a, 0x33, 0x02, 0x2a, 0x1d, 0xd4,
++	0xeb, 0x27, 0x2c, 0x80, 0xd5, 0x40, 0xc5, 0xda,
++	0x52, 0xa7, 0x30, 0xf3, 0x4d, 0x84, 0x0d, 0x7f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input022[] __initconst = {
++	0xbe, 0x33, 0x08, 0xf7, 0x2a, 0x2c, 0x6a, 0xed
++};
++static const u8 enc_output022[] __initconst = {
++	0x8e, 0x94, 0x39, 0xa5, 0x6e, 0xee, 0xc8, 0x17,
++	0xfb, 0xe8, 0xa6, 0xed, 0x8f, 0xab, 0xb1, 0x93,
++	0x75, 0x39, 0xdd, 0x6c, 0x00, 0xe9, 0x00, 0x21
++};
++static const u8 enc_assoc022[] __initconst = { };
++static const u8 enc_nonce022[] __initconst = {
++	0x4f, 0x07, 0xaf, 0xed, 0xfd, 0xc3, 0xb6, 0xc2,
++	0x36, 0x18, 0x23, 0xd3
++};
++static const u8 enc_key022[] __initconst = {
++	0x51, 0xe4, 0xbf, 0x2b, 0xad, 0x92, 0xb7, 0xaf,
++	0xf1, 0xa4, 0xbc, 0x05, 0x55, 0x0b, 0xa8, 0x1d,
++	0xf4, 0xb9, 0x6f, 0xab, 0xf4, 0x1c, 0x12, 0xc7,
++	0xb0, 0x0e, 0x60, 0xe4, 0x8d, 0xb7, 0xe1, 0x52
++};
++
++/* wycheproof - misc */
++static const u8 enc_input023[] __initconst = {
++	0xa4, 0xc9, 0xc2, 0x80, 0x1b, 0x71, 0xf7, 0xdf
++};
++static const u8 enc_output023[] __initconst = {
++	0xb9, 0xb9, 0x10, 0x43, 0x3a, 0xf0, 0x52, 0xb0,
++	0x45, 0x30, 0xf5, 0x1a, 0xee, 0xe0, 0x24, 0xe0,
++	0xa4, 0x45, 0xa6, 0x32, 0x8f, 0xa6, 0x7a, 0x18
++};
++static const u8 enc_assoc023[] __initconst = {
++	0x66, 0xc0, 0xae, 0x70, 0x07, 0x6c, 0xb1, 0x4d
++};
++static const u8 enc_nonce023[] __initconst = {
++	0xb4, 0xea, 0x66, 0x6e, 0xe1, 0x19, 0x56, 0x33,
++	0x66, 0x48, 0x4a, 0x78
++};
++static const u8 enc_key023[] __initconst = {
++	0x11, 0x31, 0xc1, 0x41, 0x85, 0x77, 0xa0, 0x54,
++	0xde, 0x7a, 0x4a, 0xc5, 0x51, 0x95, 0x0f, 0x1a,
++	0x05, 0x3f, 0x9a, 0xe4, 0x6e, 0x5b, 0x75, 0xfe,
++	0x4a, 0xbd, 0x56, 0x08, 0xd7, 0xcd, 0xda, 0xdd
++};
++
++/* wycheproof - misc */
++static const u8 enc_input024[] __initconst = {
++	0x42, 0xba, 0xae, 0x59, 0x78, 0xfe, 0xaf, 0x5c,
++	0x36, 0x8d, 0x14, 0xe0
++};
++static const u8 enc_output024[] __initconst = {
++	0xff, 0x7d, 0xc2, 0x03, 0xb2, 0x6c, 0x46, 0x7a,
++	0x6b, 0x50, 0xdb, 0x33, 0x57, 0x8c, 0x0f, 0x27,
++	0x58, 0xc2, 0xe1, 0x4e, 0x36, 0xd4, 0xfc, 0x10,
++	0x6d, 0xcb, 0x29, 0xb4
++};
++static const u8 enc_assoc024[] __initconst = { };
++static const u8 enc_nonce024[] __initconst = {
++	0x9a, 0x59, 0xfc, 0xe2, 0x6d, 0xf0, 0x00, 0x5e,
++	0x07, 0x53, 0x86, 0x56
++};
++static const u8 enc_key024[] __initconst = {
++	0x99, 0xb6, 0x2b, 0xd5, 0xaf, 0xbe, 0x3f, 0xb0,
++	0x15, 0xbd, 0xe9, 0x3f, 0x0a, 0xbf, 0x48, 0x39,
++	0x57, 0xa1, 0xc3, 0xeb, 0x3c, 0xa5, 0x9c, 0xb5,
++	0x0b, 0x39, 0xf7, 0xf8, 0xa9, 0xcc, 0x51, 0xbe
++};
++
++/* wycheproof - misc */
++static const u8 enc_input025[] __initconst = {
++	0xfd, 0xc8, 0x5b, 0x94, 0xa4, 0xb2, 0xa6, 0xb7,
++	0x59, 0xb1, 0xa0, 0xda
++};
++static const u8 enc_output025[] __initconst = {
++	0x9f, 0x88, 0x16, 0xde, 0x09, 0x94, 0xe9, 0x38,
++	0xd9, 0xe5, 0x3f, 0x95, 0xd0, 0x86, 0xfc, 0x6c,
++	0x9d, 0x8f, 0xa9, 0x15, 0xfd, 0x84, 0x23, 0xa7,
++	0xcf, 0x05, 0x07, 0x2f
++};
++static const u8 enc_assoc025[] __initconst = {
++	0xa5, 0x06, 0xe1, 0xa5, 0xc6, 0x90, 0x93, 0xf9
++};
++static const u8 enc_nonce025[] __initconst = {
++	0x58, 0xdb, 0xd4, 0xad, 0x2c, 0x4a, 0xd3, 0x5d,
++	0xd9, 0x06, 0xe9, 0xce
++};
++static const u8 enc_key025[] __initconst = {
++	0x85, 0xf3, 0x5b, 0x62, 0x82, 0xcf, 0xf4, 0x40,
++	0xbc, 0x10, 0x20, 0xc8, 0x13, 0x6f, 0xf2, 0x70,
++	0x31, 0x11, 0x0f, 0xa6, 0x3e, 0xc1, 0x6f, 0x1e,
++	0x82, 0x51, 0x18, 0xb0, 0x06, 0xb9, 0x12, 0x57
++};
++
++/* wycheproof - misc */
++static const u8 enc_input026[] __initconst = {
++	0x51, 0xf8, 0xc1, 0xf7, 0x31, 0xea, 0x14, 0xac,
++	0xdb, 0x21, 0x0a, 0x6d, 0x97, 0x3e, 0x07
++};
++static const u8 enc_output026[] __initconst = {
++	0x0b, 0x29, 0x63, 0x8e, 0x1f, 0xbd, 0xd6, 0xdf,
++	0x53, 0x97, 0x0b, 0xe2, 0x21, 0x00, 0x42, 0x2a,
++	0x91, 0x34, 0x08, 0x7d, 0x67, 0xa4, 0x6e, 0x79,
++	0x17, 0x8d, 0x0a, 0x93, 0xf5, 0xe1, 0xd2
++};
++static const u8 enc_assoc026[] __initconst = { };
++static const u8 enc_nonce026[] __initconst = {
++	0x68, 0xab, 0x7f, 0xdb, 0xf6, 0x19, 0x01, 0xda,
++	0xd4, 0x61, 0xd2, 0x3c
++};
++static const u8 enc_key026[] __initconst = {
++	0x67, 0x11, 0x96, 0x27, 0xbd, 0x98, 0x8e, 0xda,
++	0x90, 0x62, 0x19, 0xe0, 0x8c, 0x0d, 0x0d, 0x77,
++	0x9a, 0x07, 0xd2, 0x08, 0xce, 0x8a, 0x4f, 0xe0,
++	0x70, 0x9a, 0xf7, 0x55, 0xee, 0xec, 0x6d, 0xcb
++};
++
++/* wycheproof - misc */
++static const u8 enc_input027[] __initconst = {
++	0x97, 0x46, 0x9d, 0xa6, 0x67, 0xd6, 0x11, 0x0f,
++	0x9c, 0xbd, 0xa1, 0xd1, 0xa2, 0x06, 0x73
++};
++static const u8 enc_output027[] __initconst = {
++	0x32, 0xdb, 0x66, 0xc4, 0xa3, 0x81, 0x9d, 0x81,
++	0x55, 0x74, 0x55, 0xe5, 0x98, 0x0f, 0xed, 0xfe,
++	0xae, 0x30, 0xde, 0xc9, 0x4e, 0x6a, 0xd3, 0xa9,
++	0xee, 0xa0, 0x6a, 0x0d, 0x70, 0x39, 0x17
++};
++static const u8 enc_assoc027[] __initconst = {
++	0x64, 0x53, 0xa5, 0x33, 0x84, 0x63, 0x22, 0x12
++};
++static const u8 enc_nonce027[] __initconst = {
++	0xd9, 0x5b, 0x32, 0x43, 0xaf, 0xae, 0xf7, 0x14,
++	0xc5, 0x03, 0x5b, 0x6a
++};
++static const u8 enc_key027[] __initconst = {
++	0xe6, 0xf1, 0x11, 0x8d, 0x41, 0xe4, 0xb4, 0x3f,
++	0xb5, 0x82, 0x21, 0xb7, 0xed, 0x79, 0x67, 0x38,
++	0x34, 0xe0, 0xd8, 0xac, 0x5c, 0x4f, 0xa6, 0x0b,
++	0xbc, 0x8b, 0xc4, 0x89, 0x3a, 0x58, 0x89, 0x4d
++};
++
++/* wycheproof - misc */
++static const u8 enc_input028[] __initconst = {
++	0x54, 0x9b, 0x36, 0x5a, 0xf9, 0x13, 0xf3, 0xb0,
++	0x81, 0x13, 0x1c, 0xcb, 0x6b, 0x82, 0x55, 0x88
++};
++static const u8 enc_output028[] __initconst = {
++	0xe9, 0x11, 0x0e, 0x9f, 0x56, 0xab, 0x3c, 0xa4,
++	0x83, 0x50, 0x0c, 0xea, 0xba, 0xb6, 0x7a, 0x13,
++	0x83, 0x6c, 0xca, 0xbf, 0x15, 0xa6, 0xa2, 0x2a,
++	0x51, 0xc1, 0x07, 0x1c, 0xfa, 0x68, 0xfa, 0x0c
++};
++static const u8 enc_assoc028[] __initconst = { };
++static const u8 enc_nonce028[] __initconst = {
++	0x2f, 0xcb, 0x1b, 0x38, 0xa9, 0x9e, 0x71, 0xb8,
++	0x47, 0x40, 0xad, 0x9b
++};
++static const u8 enc_key028[] __initconst = {
++	0x59, 0xd4, 0xea, 0xfb, 0x4d, 0xe0, 0xcf, 0xc7,
++	0xd3, 0xdb, 0x99, 0xa8, 0xf5, 0x4b, 0x15, 0xd7,
++	0xb3, 0x9f, 0x0a, 0xcc, 0x8d, 0xa6, 0x97, 0x63,
++	0xb0, 0x19, 0xc1, 0x69, 0x9f, 0x87, 0x67, 0x4a
++};
++
++/* wycheproof - misc */
++static const u8 enc_input029[] __initconst = {
++	0x55, 0xa4, 0x65, 0x64, 0x4f, 0x5b, 0x65, 0x09,
++	0x28, 0xcb, 0xee, 0x7c, 0x06, 0x32, 0x14, 0xd6
++};
++static const u8 enc_output029[] __initconst = {
++	0xe4, 0xb1, 0x13, 0xcb, 0x77, 0x59, 0x45, 0xf3,
++	0xd3, 0xa8, 0xae, 0x9e, 0xc1, 0x41, 0xc0, 0x0c,
++	0x7c, 0x43, 0xf1, 0x6c, 0xe0, 0x96, 0xd0, 0xdc,
++	0x27, 0xc9, 0x58, 0x49, 0xdc, 0x38, 0x3b, 0x7d
++};
++static const u8 enc_assoc029[] __initconst = {
++	0x03, 0x45, 0x85, 0x62, 0x1a, 0xf8, 0xd7, 0xff
++};
++static const u8 enc_nonce029[] __initconst = {
++	0x11, 0x8a, 0x69, 0x64, 0xc2, 0xd3, 0xe3, 0x80,
++	0x07, 0x1f, 0x52, 0x66
++};
++static const u8 enc_key029[] __initconst = {
++	0xb9, 0x07, 0xa4, 0x50, 0x75, 0x51, 0x3f, 0xe8,
++	0xa8, 0x01, 0x9e, 0xde, 0xe3, 0xf2, 0x59, 0x14,
++	0x87, 0xb2, 0xa0, 0x30, 0xb0, 0x3c, 0x6e, 0x1d,
++	0x77, 0x1c, 0x86, 0x25, 0x71, 0xd2, 0xea, 0x1e
++};
++
++/* wycheproof - misc */
++static const u8 enc_input030[] __initconst = {
++	0x3f, 0xf1, 0x51, 0x4b, 0x1c, 0x50, 0x39, 0x15,
++	0x91, 0x8f, 0x0c, 0x0c, 0x31, 0x09, 0x4a, 0x6e,
++	0x1f
++};
++static const u8 enc_output030[] __initconst = {
++	0x02, 0xcc, 0x3a, 0xcb, 0x5e, 0xe1, 0xfc, 0xdd,
++	0x12, 0xa0, 0x3b, 0xb8, 0x57, 0x97, 0x64, 0x74,
++	0xd3, 0xd8, 0x3b, 0x74, 0x63, 0xa2, 0xc3, 0x80,
++	0x0f, 0xe9, 0x58, 0xc2, 0x8e, 0xaa, 0x29, 0x08,
++	0x13
++};
++static const u8 enc_assoc030[] __initconst = { };
++static const u8 enc_nonce030[] __initconst = {
++	0x45, 0xaa, 0xa3, 0xe5, 0xd1, 0x6d, 0x2d, 0x42,
++	0xdc, 0x03, 0x44, 0x5d
++};
++static const u8 enc_key030[] __initconst = {
++	0x3b, 0x24, 0x58, 0xd8, 0x17, 0x6e, 0x16, 0x21,
++	0xc0, 0xcc, 0x24, 0xc0, 0xc0, 0xe2, 0x4c, 0x1e,
++	0x80, 0xd7, 0x2f, 0x7e, 0xe9, 0x14, 0x9a, 0x4b,
++	0x16, 0x61, 0x76, 0x62, 0x96, 0x16, 0xd0, 0x11
++};
++
++/* wycheproof - misc */
++static const u8 enc_input031[] __initconst = {
++	0x63, 0x85, 0x8c, 0xa3, 0xe2, 0xce, 0x69, 0x88,
++	0x7b, 0x57, 0x8a, 0x3c, 0x16, 0x7b, 0x42, 0x1c,
++	0x9c
++};
++static const u8 enc_output031[] __initconst = {
++	0x35, 0x76, 0x64, 0x88, 0xd2, 0xbc, 0x7c, 0x2b,
++	0x8d, 0x17, 0xcb, 0xbb, 0x9a, 0xbf, 0xad, 0x9e,
++	0x6d, 0x1f, 0x39, 0x1e, 0x65, 0x7b, 0x27, 0x38,
++	0xdd, 0xa0, 0x84, 0x48, 0xcb, 0xa2, 0x81, 0x1c,
++	0xeb
++};
++static const u8 enc_assoc031[] __initconst = {
++	0x9a, 0xaf, 0x29, 0x9e, 0xee, 0xa7, 0x8f, 0x79
++};
++static const u8 enc_nonce031[] __initconst = {
++	0xf0, 0x38, 0x4f, 0xb8, 0x76, 0x12, 0x14, 0x10,
++	0x63, 0x3d, 0x99, 0x3d
++};
++static const u8 enc_key031[] __initconst = {
++	0xf6, 0x0c, 0x6a, 0x1b, 0x62, 0x57, 0x25, 0xf7,
++	0x6c, 0x70, 0x37, 0xb4, 0x8f, 0xe3, 0x57, 0x7f,
++	0xa7, 0xf7, 0xb8, 0x7b, 0x1b, 0xd5, 0xa9, 0x82,
++	0x17, 0x6d, 0x18, 0x23, 0x06, 0xff, 0xb8, 0x70
++};
++
++/* wycheproof - misc */
++static const u8 enc_input032[] __initconst = {
++	0x10, 0xf1, 0xec, 0xf9, 0xc6, 0x05, 0x84, 0x66,
++	0x5d, 0x9a, 0xe5, 0xef, 0xe2, 0x79, 0xe7, 0xf7,
++	0x37, 0x7e, 0xea, 0x69, 0x16, 0xd2, 0xb1, 0x11
++};
++static const u8 enc_output032[] __initconst = {
++	0x42, 0xf2, 0x6c, 0x56, 0xcb, 0x4b, 0xe2, 0x1d,
++	0x9d, 0x8d, 0x0c, 0x80, 0xfc, 0x99, 0xdd, 0xe0,
++	0x0d, 0x75, 0xf3, 0x80, 0x74, 0xbf, 0xe7, 0x64,
++	0x54, 0xaa, 0x7e, 0x13, 0xd4, 0x8f, 0xff, 0x7d,
++	0x75, 0x57, 0x03, 0x94, 0x57, 0x04, 0x0a, 0x3a
++};
++static const u8 enc_assoc032[] __initconst = { };
++static const u8 enc_nonce032[] __initconst = {
++	0xe6, 0xb1, 0xad, 0xf2, 0xfd, 0x58, 0xa8, 0x76,
++	0x2c, 0x65, 0xf3, 0x1b
++};
++static const u8 enc_key032[] __initconst = {
++	0x02, 0x12, 0xa8, 0xde, 0x50, 0x07, 0xed, 0x87,
++	0xb3, 0x3f, 0x1a, 0x70, 0x90, 0xb6, 0x11, 0x4f,
++	0x9e, 0x08, 0xce, 0xfd, 0x96, 0x07, 0xf2, 0xc2,
++	0x76, 0xbd, 0xcf, 0xdb, 0xc5, 0xce, 0x9c, 0xd7
++};
++
++/* wycheproof - misc */
++static const u8 enc_input033[] __initconst = {
++	0x92, 0x22, 0xf9, 0x01, 0x8e, 0x54, 0xfd, 0x6d,
++	0xe1, 0x20, 0x08, 0x06, 0xa9, 0xee, 0x8e, 0x4c,
++	0xc9, 0x04, 0xd2, 0x9f, 0x25, 0xcb, 0xa1, 0x93
++};
++static const u8 enc_output033[] __initconst = {
++	0x12, 0x30, 0x32, 0x43, 0x7b, 0x4b, 0xfd, 0x69,
++	0x20, 0xe8, 0xf7, 0xe7, 0xe0, 0x08, 0x7a, 0xe4,
++	0x88, 0x9e, 0xbe, 0x7a, 0x0a, 0xd0, 0xe9, 0x00,
++	0x3c, 0xf6, 0x8f, 0x17, 0x95, 0x50, 0xda, 0x63,
++	0xd3, 0xb9, 0x6c, 0x2d, 0x55, 0x41, 0x18, 0x65
++};
++static const u8 enc_assoc033[] __initconst = {
++	0x3e, 0x8b, 0xc5, 0xad, 0xe1, 0x82, 0xff, 0x08
++};
++static const u8 enc_nonce033[] __initconst = {
++	0x6b, 0x28, 0x2e, 0xbe, 0xcc, 0x54, 0x1b, 0xcd,
++	0x78, 0x34, 0xed, 0x55
++};
++static const u8 enc_key033[] __initconst = {
++	0xc5, 0xbc, 0x09, 0x56, 0x56, 0x46, 0xe7, 0xed,
++	0xda, 0x95, 0x4f, 0x1f, 0x73, 0x92, 0x23, 0xda,
++	0xda, 0x20, 0xb9, 0x5c, 0x44, 0xab, 0x03, 0x3d,
++	0x0f, 0xae, 0x4b, 0x02, 0x83, 0xd1, 0x8b, 0xe3
++};
++
++/* wycheproof - misc */
++static const u8 enc_input034[] __initconst = {
++	0xb0, 0x53, 0x99, 0x92, 0x86, 0xa2, 0x82, 0x4f,
++	0x42, 0xcc, 0x8c, 0x20, 0x3a, 0xb2, 0x4e, 0x2c,
++	0x97, 0xa6, 0x85, 0xad, 0xcc, 0x2a, 0xd3, 0x26,
++	0x62, 0x55, 0x8e, 0x55, 0xa5, 0xc7, 0x29
++};
++static const u8 enc_output034[] __initconst = {
++	0x45, 0xc7, 0xd6, 0xb5, 0x3a, 0xca, 0xd4, 0xab,
++	0xb6, 0x88, 0x76, 0xa6, 0xe9, 0x6a, 0x48, 0xfb,
++	0x59, 0x52, 0x4d, 0x2c, 0x92, 0xc9, 0xd8, 0xa1,
++	0x89, 0xc9, 0xfd, 0x2d, 0xb9, 0x17, 0x46, 0x56,
++	0x6d, 0x3c, 0xa1, 0x0e, 0x31, 0x1b, 0x69, 0x5f,
++	0x3e, 0xae, 0x15, 0x51, 0x65, 0x24, 0x93
++};
++static const u8 enc_assoc034[] __initconst = { };
++static const u8 enc_nonce034[] __initconst = {
++	0x04, 0xa9, 0xbe, 0x03, 0x50, 0x8a, 0x5f, 0x31,
++	0x37, 0x1a, 0x6f, 0xd2
++};
++static const u8 enc_key034[] __initconst = {
++	0x2e, 0xb5, 0x1c, 0x46, 0x9a, 0xa8, 0xeb, 0x9e,
++	0x6c, 0x54, 0xa8, 0x34, 0x9b, 0xae, 0x50, 0xa2,
++	0x0f, 0x0e, 0x38, 0x27, 0x11, 0xbb, 0xa1, 0x15,
++	0x2c, 0x42, 0x4f, 0x03, 0xb6, 0x67, 0x1d, 0x71
++};
++
++/* wycheproof - misc */
++static const u8 enc_input035[] __initconst = {
++	0xf4, 0x52, 0x06, 0xab, 0xc2, 0x55, 0x52, 0xb2,
++	0xab, 0xc9, 0xab, 0x7f, 0xa2, 0x43, 0x03, 0x5f,
++	0xed, 0xaa, 0xdd, 0xc3, 0xb2, 0x29, 0x39, 0x56,
++	0xf1, 0xea, 0x6e, 0x71, 0x56, 0xe7, 0xeb
++};
++static const u8 enc_output035[] __initconst = {
++	0x46, 0xa8, 0x0c, 0x41, 0x87, 0x02, 0x47, 0x20,
++	0x08, 0x46, 0x27, 0x58, 0x00, 0x80, 0xdd, 0xe5,
++	0xa3, 0xf4, 0xa1, 0x10, 0x93, 0xa7, 0x07, 0x6e,
++	0xd6, 0xf3, 0xd3, 0x26, 0xbc, 0x7b, 0x70, 0x53,
++	0x4d, 0x4a, 0xa2, 0x83, 0x5a, 0x52, 0xe7, 0x2d,
++	0x14, 0xdf, 0x0e, 0x4f, 0x47, 0xf2, 0x5f
++};
++static const u8 enc_assoc035[] __initconst = {
++	0x37, 0x46, 0x18, 0xa0, 0x6e, 0xa9, 0x8a, 0x48
++};
++static const u8 enc_nonce035[] __initconst = {
++	0x47, 0x0a, 0x33, 0x9e, 0xcb, 0x32, 0x19, 0xb8,
++	0xb8, 0x1a, 0x1f, 0x8b
++};
++static const u8 enc_key035[] __initconst = {
++	0x7f, 0x5b, 0x74, 0xc0, 0x7e, 0xd1, 0xb4, 0x0f,
++	0xd1, 0x43, 0x58, 0xfe, 0x2f, 0xf2, 0xa7, 0x40,
++	0xc1, 0x16, 0xc7, 0x70, 0x65, 0x10, 0xe6, 0xa4,
++	0x37, 0xf1, 0x9e, 0xa4, 0x99, 0x11, 0xce, 0xc4
++};
++
++/* wycheproof - misc */
++static const u8 enc_input036[] __initconst = {
++	0xb9, 0xc5, 0x54, 0xcb, 0xc3, 0x6a, 0xc1, 0x8a,
++	0xe8, 0x97, 0xdf, 0x7b, 0xee, 0xca, 0xc1, 0xdb,
++	0xeb, 0x4e, 0xaf, 0xa1, 0x56, 0xbb, 0x60, 0xce,
++	0x2e, 0x5d, 0x48, 0xf0, 0x57, 0x15, 0xe6, 0x78
++};
++static const u8 enc_output036[] __initconst = {
++	0xea, 0x29, 0xaf, 0xa4, 0x9d, 0x36, 0xe8, 0x76,
++	0x0f, 0x5f, 0xe1, 0x97, 0x23, 0xb9, 0x81, 0x1e,
++	0xd5, 0xd5, 0x19, 0x93, 0x4a, 0x44, 0x0f, 0x50,
++	0x81, 0xac, 0x43, 0x0b, 0x95, 0x3b, 0x0e, 0x21,
++	0x22, 0x25, 0x41, 0xaf, 0x46, 0xb8, 0x65, 0x33,
++	0xc6, 0xb6, 0x8d, 0x2f, 0xf1, 0x08, 0xa7, 0xea
++};
++static const u8 enc_assoc036[] __initconst = { };
++static const u8 enc_nonce036[] __initconst = {
++	0x72, 0xcf, 0xd9, 0x0e, 0xf3, 0x02, 0x6c, 0xa2,
++	0x2b, 0x7e, 0x6e, 0x6a
++};
++static const u8 enc_key036[] __initconst = {
++	0xe1, 0x73, 0x1d, 0x58, 0x54, 0xe1, 0xb7, 0x0c,
++	0xb3, 0xff, 0xe8, 0xb7, 0x86, 0xa2, 0xb3, 0xeb,
++	0xf0, 0x99, 0x43, 0x70, 0x95, 0x47, 0x57, 0xb9,
++	0xdc, 0x8c, 0x7b, 0xc5, 0x35, 0x46, 0x34, 0xa3
++};
++
++/* wycheproof - misc */
++static const u8 enc_input037[] __initconst = {
++	0x6b, 0x26, 0x04, 0x99, 0x6c, 0xd3, 0x0c, 0x14,
++	0xa1, 0x3a, 0x52, 0x57, 0xed, 0x6c, 0xff, 0xd3,
++	0xbc, 0x5e, 0x29, 0xd6, 0xb9, 0x7e, 0xb1, 0x79,
++	0x9e, 0xb3, 0x35, 0xe2, 0x81, 0xea, 0x45, 0x1e
++};
++static const u8 enc_output037[] __initconst = {
++	0x6d, 0xad, 0x63, 0x78, 0x97, 0x54, 0x4d, 0x8b,
++	0xf6, 0xbe, 0x95, 0x07, 0xed, 0x4d, 0x1b, 0xb2,
++	0xe9, 0x54, 0xbc, 0x42, 0x7e, 0x5d, 0xe7, 0x29,
++	0xda, 0xf5, 0x07, 0x62, 0x84, 0x6f, 0xf2, 0xf4,
++	0x7b, 0x99, 0x7d, 0x93, 0xc9, 0x82, 0x18, 0x9d,
++	0x70, 0x95, 0xdc, 0x79, 0x4c, 0x74, 0x62, 0x32
++};
++static const u8 enc_assoc037[] __initconst = {
++	0x23, 0x33, 0xe5, 0xce, 0x0f, 0x93, 0xb0, 0x59
++};
++static const u8 enc_nonce037[] __initconst = {
++	0x26, 0x28, 0x80, 0xd4, 0x75, 0xf3, 0xda, 0xc5,
++	0x34, 0x0d, 0xd1, 0xb8
++};
++static const u8 enc_key037[] __initconst = {
++	0x27, 0xd8, 0x60, 0x63, 0x1b, 0x04, 0x85, 0xa4,
++	0x10, 0x70, 0x2f, 0xea, 0x61, 0xbc, 0x87, 0x3f,
++	0x34, 0x42, 0x26, 0x0c, 0xad, 0xed, 0x4a, 0xbd,
++	0xe2, 0x5b, 0x78, 0x6a, 0x2d, 0x97, 0xf1, 0x45
++};
++
++/* wycheproof - misc */
++static const u8 enc_input038[] __initconst = {
++	0x97, 0x3d, 0x0c, 0x75, 0x38, 0x26, 0xba, 0xe4,
++	0x66, 0xcf, 0x9a, 0xbb, 0x34, 0x93, 0x15, 0x2e,
++	0x9d, 0xe7, 0x81, 0x9e, 0x2b, 0xd0, 0xc7, 0x11,
++	0x71, 0x34, 0x6b, 0x4d, 0x2c, 0xeb, 0xf8, 0x04,
++	0x1a, 0xa3, 0xce, 0xdc, 0x0d, 0xfd, 0x7b, 0x46,
++	0x7e, 0x26, 0x22, 0x8b, 0xc8, 0x6c, 0x9a
++};
++static const u8 enc_output038[] __initconst = {
++	0xfb, 0xa7, 0x8a, 0xe4, 0xf9, 0xd8, 0x08, 0xa6,
++	0x2e, 0x3d, 0xa4, 0x0b, 0xe2, 0xcb, 0x77, 0x00,
++	0xc3, 0x61, 0x3d, 0x9e, 0xb2, 0xc5, 0x29, 0xc6,
++	0x52, 0xe7, 0x6a, 0x43, 0x2c, 0x65, 0x8d, 0x27,
++	0x09, 0x5f, 0x0e, 0xb8, 0xf9, 0x40, 0xc3, 0x24,
++	0x98, 0x1e, 0xa9, 0x35, 0xe5, 0x07, 0xf9, 0x8f,
++	0x04, 0x69, 0x56, 0xdb, 0x3a, 0x51, 0x29, 0x08,
++	0xbd, 0x7a, 0xfc, 0x8f, 0x2a, 0xb0, 0xa9
++};
++static const u8 enc_assoc038[] __initconst = { };
++static const u8 enc_nonce038[] __initconst = {
++	0xe7, 0x4a, 0x51, 0x5e, 0x7e, 0x21, 0x02, 0xb9,
++	0x0b, 0xef, 0x55, 0xd2
++};
++static const u8 enc_key038[] __initconst = {
++	0xcf, 0x0d, 0x40, 0xa4, 0x64, 0x4e, 0x5f, 0x51,
++	0x81, 0x51, 0x65, 0xd5, 0x30, 0x1b, 0x22, 0x63,
++	0x1f, 0x45, 0x44, 0xc4, 0x9a, 0x18, 0x78, 0xe3,
++	0xa0, 0xa5, 0xe8, 0xe1, 0xaa, 0xe0, 0xf2, 0x64
++};
++
++/* wycheproof - misc */
++static const u8 enc_input039[] __initconst = {
++	0xa9, 0x89, 0x95, 0x50, 0x4d, 0xf1, 0x6f, 0x74,
++	0x8b, 0xfb, 0x77, 0x85, 0xff, 0x91, 0xee, 0xb3,
++	0xb6, 0x60, 0xea, 0x9e, 0xd3, 0x45, 0x0c, 0x3d,
++	0x5e, 0x7b, 0x0e, 0x79, 0xef, 0x65, 0x36, 0x59,
++	0xa9, 0x97, 0x8d, 0x75, 0x54, 0x2e, 0xf9, 0x1c,
++	0x45, 0x67, 0x62, 0x21, 0x56, 0x40, 0xb9
++};
++static const u8 enc_output039[] __initconst = {
++	0xa1, 0xff, 0xed, 0x80, 0x76, 0x18, 0x29, 0xec,
++	0xce, 0x24, 0x2e, 0x0e, 0x88, 0xb1, 0x38, 0x04,
++	0x90, 0x16, 0xbc, 0xa0, 0x18, 0xda, 0x2b, 0x6e,
++	0x19, 0x98, 0x6b, 0x3e, 0x31, 0x8c, 0xae, 0x8d,
++	0x80, 0x61, 0x98, 0xfb, 0x4c, 0x52, 0x7c, 0xc3,
++	0x93, 0x50, 0xeb, 0xdd, 0xea, 0xc5, 0x73, 0xc4,
++	0xcb, 0xf0, 0xbe, 0xfd, 0xa0, 0xb7, 0x02, 0x42,
++	0xc6, 0x40, 0xd7, 0xcd, 0x02, 0xd7, 0xa3
++};
++static const u8 enc_assoc039[] __initconst = {
++	0xb3, 0xe4, 0x06, 0x46, 0x83, 0xb0, 0x2d, 0x84
++};
++static const u8 enc_nonce039[] __initconst = {
++	0xd4, 0xd8, 0x07, 0x34, 0x16, 0x83, 0x82, 0x5b,
++	0x31, 0xcd, 0x4d, 0x95
++};
++static const u8 enc_key039[] __initconst = {
++	0x6c, 0xbf, 0xd7, 0x1c, 0x64, 0x5d, 0x18, 0x4c,
++	0xf5, 0xd2, 0x3c, 0x40, 0x2b, 0xdb, 0x0d, 0x25,
++	0xec, 0x54, 0x89, 0x8c, 0x8a, 0x02, 0x73, 0xd4,
++	0x2e, 0xb5, 0xbe, 0x10, 0x9f, 0xdc, 0xb2, 0xac
++};
++
++/* wycheproof - misc */
++static const u8 enc_input040[] __initconst = {
++	0xd0, 0x96, 0x80, 0x31, 0x81, 0xbe, 0xef, 0x9e,
++	0x00, 0x8f, 0xf8, 0x5d, 0x5d, 0xdc, 0x38, 0xdd,
++	0xac, 0xf0, 0xf0, 0x9e, 0xe5, 0xf7, 0xe0, 0x7f,
++	0x1e, 0x40, 0x79, 0xcb, 0x64, 0xd0, 0xdc, 0x8f,
++	0x5e, 0x67, 0x11, 0xcd, 0x49, 0x21, 0xa7, 0x88,
++	0x7d, 0xe7, 0x6e, 0x26, 0x78, 0xfd, 0xc6, 0x76,
++	0x18, 0xf1, 0x18, 0x55, 0x86, 0xbf, 0xea, 0x9d,
++	0x4c, 0x68, 0x5d, 0x50, 0xe4, 0xbb, 0x9a, 0x82
++};
++static const u8 enc_output040[] __initconst = {
++	0x9a, 0x4e, 0xf2, 0x2b, 0x18, 0x16, 0x77, 0xb5,
++	0x75, 0x5c, 0x08, 0xf7, 0x47, 0xc0, 0xf8, 0xd8,
++	0xe8, 0xd4, 0xc1, 0x8a, 0x9c, 0xc2, 0x40, 0x5c,
++	0x12, 0xbb, 0x51, 0xbb, 0x18, 0x72, 0xc8, 0xe8,
++	0xb8, 0x77, 0x67, 0x8b, 0xec, 0x44, 0x2c, 0xfc,
++	0xbb, 0x0f, 0xf4, 0x64, 0xa6, 0x4b, 0x74, 0x33,
++	0x2c, 0xf0, 0x72, 0x89, 0x8c, 0x7e, 0x0e, 0xdd,
++	0xf6, 0x23, 0x2e, 0xa6, 0xe2, 0x7e, 0xfe, 0x50,
++	0x9f, 0xf3, 0x42, 0x7a, 0x0f, 0x32, 0xfa, 0x56,
++	0x6d, 0x9c, 0xa0, 0xa7, 0x8a, 0xef, 0xc0, 0x13
++};
++static const u8 enc_assoc040[] __initconst = { };
++static const u8 enc_nonce040[] __initconst = {
++	0xd6, 0x10, 0x40, 0xa3, 0x13, 0xed, 0x49, 0x28,
++	0x23, 0xcc, 0x06, 0x5b
++};
++static const u8 enc_key040[] __initconst = {
++	0x5b, 0x1d, 0x10, 0x35, 0xc0, 0xb1, 0x7e, 0xe0,
++	0xb0, 0x44, 0x47, 0x67, 0xf8, 0x0a, 0x25, 0xb8,
++	0xc1, 0xb7, 0x41, 0xf4, 0xb5, 0x0a, 0x4d, 0x30,
++	0x52, 0x22, 0x6b, 0xaa, 0x1c, 0x6f, 0xb7, 0x01
++};
++
++/* wycheproof - misc */
++static const u8 enc_input041[] __initconst = {
++	0x94, 0xee, 0x16, 0x6d, 0x6d, 0x6e, 0xcf, 0x88,
++	0x32, 0x43, 0x71, 0x36, 0xb4, 0xae, 0x80, 0x5d,
++	0x42, 0x88, 0x64, 0x35, 0x95, 0x86, 0xd9, 0x19,
++	0x3a, 0x25, 0x01, 0x62, 0x93, 0xed, 0xba, 0x44,
++	0x3c, 0x58, 0xe0, 0x7e, 0x7b, 0x71, 0x95, 0xec,
++	0x5b, 0xd8, 0x45, 0x82, 0xa9, 0xd5, 0x6c, 0x8d,
++	0x4a, 0x10, 0x8c, 0x7d, 0x7c, 0xe3, 0x4e, 0x6c,
++	0x6f, 0x8e, 0xa1, 0xbe, 0xc0, 0x56, 0x73, 0x17
++};
++static const u8 enc_output041[] __initconst = {
++	0x5f, 0xbb, 0xde, 0xcc, 0x34, 0xbe, 0x20, 0x16,
++	0x14, 0xf6, 0x36, 0x03, 0x1e, 0xeb, 0x42, 0xf1,
++	0xca, 0xce, 0x3c, 0x79, 0xa1, 0x2c, 0xff, 0xd8,
++	0x71, 0xee, 0x8e, 0x73, 0x82, 0x0c, 0x82, 0x97,
++	0x49, 0xf1, 0xab, 0xb4, 0x29, 0x43, 0x67, 0x84,
++	0x9f, 0xb6, 0xc2, 0xaa, 0x56, 0xbd, 0xa8, 0xa3,
++	0x07, 0x8f, 0x72, 0x3d, 0x7c, 0x1c, 0x85, 0x20,
++	0x24, 0xb0, 0x17, 0xb5, 0x89, 0x73, 0xfb, 0x1e,
++	0x09, 0x26, 0x3d, 0xa7, 0xb4, 0xcb, 0x92, 0x14,
++	0x52, 0xf9, 0x7d, 0xca, 0x40, 0xf5, 0x80, 0xec
++};
++static const u8 enc_assoc041[] __initconst = {
++	0x71, 0x93, 0xf6, 0x23, 0x66, 0x33, 0x21, 0xa2
++};
++static const u8 enc_nonce041[] __initconst = {
++	0xd3, 0x1c, 0x21, 0xab, 0xa1, 0x75, 0xb7, 0x0d,
++	0xe4, 0xeb, 0xb1, 0x9c
++};
++static const u8 enc_key041[] __initconst = {
++	0x97, 0xd6, 0x35, 0xc4, 0xf4, 0x75, 0x74, 0xd9,
++	0x99, 0x8a, 0x90, 0x87, 0x5d, 0xa1, 0xd3, 0xa2,
++	0x84, 0xb7, 0x55, 0xb2, 0xd3, 0x92, 0x97, 0xa5,
++	0x72, 0x52, 0x35, 0x19, 0x0e, 0x10, 0xa9, 0x7e
++};
++
++/* wycheproof - misc */
++static const u8 enc_input042[] __initconst = {
++	0xb4, 0x29, 0xeb, 0x80, 0xfb, 0x8f, 0xe8, 0xba,
++	0xed, 0xa0, 0xc8, 0x5b, 0x9c, 0x33, 0x34, 0x58,
++	0xe7, 0xc2, 0x99, 0x2e, 0x55, 0x84, 0x75, 0x06,
++	0x9d, 0x12, 0xd4, 0x5c, 0x22, 0x21, 0x75, 0x64,
++	0x12, 0x15, 0x88, 0x03, 0x22, 0x97, 0xef, 0xf5,
++	0x67, 0x83, 0x74, 0x2a, 0x5f, 0xc2, 0x2d, 0x74,
++	0x10, 0xff, 0xb2, 0x9d, 0x66, 0x09, 0x86, 0x61,
++	0xd7, 0x6f, 0x12, 0x6c, 0x3c, 0x27, 0x68, 0x9e,
++	0x43, 0xb3, 0x72, 0x67, 0xca, 0xc5, 0xa3, 0xa6,
++	0xd3, 0xab, 0x49, 0xe3, 0x91, 0xda, 0x29, 0xcd,
++	0x30, 0x54, 0xa5, 0x69, 0x2e, 0x28, 0x07, 0xe4,
++	0xc3, 0xea, 0x46, 0xc8, 0x76, 0x1d, 0x50, 0xf5,
++	0x92
++};
++static const u8 enc_output042[] __initconst = {
++	0xd0, 0x10, 0x2f, 0x6c, 0x25, 0x8b, 0xf4, 0x97,
++	0x42, 0xce, 0xc3, 0x4c, 0xf2, 0xd0, 0xfe, 0xdf,
++	0x23, 0xd1, 0x05, 0xfb, 0x4c, 0x84, 0xcf, 0x98,
++	0x51, 0x5e, 0x1b, 0xc9, 0xa6, 0x4f, 0x8a, 0xd5,
++	0xbe, 0x8f, 0x07, 0x21, 0xbd, 0xe5, 0x06, 0x45,
++	0xd0, 0x00, 0x83, 0xc3, 0xa2, 0x63, 0xa3, 0x10,
++	0x53, 0xb7, 0x60, 0x24, 0x5f, 0x52, 0xae, 0x28,
++	0x66, 0xa5, 0xec, 0x83, 0xb1, 0x9f, 0x61, 0xbe,
++	0x1d, 0x30, 0xd5, 0xc5, 0xd9, 0xfe, 0xcc, 0x4c,
++	0xbb, 0xe0, 0x8f, 0xd3, 0x85, 0x81, 0x3a, 0x2a,
++	0xa3, 0x9a, 0x00, 0xff, 0x9c, 0x10, 0xf7, 0xf2,
++	0x37, 0x02, 0xad, 0xd1, 0xe4, 0xb2, 0xff, 0xa3,
++	0x1c, 0x41, 0x86, 0x5f, 0xc7, 0x1d, 0xe1, 0x2b,
++	0x19, 0x61, 0x21, 0x27, 0xce, 0x49, 0x99, 0x3b,
++	0xb0
++};
++static const u8 enc_assoc042[] __initconst = { };
++static const u8 enc_nonce042[] __initconst = {
++	0x17, 0xc8, 0x6a, 0x8a, 0xbb, 0xb7, 0xe0, 0x03,
++	0xac, 0xde, 0x27, 0x99
++};
++static const u8 enc_key042[] __initconst = {
++	0xfe, 0x6e, 0x55, 0xbd, 0xae, 0xd1, 0xf7, 0x28,
++	0x4c, 0xa5, 0xfc, 0x0f, 0x8c, 0x5f, 0x2b, 0x8d,
++	0xf5, 0x6d, 0xc0, 0xf4, 0x9e, 0x8c, 0xa6, 0x6a,
++	0x41, 0x99, 0x5e, 0x78, 0x33, 0x51, 0xf9, 0x01
++};
++
++/* wycheproof - misc */
++static const u8 enc_input043[] __initconst = {
++	0xce, 0xb5, 0x34, 0xce, 0x50, 0xdc, 0x23, 0xff,
++	0x63, 0x8a, 0xce, 0x3e, 0xf6, 0x3a, 0xb2, 0xcc,
++	0x29, 0x73, 0xee, 0xad, 0xa8, 0x07, 0x85, 0xfc,
++	0x16, 0x5d, 0x06, 0xc2, 0xf5, 0x10, 0x0f, 0xf5,
++	0xe8, 0xab, 0x28, 0x82, 0xc4, 0x75, 0xaf, 0xcd,
++	0x05, 0xcc, 0xd4, 0x9f, 0x2e, 0x7d, 0x8f, 0x55,
++	0xef, 0x3a, 0x72, 0xe3, 0xdc, 0x51, 0xd6, 0x85,
++	0x2b, 0x8e, 0x6b, 0x9e, 0x7a, 0xec, 0xe5, 0x7b,
++	0xe6, 0x55, 0x6b, 0x0b, 0x6d, 0x94, 0x13, 0xe3,
++	0x3f, 0xc5, 0xfc, 0x24, 0xa9, 0xa2, 0x05, 0xad,
++	0x59, 0x57, 0x4b, 0xb3, 0x9d, 0x94, 0x4a, 0x92,
++	0xdc, 0x47, 0x97, 0x0d, 0x84, 0xa6, 0xad, 0x31,
++	0x76
++};
++static const u8 enc_output043[] __initconst = {
++	0x75, 0x45, 0x39, 0x1b, 0x51, 0xde, 0x01, 0xd5,
++	0xc5, 0x3d, 0xfa, 0xca, 0x77, 0x79, 0x09, 0x06,
++	0x3e, 0x58, 0xed, 0xee, 0x4b, 0xb1, 0x22, 0x7e,
++	0x71, 0x10, 0xac, 0x4d, 0x26, 0x20, 0xc2, 0xae,
++	0xc2, 0xf8, 0x48, 0xf5, 0x6d, 0xee, 0xb0, 0x37,
++	0xa8, 0xdc, 0xed, 0x75, 0xaf, 0xa8, 0xa6, 0xc8,
++	0x90, 0xe2, 0xde, 0xe4, 0x2f, 0x95, 0x0b, 0xb3,
++	0x3d, 0x9e, 0x24, 0x24, 0xd0, 0x8a, 0x50, 0x5d,
++	0x89, 0x95, 0x63, 0x97, 0x3e, 0xd3, 0x88, 0x70,
++	0xf3, 0xde, 0x6e, 0xe2, 0xad, 0xc7, 0xfe, 0x07,
++	0x2c, 0x36, 0x6c, 0x14, 0xe2, 0xcf, 0x7c, 0xa6,
++	0x2f, 0xb3, 0xd3, 0x6b, 0xee, 0x11, 0x68, 0x54,
++	0x61, 0xb7, 0x0d, 0x44, 0xef, 0x8c, 0x66, 0xc5,
++	0xc7, 0xbb, 0xf1, 0x0d, 0xca, 0xdd, 0x7f, 0xac,
++	0xf6
++};
++static const u8 enc_assoc043[] __initconst = {
++	0xa1, 0x1c, 0x40, 0xb6, 0x03, 0x76, 0x73, 0x30
++};
++static const u8 enc_nonce043[] __initconst = {
++	0x46, 0x36, 0x2f, 0x45, 0xd6, 0x37, 0x9e, 0x63,
++	0xe5, 0x22, 0x94, 0x60
++};
++static const u8 enc_key043[] __initconst = {
++	0xaa, 0xbc, 0x06, 0x34, 0x74, 0xe6, 0x5c, 0x4c,
++	0x3e, 0x9b, 0xdc, 0x48, 0x0d, 0xea, 0x97, 0xb4,
++	0x51, 0x10, 0xc8, 0x61, 0x88, 0x46, 0xff, 0x6b,
++	0x15, 0xbd, 0xd2, 0xa4, 0xa5, 0x68, 0x2c, 0x4e
++};
++
++/* wycheproof - misc */
++static const u8 enc_input044[] __initconst = {
++	0xe5, 0xcc, 0xaa, 0x44, 0x1b, 0xc8, 0x14, 0x68,
++	0x8f, 0x8f, 0x6e, 0x8f, 0x28, 0xb5, 0x00, 0xb2
++};
++static const u8 enc_output044[] __initconst = {
++	0x7e, 0x72, 0xf5, 0xa1, 0x85, 0xaf, 0x16, 0xa6,
++	0x11, 0x92, 0x1b, 0x43, 0x8f, 0x74, 0x9f, 0x0b,
++	0x12, 0x42, 0xc6, 0x70, 0x73, 0x23, 0x34, 0x02,
++	0x9a, 0xdf, 0xe1, 0xc5, 0x00, 0x16, 0x51, 0xe4
++};
++static const u8 enc_assoc044[] __initconst = {
++	0x02
++};
++static const u8 enc_nonce044[] __initconst = {
++	0x87, 0x34, 0x5f, 0x10, 0x55, 0xfd, 0x9e, 0x21,
++	0x02, 0xd5, 0x06, 0x56
++};
++static const u8 enc_key044[] __initconst = {
++	0x7d, 0x00, 0xb4, 0x80, 0x95, 0xad, 0xfa, 0x32,
++	0x72, 0x05, 0x06, 0x07, 0xb2, 0x64, 0x18, 0x50,
++	0x02, 0xba, 0x99, 0x95, 0x7c, 0x49, 0x8b, 0xe0,
++	0x22, 0x77, 0x0f, 0x2c, 0xe2, 0xf3, 0x14, 0x3c
++};
++
++/* wycheproof - misc */
++static const u8 enc_input045[] __initconst = {
++	0x02, 0xcd, 0xe1, 0x68, 0xfb, 0xa3, 0xf5, 0x44,
++	0xbb, 0xd0, 0x33, 0x2f, 0x7a, 0xde, 0xad, 0xa8
++};
++static const u8 enc_output045[] __initconst = {
++	0x85, 0xf2, 0x9a, 0x71, 0x95, 0x57, 0xcd, 0xd1,
++	0x4d, 0x1f, 0x8f, 0xff, 0xab, 0x6d, 0x9e, 0x60,
++	0x73, 0x2c, 0xa3, 0x2b, 0xec, 0xd5, 0x15, 0xa1,
++	0xed, 0x35, 0x3f, 0x54, 0x2e, 0x99, 0x98, 0x58
++};
++static const u8 enc_assoc045[] __initconst = {
++	0xb6, 0x48
++};
++static const u8 enc_nonce045[] __initconst = {
++	0x87, 0xa3, 0x16, 0x3e, 0xc0, 0x59, 0x8a, 0xd9,
++	0x5b, 0x3a, 0xa7, 0x13
++};
++static const u8 enc_key045[] __initconst = {
++	0x64, 0x32, 0x71, 0x7f, 0x1d, 0xb8, 0x5e, 0x41,
++	0xac, 0x78, 0x36, 0xbc, 0xe2, 0x51, 0x85, 0xa0,
++	0x80, 0xd5, 0x76, 0x2b, 0x9e, 0x2b, 0x18, 0x44,
++	0x4b, 0x6e, 0xc7, 0x2c, 0x3b, 0xd8, 0xe4, 0xdc
++};
++
++/* wycheproof - misc */
++static const u8 enc_input046[] __initconst = {
++	0x16, 0xdd, 0xd2, 0x3f, 0xf5, 0x3f, 0x3d, 0x23,
++	0xc0, 0x63, 0x34, 0x48, 0x70, 0x40, 0xeb, 0x47
++};
++static const u8 enc_output046[] __initconst = {
++	0xc1, 0xb2, 0x95, 0x93, 0x6d, 0x56, 0xfa, 0xda,
++	0xc0, 0x3e, 0x5f, 0x74, 0x2b, 0xff, 0x73, 0xa1,
++	0x39, 0xc4, 0x57, 0xdb, 0xab, 0x66, 0x38, 0x2b,
++	0xab, 0xb3, 0xb5, 0x58, 0x00, 0xcd, 0xa5, 0xb8
++};
++static const u8 enc_assoc046[] __initconst = {
++	0xbd, 0x4c, 0xd0, 0x2f, 0xc7, 0x50, 0x2b, 0xbd,
++	0xbd, 0xf6, 0xc9, 0xa3, 0xcb, 0xe8, 0xf0
++};
++static const u8 enc_nonce046[] __initconst = {
++	0x6f, 0x57, 0x3a, 0xa8, 0x6b, 0xaa, 0x49, 0x2b,
++	0xa4, 0x65, 0x96, 0xdf
++};
++static const u8 enc_key046[] __initconst = {
++	0x8e, 0x34, 0xcf, 0x73, 0xd2, 0x45, 0xa1, 0x08,
++	0x2a, 0x92, 0x0b, 0x86, 0x36, 0x4e, 0xb8, 0x96,
++	0xc4, 0x94, 0x64, 0x67, 0xbc, 0xb3, 0xd5, 0x89,
++	0x29, 0xfc, 0xb3, 0x66, 0x90, 0xe6, 0x39, 0x4f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input047[] __initconst = {
++	0x62, 0x3b, 0x78, 0x50, 0xc3, 0x21, 0xe2, 0xcf,
++	0x0c, 0x6f, 0xbc, 0xc8, 0xdf, 0xd1, 0xaf, 0xf2
++};
++static const u8 enc_output047[] __initconst = {
++	0xc8, 0x4c, 0x9b, 0xb7, 0xc6, 0x1c, 0x1b, 0xcb,
++	0x17, 0x77, 0x2a, 0x1c, 0x50, 0x0c, 0x50, 0x95,
++	0xdb, 0xad, 0xf7, 0xa5, 0x13, 0x8c, 0xa0, 0x34,
++	0x59, 0xa2, 0xcd, 0x65, 0x83, 0x1e, 0x09, 0x2f
++};
++static const u8 enc_assoc047[] __initconst = {
++	0x89, 0xcc, 0xe9, 0xfb, 0x47, 0x44, 0x1d, 0x07,
++	0xe0, 0x24, 0x5a, 0x66, 0xfe, 0x8b, 0x77, 0x8b
++};
++static const u8 enc_nonce047[] __initconst = {
++	0x1a, 0x65, 0x18, 0xf0, 0x2e, 0xde, 0x1d, 0xa6,
++	0x80, 0x92, 0x66, 0xd9
++};
++static const u8 enc_key047[] __initconst = {
++	0xcb, 0x55, 0x75, 0xf5, 0xc7, 0xc4, 0x5c, 0x91,
++	0xcf, 0x32, 0x0b, 0x13, 0x9f, 0xb5, 0x94, 0x23,
++	0x75, 0x60, 0xd0, 0xa3, 0xe6, 0xf8, 0x65, 0xa6,
++	0x7d, 0x4f, 0x63, 0x3f, 0x2c, 0x08, 0xf0, 0x16
++};
++
++/* wycheproof - misc */
++static const u8 enc_input048[] __initconst = {
++	0x87, 0xb3, 0xa4, 0xd7, 0xb2, 0x6d, 0x8d, 0x32,
++	0x03, 0xa0, 0xde, 0x1d, 0x64, 0xef, 0x82, 0xe3
++};
++static const u8 enc_output048[] __initconst = {
++	0x94, 0xbc, 0x80, 0x62, 0x1e, 0xd1, 0xe7, 0x1b,
++	0x1f, 0xd2, 0xb5, 0xc3, 0xa1, 0x5e, 0x35, 0x68,
++	0x33, 0x35, 0x11, 0x86, 0x17, 0x96, 0x97, 0x84,
++	0x01, 0x59, 0x8b, 0x96, 0x37, 0x22, 0xf5, 0xb3
++};
++static const u8 enc_assoc048[] __initconst = {
++	0xd1, 0x9f, 0x2d, 0x98, 0x90, 0x95, 0xf7, 0xab,
++	0x03, 0xa5, 0xfd, 0xe8, 0x44, 0x16, 0xe0, 0x0c,
++	0x0e
++};
++static const u8 enc_nonce048[] __initconst = {
++	0x56, 0x4d, 0xee, 0x49, 0xab, 0x00, 0xd2, 0x40,
++	0xfc, 0x10, 0x68, 0xc3
++};
++static const u8 enc_key048[] __initconst = {
++	0xa5, 0x56, 0x9e, 0x72, 0x9a, 0x69, 0xb2, 0x4b,
++	0xa6, 0xe0, 0xff, 0x15, 0xc4, 0x62, 0x78, 0x97,
++	0x43, 0x68, 0x24, 0xc9, 0x41, 0xe9, 0xd0, 0x0b,
++	0x2e, 0x93, 0xfd, 0xdc, 0x4b, 0xa7, 0x76, 0x57
++};
++
++/* wycheproof - misc */
++static const u8 enc_input049[] __initconst = {
++	0xe6, 0x01, 0xb3, 0x85, 0x57, 0x79, 0x7d, 0xa2,
++	0xf8, 0xa4, 0x10, 0x6a, 0x08, 0x9d, 0x1d, 0xa6
++};
++static const u8 enc_output049[] __initconst = {
++	0x29, 0x9b, 0x5d, 0x3f, 0x3d, 0x03, 0xc0, 0x87,
++	0x20, 0x9a, 0x16, 0xe2, 0x85, 0x14, 0x31, 0x11,
++	0x4b, 0x45, 0x4e, 0xd1, 0x98, 0xde, 0x11, 0x7e,
++	0x83, 0xec, 0x49, 0xfa, 0x8d, 0x85, 0x08, 0xd6
++};
++static const u8 enc_assoc049[] __initconst = {
++	0x5e, 0x64, 0x70, 0xfa, 0xcd, 0x99, 0xc1, 0xd8,
++	0x1e, 0x37, 0xcd, 0x44, 0x01, 0x5f, 0xe1, 0x94,
++	0x80, 0xa2, 0xa4, 0xd3, 0x35, 0x2a, 0x4f, 0xf5,
++	0x60, 0xc0, 0x64, 0x0f, 0xdb, 0xda
++};
++static const u8 enc_nonce049[] __initconst = {
++	0xdf, 0x87, 0x13, 0xe8, 0x7e, 0xc3, 0xdb, 0xcf,
++	0xad, 0x14, 0xd5, 0x3e
++};
++static const u8 enc_key049[] __initconst = {
++	0x56, 0x20, 0x74, 0x65, 0xb4, 0xe4, 0x8e, 0x6d,
++	0x04, 0x63, 0x0f, 0x4a, 0x42, 0xf3, 0x5c, 0xfc,
++	0x16, 0x3a, 0xb2, 0x89, 0xc2, 0x2a, 0x2b, 0x47,
++	0x84, 0xf6, 0xf9, 0x29, 0x03, 0x30, 0xbe, 0xe0
++};
++
++/* wycheproof - misc */
++static const u8 enc_input050[] __initconst = {
++	0xdc, 0x9e, 0x9e, 0xaf, 0x11, 0xe3, 0x14, 0x18,
++	0x2d, 0xf6, 0xa4, 0xeb, 0xa1, 0x7a, 0xec, 0x9c
++};
++static const u8 enc_output050[] __initconst = {
++	0x60, 0x5b, 0xbf, 0x90, 0xae, 0xb9, 0x74, 0xf6,
++	0x60, 0x2b, 0xc7, 0x78, 0x05, 0x6f, 0x0d, 0xca,
++	0x38, 0xea, 0x23, 0xd9, 0x90, 0x54, 0xb4, 0x6b,
++	0x42, 0xff, 0xe0, 0x04, 0x12, 0x9d, 0x22, 0x04
++};
++static const u8 enc_assoc050[] __initconst = {
++	0xba, 0x44, 0x6f, 0x6f, 0x9a, 0x0c, 0xed, 0x22,
++	0x45, 0x0f, 0xeb, 0x10, 0x73, 0x7d, 0x90, 0x07,
++	0xfd, 0x69, 0xab, 0xc1, 0x9b, 0x1d, 0x4d, 0x90,
++	0x49, 0xa5, 0x55, 0x1e, 0x86, 0xec, 0x2b, 0x37
++};
++static const u8 enc_nonce050[] __initconst = {
++	0x8d, 0xf4, 0xb1, 0x5a, 0x88, 0x8c, 0x33, 0x28,
++	0x6a, 0x7b, 0x76, 0x51
++};
++static const u8 enc_key050[] __initconst = {
++	0x39, 0x37, 0x98, 0x6a, 0xf8, 0x6d, 0xaf, 0xc1,
++	0xba, 0x0c, 0x46, 0x72, 0xd8, 0xab, 0xc4, 0x6c,
++	0x20, 0x70, 0x62, 0x68, 0x2d, 0x9c, 0x26, 0x4a,
++	0xb0, 0x6d, 0x6c, 0x58, 0x07, 0x20, 0x51, 0x30
++};
++
++/* wycheproof - misc */
++static const u8 enc_input051[] __initconst = {
++	0x81, 0xce, 0x84, 0xed, 0xe9, 0xb3, 0x58, 0x59,
++	0xcc, 0x8c, 0x49, 0xa8, 0xf6, 0xbe, 0x7d, 0xc6
++};
++static const u8 enc_output051[] __initconst = {
++	0x7b, 0x7c, 0xe0, 0xd8, 0x24, 0x80, 0x9a, 0x70,
++	0xde, 0x32, 0x56, 0x2c, 0xcf, 0x2c, 0x2b, 0xbd,
++	0x15, 0xd4, 0x4a, 0x00, 0xce, 0x0d, 0x19, 0xb4,
++	0x23, 0x1f, 0x92, 0x1e, 0x22, 0xbc, 0x0a, 0x43
++};
++static const u8 enc_assoc051[] __initconst = {
++	0xd4, 0x1a, 0x82, 0x8d, 0x5e, 0x71, 0x82, 0x92,
++	0x47, 0x02, 0x19, 0x05, 0x40, 0x2e, 0xa2, 0x57,
++	0xdc, 0xcb, 0xc3, 0xb8, 0x0f, 0xcd, 0x56, 0x75,
++	0x05, 0x6b, 0x68, 0xbb, 0x59, 0xe6, 0x2e, 0x88,
++	0x73
++};
++static const u8 enc_nonce051[] __initconst = {
++	0xbe, 0x40, 0xe5, 0xf1, 0xa1, 0x18, 0x17, 0xa0,
++	0xa8, 0xfa, 0x89, 0x49
++};
++static const u8 enc_key051[] __initconst = {
++	0x36, 0x37, 0x2a, 0xbc, 0xdb, 0x78, 0xe0, 0x27,
++	0x96, 0x46, 0xac, 0x3d, 0x17, 0x6b, 0x96, 0x74,
++	0xe9, 0x15, 0x4e, 0xec, 0xf0, 0xd5, 0x46, 0x9c,
++	0x65, 0x1e, 0xc7, 0xe1, 0x6b, 0x4c, 0x11, 0x99
++};
++
++/* wycheproof - misc */
++static const u8 enc_input052[] __initconst = {
++	0xa6, 0x67, 0x47, 0xc8, 0x9e, 0x85, 0x7a, 0xf3,
++	0xa1, 0x8e, 0x2c, 0x79, 0x50, 0x00, 0x87, 0xed
++};
++static const u8 enc_output052[] __initconst = {
++	0xca, 0x82, 0xbf, 0xf3, 0xe2, 0xf3, 0x10, 0xcc,
++	0xc9, 0x76, 0x67, 0x2c, 0x44, 0x15, 0xe6, 0x9b,
++	0x57, 0x63, 0x8c, 0x62, 0xa5, 0xd8, 0x5d, 0xed,
++	0x77, 0x4f, 0x91, 0x3c, 0x81, 0x3e, 0xa0, 0x32
++};
++static const u8 enc_assoc052[] __initconst = {
++	0x3f, 0x2d, 0xd4, 0x9b, 0xbf, 0x09, 0xd6, 0x9a,
++	0x78, 0xa3, 0xd8, 0x0e, 0xa2, 0x56, 0x66, 0x14,
++	0xfc, 0x37, 0x94, 0x74, 0x19, 0x6c, 0x1a, 0xae,
++	0x84, 0x58, 0x3d, 0xa7, 0x3d, 0x7f, 0xf8, 0x5c,
++	0x6f, 0x42, 0xca, 0x42, 0x05, 0x6a, 0x97, 0x92,
++	0xcc, 0x1b, 0x9f, 0xb3, 0xc7, 0xd2, 0x61
++};
++static const u8 enc_nonce052[] __initconst = {
++	0x84, 0xc8, 0x7d, 0xae, 0x4e, 0xee, 0x27, 0x73,
++	0x0e, 0xc3, 0x5d, 0x12
++};
++static const u8 enc_key052[] __initconst = {
++	0x9f, 0x14, 0x79, 0xed, 0x09, 0x7d, 0x7f, 0xe5,
++	0x29, 0xc1, 0x1f, 0x2f, 0x5a, 0xdd, 0x9a, 0xaf,
++	0xf4, 0xa1, 0xca, 0x0b, 0x68, 0x99, 0x7a, 0x2c,
++	0xb7, 0xf7, 0x97, 0x49, 0xbd, 0x90, 0xaa, 0xf4
++};
++
++/* wycheproof - misc */
++static const u8 enc_input053[] __initconst = {
++	0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
++	0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8,
++	0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b,
++	0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe
++};
++static const u8 enc_output053[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0xe6, 0xd3, 0xd7, 0x32, 0x4a, 0x1c, 0xbb, 0xa7,
++	0x77, 0xbb, 0xb0, 0xec, 0xdd, 0xa3, 0x78, 0x07
++};
++static const u8 enc_assoc053[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 enc_nonce053[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key053[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input054[] __initconst = {
++	0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
++	0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8,
++	0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b,
++	0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe,
++	0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe,
++	0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b,
++	0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5,
++	0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd
++};
++static const u8 enc_output054[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x06, 0x2d, 0xe6, 0x79, 0x5f, 0x27, 0x4f, 0xd2,
++	0xa3, 0x05, 0xd7, 0x69, 0x80, 0xbc, 0x9c, 0xce
++};
++static const u8 enc_assoc054[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 enc_nonce054[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key054[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input055[] __initconst = {
++	0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
++	0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8,
++	0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b,
++	0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe,
++	0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe,
++	0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b,
++	0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5,
++	0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd,
++	0x7a, 0xda, 0x44, 0x42, 0x42, 0x69, 0xbf, 0xfa,
++	0x55, 0x27, 0xf2, 0x70, 0xac, 0xf6, 0x85, 0x02,
++	0xb7, 0x4c, 0x5a, 0xe2, 0xe6, 0x0c, 0x05, 0x80,
++	0x98, 0x1a, 0x49, 0x38, 0x45, 0x93, 0x92, 0xc4,
++	0x9b, 0xb2, 0xf2, 0x84, 0xb6, 0x46, 0xef, 0xc7,
++	0xf3, 0xf0, 0xb1, 0x36, 0x1d, 0xc3, 0x48, 0xed,
++	0x77, 0xd3, 0x0b, 0xc5, 0x76, 0x92, 0xed, 0x38,
++	0xfb, 0xac, 0x01, 0x88, 0x38, 0x04, 0x88, 0xc7
++};
++static const u8 enc_output055[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0xd8, 0xb4, 0x79, 0x02, 0xba, 0xae, 0xaf, 0xb3,
++	0x42, 0x03, 0x05, 0x15, 0x29, 0xaf, 0x28, 0x2e
++};
++static const u8 enc_assoc055[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 enc_nonce055[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key055[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input056[] __initconst = {
++	0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c,
++	0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17,
++	0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84,
++	0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41
++};
++static const u8 enc_output056[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xb3, 0x89, 0x1c, 0x84, 0x9c, 0xb5, 0x2c, 0x27,
++	0x74, 0x7e, 0xdf, 0xcf, 0x31, 0x21, 0x3b, 0xb6
++};
++static const u8 enc_assoc056[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce056[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key056[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input057[] __initconst = {
++	0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c,
++	0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17,
++	0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84,
++	0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41,
++	0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01,
++	0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4,
++	0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a,
++	0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42
++};
++static const u8 enc_output057[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xf0, 0xc1, 0x2d, 0x26, 0xef, 0x03, 0x02, 0x9b,
++	0x62, 0xc0, 0x08, 0xda, 0x27, 0xc5, 0xdc, 0x68
++};
++static const u8 enc_assoc057[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce057[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key057[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input058[] __initconst = {
++	0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c,
++	0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17,
++	0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84,
++	0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41,
++	0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01,
++	0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4,
++	0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a,
++	0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42,
++	0x85, 0x25, 0xbb, 0xbd, 0xbd, 0x96, 0x40, 0x05,
++	0xaa, 0xd8, 0x0d, 0x8f, 0x53, 0x09, 0x7a, 0xfd,
++	0x48, 0xb3, 0xa5, 0x1d, 0x19, 0xf3, 0xfa, 0x7f,
++	0x67, 0xe5, 0xb6, 0xc7, 0xba, 0x6c, 0x6d, 0x3b,
++	0x64, 0x4d, 0x0d, 0x7b, 0x49, 0xb9, 0x10, 0x38,
++	0x0c, 0x0f, 0x4e, 0xc9, 0xe2, 0x3c, 0xb7, 0x12,
++	0x88, 0x2c, 0xf4, 0x3a, 0x89, 0x6d, 0x12, 0xc7,
++	0x04, 0x53, 0xfe, 0x77, 0xc7, 0xfb, 0x77, 0x38
++};
++static const u8 enc_output058[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xee, 0x65, 0x78, 0x30, 0x01, 0xc2, 0x56, 0x91,
++	0xfa, 0x28, 0xd0, 0xf5, 0xf1, 0xc1, 0xd7, 0x62
++};
++static const u8 enc_assoc058[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce058[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key058[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input059[] __initconst = {
++	0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03,
++	0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68,
++	0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb,
++	0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e
++};
++static const u8 enc_output059[] __initconst = {
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x79, 0xba, 0x7a, 0x29, 0xf5, 0xa7, 0xbb, 0x75,
++	0x79, 0x7a, 0xf8, 0x7a, 0x61, 0x01, 0x29, 0xa4
++};
++static const u8 enc_assoc059[] __initconst = {
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
++};
++static const u8 enc_nonce059[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key059[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input060[] __initconst = {
++	0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03,
++	0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68,
++	0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb,
++	0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e,
++	0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e,
++	0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab,
++	0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65,
++	0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d
++};
++static const u8 enc_output060[] __initconst = {
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x36, 0xb1, 0x74, 0x38, 0x19, 0xe1, 0xb9, 0xba,
++	0x15, 0x51, 0xe8, 0xed, 0x92, 0x2a, 0x95, 0x9a
++};
++static const u8 enc_assoc060[] __initconst = {
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
++};
++static const u8 enc_nonce060[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key060[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input061[] __initconst = {
++	0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03,
++	0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68,
++	0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb,
++	0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e,
++	0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e,
++	0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab,
++	0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65,
++	0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d,
++	0x7a, 0xda, 0x44, 0xc2, 0x42, 0x69, 0xbf, 0x7a,
++	0x55, 0x27, 0xf2, 0xf0, 0xac, 0xf6, 0x85, 0x82,
++	0xb7, 0x4c, 0x5a, 0x62, 0xe6, 0x0c, 0x05, 0x00,
++	0x98, 0x1a, 0x49, 0xb8, 0x45, 0x93, 0x92, 0x44,
++	0x9b, 0xb2, 0xf2, 0x04, 0xb6, 0x46, 0xef, 0x47,
++	0xf3, 0xf0, 0xb1, 0xb6, 0x1d, 0xc3, 0x48, 0x6d,
++	0x77, 0xd3, 0x0b, 0x45, 0x76, 0x92, 0xed, 0xb8,
++	0xfb, 0xac, 0x01, 0x08, 0x38, 0x04, 0x88, 0x47
++};
++static const u8 enc_output061[] __initconst = {
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0xfe, 0xac, 0x49, 0x55, 0x55, 0x4e, 0x80, 0x6f,
++	0x3a, 0x19, 0x02, 0xe2, 0x44, 0x32, 0xc0, 0x8a
++};
++static const u8 enc_assoc061[] __initconst = {
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
++};
++static const u8 enc_nonce061[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key061[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input062[] __initconst = {
++	0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc,
++	0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97,
++	0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04,
++	0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1
++};
++static const u8 enc_output062[] __initconst = {
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0x20, 0xa3, 0x79, 0x8d, 0xf1, 0x29, 0x2c, 0x59,
++	0x72, 0xbf, 0x97, 0x41, 0xae, 0xc3, 0x8a, 0x19
++};
++static const u8 enc_assoc062[] __initconst = {
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
++};
++static const u8 enc_nonce062[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key062[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input063[] __initconst = {
++	0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc,
++	0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97,
++	0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04,
++	0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1,
++	0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81,
++	0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54,
++	0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a,
++	0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2
++};
++static const u8 enc_output063[] __initconst = {
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xc0, 0x3d, 0x9f, 0x67, 0x35, 0x4a, 0x97, 0xb2,
++	0xf0, 0x74, 0xf7, 0x55, 0x15, 0x57, 0xe4, 0x9c
++};
++static const u8 enc_assoc063[] __initconst = {
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
++};
++static const u8 enc_nonce063[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key063[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input064[] __initconst = {
++	0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc,
++	0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97,
++	0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04,
++	0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1,
++	0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81,
++	0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54,
++	0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a,
++	0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2,
++	0x85, 0x25, 0xbb, 0x3d, 0xbd, 0x96, 0x40, 0x85,
++	0xaa, 0xd8, 0x0d, 0x0f, 0x53, 0x09, 0x7a, 0x7d,
++	0x48, 0xb3, 0xa5, 0x9d, 0x19, 0xf3, 0xfa, 0xff,
++	0x67, 0xe5, 0xb6, 0x47, 0xba, 0x6c, 0x6d, 0xbb,
++	0x64, 0x4d, 0x0d, 0xfb, 0x49, 0xb9, 0x10, 0xb8,
++	0x0c, 0x0f, 0x4e, 0x49, 0xe2, 0x3c, 0xb7, 0x92,
++	0x88, 0x2c, 0xf4, 0xba, 0x89, 0x6d, 0x12, 0x47,
++	0x04, 0x53, 0xfe, 0xf7, 0xc7, 0xfb, 0x77, 0xb8
++};
++static const u8 enc_output064[] __initconst = {
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xc8, 0x6d, 0xa8, 0xdd, 0x65, 0x22, 0x86, 0xd5,
++	0x02, 0x13, 0xd3, 0x28, 0xd6, 0x3e, 0x40, 0x06
++};
++static const u8 enc_assoc064[] __initconst = {
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
++};
++static const u8 enc_nonce064[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key064[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input065[] __initconst = {
++	0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c,
++	0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17,
++	0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84,
++	0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41
++};
++static const u8 enc_output065[] __initconst = {
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0xbe, 0xde, 0x90, 0x83, 0xce, 0xb3, 0x6d, 0xdf,
++	0xe5, 0xfa, 0x81, 0x1f, 0x95, 0x47, 0x1c, 0x67
++};
++static const u8 enc_assoc065[] __initconst = {
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce065[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key065[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input066[] __initconst = {
++	0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c,
++	0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17,
++	0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84,
++	0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41,
++	0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01,
++	0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4,
++	0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a,
++	0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42
++};
++static const u8 enc_output066[] __initconst = {
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x30, 0x08, 0x74, 0xbb, 0x06, 0x92, 0xb6, 0x89,
++	0xde, 0xad, 0x9a, 0xe1, 0x5b, 0x06, 0x73, 0x90
++};
++static const u8 enc_assoc066[] __initconst = {
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce066[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key066[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input067[] __initconst = {
++	0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c,
++	0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17,
++	0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84,
++	0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41,
++	0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01,
++	0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4,
++	0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a,
++	0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42,
++	0x05, 0x25, 0xbb, 0xbd, 0x3d, 0x96, 0x40, 0x05,
++	0x2a, 0xd8, 0x0d, 0x8f, 0xd3, 0x09, 0x7a, 0xfd,
++	0xc8, 0xb3, 0xa5, 0x1d, 0x99, 0xf3, 0xfa, 0x7f,
++	0xe7, 0xe5, 0xb6, 0xc7, 0x3a, 0x6c, 0x6d, 0x3b,
++	0xe4, 0x4d, 0x0d, 0x7b, 0xc9, 0xb9, 0x10, 0x38,
++	0x8c, 0x0f, 0x4e, 0xc9, 0x62, 0x3c, 0xb7, 0x12,
++	0x08, 0x2c, 0xf4, 0x3a, 0x09, 0x6d, 0x12, 0xc7,
++	0x84, 0x53, 0xfe, 0x77, 0x47, 0xfb, 0x77, 0x38
++};
++static const u8 enc_output067[] __initconst = {
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x99, 0xca, 0xd8, 0x5f, 0x45, 0xca, 0x40, 0x94,
++	0x2d, 0x0d, 0x4d, 0x5e, 0x95, 0x0a, 0xde, 0x22
++};
++static const u8 enc_assoc067[] __initconst = {
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
++	0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce067[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key067[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input068[] __initconst = {
++	0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c,
++	0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17,
++	0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84,
++	0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41
++};
++static const u8 enc_output068[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x8b, 0xbe, 0x14, 0x52, 0x72, 0xe7, 0xc2, 0xd9,
++	0xa1, 0x89, 0x1a, 0x3a, 0xb0, 0x98, 0x3d, 0x9d
++};
++static const u8 enc_assoc068[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce068[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key068[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input069[] __initconst = {
++	0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c,
++	0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17,
++	0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84,
++	0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41,
++	0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01,
++	0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4,
++	0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a,
++	0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42
++};
++static const u8 enc_output069[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x3b, 0x41, 0x86, 0x19, 0x13, 0xa8, 0xf6, 0xde,
++	0x7f, 0x61, 0xe2, 0x25, 0x63, 0x1b, 0xc3, 0x82
++};
++static const u8 enc_assoc069[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce069[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key069[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input070[] __initconst = {
++	0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c,
++	0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17,
++	0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84,
++	0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41,
++	0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01,
++	0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4,
++	0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a,
++	0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42,
++	0x7a, 0xda, 0x44, 0x42, 0xbd, 0x96, 0x40, 0x05,
++	0x55, 0x27, 0xf2, 0x70, 0x53, 0x09, 0x7a, 0xfd,
++	0xb7, 0x4c, 0x5a, 0xe2, 0x19, 0xf3, 0xfa, 0x7f,
++	0x98, 0x1a, 0x49, 0x38, 0xba, 0x6c, 0x6d, 0x3b,
++	0x9b, 0xb2, 0xf2, 0x84, 0x49, 0xb9, 0x10, 0x38,
++	0xf3, 0xf0, 0xb1, 0x36, 0xe2, 0x3c, 0xb7, 0x12,
++	0x77, 0xd3, 0x0b, 0xc5, 0x89, 0x6d, 0x12, 0xc7,
++	0xfb, 0xac, 0x01, 0x88, 0xc7, 0xfb, 0x77, 0x38
++};
++static const u8 enc_output070[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x84, 0x28, 0xbc, 0xf0, 0x23, 0xec, 0x6b, 0xf3,
++	0x1f, 0xd9, 0xef, 0xb2, 0x03, 0xff, 0x08, 0x71
++};
++static const u8 enc_assoc070[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce070[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key070[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input071[] __initconst = {
++	0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83,
++	0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8,
++	0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b,
++	0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe
++};
++static const u8 enc_output071[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0x13, 0x9f, 0xdf, 0x64, 0x74, 0xea, 0x24, 0xf5,
++	0x49, 0xb0, 0x75, 0x82, 0x5f, 0x2c, 0x76, 0x20
++};
++static const u8 enc_assoc071[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 enc_nonce071[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key071[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input072[] __initconst = {
++	0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83,
++	0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8,
++	0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b,
++	0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe,
++	0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe,
++	0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b,
++	0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5,
++	0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd
++};
++static const u8 enc_output072[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xbb, 0xad, 0x8d, 0x86, 0x3b, 0x83, 0x5a, 0x8e,
++	0x86, 0x64, 0xfd, 0x1d, 0x45, 0x66, 0xb6, 0xb4
++};
++static const u8 enc_assoc072[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 enc_nonce072[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key072[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - misc */
++static const u8 enc_input073[] __initconst = {
++	0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83,
++	0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8,
++	0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b,
++	0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe,
++	0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe,
++	0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b,
++	0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5,
++	0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd,
++	0x85, 0x25, 0xbb, 0xbd, 0x42, 0x69, 0xbf, 0xfa,
++	0xaa, 0xd8, 0x0d, 0x8f, 0xac, 0xf6, 0x85, 0x02,
++	0x48, 0xb3, 0xa5, 0x1d, 0xe6, 0x0c, 0x05, 0x80,
++	0x67, 0xe5, 0xb6, 0xc7, 0x45, 0x93, 0x92, 0xc4,
++	0x64, 0x4d, 0x0d, 0x7b, 0xb6, 0x46, 0xef, 0xc7,
++	0x0c, 0x0f, 0x4e, 0xc9, 0x1d, 0xc3, 0x48, 0xed,
++	0x88, 0x2c, 0xf4, 0x3a, 0x76, 0x92, 0xed, 0x38,
++	0x04, 0x53, 0xfe, 0x77, 0x38, 0x04, 0x88, 0xc7
++};
++static const u8 enc_output073[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0x42, 0xf2, 0x35, 0x42, 0x97, 0x84, 0x9a, 0x51,
++	0x1d, 0x53, 0xe5, 0x57, 0x17, 0x72, 0xf7, 0x1f
++};
++static const u8 enc_assoc073[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 enc_nonce073[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
++};
++static const u8 enc_key073[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input074[] __initconst = {
++	0xd4, 0x50, 0x0b, 0xf0, 0x09, 0x49, 0x35, 0x51,
++	0xc3, 0x80, 0xad, 0xf5, 0x2c, 0x57, 0x3a, 0x69,
++	0xdf, 0x7e, 0x8b, 0x76, 0x24, 0x63, 0x33, 0x0f,
++	0xac, 0xc1, 0x6a, 0x57, 0x26, 0xbe, 0x71, 0x90,
++	0xc6, 0x3c, 0x5a, 0x1c, 0x92, 0x65, 0x84, 0xa0,
++	0x96, 0x75, 0x68, 0x28, 0xdc, 0xdc, 0x64, 0xac,
++	0xdf, 0x96, 0x3d, 0x93, 0x1b, 0xf1, 0xda, 0xe2,
++	0x38, 0xf3, 0xf1, 0x57, 0x22, 0x4a, 0xc4, 0xb5,
++	0x42, 0xd7, 0x85, 0xb0, 0xdd, 0x84, 0xdb, 0x6b,
++	0xe3, 0xbc, 0x5a, 0x36, 0x63, 0xe8, 0x41, 0x49,
++	0xff, 0xbe, 0xd0, 0x9e, 0x54, 0xf7, 0x8f, 0x16,
++	0xa8, 0x22, 0x3b, 0x24, 0xcb, 0x01, 0x9f, 0x58,
++	0xb2, 0x1b, 0x0e, 0x55, 0x1e, 0x7a, 0xa0, 0x73,
++	0x27, 0x62, 0x95, 0x51, 0x37, 0x6c, 0xcb, 0xc3,
++	0x93, 0x76, 0x71, 0xa0, 0x62, 0x9b, 0xd9, 0x5c,
++	0x99, 0x15, 0xc7, 0x85, 0x55, 0x77, 0x1e, 0x7a
++};
++static const u8 enc_output074[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x0b, 0x30, 0x0d, 0x8d, 0xa5, 0x6c, 0x21, 0x85,
++	0x75, 0x52, 0x79, 0x55, 0x3c, 0x4c, 0x82, 0xca
++};
++static const u8 enc_assoc074[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce074[] __initconst = {
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x00, 0x02, 0x50, 0x6e
++};
++static const u8 enc_key074[] __initconst = {
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input075[] __initconst = {
++	0x7d, 0xe8, 0x7f, 0x67, 0x29, 0x94, 0x52, 0x75,
++	0xd0, 0x65, 0x5d, 0xa4, 0xc7, 0xfd, 0xe4, 0x56,
++	0x9e, 0x16, 0xf1, 0x11, 0xb5, 0xeb, 0x26, 0xc2,
++	0x2d, 0x85, 0x9e, 0x3f, 0xf8, 0x22, 0xec, 0xed,
++	0x3a, 0x6d, 0xd9, 0xa6, 0x0f, 0x22, 0x95, 0x7f,
++	0x7b, 0x7c, 0x85, 0x7e, 0x88, 0x22, 0xeb, 0x9f,
++	0xe0, 0xb8, 0xd7, 0x02, 0x21, 0x41, 0xf2, 0xd0,
++	0xb4, 0x8f, 0x4b, 0x56, 0x12, 0xd3, 0x22, 0xa8,
++	0x8d, 0xd0, 0xfe, 0x0b, 0x4d, 0x91, 0x79, 0x32,
++	0x4f, 0x7c, 0x6c, 0x9e, 0x99, 0x0e, 0xfb, 0xd8,
++	0x0e, 0x5e, 0xd6, 0x77, 0x58, 0x26, 0x49, 0x8b,
++	0x1e, 0xfe, 0x0f, 0x71, 0xa0, 0xf3, 0xec, 0x5b,
++	0x29, 0xcb, 0x28, 0xc2, 0x54, 0x0a, 0x7d, 0xcd,
++	0x51, 0xb7, 0xda, 0xae, 0xe0, 0xff, 0x4a, 0x7f,
++	0x3a, 0xc1, 0xee, 0x54, 0xc2, 0x9e, 0xe4, 0xc1,
++	0x70, 0xde, 0x40, 0x8f, 0x66, 0x69, 0x21, 0x94
++};
++static const u8 enc_output075[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xc5, 0x78, 0xe2, 0xaa, 0x44, 0xd3, 0x09, 0xb7,
++	0xb6, 0xa5, 0x19, 0x3b, 0xdc, 0x61, 0x18, 0xf5
++};
++static const u8 enc_assoc075[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce075[] __initconst = {
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x00, 0x03, 0x18, 0xa5
++};
++static const u8 enc_key075[] __initconst = {
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input076[] __initconst = {
++	0x1b, 0x99, 0x6f, 0x9a, 0x3c, 0xcc, 0x67, 0x85,
++	0xde, 0x22, 0xff, 0x5b, 0x8a, 0xdd, 0x95, 0x02,
++	0xce, 0x03, 0xa0, 0xfa, 0xf5, 0x99, 0x2a, 0x09,
++	0x52, 0x2c, 0xdd, 0x12, 0x06, 0xd2, 0x20, 0xb8,
++	0xf8, 0xbd, 0x07, 0xd1, 0xf1, 0xf5, 0xa1, 0xbd,
++	0x9a, 0x71, 0xd1, 0x1c, 0x7f, 0x57, 0x9b, 0x85,
++	0x58, 0x18, 0xc0, 0x8d, 0x4d, 0xe0, 0x36, 0x39,
++	0x31, 0x83, 0xb7, 0xf5, 0x90, 0xb3, 0x35, 0xae,
++	0xd8, 0xde, 0x5b, 0x57, 0xb1, 0x3c, 0x5f, 0xed,
++	0xe2, 0x44, 0x1c, 0x3e, 0x18, 0x4a, 0xa9, 0xd4,
++	0x6e, 0x61, 0x59, 0x85, 0x06, 0xb3, 0xe1, 0x1c,
++	0x43, 0xc6, 0x2c, 0xbc, 0xac, 0xec, 0xed, 0x33,
++	0x19, 0x08, 0x75, 0xb0, 0x12, 0x21, 0x8b, 0x19,
++	0x30, 0xfb, 0x7c, 0x38, 0xec, 0x45, 0xac, 0x11,
++	0xc3, 0x53, 0xd0, 0xcf, 0x93, 0x8d, 0xcc, 0xb9,
++	0xef, 0xad, 0x8f, 0xed, 0xbe, 0x46, 0xda, 0xa5
++};
++static const u8 enc_output076[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x4b, 0x0b, 0xda, 0x8a, 0xd0, 0x43, 0x83, 0x0d,
++	0x83, 0x19, 0xab, 0x82, 0xc5, 0x0c, 0x76, 0x63
++};
++static const u8 enc_assoc076[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce076[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xb4, 0xf0
++};
++static const u8 enc_key076[] __initconst = {
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input077[] __initconst = {
++	0x86, 0xcb, 0xac, 0xae, 0x4d, 0x3f, 0x74, 0xae,
++	0x01, 0x21, 0x3e, 0x05, 0x51, 0xcc, 0x15, 0x16,
++	0x0e, 0xa1, 0xbe, 0x84, 0x08, 0xe3, 0xd5, 0xd7,
++	0x4f, 0x01, 0x46, 0x49, 0x95, 0xa6, 0x9e, 0x61,
++	0x76, 0xcb, 0x9e, 0x02, 0xb2, 0x24, 0x7e, 0xd2,
++	0x99, 0x89, 0x2f, 0x91, 0x82, 0xa4, 0x5c, 0xaf,
++	0x4c, 0x69, 0x40, 0x56, 0x11, 0x76, 0x6e, 0xdf,
++	0xaf, 0xdc, 0x28, 0x55, 0x19, 0xea, 0x30, 0x48,
++	0x0c, 0x44, 0xf0, 0x5e, 0x78, 0x1e, 0xac, 0xf8,
++	0xfc, 0xec, 0xc7, 0x09, 0x0a, 0xbb, 0x28, 0xfa,
++	0x5f, 0xd5, 0x85, 0xac, 0x8c, 0xda, 0x7e, 0x87,
++	0x72, 0xe5, 0x94, 0xe4, 0xce, 0x6c, 0x88, 0x32,
++	0x81, 0x93, 0x2e, 0x0f, 0x89, 0xf8, 0x77, 0xa1,
++	0xf0, 0x4d, 0x9c, 0x32, 0xb0, 0x6c, 0xf9, 0x0b,
++	0x0e, 0x76, 0x2b, 0x43, 0x0c, 0x4d, 0x51, 0x7c,
++	0x97, 0x10, 0x70, 0x68, 0xf4, 0x98, 0xef, 0x7f
++};
++static const u8 enc_output077[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x4b, 0xc9, 0x8f, 0x72, 0xc4, 0x94, 0xc2, 0xa4,
++	0x3c, 0x2b, 0x15, 0xa1, 0x04, 0x3f, 0x1c, 0xfa
++};
++static const u8 enc_assoc077[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce077[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0xfb, 0x66
++};
++static const u8 enc_key077[] __initconst = {
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input078[] __initconst = {
++	0xfa, 0xb1, 0xcd, 0xdf, 0x4f, 0xe1, 0x98, 0xef,
++	0x63, 0xad, 0xd8, 0x81, 0xd6, 0xea, 0xd6, 0xc5,
++	0x76, 0x37, 0xbb, 0xe9, 0x20, 0x18, 0xca, 0x7c,
++	0x0b, 0x96, 0xfb, 0xa0, 0x87, 0x1e, 0x93, 0x2d,
++	0xb1, 0xfb, 0xf9, 0x07, 0x61, 0xbe, 0x25, 0xdf,
++	0x8d, 0xfa, 0xf9, 0x31, 0xce, 0x57, 0x57, 0xe6,
++	0x17, 0xb3, 0xd7, 0xa9, 0xf0, 0xbf, 0x0f, 0xfe,
++	0x5d, 0x59, 0x1a, 0x33, 0xc1, 0x43, 0xb8, 0xf5,
++	0x3f, 0xd0, 0xb5, 0xa1, 0x96, 0x09, 0xfd, 0x62,
++	0xe5, 0xc2, 0x51, 0xa4, 0x28, 0x1a, 0x20, 0x0c,
++	0xfd, 0xc3, 0x4f, 0x28, 0x17, 0x10, 0x40, 0x6f,
++	0x4e, 0x37, 0x62, 0x54, 0x46, 0xff, 0x6e, 0xf2,
++	0x24, 0x91, 0x3d, 0xeb, 0x0d, 0x89, 0xaf, 0x33,
++	0x71, 0x28, 0xe3, 0xd1, 0x55, 0xd1, 0x6d, 0x3e,
++	0xc3, 0x24, 0x60, 0x41, 0x43, 0x21, 0x43, 0xe9,
++	0xab, 0x3a, 0x6d, 0x2c, 0xcc, 0x2f, 0x4d, 0x62
++};
++static const u8 enc_output078[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xf7, 0xe9, 0xe1, 0x51, 0xb0, 0x25, 0x33, 0xc7,
++	0x46, 0x58, 0xbf, 0xc7, 0x73, 0x7c, 0x68, 0x0d
++};
++static const u8 enc_assoc078[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce078[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xbb, 0x90
++};
++static const u8 enc_key078[] __initconst = {
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input079[] __initconst = {
++	0x22, 0x72, 0x02, 0xbe, 0x7f, 0x35, 0x15, 0xe9,
++	0xd1, 0xc0, 0x2e, 0xea, 0x2f, 0x19, 0x50, 0xb6,
++	0x48, 0x1b, 0x04, 0x8a, 0x4c, 0x91, 0x50, 0x6c,
++	0xb4, 0x0d, 0x50, 0x4e, 0x6c, 0x94, 0x9f, 0x82,
++	0xd1, 0x97, 0xc2, 0x5a, 0xd1, 0x7d, 0xc7, 0x21,
++	0x65, 0x11, 0x25, 0x78, 0x2a, 0xc7, 0xa7, 0x12,
++	0x47, 0xfe, 0xae, 0xf3, 0x2f, 0x1f, 0x25, 0x0c,
++	0xe4, 0xbb, 0x8f, 0x79, 0xac, 0xaa, 0x17, 0x9d,
++	0x45, 0xa7, 0xb0, 0x54, 0x5f, 0x09, 0x24, 0x32,
++	0x5e, 0xfa, 0x87, 0xd5, 0xe4, 0x41, 0xd2, 0x84,
++	0x78, 0xc6, 0x1f, 0x22, 0x23, 0xee, 0x67, 0xc3,
++	0xb4, 0x1f, 0x43, 0x94, 0x53, 0x5e, 0x2a, 0x24,
++	0x36, 0x9a, 0x2e, 0x16, 0x61, 0x3c, 0x45, 0x94,
++	0x90, 0xc1, 0x4f, 0xb1, 0xd7, 0x55, 0xfe, 0x53,
++	0xfb, 0xe1, 0xee, 0x45, 0xb1, 0xb2, 0x1f, 0x71,
++	0x62, 0xe2, 0xfc, 0xaa, 0x74, 0x2a, 0xbe, 0xfd
++};
++static const u8 enc_output079[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x79, 0x5b, 0xcf, 0xf6, 0x47, 0xc5, 0x53, 0xc2,
++	0xe4, 0xeb, 0x6e, 0x0e, 0xaf, 0xd9, 0xe0, 0x4e
++};
++static const u8 enc_assoc079[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce079[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x48, 0x4a
++};
++static const u8 enc_key079[] __initconst = {
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input080[] __initconst = {
++	0xfa, 0xe5, 0x83, 0x45, 0xc1, 0x6c, 0xb0, 0xf5,
++	0xcc, 0x53, 0x7f, 0x2b, 0x1b, 0x34, 0x69, 0xc9,
++	0x69, 0x46, 0x3b, 0x3e, 0xa7, 0x1b, 0xcf, 0x6b,
++	0x98, 0xd6, 0x69, 0xa8, 0xe6, 0x0e, 0x04, 0xfc,
++	0x08, 0xd5, 0xfd, 0x06, 0x9c, 0x36, 0x26, 0x38,
++	0xe3, 0x40, 0x0e, 0xf4, 0xcb, 0x24, 0x2e, 0x27,
++	0xe2, 0x24, 0x5e, 0x68, 0xcb, 0x9e, 0xc5, 0x83,
++	0xda, 0x53, 0x40, 0xb1, 0x2e, 0xdf, 0x42, 0x3b,
++	0x73, 0x26, 0xad, 0x20, 0xfe, 0xeb, 0x57, 0xda,
++	0xca, 0x2e, 0x04, 0x67, 0xa3, 0x28, 0x99, 0xb4,
++	0x2d, 0xf8, 0xe5, 0x6d, 0x84, 0xe0, 0x06, 0xbc,
++	0x8a, 0x7a, 0xcc, 0x73, 0x1e, 0x7c, 0x1f, 0x6b,
++	0xec, 0xb5, 0x71, 0x9f, 0x70, 0x77, 0xf0, 0xd4,
++	0xf4, 0xc6, 0x1a, 0xb1, 0x1e, 0xba, 0xc1, 0x00,
++	0x18, 0x01, 0xce, 0x33, 0xc4, 0xe4, 0xa7, 0x7d,
++	0x83, 0x1d, 0x3c, 0xe3, 0x4e, 0x84, 0x10, 0xe1
++};
++static const u8 enc_output080[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x19, 0x46, 0xd6, 0x53, 0x96, 0x0f, 0x94, 0x7a,
++	0x74, 0xd3, 0xe8, 0x09, 0x3c, 0xf4, 0x85, 0x02
++};
++static const u8 enc_assoc080[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce080[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x93, 0x2f, 0x40
++};
++static const u8 enc_key080[] __initconst = {
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input081[] __initconst = {
++	0xeb, 0xb2, 0x16, 0xdd, 0xd7, 0xca, 0x70, 0x92,
++	0x15, 0xf5, 0x03, 0xdf, 0x9c, 0xe6, 0x3c, 0x5c,
++	0xd2, 0x19, 0x4e, 0x7d, 0x90, 0x99, 0xe8, 0xa9,
++	0x0b, 0x2a, 0xfa, 0xad, 0x5e, 0xba, 0x35, 0x06,
++	0x99, 0x25, 0xa6, 0x03, 0xfd, 0xbc, 0x34, 0x1a,
++	0xae, 0xd4, 0x15, 0x05, 0xb1, 0x09, 0x41, 0xfa,
++	0x38, 0x56, 0xa7, 0xe2, 0x47, 0xb1, 0x04, 0x07,
++	0x09, 0x74, 0x6c, 0xfc, 0x20, 0x96, 0xca, 0xa6,
++	0x31, 0xb2, 0xff, 0xf4, 0x1c, 0x25, 0x05, 0x06,
++	0xd8, 0x89, 0xc1, 0xc9, 0x06, 0x71, 0xad, 0xe8,
++	0x53, 0xee, 0x63, 0x94, 0xc1, 0x91, 0x92, 0xa5,
++	0xcf, 0x37, 0x10, 0xd1, 0x07, 0x30, 0x99, 0xe5,
++	0xbc, 0x94, 0x65, 0x82, 0xfc, 0x0f, 0xab, 0x9f,
++	0x54, 0x3c, 0x71, 0x6a, 0xe2, 0x48, 0x6a, 0x86,
++	0x83, 0xfd, 0xca, 0x39, 0xd2, 0xe1, 0x4f, 0x23,
++	0xd0, 0x0a, 0x58, 0x26, 0x64, 0xf4, 0xec, 0xb1
++};
++static const u8 enc_output081[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x36, 0xc3, 0x00, 0x29, 0x85, 0xdd, 0x21, 0xba,
++	0xf8, 0x95, 0xd6, 0x33, 0x57, 0x3f, 0x12, 0xc0
++};
++static const u8 enc_assoc081[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce081[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0x93, 0x35
++};
++static const u8 enc_key081[] __initconst = {
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
++	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input082[] __initconst = {
++	0x40, 0x8a, 0xe6, 0xef, 0x1c, 0x7e, 0xf0, 0xfb,
++	0x2c, 0x2d, 0x61, 0x08, 0x16, 0xfc, 0x78, 0x49,
++	0xef, 0xa5, 0x8f, 0x78, 0x27, 0x3f, 0x5f, 0x16,
++	0x6e, 0xa6, 0x5f, 0x81, 0xb5, 0x75, 0x74, 0x7d,
++	0x03, 0x5b, 0x30, 0x40, 0xfe, 0xde, 0x1e, 0xb9,
++	0x45, 0x97, 0x88, 0x66, 0x97, 0x88, 0x40, 0x8e,
++	0x00, 0x41, 0x3b, 0x3e, 0x37, 0x6d, 0x15, 0x2d,
++	0x20, 0x4a, 0xa2, 0xb7, 0xa8, 0x35, 0x58, 0xfc,
++	0xd4, 0x8a, 0x0e, 0xf7, 0xa2, 0x6b, 0x1c, 0xd6,
++	0xd3, 0x5d, 0x23, 0xb3, 0xf5, 0xdf, 0xe0, 0xca,
++	0x77, 0xa4, 0xce, 0x32, 0xb9, 0x4a, 0xbf, 0x83,
++	0xda, 0x2a, 0xef, 0xca, 0xf0, 0x68, 0x38, 0x08,
++	0x79, 0xe8, 0x9f, 0xb0, 0xa3, 0x82, 0x95, 0x95,
++	0xcf, 0x44, 0xc3, 0x85, 0x2a, 0xe2, 0xcc, 0x66,
++	0x2b, 0x68, 0x9f, 0x93, 0x55, 0xd9, 0xc1, 0x83,
++	0x80, 0x1f, 0x6a, 0xcc, 0x31, 0x3f, 0x89, 0x07
++};
++static const u8 enc_output082[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x65, 0x14, 0x51, 0x8e, 0x0a, 0x26, 0x41, 0x42,
++	0xe0, 0xb7, 0x35, 0x1f, 0x96, 0x7f, 0xc2, 0xae
++};
++static const u8 enc_assoc082[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce082[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xf7, 0xd5
++};
++static const u8 enc_key082[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input083[] __initconst = {
++	0x0a, 0x0a, 0x24, 0x49, 0x9b, 0xca, 0xde, 0x58,
++	0xcf, 0x15, 0x76, 0xc3, 0x12, 0xac, 0xa9, 0x84,
++	0x71, 0x8c, 0xb4, 0xcc, 0x7e, 0x01, 0x53, 0xf5,
++	0xa9, 0x01, 0x58, 0x10, 0x85, 0x96, 0x44, 0xdf,
++	0xc0, 0x21, 0x17, 0x4e, 0x0b, 0x06, 0x0a, 0x39,
++	0x74, 0x48, 0xde, 0x8b, 0x48, 0x4a, 0x86, 0x03,
++	0xbe, 0x68, 0x0a, 0x69, 0x34, 0xc0, 0x90, 0x6f,
++	0x30, 0xdd, 0x17, 0xea, 0xe2, 0xd4, 0xc5, 0xfa,
++	0xa7, 0x77, 0xf8, 0xca, 0x53, 0x37, 0x0e, 0x08,
++	0x33, 0x1b, 0x88, 0xc3, 0x42, 0xba, 0xc9, 0x59,
++	0x78, 0x7b, 0xbb, 0x33, 0x93, 0x0e, 0x3b, 0x56,
++	0xbe, 0x86, 0xda, 0x7f, 0x2a, 0x6e, 0xb1, 0xf9,
++	0x40, 0x89, 0xd1, 0xd1, 0x81, 0x07, 0x4d, 0x43,
++	0x02, 0xf8, 0xe0, 0x55, 0x2d, 0x0d, 0xe1, 0xfa,
++	0xb3, 0x06, 0xa2, 0x1b, 0x42, 0xd4, 0xc3, 0xba,
++	0x6e, 0x6f, 0x0c, 0xbc, 0xc8, 0x1e, 0x87, 0x7a
++};
++static const u8 enc_output083[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x4c, 0x19, 0x4d, 0xa6, 0xa9, 0x9f, 0xd6, 0x5b,
++	0x40, 0xe9, 0xca, 0xd7, 0x98, 0xf4, 0x4b, 0x19
++};
++static const u8 enc_assoc083[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce083[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0xfc, 0xe4
++};
++static const u8 enc_key083[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input084[] __initconst = {
++	0x4a, 0x0a, 0xaf, 0xf8, 0x49, 0x47, 0x29, 0x18,
++	0x86, 0x91, 0x70, 0x13, 0x40, 0xf3, 0xce, 0x2b,
++	0x8a, 0x78, 0xee, 0xd3, 0xa0, 0xf0, 0x65, 0x99,
++	0x4b, 0x72, 0x48, 0x4e, 0x79, 0x91, 0xd2, 0x5c,
++	0x29, 0xaa, 0x07, 0x5e, 0xb1, 0xfc, 0x16, 0xde,
++	0x93, 0xfe, 0x06, 0x90, 0x58, 0x11, 0x2a, 0xb2,
++	0x84, 0xa3, 0xed, 0x18, 0x78, 0x03, 0x26, 0xd1,
++	0x25, 0x8a, 0x47, 0x22, 0x2f, 0xa6, 0x33, 0xd8,
++	0xb2, 0x9f, 0x3b, 0xd9, 0x15, 0x0b, 0x23, 0x9b,
++	0x15, 0x46, 0xc2, 0xbb, 0x9b, 0x9f, 0x41, 0x0f,
++	0xeb, 0xea, 0xd3, 0x96, 0x00, 0x0e, 0xe4, 0x77,
++	0x70, 0x15, 0x32, 0xc3, 0xd0, 0xf5, 0xfb, 0xf8,
++	0x95, 0xd2, 0x80, 0x19, 0x6d, 0x2f, 0x73, 0x7c,
++	0x5e, 0x9f, 0xec, 0x50, 0xd9, 0x2b, 0xb0, 0xdf,
++	0x5d, 0x7e, 0x51, 0x3b, 0xe5, 0xb8, 0xea, 0x97,
++	0x13, 0x10, 0xd5, 0xbf, 0x16, 0xba, 0x7a, 0xee
++};
++static const u8 enc_output084[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xc8, 0xae, 0x77, 0x88, 0xcd, 0x28, 0x74, 0xab,
++	0xc1, 0x38, 0x54, 0x1e, 0x11, 0xfd, 0x05, 0x87
++};
++static const u8 enc_assoc084[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce084[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0x84, 0x86, 0xa8
++};
++static const u8 enc_key084[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - checking for int overflows */
++static const u8 enc_input085[] __initconst = {
++	0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0x78, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0x9c, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0xd4, 0xd2, 0x06, 0x61, 0x6f, 0x92, 0x93, 0xf6,
++	0x5b, 0x45, 0xdb, 0xbc, 0x74, 0xe7, 0xc2, 0xed,
++	0xfb, 0xcb, 0xbf, 0x1c, 0xfb, 0x67, 0x9b, 0xb7,
++	0x39, 0xa5, 0x86, 0x2d, 0xe2, 0xbc, 0xb9, 0x37,
++	0xf7, 0x4d, 0x5b, 0xf8, 0x67, 0x1c, 0x5a, 0x8a,
++	0x50, 0x92, 0xf6, 0x1d, 0x54, 0xc9, 0xaa, 0x5b
++};
++static const u8 enc_output085[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x93, 0x3a, 0x51, 0x63, 0xc7, 0xf6, 0x23, 0x68,
++	0x32, 0x7b, 0x3f, 0xbc, 0x10, 0x36, 0xc9, 0x43
++};
++static const u8 enc_assoc085[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce085[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key085[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - special case tag */
++static const u8 enc_input086[] __initconst = {
++	0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
++	0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
++	0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
++	0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
++	0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
++	0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
++	0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
++	0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
++};
++static const u8 enc_output086[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
++};
++static const u8 enc_assoc086[] __initconst = {
++	0x85, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xa6, 0x90, 0x2f, 0xcb, 0xc8, 0x83, 0xbb, 0xc1,
++	0x80, 0xb2, 0x56, 0xae, 0x34, 0xad, 0x7f, 0x00
++};
++static const u8 enc_nonce086[] __initconst = {
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b
++};
++static const u8 enc_key086[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - special case tag */
++static const u8 enc_input087[] __initconst = {
++	0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
++	0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
++	0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
++	0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
++	0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
++	0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
++	0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
++	0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
++};
++static const u8 enc_output087[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 enc_assoc087[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x24, 0x7e, 0x50, 0x64, 0x2a, 0x1c, 0x0a, 0x2f,
++	0x8f, 0x77, 0x21, 0x96, 0x09, 0xdb, 0xa9, 0x58
++};
++static const u8 enc_nonce087[] __initconst = {
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b
++};
++static const u8 enc_key087[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - special case tag */
++static const u8 enc_input088[] __initconst = {
++	0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
++	0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
++	0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
++	0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
++	0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
++	0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
++	0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
++	0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
++};
++static const u8 enc_output088[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_assoc088[] __initconst = {
++	0x7c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xd9, 0xe7, 0x2c, 0x06, 0x4a, 0xc8, 0x96, 0x1f,
++	0x3f, 0xa5, 0x85, 0xe0, 0xe2, 0xab, 0xd6, 0x00
++};
++static const u8 enc_nonce088[] __initconst = {
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b
++};
++static const u8 enc_key088[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - special case tag */
++static const u8 enc_input089[] __initconst = {
++	0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
++	0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
++	0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
++	0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
++	0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
++	0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
++	0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
++	0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
++};
++static const u8 enc_output089[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
++	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
++};
++static const u8 enc_assoc089[] __initconst = {
++	0x65, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x95, 0xaf, 0x0f, 0x4d, 0x0b, 0x68, 0x6e, 0xae,
++	0xcc, 0xca, 0x43, 0x07, 0xd5, 0x96, 0xf5, 0x02
++};
++static const u8 enc_nonce089[] __initconst = {
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b
++};
++static const u8 enc_key089[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - special case tag */
++static const u8 enc_input090[] __initconst = {
++	0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
++	0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
++	0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
++	0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
++	0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
++	0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
++	0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
++	0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
++};
++static const u8 enc_output090[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
++	0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
++};
++static const u8 enc_assoc090[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x85, 0x40, 0xb4, 0x64, 0x35, 0x77, 0x07, 0xbe,
++	0x3a, 0x39, 0xd5, 0x5c, 0x34, 0xf8, 0xbc, 0xb3
++};
++static const u8 enc_nonce090[] __initconst = {
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b
++};
++static const u8 enc_key090[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - special case tag */
++static const u8 enc_input091[] __initconst = {
++	0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
++	0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
++	0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
++	0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
++	0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
++	0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
++	0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
++	0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
++};
++static const u8 enc_output091[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
++	0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00
++};
++static const u8 enc_assoc091[] __initconst = {
++	0x4f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x66, 0x23, 0xd9, 0x90, 0xb8, 0x98, 0xd8, 0x30,
++	0xd2, 0x12, 0xaf, 0x23, 0x83, 0x33, 0x07, 0x01
++};
++static const u8 enc_nonce091[] __initconst = {
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b
++};
++static const u8 enc_key091[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - special case tag */
++static const u8 enc_input092[] __initconst = {
++	0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
++	0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
++	0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
++	0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
++	0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
++	0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
++	0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
++	0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
++};
++static const u8 enc_output092[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 enc_assoc092[] __initconst = {
++	0x83, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x5f, 0x16, 0xd0, 0x9f, 0x17, 0x78, 0x72, 0x11,
++	0xb7, 0xd4, 0x84, 0xe0, 0x24, 0xf8, 0x97, 0x01
++};
++static const u8 enc_nonce092[] __initconst = {
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b
++};
++static const u8 enc_key092[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input093[] __initconst = {
++	0x00, 0x52, 0x35, 0xd2, 0xa9, 0x19, 0xf2, 0x8d,
++	0x3d, 0xb7, 0x66, 0x4a, 0x34, 0xae, 0x6b, 0x44,
++	0x4d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0x5b, 0x8b, 0x94, 0x50, 0x9e, 0x2b, 0x74, 0xa3,
++	0x6d, 0x34, 0x6e, 0x33, 0xd5, 0x72, 0x65, 0x9b,
++	0xa9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0x83, 0xdc, 0xe9, 0xf3, 0x07, 0x3e, 0xfa, 0xdb,
++	0x7d, 0x23, 0xb8, 0x7a, 0xce, 0x35, 0x16, 0x8c
++};
++static const u8 enc_output093[] __initconst = {
++	0x00, 0x39, 0xe2, 0xfd, 0x2f, 0xd3, 0x12, 0x14,
++	0x9e, 0x98, 0x98, 0x80, 0x88, 0x48, 0x13, 0xe7,
++	0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96,
++	0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00,
++	0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96,
++	0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00,
++	0xa5, 0x19, 0xac, 0x1a, 0x35, 0xb4, 0xa5, 0x77,
++	0x87, 0x51, 0x0a, 0xf7, 0x8d, 0x8d, 0x20, 0x0a
++};
++static const u8 enc_assoc093[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce093[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key093[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input094[] __initconst = {
++	0xd3, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0xe5, 0xda, 0x78, 0x76, 0x6f, 0xa1, 0x92, 0x90,
++	0xc0, 0x31, 0xf7, 0x52, 0x08, 0x50, 0x67, 0x45,
++	0xae, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0x49, 0x6d, 0xde, 0xb0, 0x55, 0x09, 0xc6, 0xef,
++	0xff, 0xab, 0x75, 0xeb, 0x2d, 0xf4, 0xab, 0x09,
++	0x76, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0x01, 0x49, 0xef, 0x50, 0x4b, 0x71, 0xb1, 0x20,
++	0xca, 0x4f, 0xf3, 0x95, 0x19, 0xc2, 0xc2, 0x10
++};
++static const u8 enc_output094[] __initconst = {
++	0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x62, 0x18, 0xb2, 0x7f, 0x83, 0xb8, 0xb4, 0x66,
++	0x02, 0xf6, 0xe1, 0xd8, 0x34, 0x20, 0x7b, 0x02,
++	0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29,
++	0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02,
++	0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29,
++	0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02,
++	0x30, 0x2f, 0xe8, 0x2a, 0xb0, 0xa0, 0x9a, 0xf6,
++	0x44, 0x00, 0xd0, 0x15, 0xae, 0x83, 0xd9, 0xcc
++};
++static const u8 enc_assoc094[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce094[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key094[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input095[] __initconst = {
++	0xe9, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0x6d, 0xf1, 0x39, 0x4e, 0xdc, 0x53, 0x9b, 0x5b,
++	0x3a, 0x09, 0x57, 0xbe, 0x0f, 0xb8, 0x59, 0x46,
++	0x80, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0xd1, 0x76, 0x9f, 0xe8, 0x06, 0xbb, 0xfe, 0xb6,
++	0xf5, 0x90, 0x95, 0x0f, 0x2e, 0xac, 0x9e, 0x0a,
++	0x58, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0x99, 0x52, 0xae, 0x08, 0x18, 0xc3, 0x89, 0x79,
++	0xc0, 0x74, 0x13, 0x71, 0x1a, 0x9a, 0xf7, 0x13
++};
++static const u8 enc_output095[] __initconst = {
++	0xe9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xea, 0x33, 0xf3, 0x47, 0x30, 0x4a, 0xbd, 0xad,
++	0xf8, 0xce, 0x41, 0x34, 0x33, 0xc8, 0x45, 0x01,
++	0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70,
++	0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01,
++	0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70,
++	0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01,
++	0x98, 0xa7, 0xe8, 0x36, 0xe0, 0xee, 0x4d, 0x02,
++	0x35, 0x00, 0xd0, 0x55, 0x7e, 0xc2, 0xcb, 0xe0
++};
++static const u8 enc_assoc095[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce095[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key095[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input096[] __initconst = {
++	0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0x64, 0xf9, 0x0f, 0x5b, 0x26, 0x92, 0xb8, 0x60,
++	0xd4, 0x59, 0x6f, 0xf4, 0xb3, 0x40, 0x2c, 0x5c,
++	0x00, 0xb9, 0xbb, 0x53, 0x70, 0x7a, 0xa6, 0x67,
++	0xd3, 0x56, 0xfe, 0x50, 0xc7, 0x19, 0x96, 0x94,
++	0x03, 0x35, 0x61, 0xe7, 0xca, 0xca, 0x6d, 0x94,
++	0x1d, 0xc3, 0xcd, 0x69, 0x14, 0xad, 0x69, 0x04
++};
++static const u8 enc_output096[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xe3, 0x3b, 0xc5, 0x52, 0xca, 0x8b, 0x9e, 0x96,
++	0x16, 0x9e, 0x79, 0x7e, 0x8f, 0x30, 0x30, 0x1b,
++	0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52,
++	0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f,
++	0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52,
++	0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f,
++	0x6a, 0xb8, 0xdc, 0xe2, 0xc5, 0x9d, 0xa4, 0x73,
++	0x71, 0x30, 0xb0, 0x25, 0x2f, 0x68, 0xa8, 0xd8
++};
++static const u8 enc_assoc096[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce096[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key096[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input097[] __initconst = {
++	0x68, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0xb0, 0x8f, 0x25, 0x67, 0x5b, 0x9b, 0xcb, 0xf6,
++	0xe3, 0x84, 0x07, 0xde, 0x2e, 0xc7, 0x5a, 0x47,
++	0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0x2d, 0x2a, 0xf7, 0xcd, 0x6b, 0x08, 0x05, 0x01,
++	0xd3, 0x1b, 0xa5, 0x4f, 0xb2, 0xeb, 0x75, 0x96,
++	0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0x65, 0x0e, 0xc6, 0x2d, 0x75, 0x70, 0x72, 0xce,
++	0xe6, 0xff, 0x23, 0x31, 0x86, 0xdd, 0x1c, 0x8f
++};
++static const u8 enc_output097[] __initconst = {
++	0x68, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x37, 0x4d, 0xef, 0x6e, 0xb7, 0x82, 0xed, 0x00,
++	0x21, 0x43, 0x11, 0x54, 0x12, 0xb7, 0x46, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7,
++	0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7,
++	0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d,
++	0x04, 0x4d, 0xea, 0x60, 0x88, 0x80, 0x41, 0x2b,
++	0xfd, 0xff, 0xcf, 0x35, 0x57, 0x9e, 0x9b, 0x26
++};
++static const u8 enc_assoc097[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce097[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key097[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input098[] __initconst = {
++	0x6d, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0xa1, 0x61, 0xb5, 0xab, 0x04, 0x09, 0x00, 0x62,
++	0x9e, 0xfe, 0xff, 0x78, 0xd7, 0xd8, 0x6b, 0x45,
++	0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0xc6, 0xf8, 0x07, 0x8c, 0xc8, 0xef, 0x12, 0xa0,
++	0xff, 0x65, 0x7d, 0x6d, 0x08, 0xdb, 0x10, 0xb8,
++	0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0x8e, 0xdc, 0x36, 0x6c, 0xd6, 0x97, 0x65, 0x6f,
++	0xca, 0x81, 0xfb, 0x13, 0x3c, 0xed, 0x79, 0xa1
++};
++static const u8 enc_output098[] __initconst = {
++	0x6d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x26, 0xa3, 0x7f, 0xa2, 0xe8, 0x10, 0x26, 0x94,
++	0x5c, 0x39, 0xe9, 0xf2, 0xeb, 0xa8, 0x77, 0x02,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66,
++	0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66,
++	0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3,
++	0x1e, 0x6b, 0xea, 0x63, 0x14, 0x54, 0x2e, 0x2e,
++	0xf9, 0xff, 0xcf, 0x45, 0x0b, 0x2e, 0x98, 0x2b
++};
++static const u8 enc_assoc098[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce098[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key098[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input099[] __initconst = {
++	0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0xfc, 0x01, 0xb8, 0x91, 0xe5, 0xf0, 0xf9, 0x12,
++	0x8d, 0x7d, 0x1c, 0x57, 0x91, 0x92, 0xb6, 0x98,
++	0x63, 0x41, 0x44, 0x15, 0xb6, 0x99, 0x68, 0x95,
++	0x9a, 0x72, 0x91, 0xb7, 0xa5, 0xaf, 0x13, 0x48,
++	0x60, 0xcd, 0x9e, 0xa1, 0x0c, 0x29, 0xa3, 0x66,
++	0x54, 0xe7, 0xa2, 0x8e, 0x76, 0x1b, 0xec, 0xd8
++};
++static const u8 enc_output099[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x7b, 0xc3, 0x72, 0x98, 0x09, 0xe9, 0xdf, 0xe4,
++	0x4f, 0xba, 0x0a, 0xdd, 0xad, 0xe2, 0xaa, 0xdf,
++	0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0,
++	0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3,
++	0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0,
++	0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3,
++	0xed, 0x20, 0x17, 0xc8, 0xdb, 0xa4, 0x77, 0x56,
++	0x29, 0x04, 0x9d, 0x78, 0x6e, 0x3b, 0xce, 0xb1
++};
++static const u8 enc_assoc099[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce099[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key099[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input100[] __initconst = {
++	0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0x6b, 0x6d, 0xc9, 0xd2, 0x1a, 0x81, 0x9e, 0x70,
++	0xb5, 0x77, 0xf4, 0x41, 0x37, 0xd3, 0xd6, 0xbd,
++	0x13, 0x35, 0xf5, 0xeb, 0x44, 0x49, 0x40, 0x77,
++	0xb2, 0x64, 0x49, 0xa5, 0x4b, 0x6c, 0x7c, 0x75,
++	0x10, 0xb9, 0x2f, 0x5f, 0xfe, 0xf9, 0x8b, 0x84,
++	0x7c, 0xf1, 0x7a, 0x9c, 0x98, 0xd8, 0x83, 0xe5
++};
++static const u8 enc_output100[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xec, 0xaf, 0x03, 0xdb, 0xf6, 0x98, 0xb8, 0x86,
++	0x77, 0xb0, 0xe2, 0xcb, 0x0b, 0xa3, 0xca, 0xfa,
++	0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42,
++	0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee,
++	0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42,
++	0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee,
++	0x07, 0x3f, 0x17, 0xcb, 0x67, 0x78, 0x64, 0x59,
++	0x25, 0x04, 0x9d, 0x88, 0x22, 0xcb, 0xca, 0xb6
++};
++static const u8 enc_assoc100[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce100[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key100[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input101[] __initconst = {
++	0xff, 0xcb, 0x2b, 0x11, 0x06, 0xf8, 0x23, 0x4c,
++	0x5e, 0x99, 0xd4, 0xdb, 0x4c, 0x70, 0x48, 0xde,
++	0x32, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0x16, 0xe9, 0x88, 0x4a, 0x11, 0x4f, 0x0e, 0x92,
++	0x66, 0xce, 0xa3, 0x88, 0x5f, 0xe3, 0x6b, 0x9f,
++	0xd6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0xce, 0xbe, 0xf5, 0xe9, 0x88, 0x5a, 0x80, 0xea,
++	0x76, 0xd9, 0x75, 0xc1, 0x44, 0xa4, 0x18, 0x88
++};
++static const u8 enc_output101[] __initconst = {
++	0xff, 0xa0, 0xfc, 0x3e, 0x80, 0x32, 0xc3, 0xd5,
++	0xfd, 0xb6, 0x2a, 0x11, 0xf0, 0x96, 0x30, 0x7d,
++	0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7,
++	0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04,
++	0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7,
++	0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04,
++	0x8b, 0x9b, 0xb4, 0xb4, 0x86, 0x12, 0x89, 0x65,
++	0x8c, 0x69, 0x6a, 0x83, 0x40, 0x15, 0x04, 0x05
++};
++static const u8 enc_assoc101[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce101[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key101[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input102[] __initconst = {
++	0x6f, 0x9e, 0x70, 0xed, 0x3b, 0x8b, 0xac, 0xa0,
++	0x26, 0xe4, 0x6a, 0x5a, 0x09, 0x43, 0x15, 0x8d,
++	0x21, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0x0c, 0x61, 0x2c, 0x5e, 0x8d, 0x89, 0xa8, 0x73,
++	0xdb, 0xca, 0xad, 0x5b, 0x73, 0x46, 0x42, 0x9b,
++	0xc5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0xd4, 0x36, 0x51, 0xfd, 0x14, 0x9c, 0x26, 0x0b,
++	0xcb, 0xdd, 0x7b, 0x12, 0x68, 0x01, 0x31, 0x8c
++};
++static const u8 enc_output102[] __initconst = {
++	0x6f, 0xf5, 0xa7, 0xc2, 0xbd, 0x41, 0x4c, 0x39,
++	0x85, 0xcb, 0x94, 0x90, 0xb5, 0xa5, 0x6d, 0x2e,
++	0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46,
++	0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00,
++	0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46,
++	0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00,
++	0x8b, 0x3b, 0xbd, 0x51, 0x64, 0x44, 0x59, 0x56,
++	0x8d, 0x81, 0xca, 0x1f, 0xa7, 0x2c, 0xe4, 0x04
++};
++static const u8 enc_assoc102[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce102[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key102[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input103[] __initconst = {
++	0x41, 0x2b, 0x08, 0x0a, 0x3e, 0x19, 0xc1, 0x0d,
++	0x44, 0xa1, 0xaf, 0x1e, 0xab, 0xde, 0xb4, 0xce,
++	0x35, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0x6b, 0x83, 0x94, 0x33, 0x09, 0x21, 0x48, 0x6c,
++	0xa1, 0x1d, 0x29, 0x1c, 0x3e, 0x97, 0xee, 0x9a,
++	0xd1, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0xb3, 0xd4, 0xe9, 0x90, 0x90, 0x34, 0xc6, 0x14,
++	0xb1, 0x0a, 0xff, 0x55, 0x25, 0xd0, 0x9d, 0x8d
++};
++static const u8 enc_output103[] __initconst = {
++	0x41, 0x40, 0xdf, 0x25, 0xb8, 0xd3, 0x21, 0x94,
++	0xe7, 0x8e, 0x51, 0xd4, 0x17, 0x38, 0xcc, 0x6d,
++	0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59,
++	0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01,
++	0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59,
++	0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01,
++	0x86, 0xfb, 0xab, 0x2b, 0x4a, 0x94, 0xf4, 0x7a,
++	0xa5, 0x6f, 0x0a, 0xea, 0x65, 0xd1, 0x10, 0x08
++};
++static const u8 enc_assoc103[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce103[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key103[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input104[] __initconst = {
++	0xb2, 0x47, 0xa7, 0x47, 0x23, 0x49, 0x1a, 0xac,
++	0xac, 0xaa, 0xd7, 0x09, 0xc9, 0x1e, 0x93, 0x2b,
++	0x31, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0x9a, 0xde, 0x04, 0xe7, 0x5b, 0xb7, 0x01, 0xd9,
++	0x66, 0x06, 0x01, 0xb3, 0x47, 0x65, 0xde, 0x98,
++	0xd5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0x42, 0x89, 0x79, 0x44, 0xc2, 0xa2, 0x8f, 0xa1,
++	0x76, 0x11, 0xd7, 0xfa, 0x5c, 0x22, 0xad, 0x8f
++};
++static const u8 enc_output104[] __initconst = {
++	0xb2, 0x2c, 0x70, 0x68, 0xa5, 0x83, 0xfa, 0x35,
++	0x0f, 0x85, 0x29, 0xc3, 0x75, 0xf8, 0xeb, 0x88,
++	0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec,
++	0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03,
++	0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec,
++	0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03,
++	0xa0, 0x19, 0xac, 0x2e, 0xd6, 0x67, 0xe1, 0x7d,
++	0xa1, 0x6f, 0x0a, 0xfa, 0x19, 0x61, 0x0d, 0x0d
++};
++static const u8 enc_assoc104[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce104[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key104[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input105[] __initconst = {
++	0x74, 0x0f, 0x9e, 0x49, 0xf6, 0x10, 0xef, 0xa5,
++	0x85, 0xb6, 0x59, 0xca, 0x6e, 0xd8, 0xb4, 0x99,
++	0x2d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0x41, 0x2d, 0x96, 0xaf, 0xbe, 0x80, 0xec, 0x3e,
++	0x79, 0xd4, 0x51, 0xb0, 0x0a, 0x2d, 0xb2, 0x9a,
++	0xc9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0x99, 0x7a, 0xeb, 0x0c, 0x27, 0x95, 0x62, 0x46,
++	0x69, 0xc3, 0x87, 0xf9, 0x11, 0x6a, 0xc1, 0x8d
++};
++static const u8 enc_output105[] __initconst = {
++	0x74, 0x64, 0x49, 0x66, 0x70, 0xda, 0x0f, 0x3c,
++	0x26, 0x99, 0xa7, 0x00, 0xd2, 0x3e, 0xcc, 0x3a,
++	0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b,
++	0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01,
++	0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b,
++	0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01,
++	0x73, 0x6e, 0x18, 0x18, 0x16, 0x96, 0xa5, 0x88,
++	0x9c, 0x31, 0x59, 0xfa, 0xab, 0xab, 0x20, 0xfd
++};
++static const u8 enc_assoc105[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce105[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key105[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input106[] __initconst = {
++	0xad, 0xba, 0x5d, 0x10, 0x5b, 0xc8, 0xaa, 0x06,
++	0x2c, 0x23, 0x36, 0xcb, 0x88, 0x9d, 0xdb, 0xd5,
++	0x37, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0x17, 0x7c, 0x5f, 0xfe, 0x28, 0x75, 0xf4, 0x68,
++	0xf6, 0xc2, 0x96, 0x57, 0x48, 0xf3, 0x59, 0x9a,
++	0xd3, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0xcf, 0x2b, 0x22, 0x5d, 0xb1, 0x60, 0x7a, 0x10,
++	0xe6, 0xd5, 0x40, 0x1e, 0x53, 0xb4, 0x2a, 0x8d
++};
++static const u8 enc_output106[] __initconst = {
++	0xad, 0xd1, 0x8a, 0x3f, 0xdd, 0x02, 0x4a, 0x9f,
++	0x8f, 0x0c, 0xc8, 0x01, 0x34, 0x7b, 0xa3, 0x76,
++	0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d,
++	0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01,
++	0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d,
++	0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01,
++	0xba, 0xd5, 0x8f, 0x10, 0xa9, 0x1e, 0x6a, 0x88,
++	0x9a, 0xba, 0x32, 0xfd, 0x17, 0xd8, 0x33, 0x1a
++};
++static const u8 enc_assoc106[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce106[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key106[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input107[] __initconst = {
++	0xfe, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0xc0, 0x01, 0xed, 0xc5, 0xda, 0x44, 0x2e, 0x71,
++	0x9b, 0xce, 0x9a, 0xbe, 0x27, 0x3a, 0xf1, 0x44,
++	0xb4, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0x48, 0x02, 0x5f, 0x41, 0xfa, 0x4e, 0x33, 0x6c,
++	0x78, 0x69, 0x57, 0xa2, 0xa7, 0xc4, 0x93, 0x0a,
++	0x6c, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0x00, 0x26, 0x6e, 0xa1, 0xe4, 0x36, 0x44, 0xa3,
++	0x4d, 0x8d, 0xd1, 0xdc, 0x93, 0xf2, 0xfa, 0x13
++};
++static const u8 enc_output107[] __initconst = {
++	0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x47, 0xc3, 0x27, 0xcc, 0x36, 0x5d, 0x08, 0x87,
++	0x59, 0x09, 0x8c, 0x34, 0x1b, 0x4a, 0xed, 0x03,
++	0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa,
++	0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01,
++	0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa,
++	0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01,
++	0xd6, 0x8c, 0xe1, 0x74, 0x07, 0x9a, 0xdd, 0x02,
++	0x8d, 0xd0, 0x5c, 0xf8, 0x14, 0x63, 0x04, 0x88
++};
++static const u8 enc_assoc107[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce107[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key107[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input108[] __initconst = {
++	0xb5, 0x13, 0xb0, 0x6a, 0xb9, 0xac, 0x14, 0x43,
++	0x5a, 0xcb, 0x8a, 0xa3, 0xa3, 0x7a, 0xfd, 0xb6,
++	0x54, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0x61, 0x95, 0x01, 0x93, 0xb1, 0xbf, 0x03, 0x11,
++	0xff, 0x11, 0x79, 0x89, 0xae, 0xd9, 0xa9, 0x99,
++	0xb0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0xb9, 0xc2, 0x7c, 0x30, 0x28, 0xaa, 0x8d, 0x69,
++	0xef, 0x06, 0xaf, 0xc0, 0xb5, 0x9e, 0xda, 0x8e
++};
++static const u8 enc_output108[] __initconst = {
++	0xb5, 0x78, 0x67, 0x45, 0x3f, 0x66, 0xf4, 0xda,
++	0xf9, 0xe4, 0x74, 0x69, 0x1f, 0x9c, 0x85, 0x15,
++	0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24,
++	0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02,
++	0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24,
++	0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02,
++	0xaa, 0x48, 0xa3, 0x88, 0x7d, 0x4b, 0x05, 0x96,
++	0x99, 0xc2, 0xfd, 0xf9, 0xc6, 0x78, 0x7e, 0x0a
++};
++static const u8 enc_assoc108[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce108[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key108[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input109[] __initconst = {
++	0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0xd4, 0xf1, 0x09, 0xe8, 0x14, 0xce, 0xa8, 0x5a,
++	0x08, 0xc0, 0x11, 0xd8, 0x50, 0xdd, 0x1d, 0xcb,
++	0xcf, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0x53, 0x40, 0xb8, 0x5a, 0x9a, 0xa0, 0x82, 0x96,
++	0xb7, 0x7a, 0x5f, 0xc3, 0x96, 0x1f, 0x66, 0x0f,
++	0x17, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0x1b, 0x64, 0x89, 0xba, 0x84, 0xd8, 0xf5, 0x59,
++	0x82, 0x9e, 0xd9, 0xbd, 0xa2, 0x29, 0x0f, 0x16
++};
++static const u8 enc_output109[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x53, 0x33, 0xc3, 0xe1, 0xf8, 0xd7, 0x8e, 0xac,
++	0xca, 0x07, 0x07, 0x52, 0x6c, 0xad, 0x01, 0x8c,
++	0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50,
++	0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04,
++	0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50,
++	0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04,
++	0xb9, 0x36, 0xa8, 0x17, 0xf2, 0x21, 0x1a, 0xf1,
++	0x29, 0xe2, 0xcf, 0x16, 0x0f, 0xd4, 0x2b, 0xcb
++};
++static const u8 enc_assoc109[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce109[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key109[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input110[] __initconst = {
++	0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0xdf, 0x4c, 0x62, 0x03, 0x2d, 0x41, 0x19, 0xb5,
++	0x88, 0x47, 0x7e, 0x99, 0x92, 0x5a, 0x56, 0xd9,
++	0xd6, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0xfa, 0x84, 0xf0, 0x64, 0x55, 0x36, 0x42, 0x1b,
++	0x2b, 0xb9, 0x24, 0x6e, 0xc2, 0x19, 0xed, 0x0b,
++	0x0e, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0xb2, 0xa0, 0xc1, 0x84, 0x4b, 0x4e, 0x35, 0xd4,
++	0x1e, 0x5d, 0xa2, 0x10, 0xf6, 0x2f, 0x84, 0x12
++};
++static const u8 enc_output110[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x58, 0x8e, 0xa8, 0x0a, 0xc1, 0x58, 0x3f, 0x43,
++	0x4a, 0x80, 0x68, 0x13, 0xae, 0x2a, 0x4a, 0x9e,
++	0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd,
++	0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00,
++	0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd,
++	0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00,
++	0x9f, 0x7a, 0xc4, 0x35, 0x1f, 0x6b, 0x91, 0xe6,
++	0x30, 0x97, 0xa7, 0x13, 0x11, 0x5d, 0x05, 0xbe
++};
++static const u8 enc_assoc110[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce110[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key110[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input111[] __initconst = {
++	0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0x13, 0xf8, 0x0a, 0x00, 0x6d, 0xc1, 0xbb, 0xda,
++	0xd6, 0x39, 0xa9, 0x2f, 0xc7, 0xec, 0xa6, 0x55,
++	0xf7, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0x63, 0x48, 0xb8, 0xfd, 0x29, 0xbf, 0x96, 0xd5,
++	0x63, 0xa5, 0x17, 0xe2, 0x7d, 0x7b, 0xfc, 0x0f,
++	0x2f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0x2b, 0x6c, 0x89, 0x1d, 0x37, 0xc7, 0xe1, 0x1a,
++	0x56, 0x41, 0x91, 0x9c, 0x49, 0x4d, 0x95, 0x16
++};
++static const u8 enc_output111[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x94, 0x3a, 0xc0, 0x09, 0x81, 0xd8, 0x9d, 0x2c,
++	0x14, 0xfe, 0xbf, 0xa5, 0xfb, 0x9c, 0xba, 0x12,
++	0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13,
++	0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04,
++	0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13,
++	0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04,
++	0x9a, 0x18, 0xa8, 0x28, 0x07, 0x02, 0x69, 0xf4,
++	0x47, 0x00, 0xd0, 0x09, 0xe7, 0x17, 0x1c, 0xc9
++};
++static const u8 enc_assoc111[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce111[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key111[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input112[] __initconst = {
++	0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0x82, 0xe5, 0x9b, 0x45, 0x82, 0x91, 0x50, 0x38,
++	0xf9, 0x33, 0x81, 0x1e, 0x65, 0x2d, 0xc6, 0x6a,
++	0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0xb6, 0x71, 0xc8, 0xca, 0xc2, 0x70, 0xc2, 0x65,
++	0xa0, 0xac, 0x2f, 0x53, 0x57, 0x99, 0x88, 0x0a,
++	0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0xfe, 0x55, 0xf9, 0x2a, 0xdc, 0x08, 0xb5, 0xaa,
++	0x95, 0x48, 0xa9, 0x2d, 0x63, 0xaf, 0xe1, 0x13
++};
++static const u8 enc_output112[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x05, 0x27, 0x51, 0x4c, 0x6e, 0x88, 0x76, 0xce,
++	0x3b, 0xf4, 0x97, 0x94, 0x59, 0x5d, 0xda, 0x2d,
++	0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3,
++	0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01,
++	0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3,
++	0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01,
++	0xb4, 0x36, 0xa8, 0x2b, 0x93, 0xd5, 0x55, 0xf7,
++	0x43, 0x00, 0xd0, 0x19, 0x9b, 0xa7, 0x18, 0xce
++};
++static const u8 enc_assoc112[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce112[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key112[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input113[] __initconst = {
++	0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0xf1, 0xd1, 0x28, 0x87, 0xb7, 0x21, 0x69, 0x86,
++	0xa1, 0x2d, 0x79, 0x09, 0x8b, 0x6d, 0xe6, 0x0f,
++	0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0xa7, 0xc7, 0x58, 0x99, 0xf3, 0xe6, 0x0a, 0xf1,
++	0xfc, 0xb6, 0xc7, 0x30, 0x7d, 0x87, 0x59, 0x0f,
++	0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0xef, 0xe3, 0x69, 0x79, 0xed, 0x9e, 0x7d, 0x3e,
++	0xc9, 0x52, 0x41, 0x4e, 0x49, 0xb1, 0x30, 0x16
++};
++static const u8 enc_output113[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x76, 0x13, 0xe2, 0x8e, 0x5b, 0x38, 0x4f, 0x70,
++	0x63, 0xea, 0x6f, 0x83, 0xb7, 0x1d, 0xfa, 0x48,
++	0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37,
++	0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04,
++	0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37,
++	0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04,
++	0xce, 0x54, 0xa8, 0x2e, 0x1f, 0xa9, 0x42, 0xfa,
++	0x3f, 0x00, 0xd0, 0x29, 0x4f, 0x37, 0x15, 0xd3
++};
++static const u8 enc_assoc113[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce113[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key113[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input114[] __initconst = {
++	0xcb, 0xf1, 0xda, 0x9e, 0x0b, 0xa9, 0x37, 0x73,
++	0x74, 0xe6, 0x9e, 0x1c, 0x0e, 0x60, 0x0c, 0xfc,
++	0x34, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0xbe, 0x3f, 0xa6, 0x6b, 0x6c, 0xe7, 0x80, 0x8a,
++	0xa3, 0xe4, 0x59, 0x49, 0xf9, 0x44, 0x64, 0x9f,
++	0xd0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0x66, 0x68, 0xdb, 0xc8, 0xf5, 0xf2, 0x0e, 0xf2,
++	0xb3, 0xf3, 0x8f, 0x00, 0xe2, 0x03, 0x17, 0x88
++};
++static const u8 enc_output114[] __initconst = {
++	0xcb, 0x9a, 0x0d, 0xb1, 0x8d, 0x63, 0xd7, 0xea,
++	0xd7, 0xc9, 0x60, 0xd6, 0xb2, 0x86, 0x74, 0x5f,
++	0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf,
++	0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04,
++	0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf,
++	0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04,
++	0x23, 0x83, 0xab, 0x0b, 0x79, 0x92, 0x05, 0x69,
++	0x9b, 0x51, 0x0a, 0xa7, 0x09, 0xbf, 0x31, 0xf1
++};
++static const u8 enc_assoc114[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce114[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key114[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input115[] __initconst = {
++	0x8f, 0x27, 0x86, 0x94, 0xc4, 0xe9, 0xda, 0xeb,
++	0xd5, 0x8d, 0x3e, 0x5b, 0x96, 0x6e, 0x8b, 0x68,
++	0x42, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
++	0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
++	0x06, 0x53, 0xe7, 0xa3, 0x31, 0x71, 0x88, 0x33,
++	0xac, 0xc3, 0xb9, 0xad, 0xff, 0x1c, 0x31, 0x98,
++	0xa6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
++	0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
++	0xde, 0x04, 0x9a, 0x00, 0xa8, 0x64, 0x06, 0x4b,
++	0xbc, 0xd4, 0x6f, 0xe4, 0xe4, 0x5b, 0x42, 0x8f
++};
++static const u8 enc_output115[] __initconst = {
++	0x8f, 0x4c, 0x51, 0xbb, 0x42, 0x23, 0x3a, 0x72,
++	0x76, 0xa2, 0xc0, 0x91, 0x2a, 0x88, 0xf3, 0xcb,
++	0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06,
++	0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03,
++	0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06,
++	0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03,
++	0x8b, 0xfb, 0xab, 0x17, 0xa9, 0xe0, 0xb8, 0x74,
++	0x8b, 0x51, 0x0a, 0xe7, 0xd9, 0xfd, 0x23, 0x05
++};
++static const u8 enc_assoc115[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce115[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key115[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input116[] __initconst = {
++	0xd5, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0x9a, 0x22, 0xd7, 0x0a, 0x48, 0xe2, 0x4f, 0xdd,
++	0xcd, 0xd4, 0x41, 0x9d, 0xe6, 0x4c, 0x8f, 0x44,
++	0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0x77, 0xb5, 0xc9, 0x07, 0xd9, 0xc9, 0xe1, 0xea,
++	0x51, 0x85, 0x1a, 0x20, 0x4a, 0xad, 0x9f, 0x0a,
++	0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0x3f, 0x91, 0xf8, 0xe7, 0xc7, 0xb1, 0x96, 0x25,
++	0x64, 0x61, 0x9c, 0x5e, 0x7e, 0x9b, 0xf6, 0x13
++};
++static const u8 enc_output116[] __initconst = {
++	0xd5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x1d, 0xe0, 0x1d, 0x03, 0xa4, 0xfb, 0x69, 0x2b,
++	0x0f, 0x13, 0x57, 0x17, 0xda, 0x3c, 0x93, 0x03,
++	0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c,
++	0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01,
++	0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c,
++	0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01,
++	0x49, 0xbc, 0x6e, 0x9f, 0xc5, 0x1c, 0x4d, 0x50,
++	0x30, 0x36, 0x64, 0x4d, 0x84, 0x27, 0x73, 0xd2
++};
++static const u8 enc_assoc116[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce116[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key116[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input117[] __initconst = {
++	0xdb, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0x75, 0xd5, 0x64, 0x3a, 0xa5, 0xaf, 0x93, 0x4d,
++	0x8c, 0xce, 0x39, 0x2c, 0xc3, 0xee, 0xdb, 0x47,
++	0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0x60, 0x1b, 0x5a, 0xd2, 0x06, 0x7f, 0x28, 0x06,
++	0x6a, 0x8f, 0x32, 0x81, 0x71, 0x5b, 0xa8, 0x08,
++	0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0x28, 0x3f, 0x6b, 0x32, 0x18, 0x07, 0x5f, 0xc9,
++	0x5f, 0x6b, 0xb4, 0xff, 0x45, 0x6d, 0xc1, 0x11
++};
++static const u8 enc_output117[] __initconst = {
++	0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xf2, 0x17, 0xae, 0x33, 0x49, 0xb6, 0xb5, 0xbb,
++	0x4e, 0x09, 0x2f, 0xa6, 0xff, 0x9e, 0xc7, 0x00,
++	0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0,
++	0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03,
++	0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0,
++	0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03,
++	0x63, 0xda, 0x6e, 0xa2, 0x51, 0xf0, 0x39, 0x53,
++	0x2c, 0x36, 0x64, 0x5d, 0x38, 0xb7, 0x6f, 0xd7
++};
++static const u8 enc_assoc117[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce117[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key117[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++/* wycheproof - edge case intermediate sums in poly1305 */
++static const u8 enc_input118[] __initconst = {
++	0x93, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
++	0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
++	0x62, 0x48, 0x39, 0x60, 0x42, 0x16, 0xe4, 0x03,
++	0xeb, 0xcc, 0x6a, 0xf5, 0x59, 0xec, 0x8b, 0x43,
++	0x97, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
++	0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
++	0xd8, 0xc8, 0xc3, 0xfa, 0x1a, 0x9e, 0x47, 0x4a,
++	0xbe, 0x52, 0xd0, 0x2c, 0x81, 0x87, 0xe9, 0x0f,
++	0x4f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
++	0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
++	0x90, 0xec, 0xf2, 0x1a, 0x04, 0xe6, 0x30, 0x85,
++	0x8b, 0xb6, 0x56, 0x52, 0xb5, 0xb1, 0x80, 0x16
++};
++static const u8 enc_output118[] __initconst = {
++	0x93, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xe5, 0x8a, 0xf3, 0x69, 0xae, 0x0f, 0xc2, 0xf5,
++	0x29, 0x0b, 0x7c, 0x7f, 0x65, 0x9c, 0x97, 0x04,
++	0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c,
++	0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04,
++	0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c,
++	0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04,
++	0x73, 0xeb, 0x27, 0x24, 0xb5, 0xc4, 0x05, 0xf0,
++	0x4d, 0x00, 0xd0, 0xf1, 0x58, 0x40, 0xa1, 0xc1
++};
++static const u8 enc_assoc118[] __initconst = {
++	0xff, 0xff, 0xff, 0xff
++};
++static const u8 enc_nonce118[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
++};
++static const u8 enc_key118[] __initconst = {
++	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
++	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
++	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
++	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
++};
++
++static const struct chacha20poly1305_testvec
++chacha20poly1305_enc_vectors[] __initconst = {
++	{ enc_input001, enc_output001, enc_assoc001, enc_nonce001, enc_key001,
++	  sizeof(enc_input001), sizeof(enc_assoc001), sizeof(enc_nonce001) },
++	{ enc_input002, enc_output002, enc_assoc002, enc_nonce002, enc_key002,
++	  sizeof(enc_input002), sizeof(enc_assoc002), sizeof(enc_nonce002) },
++	{ enc_input003, enc_output003, enc_assoc003, enc_nonce003, enc_key003,
++	  sizeof(enc_input003), sizeof(enc_assoc003), sizeof(enc_nonce003) },
++	{ enc_input004, enc_output004, enc_assoc004, enc_nonce004, enc_key004,
++	  sizeof(enc_input004), sizeof(enc_assoc004), sizeof(enc_nonce004) },
++	{ enc_input005, enc_output005, enc_assoc005, enc_nonce005, enc_key005,
++	  sizeof(enc_input005), sizeof(enc_assoc005), sizeof(enc_nonce005) },
++	{ enc_input006, enc_output006, enc_assoc006, enc_nonce006, enc_key006,
++	  sizeof(enc_input006), sizeof(enc_assoc006), sizeof(enc_nonce006) },
++	{ enc_input007, enc_output007, enc_assoc007, enc_nonce007, enc_key007,
++	  sizeof(enc_input007), sizeof(enc_assoc007), sizeof(enc_nonce007) },
++	{ enc_input008, enc_output008, enc_assoc008, enc_nonce008, enc_key008,
++	  sizeof(enc_input008), sizeof(enc_assoc008), sizeof(enc_nonce008) },
++	{ enc_input009, enc_output009, enc_assoc009, enc_nonce009, enc_key009,
++	  sizeof(enc_input009), sizeof(enc_assoc009), sizeof(enc_nonce009) },
++	{ enc_input010, enc_output010, enc_assoc010, enc_nonce010, enc_key010,
++	  sizeof(enc_input010), sizeof(enc_assoc010), sizeof(enc_nonce010) },
++	{ enc_input011, enc_output011, enc_assoc011, enc_nonce011, enc_key011,
++	  sizeof(enc_input011), sizeof(enc_assoc011), sizeof(enc_nonce011) },
++	{ enc_input012, enc_output012, enc_assoc012, enc_nonce012, enc_key012,
++	  sizeof(enc_input012), sizeof(enc_assoc012), sizeof(enc_nonce012) },
++	{ enc_input013, enc_output013, enc_assoc013, enc_nonce013, enc_key013,
++	  sizeof(enc_input013), sizeof(enc_assoc013), sizeof(enc_nonce013) },
++	{ enc_input014, enc_output014, enc_assoc014, enc_nonce014, enc_key014,
++	  sizeof(enc_input014), sizeof(enc_assoc014), sizeof(enc_nonce014) },
++	{ enc_input015, enc_output015, enc_assoc015, enc_nonce015, enc_key015,
++	  sizeof(enc_input015), sizeof(enc_assoc015), sizeof(enc_nonce015) },
++	{ enc_input016, enc_output016, enc_assoc016, enc_nonce016, enc_key016,
++	  sizeof(enc_input016), sizeof(enc_assoc016), sizeof(enc_nonce016) },
++	{ enc_input017, enc_output017, enc_assoc017, enc_nonce017, enc_key017,
++	  sizeof(enc_input017), sizeof(enc_assoc017), sizeof(enc_nonce017) },
++	{ enc_input018, enc_output018, enc_assoc018, enc_nonce018, enc_key018,
++	  sizeof(enc_input018), sizeof(enc_assoc018), sizeof(enc_nonce018) },
++	{ enc_input019, enc_output019, enc_assoc019, enc_nonce019, enc_key019,
++	  sizeof(enc_input019), sizeof(enc_assoc019), sizeof(enc_nonce019) },
++	{ enc_input020, enc_output020, enc_assoc020, enc_nonce020, enc_key020,
++	  sizeof(enc_input020), sizeof(enc_assoc020), sizeof(enc_nonce020) },
++	{ enc_input021, enc_output021, enc_assoc021, enc_nonce021, enc_key021,
++	  sizeof(enc_input021), sizeof(enc_assoc021), sizeof(enc_nonce021) },
++	{ enc_input022, enc_output022, enc_assoc022, enc_nonce022, enc_key022,
++	  sizeof(enc_input022), sizeof(enc_assoc022), sizeof(enc_nonce022) },
++	{ enc_input023, enc_output023, enc_assoc023, enc_nonce023, enc_key023,
++	  sizeof(enc_input023), sizeof(enc_assoc023), sizeof(enc_nonce023) },
++	{ enc_input024, enc_output024, enc_assoc024, enc_nonce024, enc_key024,
++	  sizeof(enc_input024), sizeof(enc_assoc024), sizeof(enc_nonce024) },
++	{ enc_input025, enc_output025, enc_assoc025, enc_nonce025, enc_key025,
++	  sizeof(enc_input025), sizeof(enc_assoc025), sizeof(enc_nonce025) },
++	{ enc_input026, enc_output026, enc_assoc026, enc_nonce026, enc_key026,
++	  sizeof(enc_input026), sizeof(enc_assoc026), sizeof(enc_nonce026) },
++	{ enc_input027, enc_output027, enc_assoc027, enc_nonce027, enc_key027,
++	  sizeof(enc_input027), sizeof(enc_assoc027), sizeof(enc_nonce027) },
++	{ enc_input028, enc_output028, enc_assoc028, enc_nonce028, enc_key028,
++	  sizeof(enc_input028), sizeof(enc_assoc028), sizeof(enc_nonce028) },
++	{ enc_input029, enc_output029, enc_assoc029, enc_nonce029, enc_key029,
++	  sizeof(enc_input029), sizeof(enc_assoc029), sizeof(enc_nonce029) },
++	{ enc_input030, enc_output030, enc_assoc030, enc_nonce030, enc_key030,
++	  sizeof(enc_input030), sizeof(enc_assoc030), sizeof(enc_nonce030) },
++	{ enc_input031, enc_output031, enc_assoc031, enc_nonce031, enc_key031,
++	  sizeof(enc_input031), sizeof(enc_assoc031), sizeof(enc_nonce031) },
++	{ enc_input032, enc_output032, enc_assoc032, enc_nonce032, enc_key032,
++	  sizeof(enc_input032), sizeof(enc_assoc032), sizeof(enc_nonce032) },
++	{ enc_input033, enc_output033, enc_assoc033, enc_nonce033, enc_key033,
++	  sizeof(enc_input033), sizeof(enc_assoc033), sizeof(enc_nonce033) },
++	{ enc_input034, enc_output034, enc_assoc034, enc_nonce034, enc_key034,
++	  sizeof(enc_input034), sizeof(enc_assoc034), sizeof(enc_nonce034) },
++	{ enc_input035, enc_output035, enc_assoc035, enc_nonce035, enc_key035,
++	  sizeof(enc_input035), sizeof(enc_assoc035), sizeof(enc_nonce035) },
++	{ enc_input036, enc_output036, enc_assoc036, enc_nonce036, enc_key036,
++	  sizeof(enc_input036), sizeof(enc_assoc036), sizeof(enc_nonce036) },
++	{ enc_input037, enc_output037, enc_assoc037, enc_nonce037, enc_key037,
++	  sizeof(enc_input037), sizeof(enc_assoc037), sizeof(enc_nonce037) },
++	{ enc_input038, enc_output038, enc_assoc038, enc_nonce038, enc_key038,
++	  sizeof(enc_input038), sizeof(enc_assoc038), sizeof(enc_nonce038) },
++	{ enc_input039, enc_output039, enc_assoc039, enc_nonce039, enc_key039,
++	  sizeof(enc_input039), sizeof(enc_assoc039), sizeof(enc_nonce039) },
++	{ enc_input040, enc_output040, enc_assoc040, enc_nonce040, enc_key040,
++	  sizeof(enc_input040), sizeof(enc_assoc040), sizeof(enc_nonce040) },
++	{ enc_input041, enc_output041, enc_assoc041, enc_nonce041, enc_key041,
++	  sizeof(enc_input041), sizeof(enc_assoc041), sizeof(enc_nonce041) },
++	{ enc_input042, enc_output042, enc_assoc042, enc_nonce042, enc_key042,
++	  sizeof(enc_input042), sizeof(enc_assoc042), sizeof(enc_nonce042) },
++	{ enc_input043, enc_output043, enc_assoc043, enc_nonce043, enc_key043,
++	  sizeof(enc_input043), sizeof(enc_assoc043), sizeof(enc_nonce043) },
++	{ enc_input044, enc_output044, enc_assoc044, enc_nonce044, enc_key044,
++	  sizeof(enc_input044), sizeof(enc_assoc044), sizeof(enc_nonce044) },
++	{ enc_input045, enc_output045, enc_assoc045, enc_nonce045, enc_key045,
++	  sizeof(enc_input045), sizeof(enc_assoc045), sizeof(enc_nonce045) },
++	{ enc_input046, enc_output046, enc_assoc046, enc_nonce046, enc_key046,
++	  sizeof(enc_input046), sizeof(enc_assoc046), sizeof(enc_nonce046) },
++	{ enc_input047, enc_output047, enc_assoc047, enc_nonce047, enc_key047,
++	  sizeof(enc_input047), sizeof(enc_assoc047), sizeof(enc_nonce047) },
++	{ enc_input048, enc_output048, enc_assoc048, enc_nonce048, enc_key048,
++	  sizeof(enc_input048), sizeof(enc_assoc048), sizeof(enc_nonce048) },
++	{ enc_input049, enc_output049, enc_assoc049, enc_nonce049, enc_key049,
++	  sizeof(enc_input049), sizeof(enc_assoc049), sizeof(enc_nonce049) },
++	{ enc_input050, enc_output050, enc_assoc050, enc_nonce050, enc_key050,
++	  sizeof(enc_input050), sizeof(enc_assoc050), sizeof(enc_nonce050) },
++	{ enc_input051, enc_output051, enc_assoc051, enc_nonce051, enc_key051,
++	  sizeof(enc_input051), sizeof(enc_assoc051), sizeof(enc_nonce051) },
++	{ enc_input052, enc_output052, enc_assoc052, enc_nonce052, enc_key052,
++	  sizeof(enc_input052), sizeof(enc_assoc052), sizeof(enc_nonce052) },
++	{ enc_input053, enc_output053, enc_assoc053, enc_nonce053, enc_key053,
++	  sizeof(enc_input053), sizeof(enc_assoc053), sizeof(enc_nonce053) },
++	{ enc_input054, enc_output054, enc_assoc054, enc_nonce054, enc_key054,
++	  sizeof(enc_input054), sizeof(enc_assoc054), sizeof(enc_nonce054) },
++	{ enc_input055, enc_output055, enc_assoc055, enc_nonce055, enc_key055,
++	  sizeof(enc_input055), sizeof(enc_assoc055), sizeof(enc_nonce055) },
++	{ enc_input056, enc_output056, enc_assoc056, enc_nonce056, enc_key056,
++	  sizeof(enc_input056), sizeof(enc_assoc056), sizeof(enc_nonce056) },
++	{ enc_input057, enc_output057, enc_assoc057, enc_nonce057, enc_key057,
++	  sizeof(enc_input057), sizeof(enc_assoc057), sizeof(enc_nonce057) },
++	{ enc_input058, enc_output058, enc_assoc058, enc_nonce058, enc_key058,
++	  sizeof(enc_input058), sizeof(enc_assoc058), sizeof(enc_nonce058) },
++	{ enc_input059, enc_output059, enc_assoc059, enc_nonce059, enc_key059,
++	  sizeof(enc_input059), sizeof(enc_assoc059), sizeof(enc_nonce059) },
++	{ enc_input060, enc_output060, enc_assoc060, enc_nonce060, enc_key060,
++	  sizeof(enc_input060), sizeof(enc_assoc060), sizeof(enc_nonce060) },
++	{ enc_input061, enc_output061, enc_assoc061, enc_nonce061, enc_key061,
++	  sizeof(enc_input061), sizeof(enc_assoc061), sizeof(enc_nonce061) },
++	{ enc_input062, enc_output062, enc_assoc062, enc_nonce062, enc_key062,
++	  sizeof(enc_input062), sizeof(enc_assoc062), sizeof(enc_nonce062) },
++	{ enc_input063, enc_output063, enc_assoc063, enc_nonce063, enc_key063,
++	  sizeof(enc_input063), sizeof(enc_assoc063), sizeof(enc_nonce063) },
++	{ enc_input064, enc_output064, enc_assoc064, enc_nonce064, enc_key064,
++	  sizeof(enc_input064), sizeof(enc_assoc064), sizeof(enc_nonce064) },
++	{ enc_input065, enc_output065, enc_assoc065, enc_nonce065, enc_key065,
++	  sizeof(enc_input065), sizeof(enc_assoc065), sizeof(enc_nonce065) },
++	{ enc_input066, enc_output066, enc_assoc066, enc_nonce066, enc_key066,
++	  sizeof(enc_input066), sizeof(enc_assoc066), sizeof(enc_nonce066) },
++	{ enc_input067, enc_output067, enc_assoc067, enc_nonce067, enc_key067,
++	  sizeof(enc_input067), sizeof(enc_assoc067), sizeof(enc_nonce067) },
++	{ enc_input068, enc_output068, enc_assoc068, enc_nonce068, enc_key068,
++	  sizeof(enc_input068), sizeof(enc_assoc068), sizeof(enc_nonce068) },
++	{ enc_input069, enc_output069, enc_assoc069, enc_nonce069, enc_key069,
++	  sizeof(enc_input069), sizeof(enc_assoc069), sizeof(enc_nonce069) },
++	{ enc_input070, enc_output070, enc_assoc070, enc_nonce070, enc_key070,
++	  sizeof(enc_input070), sizeof(enc_assoc070), sizeof(enc_nonce070) },
++	{ enc_input071, enc_output071, enc_assoc071, enc_nonce071, enc_key071,
++	  sizeof(enc_input071), sizeof(enc_assoc071), sizeof(enc_nonce071) },
++	{ enc_input072, enc_output072, enc_assoc072, enc_nonce072, enc_key072,
++	  sizeof(enc_input072), sizeof(enc_assoc072), sizeof(enc_nonce072) },
++	{ enc_input073, enc_output073, enc_assoc073, enc_nonce073, enc_key073,
++	  sizeof(enc_input073), sizeof(enc_assoc073), sizeof(enc_nonce073) },
++	{ enc_input074, enc_output074, enc_assoc074, enc_nonce074, enc_key074,
++	  sizeof(enc_input074), sizeof(enc_assoc074), sizeof(enc_nonce074) },
++	{ enc_input075, enc_output075, enc_assoc075, enc_nonce075, enc_key075,
++	  sizeof(enc_input075), sizeof(enc_assoc075), sizeof(enc_nonce075) },
++	{ enc_input076, enc_output076, enc_assoc076, enc_nonce076, enc_key076,
++	  sizeof(enc_input076), sizeof(enc_assoc076), sizeof(enc_nonce076) },
++	{ enc_input077, enc_output077, enc_assoc077, enc_nonce077, enc_key077,
++	  sizeof(enc_input077), sizeof(enc_assoc077), sizeof(enc_nonce077) },
++	{ enc_input078, enc_output078, enc_assoc078, enc_nonce078, enc_key078,
++	  sizeof(enc_input078), sizeof(enc_assoc078), sizeof(enc_nonce078) },
++	{ enc_input079, enc_output079, enc_assoc079, enc_nonce079, enc_key079,
++	  sizeof(enc_input079), sizeof(enc_assoc079), sizeof(enc_nonce079) },
++	{ enc_input080, enc_output080, enc_assoc080, enc_nonce080, enc_key080,
++	  sizeof(enc_input080), sizeof(enc_assoc080), sizeof(enc_nonce080) },
++	{ enc_input081, enc_output081, enc_assoc081, enc_nonce081, enc_key081,
++	  sizeof(enc_input081), sizeof(enc_assoc081), sizeof(enc_nonce081) },
++	{ enc_input082, enc_output082, enc_assoc082, enc_nonce082, enc_key082,
++	  sizeof(enc_input082), sizeof(enc_assoc082), sizeof(enc_nonce082) },
++	{ enc_input083, enc_output083, enc_assoc083, enc_nonce083, enc_key083,
++	  sizeof(enc_input083), sizeof(enc_assoc083), sizeof(enc_nonce083) },
++	{ enc_input084, enc_output084, enc_assoc084, enc_nonce084, enc_key084,
++	  sizeof(enc_input084), sizeof(enc_assoc084), sizeof(enc_nonce084) },
++	{ enc_input085, enc_output085, enc_assoc085, enc_nonce085, enc_key085,
++	  sizeof(enc_input085), sizeof(enc_assoc085), sizeof(enc_nonce085) },
++	{ enc_input086, enc_output086, enc_assoc086, enc_nonce086, enc_key086,
++	  sizeof(enc_input086), sizeof(enc_assoc086), sizeof(enc_nonce086) },
++	{ enc_input087, enc_output087, enc_assoc087, enc_nonce087, enc_key087,
++	  sizeof(enc_input087), sizeof(enc_assoc087), sizeof(enc_nonce087) },
++	{ enc_input088, enc_output088, enc_assoc088, enc_nonce088, enc_key088,
++	  sizeof(enc_input088), sizeof(enc_assoc088), sizeof(enc_nonce088) },
++	{ enc_input089, enc_output089, enc_assoc089, enc_nonce089, enc_key089,
++	  sizeof(enc_input089), sizeof(enc_assoc089), sizeof(enc_nonce089) },
++	{ enc_input090, enc_output090, enc_assoc090, enc_nonce090, enc_key090,
++	  sizeof(enc_input090), sizeof(enc_assoc090), sizeof(enc_nonce090) },
++	{ enc_input091, enc_output091, enc_assoc091, enc_nonce091, enc_key091,
++	  sizeof(enc_input091), sizeof(enc_assoc091), sizeof(enc_nonce091) },
++	{ enc_input092, enc_output092, enc_assoc092, enc_nonce092, enc_key092,
++	  sizeof(enc_input092), sizeof(enc_assoc092), sizeof(enc_nonce092) },
++	{ enc_input093, enc_output093, enc_assoc093, enc_nonce093, enc_key093,
++	  sizeof(enc_input093), sizeof(enc_assoc093), sizeof(enc_nonce093) },
++	{ enc_input094, enc_output094, enc_assoc094, enc_nonce094, enc_key094,
++	  sizeof(enc_input094), sizeof(enc_assoc094), sizeof(enc_nonce094) },
++	{ enc_input095, enc_output095, enc_assoc095, enc_nonce095, enc_key095,
++	  sizeof(enc_input095), sizeof(enc_assoc095), sizeof(enc_nonce095) },
++	{ enc_input096, enc_output096, enc_assoc096, enc_nonce096, enc_key096,
++	  sizeof(enc_input096), sizeof(enc_assoc096), sizeof(enc_nonce096) },
++	{ enc_input097, enc_output097, enc_assoc097, enc_nonce097, enc_key097,
++	  sizeof(enc_input097), sizeof(enc_assoc097), sizeof(enc_nonce097) },
++	{ enc_input098, enc_output098, enc_assoc098, enc_nonce098, enc_key098,
++	  sizeof(enc_input098), sizeof(enc_assoc098), sizeof(enc_nonce098) },
++	{ enc_input099, enc_output099, enc_assoc099, enc_nonce099, enc_key099,
++	  sizeof(enc_input099), sizeof(enc_assoc099), sizeof(enc_nonce099) },
++	{ enc_input100, enc_output100, enc_assoc100, enc_nonce100, enc_key100,
++	  sizeof(enc_input100), sizeof(enc_assoc100), sizeof(enc_nonce100) },
++	{ enc_input101, enc_output101, enc_assoc101, enc_nonce101, enc_key101,
++	  sizeof(enc_input101), sizeof(enc_assoc101), sizeof(enc_nonce101) },
++	{ enc_input102, enc_output102, enc_assoc102, enc_nonce102, enc_key102,
++	  sizeof(enc_input102), sizeof(enc_assoc102), sizeof(enc_nonce102) },
++	{ enc_input103, enc_output103, enc_assoc103, enc_nonce103, enc_key103,
++	  sizeof(enc_input103), sizeof(enc_assoc103), sizeof(enc_nonce103) },
++	{ enc_input104, enc_output104, enc_assoc104, enc_nonce104, enc_key104,
++	  sizeof(enc_input104), sizeof(enc_assoc104), sizeof(enc_nonce104) },
++	{ enc_input105, enc_output105, enc_assoc105, enc_nonce105, enc_key105,
++	  sizeof(enc_input105), sizeof(enc_assoc105), sizeof(enc_nonce105) },
++	{ enc_input106, enc_output106, enc_assoc106, enc_nonce106, enc_key106,
++	  sizeof(enc_input106), sizeof(enc_assoc106), sizeof(enc_nonce106) },
++	{ enc_input107, enc_output107, enc_assoc107, enc_nonce107, enc_key107,
++	  sizeof(enc_input107), sizeof(enc_assoc107), sizeof(enc_nonce107) },
++	{ enc_input108, enc_output108, enc_assoc108, enc_nonce108, enc_key108,
++	  sizeof(enc_input108), sizeof(enc_assoc108), sizeof(enc_nonce108) },
++	{ enc_input109, enc_output109, enc_assoc109, enc_nonce109, enc_key109,
++	  sizeof(enc_input109), sizeof(enc_assoc109), sizeof(enc_nonce109) },
++	{ enc_input110, enc_output110, enc_assoc110, enc_nonce110, enc_key110,
++	  sizeof(enc_input110), sizeof(enc_assoc110), sizeof(enc_nonce110) },
++	{ enc_input111, enc_output111, enc_assoc111, enc_nonce111, enc_key111,
++	  sizeof(enc_input111), sizeof(enc_assoc111), sizeof(enc_nonce111) },
++	{ enc_input112, enc_output112, enc_assoc112, enc_nonce112, enc_key112,
++	  sizeof(enc_input112), sizeof(enc_assoc112), sizeof(enc_nonce112) },
++	{ enc_input113, enc_output113, enc_assoc113, enc_nonce113, enc_key113,
++	  sizeof(enc_input113), sizeof(enc_assoc113), sizeof(enc_nonce113) },
++	{ enc_input114, enc_output114, enc_assoc114, enc_nonce114, enc_key114,
++	  sizeof(enc_input114), sizeof(enc_assoc114), sizeof(enc_nonce114) },
++	{ enc_input115, enc_output115, enc_assoc115, enc_nonce115, enc_key115,
++	  sizeof(enc_input115), sizeof(enc_assoc115), sizeof(enc_nonce115) },
++	{ enc_input116, enc_output116, enc_assoc116, enc_nonce116, enc_key116,
++	  sizeof(enc_input116), sizeof(enc_assoc116), sizeof(enc_nonce116) },
++	{ enc_input117, enc_output117, enc_assoc117, enc_nonce117, enc_key117,
++	  sizeof(enc_input117), sizeof(enc_assoc117), sizeof(enc_nonce117) },
++	{ enc_input118, enc_output118, enc_assoc118, enc_nonce118, enc_key118,
++	  sizeof(enc_input118), sizeof(enc_assoc118), sizeof(enc_nonce118) }
++};
++
++static const u8 dec_input001[] __initconst = {
++	0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4,
++	0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd,
++	0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89,
++	0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2,
++	0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee,
++	0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0,
++	0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00,
++	0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf,
++	0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce,
++	0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81,
++	0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd,
++	0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55,
++	0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61,
++	0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38,
++	0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0,
++	0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4,
++	0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46,
++	0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9,
++	0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e,
++	0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e,
++	0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15,
++	0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a,
++	0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea,
++	0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a,
++	0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99,
++	0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e,
++	0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10,
++	0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10,
++	0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94,
++	0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30,
++	0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf,
++	0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29,
++	0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70,
++	0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb,
++	0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f,
++	0x38
++};
++static const u8 dec_output001[] __initconst = {
++	0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
++	0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
++	0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
++	0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
++	0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
++	0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
++	0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
++	0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
++	0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
++	0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
++	0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
++	0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
++	0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
++	0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
++	0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
++	0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
++	0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
++	0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
++	0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
++	0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
++	0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
++	0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
++	0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
++	0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
++	0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
++	0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
++	0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
++	0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
++	0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
++	0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
++	0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
++	0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
++	0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
++	0x9d
++};
++static const u8 dec_assoc001[] __initconst = {
++	0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x4e, 0x91
++};
++static const u8 dec_nonce001[] __initconst = {
++	0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
++};
++static const u8 dec_key001[] __initconst = {
++	0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
++	0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
++	0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
++	0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
++};
++
++static const u8 dec_input002[] __initconst = {
++	0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1,
++	0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92
++};
++static const u8 dec_output002[] __initconst = { };
++static const u8 dec_assoc002[] __initconst = { };
++static const u8 dec_nonce002[] __initconst = {
++	0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e
++};
++static const u8 dec_key002[] __initconst = {
++	0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f,
++	0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86,
++	0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef,
++	0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68
++};
++
++static const u8 dec_input003[] __initconst = {
++	0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6,
++	0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77
++};
++static const u8 dec_output003[] __initconst = { };
++static const u8 dec_assoc003[] __initconst = {
++	0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b
++};
++static const u8 dec_nonce003[] __initconst = {
++	0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d
++};
++static const u8 dec_key003[] __initconst = {
++	0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88,
++	0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a,
++	0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08,
++	0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d
++};
++
++static const u8 dec_input004[] __initconst = {
++	0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2,
++	0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac,
++	0x89
++};
++static const u8 dec_output004[] __initconst = {
++	0xa4
++};
++static const u8 dec_assoc004[] __initconst = {
++	0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40
++};
++static const u8 dec_nonce004[] __initconst = {
++	0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4
++};
++static const u8 dec_key004[] __initconst = {
++	0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8,
++	0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1,
++	0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d,
++	0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e
++};
++
++static const u8 dec_input005[] __initconst = {
++	0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e,
++	0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c,
++	0xac
++};
++static const u8 dec_output005[] __initconst = {
++	0x2d
++};
++static const u8 dec_assoc005[] __initconst = { };
++static const u8 dec_nonce005[] __initconst = {
++	0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30
++};
++static const u8 dec_key005[] __initconst = {
++	0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31,
++	0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87,
++	0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01,
++	0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87
++};
++
++static const u8 dec_input006[] __initconst = {
++	0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1,
++	0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15,
++	0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c,
++	0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda,
++	0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11,
++	0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8,
++	0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc,
++	0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3,
++	0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5,
++	0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02,
++	0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93,
++	0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78,
++	0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1,
++	0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66,
++	0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc,
++	0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0,
++	0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d,
++	0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a,
++	0xeb
++};
++static const u8 dec_output006[] __initconst = {
++	0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a,
++	0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92,
++	0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37,
++	0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50,
++	0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec,
++	0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb,
++	0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66,
++	0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb,
++	0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b,
++	0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e,
++	0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3,
++	0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0,
++	0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb,
++	0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41,
++	0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc,
++	0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde,
++	0x8f
++};
++static const u8 dec_assoc006[] __initconst = {
++	0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b
++};
++static const u8 dec_nonce006[] __initconst = {
++	0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c
++};
++static const u8 dec_key006[] __initconst = {
++	0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae,
++	0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78,
++	0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9,
++	0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01
++};
++
++static const u8 dec_input007[] __initconst = {
++	0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c,
++	0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8,
++	0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c,
++	0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb,
++	0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0,
++	0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21,
++	0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70,
++	0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac,
++	0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99,
++	0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9,
++	0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f,
++	0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7,
++	0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53,
++	0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12,
++	0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6,
++	0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0,
++	0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54,
++	0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6,
++	0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e,
++	0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb,
++	0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30,
++	0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f,
++	0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2,
++	0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e,
++	0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34,
++	0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39,
++	0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7,
++	0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9,
++	0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82,
++	0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04,
++	0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34,
++	0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef,
++	0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42,
++	0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53
++};
++static const u8 dec_output007[] __initconst = {
++	0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5,
++	0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a,
++	0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1,
++	0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17,
++	0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c,
++	0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1,
++	0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51,
++	0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1,
++	0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86,
++	0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a,
++	0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a,
++	0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98,
++	0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36,
++	0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34,
++	0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57,
++	0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84,
++	0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4,
++	0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80,
++	0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82,
++	0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5,
++	0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d,
++	0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c,
++	0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf,
++	0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc,
++	0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3,
++	0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14,
++	0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81,
++	0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77,
++	0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3,
++	0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2,
++	0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b,
++	0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3
++};
++static const u8 dec_assoc007[] __initconst = { };
++static const u8 dec_nonce007[] __initconst = {
++	0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0
++};
++static const u8 dec_key007[] __initconst = {
++	0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd,
++	0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c,
++	0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80,
++	0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01
++};
++
++static const u8 dec_input008[] __initconst = {
++	0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd,
++	0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1,
++	0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93,
++	0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d,
++	0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c,
++	0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6,
++	0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4,
++	0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5,
++	0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84,
++	0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd,
++	0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed,
++	0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab,
++	0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13,
++	0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49,
++	0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6,
++	0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8,
++	0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2,
++	0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94,
++	0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18,
++	0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60,
++	0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8,
++	0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b,
++	0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f,
++	0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c,
++	0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20,
++	0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff,
++	0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9,
++	0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c,
++	0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9,
++	0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6,
++	0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea,
++	0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e,
++	0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82,
++	0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1,
++	0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70,
++	0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1,
++	0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c,
++	0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7,
++	0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc,
++	0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc,
++	0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3,
++	0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb,
++	0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97,
++	0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f,
++	0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39,
++	0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f,
++	0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d,
++	0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2,
++	0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d,
++	0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96,
++	0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b,
++	0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20,
++	0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95,
++	0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb,
++	0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35,
++	0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62,
++	0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9,
++	0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6,
++	0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8,
++	0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a,
++	0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93,
++	0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14,
++	0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99,
++	0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86,
++	0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f,
++	0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54
++};
++static const u8 dec_output008[] __initconst = {
++	0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10,
++	0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2,
++	0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c,
++	0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb,
++	0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12,
++	0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa,
++	0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6,
++	0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4,
++	0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91,
++	0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb,
++	0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47,
++	0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15,
++	0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f,
++	0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a,
++	0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3,
++	0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97,
++	0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80,
++	0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e,
++	0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f,
++	0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10,
++	0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a,
++	0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0,
++	0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35,
++	0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d,
++	0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d,
++	0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57,
++	0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4,
++	0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f,
++	0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39,
++	0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda,
++	0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17,
++	0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43,
++	0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19,
++	0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09,
++	0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21,
++	0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07,
++	0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f,
++	0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b,
++	0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a,
++	0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed,
++	0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2,
++	0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca,
++	0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff,
++	0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b,
++	0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b,
++	0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b,
++	0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6,
++	0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04,
++	0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48,
++	0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b,
++	0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13,
++	0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8,
++	0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f,
++	0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0,
++	0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92,
++	0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a,
++	0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41,
++	0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17,
++	0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30,
++	0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20,
++	0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49,
++	0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a,
++	0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b,
++	0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3
++};
++static const u8 dec_assoc008[] __initconst = { };
++static const u8 dec_nonce008[] __initconst = {
++	0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02
++};
++static const u8 dec_key008[] __initconst = {
++	0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53,
++	0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0,
++	0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86,
++	0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba
++};
++
++static const u8 dec_input009[] __initconst = {
++	0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf,
++	0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66,
++	0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72,
++	0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd,
++	0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28,
++	0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe,
++	0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06,
++	0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5,
++	0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7,
++	0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09,
++	0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a,
++	0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00,
++	0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62,
++	0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb,
++	0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2,
++	0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28,
++	0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e,
++	0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a,
++	0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6,
++	0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83,
++	0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9,
++	0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a,
++	0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79,
++	0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a,
++	0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea,
++	0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b,
++	0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52,
++	0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb,
++	0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89,
++	0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad,
++	0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19,
++	0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71,
++	0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d,
++	0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54,
++	0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a,
++	0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d,
++	0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95,
++	0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42,
++	0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16,
++	0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6,
++	0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf,
++	0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d,
++	0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f,
++	0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b,
++	0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e,
++	0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4,
++	0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c,
++	0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4,
++	0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1,
++	0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb,
++	0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff,
++	0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2,
++	0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06,
++	0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66,
++	0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90,
++	0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55,
++	0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc,
++	0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8,
++	0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62,
++	0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba,
++	0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2,
++	0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89,
++	0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06,
++	0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90,
++	0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf,
++	0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8,
++	0xae
++};
++static const u8 dec_output009[] __initconst = {
++	0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b,
++	0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8,
++	0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca,
++	0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09,
++	0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5,
++	0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85,
++	0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44,
++	0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97,
++	0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77,
++	0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41,
++	0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c,
++	0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00,
++	0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82,
++	0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f,
++	0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e,
++	0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55,
++	0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab,
++	0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17,
++	0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e,
++	0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f,
++	0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82,
++	0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3,
++	0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f,
++	0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0,
++	0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08,
++	0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b,
++	0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85,
++	0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28,
++	0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c,
++	0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62,
++	0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2,
++	0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3,
++	0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62,
++	0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40,
++	0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f,
++	0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b,
++	0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91,
++	0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5,
++	0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c,
++	0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4,
++	0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49,
++	0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04,
++	0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03,
++	0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa,
++	0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec,
++	0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6,
++	0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69,
++	0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36,
++	0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8,
++	0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf,
++	0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe,
++	0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82,
++	0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab,
++	0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d,
++	0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3,
++	0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5,
++	0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34,
++	0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49,
++	0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f,
++	0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d,
++	0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42,
++	0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef,
++	0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27,
++	0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52,
++	0x65
++};
++static const u8 dec_assoc009[] __initconst = {
++	0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e,
++	0xef
++};
++static const u8 dec_nonce009[] __initconst = {
++	0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78
++};
++static const u8 dec_key009[] __initconst = {
++	0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5,
++	0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86,
++	0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2,
++	0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b
++};
++
++static const u8 dec_input010[] __initconst = {
++	0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b,
++	0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74,
++	0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1,
++	0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd,
++	0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6,
++	0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5,
++	0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96,
++	0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02,
++	0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30,
++	0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57,
++	0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53,
++	0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65,
++	0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71,
++	0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9,
++	0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18,
++	0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce,
++	0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a,
++	0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69,
++	0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2,
++	0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95,
++	0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49,
++	0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e,
++	0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a,
++	0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a,
++	0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e,
++	0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19,
++	0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b,
++	0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75,
++	0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d,
++	0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d,
++	0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f,
++	0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a,
++	0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d,
++	0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5,
++	0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c,
++	0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77,
++	0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46,
++	0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43,
++	0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe,
++	0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8,
++	0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76,
++	0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47,
++	0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8,
++	0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32,
++	0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59,
++	0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae,
++	0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a,
++	0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3,
++	0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74,
++	0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75,
++	0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2,
++	0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e,
++	0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2,
++	0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9,
++	0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1,
++	0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07,
++	0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79,
++	0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71,
++	0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad,
++	0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a,
++	0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c,
++	0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9,
++	0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79,
++	0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27,
++	0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90,
++	0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe,
++	0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99,
++	0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1,
++	0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9,
++	0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0,
++	0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28,
++	0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e,
++	0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20,
++	0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60,
++	0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47,
++	0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68,
++	0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe,
++	0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33,
++	0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8,
++	0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38,
++	0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7,
++	0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04,
++	0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c,
++	0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f,
++	0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c,
++	0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77,
++	0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54,
++	0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5,
++	0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4,
++	0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2,
++	0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e,
++	0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27,
++	0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f,
++	0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92,
++	0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55,
++	0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe,
++	0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04,
++	0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4,
++	0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56,
++	0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02,
++	0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2,
++	0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8,
++	0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27,
++	0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47,
++	0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10,
++	0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43,
++	0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0,
++	0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee,
++	0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47,
++	0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6,
++	0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d,
++	0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c,
++	0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3,
++	0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b,
++	0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09,
++	0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d,
++	0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1,
++	0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd,
++	0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4,
++	0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63,
++	0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87,
++	0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd,
++	0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e,
++	0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a,
++	0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c,
++	0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38,
++	0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a,
++	0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5,
++	0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9,
++	0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0
++};
++static const u8 dec_output010[] __initconst = {
++	0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf,
++	0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c,
++	0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22,
++	0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc,
++	0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16,
++	0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7,
++	0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4,
++	0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d,
++	0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5,
++	0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46,
++	0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82,
++	0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b,
++	0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a,
++	0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf,
++	0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca,
++	0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95,
++	0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09,
++	0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3,
++	0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3,
++	0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f,
++	0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58,
++	0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad,
++	0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde,
++	0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44,
++	0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a,
++	0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9,
++	0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26,
++	0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc,
++	0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74,
++	0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b,
++	0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93,
++	0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37,
++	0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f,
++	0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d,
++	0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca,
++	0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73,
++	0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f,
++	0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1,
++	0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9,
++	0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76,
++	0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac,
++	0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7,
++	0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce,
++	0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30,
++	0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb,
++	0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa,
++	0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd,
++	0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f,
++	0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb,
++	0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34,
++	0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e,
++	0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f,
++	0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53,
++	0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41,
++	0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e,
++	0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d,
++	0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27,
++	0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e,
++	0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8,
++	0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a,
++	0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12,
++	0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3,
++	0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66,
++	0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0,
++	0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c,
++	0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4,
++	0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49,
++	0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90,
++	0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11,
++	0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c,
++	0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b,
++	0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74,
++	0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c,
++	0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27,
++	0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1,
++	0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27,
++	0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88,
++	0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27,
++	0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b,
++	0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39,
++	0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7,
++	0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc,
++	0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe,
++	0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5,
++	0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf,
++	0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05,
++	0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73,
++	0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda,
++	0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe,
++	0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71,
++	0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed,
++	0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d,
++	0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33,
++	0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f,
++	0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a,
++	0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa,
++	0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e,
++	0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e,
++	0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87,
++	0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5,
++	0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4,
++	0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38,
++	0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34,
++	0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f,
++	0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36,
++	0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69,
++	0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44,
++	0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5,
++	0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce,
++	0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd,
++	0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27,
++	0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f,
++	0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8,
++	0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a,
++	0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5,
++	0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca,
++	0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e,
++	0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92,
++	0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13,
++	0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf,
++	0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6,
++	0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3,
++	0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b,
++	0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d,
++	0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f,
++	0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40,
++	0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c,
++	0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f
++};
++static const u8 dec_assoc010[] __initconst = {
++	0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27,
++	0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2
++};
++static const u8 dec_nonce010[] __initconst = {
++	0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30
++};
++static const u8 dec_key010[] __initconst = {
++	0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44,
++	0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf,
++	0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74,
++	0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7
++};
++
++static const u8 dec_input011[] __initconst = {
++	0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8,
++	0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc,
++	0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74,
++	0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73,
++	0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e,
++	0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9,
++	0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e,
++	0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd,
++	0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57,
++	0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19,
++	0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f,
++	0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45,
++	0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e,
++	0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39,
++	0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03,
++	0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f,
++	0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0,
++	0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce,
++	0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb,
++	0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52,
++	0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21,
++	0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a,
++	0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35,
++	0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91,
++	0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b,
++	0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e,
++	0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19,
++	0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07,
++	0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18,
++	0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96,
++	0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68,
++	0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4,
++	0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57,
++	0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c,
++	0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23,
++	0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8,
++	0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6,
++	0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40,
++	0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab,
++	0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb,
++	0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea,
++	0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8,
++	0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31,
++	0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0,
++	0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc,
++	0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94,
++	0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1,
++	0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46,
++	0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6,
++	0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7,
++	0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71,
++	0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a,
++	0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33,
++	0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38,
++	0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23,
++	0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb,
++	0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65,
++	0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73,
++	0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8,
++	0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb,
++	0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a,
++	0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca,
++	0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5,
++	0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71,
++	0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8,
++	0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d,
++	0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6,
++	0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d,
++	0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7,
++	0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5,
++	0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8,
++	0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd,
++	0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29,
++	0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22,
++	0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5,
++	0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67,
++	0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11,
++	0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e,
++	0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09,
++	0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4,
++	0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f,
++	0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa,
++	0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec,
++	0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b,
++	0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d,
++	0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b,
++	0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48,
++	0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3,
++	0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63,
++	0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd,
++	0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78,
++	0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed,
++	0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82,
++	0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f,
++	0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3,
++	0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9,
++	0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72,
++	0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74,
++	0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40,
++	0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b,
++	0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a,
++	0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5,
++	0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98,
++	0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71,
++	0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e,
++	0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4,
++	0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46,
++	0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e,
++	0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f,
++	0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93,
++	0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0,
++	0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5,
++	0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61,
++	0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64,
++	0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85,
++	0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20,
++	0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6,
++	0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc,
++	0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8,
++	0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50,
++	0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4,
++	0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80,
++	0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0,
++	0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a,
++	0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35,
++	0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43,
++	0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12,
++	0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7,
++	0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34,
++	0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42,
++	0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0,
++	0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95,
++	0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74,
++	0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5,
++	0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12,
++	0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6,
++	0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86,
++	0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97,
++	0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45,
++	0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19,
++	0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86,
++	0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c,
++	0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba,
++	0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29,
++	0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6,
++	0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6,
++	0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09,
++	0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31,
++	0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99,
++	0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b,
++	0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca,
++	0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00,
++	0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93,
++	0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3,
++	0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07,
++	0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda,
++	0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90,
++	0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b,
++	0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a,
++	0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6,
++	0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c,
++	0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57,
++	0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15,
++	0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e,
++	0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51,
++	0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75,
++	0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19,
++	0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08,
++	0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14,
++	0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba,
++	0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff,
++	0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90,
++	0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e,
++	0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93,
++	0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad,
++	0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2,
++	0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac,
++	0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d,
++	0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06,
++	0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c,
++	0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91,
++	0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17,
++	0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20,
++	0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7,
++	0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf,
++	0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c,
++	0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2,
++	0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e,
++	0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a,
++	0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05,
++	0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58,
++	0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8,
++	0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d,
++	0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71,
++	0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3,
++	0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe,
++	0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62,
++	0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16,
++	0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66,
++	0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4,
++	0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2,
++	0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35,
++	0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3,
++	0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4,
++	0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f,
++	0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe,
++	0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56,
++	0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b,
++	0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37,
++	0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3,
++	0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f,
++	0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f,
++	0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0,
++	0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70,
++	0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd,
++	0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f,
++	0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e,
++	0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67,
++	0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51,
++	0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23,
++	0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3,
++	0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5,
++	0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09,
++	0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7,
++	0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed,
++	0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb,
++	0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6,
++	0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5,
++	0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96,
++	0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe,
++	0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44,
++	0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6,
++	0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e,
++	0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0,
++	0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79,
++	0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f,
++	0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d,
++	0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82,
++	0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47,
++	0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93,
++	0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6,
++	0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69,
++	0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e,
++	0x2b, 0xdf, 0xcd, 0xf9, 0x3c
++};
++static const u8 dec_output011[] __initconst = {
++	0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b,
++	0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b,
++	0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d,
++	0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee,
++	0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30,
++	0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20,
++	0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f,
++	0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e,
++	0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66,
++	0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46,
++	0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35,
++	0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6,
++	0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0,
++	0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15,
++	0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13,
++	0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7,
++	0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3,
++	0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37,
++	0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc,
++	0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95,
++	0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8,
++	0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac,
++	0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45,
++	0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf,
++	0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d,
++	0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc,
++	0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45,
++	0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a,
++	0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec,
++	0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e,
++	0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10,
++	0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8,
++	0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66,
++	0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0,
++	0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62,
++	0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b,
++	0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4,
++	0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96,
++	0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7,
++	0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74,
++	0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8,
++	0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b,
++	0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70,
++	0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95,
++	0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3,
++	0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9,
++	0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d,
++	0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e,
++	0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32,
++	0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5,
++	0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80,
++	0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3,
++	0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad,
++	0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d,
++	0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20,
++	0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17,
++	0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6,
++	0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d,
++	0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82,
++	0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c,
++	0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9,
++	0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb,
++	0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96,
++	0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9,
++	0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f,
++	0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40,
++	0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc,
++	0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce,
++	0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71,
++	0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f,
++	0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35,
++	0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90,
++	0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8,
++	0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01,
++	0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1,
++	0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe,
++	0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4,
++	0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf,
++	0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9,
++	0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f,
++	0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04,
++	0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7,
++	0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15,
++	0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc,
++	0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0,
++	0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae,
++	0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb,
++	0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed,
++	0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51,
++	0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52,
++	0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84,
++	0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5,
++	0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4,
++	0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e,
++	0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74,
++	0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f,
++	0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13,
++	0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea,
++	0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b,
++	0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef,
++	0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09,
++	0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe,
++	0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1,
++	0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9,
++	0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15,
++	0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a,
++	0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab,
++	0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36,
++	0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd,
++	0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde,
++	0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd,
++	0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47,
++	0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5,
++	0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69,
++	0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21,
++	0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98,
++	0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07,
++	0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57,
++	0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd,
++	0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03,
++	0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11,
++	0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96,
++	0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91,
++	0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d,
++	0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0,
++	0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9,
++	0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42,
++	0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a,
++	0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18,
++	0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc,
++	0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce,
++	0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc,
++	0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0,
++	0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf,
++	0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7,
++	0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80,
++	0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c,
++	0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82,
++	0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9,
++	0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20,
++	0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58,
++	0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6,
++	0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc,
++	0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50,
++	0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86,
++	0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a,
++	0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80,
++	0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec,
++	0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08,
++	0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c,
++	0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde,
++	0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d,
++	0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17,
++	0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f,
++	0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26,
++	0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96,
++	0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97,
++	0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6,
++	0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55,
++	0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e,
++	0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88,
++	0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5,
++	0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b,
++	0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15,
++	0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1,
++	0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4,
++	0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3,
++	0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf,
++	0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e,
++	0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb,
++	0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76,
++	0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5,
++	0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c,
++	0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde,
++	0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f,
++	0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51,
++	0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9,
++	0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99,
++	0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6,
++	0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04,
++	0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31,
++	0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a,
++	0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56,
++	0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e,
++	0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78,
++	0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a,
++	0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7,
++	0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb,
++	0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6,
++	0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8,
++	0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc,
++	0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84,
++	0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86,
++	0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76,
++	0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a,
++	0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73,
++	0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8,
++	0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6,
++	0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2,
++	0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56,
++	0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb,
++	0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab,
++	0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76,
++	0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69,
++	0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d,
++	0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc,
++	0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22,
++	0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39,
++	0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6,
++	0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9,
++	0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f,
++	0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1,
++	0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83,
++	0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc,
++	0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4,
++	0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59,
++	0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68,
++	0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef,
++	0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1,
++	0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3,
++	0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44,
++	0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09,
++	0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8,
++	0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a,
++	0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d,
++	0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae,
++	0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2,
++	0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10,
++	0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a,
++	0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34,
++	0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f,
++	0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9,
++	0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b,
++	0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d,
++	0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57,
++	0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03,
++	0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87,
++	0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca,
++	0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53,
++	0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f,
++	0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61,
++	0x10, 0x1e, 0xbf, 0xec, 0xa8
++};
++static const u8 dec_assoc011[] __initconst = {
++	0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7
++};
++static const u8 dec_nonce011[] __initconst = {
++	0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa
++};
++static const u8 dec_key011[] __initconst = {
++	0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85,
++	0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca,
++	0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52,
++	0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38
++};
++
++static const u8 dec_input012[] __initconst = {
++	0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3,
++	0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf,
++	0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1,
++	0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f,
++	0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e,
++	0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5,
++	0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b,
++	0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b,
++	0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2,
++	0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1,
++	0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74,
++	0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e,
++	0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae,
++	0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd,
++	0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04,
++	0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55,
++	0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef,
++	0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b,
++	0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74,
++	0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26,
++	0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f,
++	0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64,
++	0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd,
++	0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad,
++	0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b,
++	0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e,
++	0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e,
++	0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0,
++	0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f,
++	0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50,
++	0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97,
++	0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03,
++	0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a,
++	0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15,
++	0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb,
++	0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34,
++	0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47,
++	0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86,
++	0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24,
++	0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c,
++	0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9,
++	0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7,
++	0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48,
++	0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b,
++	0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e,
++	0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61,
++	0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75,
++	0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26,
++	0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74,
++	0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43,
++	0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1,
++	0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79,
++	0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3,
++	0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5,
++	0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9,
++	0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d,
++	0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8,
++	0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26,
++	0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5,
++	0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d,
++	0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29,
++	0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57,
++	0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92,
++	0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9,
++	0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc,
++	0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd,
++	0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57,
++	0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3,
++	0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4,
++	0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c,
++	0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27,
++	0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c,
++	0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5,
++	0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14,
++	0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94,
++	0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b,
++	0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99,
++	0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84,
++	0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a,
++	0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa,
++	0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75,
++	0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74,
++	0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40,
++	0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72,
++	0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f,
++	0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92,
++	0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8,
++	0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c,
++	0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f,
++	0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb,
++	0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a,
++	0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b,
++	0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d,
++	0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c,
++	0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4,
++	0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00,
++	0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b,
++	0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4,
++	0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84,
++	0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba,
++	0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47,
++	0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4,
++	0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88,
++	0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81,
++	0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1,
++	0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a,
++	0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e,
++	0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1,
++	0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07,
++	0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24,
++	0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f,
++	0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a,
++	0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9,
++	0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9,
++	0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51,
++	0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1,
++	0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c,
++	0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53,
++	0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40,
++	0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a,
++	0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2,
++	0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2,
++	0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8,
++	0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07,
++	0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9,
++	0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d,
++	0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde,
++	0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f,
++	0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d,
++	0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d,
++	0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56,
++	0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c,
++	0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3,
++	0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d,
++	0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26,
++	0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10,
++	0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c,
++	0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11,
++	0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf,
++	0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c,
++	0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb,
++	0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79,
++	0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa,
++	0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80,
++	0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08,
++	0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c,
++	0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc,
++	0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab,
++	0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6,
++	0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9,
++	0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7,
++	0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2,
++	0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33,
++	0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2,
++	0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e,
++	0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c,
++	0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b,
++	0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66,
++	0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6,
++	0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44,
++	0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74,
++	0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6,
++	0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f,
++	0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24,
++	0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1,
++	0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2,
++	0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5,
++	0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d,
++	0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0,
++	0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b,
++	0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3,
++	0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0,
++	0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3,
++	0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c,
++	0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b,
++	0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5,
++	0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51,
++	0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71,
++	0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68,
++	0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb,
++	0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e,
++	0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b,
++	0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8,
++	0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb,
++	0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54,
++	0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7,
++	0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff,
++	0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd,
++	0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde,
++	0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c,
++	0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1,
++	0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8,
++	0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14,
++	0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c,
++	0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4,
++	0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06,
++	0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52,
++	0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d,
++	0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c,
++	0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6,
++	0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5,
++	0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f,
++	0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e,
++	0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98,
++	0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8,
++	0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb,
++	0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b,
++	0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79,
++	0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11,
++	0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d,
++	0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10,
++	0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23,
++	0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23,
++	0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90,
++	0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4,
++	0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1,
++	0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7,
++	0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11,
++	0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50,
++	0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8,
++	0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97,
++	0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38,
++	0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f,
++	0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33,
++	0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f,
++	0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75,
++	0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21,
++	0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90,
++	0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8,
++	0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91,
++	0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1,
++	0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f,
++	0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3,
++	0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc,
++	0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a,
++	0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62,
++	0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55,
++	0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23,
++	0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6,
++	0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac,
++	0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12,
++	0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a,
++	0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7,
++	0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec,
++	0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28,
++	0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88,
++	0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4,
++	0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17,
++	0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2,
++	0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33,
++	0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a,
++	0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28,
++	0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62,
++	0x70, 0xcf, 0xd6
++};
++static const u8 dec_output012[] __initconst = {
++	0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0,
++	0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5,
++	0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57,
++	0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff,
++	0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5,
++	0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b,
++	0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46,
++	0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b,
++	0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71,
++	0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0,
++	0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b,
++	0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d,
++	0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f,
++	0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24,
++	0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23,
++	0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e,
++	0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14,
++	0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d,
++	0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb,
++	0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4,
++	0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf,
++	0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e,
++	0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6,
++	0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33,
++	0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb,
++	0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0,
++	0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe,
++	0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00,
++	0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d,
++	0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b,
++	0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50,
++	0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e,
++	0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4,
++	0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28,
++	0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8,
++	0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b,
++	0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86,
++	0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67,
++	0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff,
++	0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59,
++	0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe,
++	0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6,
++	0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e,
++	0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b,
++	0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50,
++	0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39,
++	0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02,
++	0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9,
++	0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a,
++	0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38,
++	0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9,
++	0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65,
++	0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb,
++	0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2,
++	0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae,
++	0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee,
++	0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00,
++	0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c,
++	0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8,
++	0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31,
++	0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68,
++	0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4,
++	0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0,
++	0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11,
++	0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7,
++	0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39,
++	0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1,
++	0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1,
++	0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2,
++	0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66,
++	0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49,
++	0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2,
++	0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5,
++	0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3,
++	0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c,
++	0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa,
++	0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00,
++	0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54,
++	0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87,
++	0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03,
++	0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39,
++	0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40,
++	0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6,
++	0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22,
++	0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5,
++	0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e,
++	0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32,
++	0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53,
++	0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42,
++	0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c,
++	0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68,
++	0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48,
++	0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c,
++	0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce,
++	0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd,
++	0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa,
++	0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69,
++	0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8,
++	0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58,
++	0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0,
++	0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45,
++	0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb,
++	0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33,
++	0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c,
++	0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23,
++	0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80,
++	0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1,
++	0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff,
++	0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24,
++	0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9,
++	0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46,
++	0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8,
++	0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20,
++	0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35,
++	0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63,
++	0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb,
++	0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36,
++	0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a,
++	0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c,
++	0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f,
++	0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02,
++	0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03,
++	0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa,
++	0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16,
++	0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d,
++	0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5,
++	0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7,
++	0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac,
++	0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47,
++	0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3,
++	0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35,
++	0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e,
++	0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6,
++	0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74,
++	0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e,
++	0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a,
++	0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0,
++	0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4,
++	0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8,
++	0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16,
++	0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32,
++	0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65,
++	0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06,
++	0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a,
++	0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7,
++	0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85,
++	0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb,
++	0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46,
++	0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e,
++	0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61,
++	0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb,
++	0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d,
++	0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00,
++	0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5,
++	0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6,
++	0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1,
++	0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a,
++	0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7,
++	0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63,
++	0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38,
++	0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3,
++	0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed,
++	0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49,
++	0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42,
++	0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0,
++	0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f,
++	0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1,
++	0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd,
++	0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d,
++	0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88,
++	0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1,
++	0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25,
++	0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22,
++	0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28,
++	0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f,
++	0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53,
++	0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28,
++	0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8,
++	0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc,
++	0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8,
++	0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb,
++	0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3,
++	0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3,
++	0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac,
++	0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2,
++	0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a,
++	0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad,
++	0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e,
++	0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd,
++	0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf,
++	0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba,
++	0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41,
++	0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91,
++	0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d,
++	0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6,
++	0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf,
++	0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92,
++	0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e,
++	0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72,
++	0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04,
++	0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46,
++	0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55,
++	0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84,
++	0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61,
++	0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d,
++	0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8,
++	0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d,
++	0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87,
++	0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70,
++	0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94,
++	0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f,
++	0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb,
++	0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90,
++	0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31,
++	0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06,
++	0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05,
++	0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7,
++	0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e,
++	0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae,
++	0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2,
++	0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21,
++	0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0,
++	0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d,
++	0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0,
++	0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6,
++	0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5,
++	0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9,
++	0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8,
++	0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57,
++	0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1,
++	0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c,
++	0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b,
++	0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69,
++	0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d,
++	0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d,
++	0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19,
++	0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82,
++	0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20,
++	0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f,
++	0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e,
++	0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f,
++	0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47,
++	0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b,
++	0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4,
++	0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b,
++	0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4,
++	0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9,
++	0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3,
++	0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0,
++	0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16,
++	0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d,
++	0x78, 0xec, 0x00
++};
++static const u8 dec_assoc012[] __initconst = {
++	0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8,
++	0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce,
++	0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c,
++	0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc,
++	0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e,
++	0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f,
++	0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b,
++	0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9
++};
++static const u8 dec_nonce012[] __initconst = {
++	0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06
++};
++static const u8 dec_key012[] __initconst = {
++	0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e,
++	0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d,
++	0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e,
++	0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
++};
++
++static const u8 dec_input013[] __initconst = {
++	0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3,
++	0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf,
++	0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1,
++	0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f,
++	0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e,
++	0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5,
++	0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b,
++	0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b,
++	0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2,
++	0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1,
++	0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74,
++	0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e,
++	0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae,
++	0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd,
++	0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04,
++	0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55,
++	0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef,
++	0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b,
++	0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74,
++	0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26,
++	0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f,
++	0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64,
++	0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd,
++	0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad,
++	0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b,
++	0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e,
++	0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e,
++	0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0,
++	0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f,
++	0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50,
++	0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97,
++	0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03,
++	0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a,
++	0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15,
++	0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb,
++	0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34,
++	0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47,
++	0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86,
++	0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24,
++	0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c,
++	0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9,
++	0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7,
++	0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48,
++	0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b,
++	0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e,
++	0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61,
++	0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75,
++	0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26,
++	0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74,
++	0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43,
++	0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1,
++	0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79,
++	0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3,
++	0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5,
++	0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9,
++	0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d,
++	0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8,
++	0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26,
++	0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5,
++	0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d,
++	0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29,
++	0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57,
++	0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92,
++	0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9,
++	0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc,
++	0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd,
++	0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57,
++	0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3,
++	0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4,
++	0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c,
++	0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27,
++	0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c,
++	0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5,
++	0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14,
++	0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94,
++	0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b,
++	0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99,
++	0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84,
++	0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a,
++	0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa,
++	0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75,
++	0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74,
++	0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40,
++	0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72,
++	0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f,
++	0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92,
++	0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8,
++	0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c,
++	0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f,
++	0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb,
++	0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a,
++	0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b,
++	0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d,
++	0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c,
++	0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4,
++	0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00,
++	0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b,
++	0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4,
++	0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84,
++	0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba,
++	0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47,
++	0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4,
++	0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88,
++	0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81,
++	0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1,
++	0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a,
++	0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e,
++	0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1,
++	0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07,
++	0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24,
++	0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f,
++	0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a,
++	0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9,
++	0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9,
++	0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51,
++	0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1,
++	0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c,
++	0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53,
++	0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40,
++	0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a,
++	0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2,
++	0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2,
++	0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8,
++	0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07,
++	0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9,
++	0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d,
++	0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde,
++	0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f,
++	0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d,
++	0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d,
++	0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56,
++	0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c,
++	0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3,
++	0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d,
++	0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26,
++	0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10,
++	0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c,
++	0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11,
++	0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf,
++	0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c,
++	0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb,
++	0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79,
++	0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa,
++	0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80,
++	0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08,
++	0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c,
++	0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc,
++	0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab,
++	0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6,
++	0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9,
++	0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7,
++	0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2,
++	0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33,
++	0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2,
++	0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e,
++	0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c,
++	0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b,
++	0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66,
++	0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6,
++	0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44,
++	0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74,
++	0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6,
++	0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f,
++	0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24,
++	0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1,
++	0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2,
++	0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5,
++	0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d,
++	0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0,
++	0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b,
++	0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3,
++	0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0,
++	0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3,
++	0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c,
++	0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b,
++	0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5,
++	0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51,
++	0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71,
++	0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68,
++	0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb,
++	0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e,
++	0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b,
++	0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8,
++	0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb,
++	0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54,
++	0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7,
++	0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff,
++	0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd,
++	0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde,
++	0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c,
++	0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1,
++	0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8,
++	0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14,
++	0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c,
++	0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4,
++	0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06,
++	0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52,
++	0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d,
++	0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c,
++	0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6,
++	0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5,
++	0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f,
++	0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e,
++	0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98,
++	0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8,
++	0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb,
++	0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b,
++	0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79,
++	0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11,
++	0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d,
++	0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10,
++	0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23,
++	0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23,
++	0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90,
++	0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4,
++	0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1,
++	0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7,
++	0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11,
++	0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50,
++	0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8,
++	0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97,
++	0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38,
++	0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f,
++	0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33,
++	0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f,
++	0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75,
++	0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21,
++	0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90,
++	0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8,
++	0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91,
++	0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1,
++	0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f,
++	0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3,
++	0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc,
++	0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a,
++	0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62,
++	0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55,
++	0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23,
++	0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6,
++	0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac,
++	0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12,
++	0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a,
++	0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7,
++	0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec,
++	0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28,
++	0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88,
++	0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4,
++	0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17,
++	0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2,
++	0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33,
++	0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a,
++	0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28,
++	0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62,
++	0x70, 0xcf, 0xd7
++};
++static const u8 dec_output013[] __initconst = {
++	0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0,
++	0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5,
++	0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57,
++	0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff,
++	0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5,
++	0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b,
++	0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46,
++	0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b,
++	0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71,
++	0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0,
++	0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b,
++	0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d,
++	0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f,
++	0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24,
++	0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23,
++	0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e,
++	0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14,
++	0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d,
++	0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb,
++	0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4,
++	0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf,
++	0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e,
++	0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6,
++	0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33,
++	0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb,
++	0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0,
++	0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe,
++	0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00,
++	0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d,
++	0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b,
++	0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50,
++	0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e,
++	0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4,
++	0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28,
++	0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8,
++	0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b,
++	0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86,
++	0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67,
++	0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff,
++	0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59,
++	0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe,
++	0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6,
++	0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e,
++	0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b,
++	0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50,
++	0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39,
++	0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02,
++	0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9,
++	0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a,
++	0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38,
++	0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9,
++	0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65,
++	0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb,
++	0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2,
++	0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae,
++	0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee,
++	0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00,
++	0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c,
++	0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8,
++	0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31,
++	0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68,
++	0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4,
++	0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0,
++	0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11,
++	0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7,
++	0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39,
++	0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1,
++	0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1,
++	0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2,
++	0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66,
++	0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49,
++	0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2,
++	0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5,
++	0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3,
++	0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c,
++	0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa,
++	0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00,
++	0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54,
++	0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87,
++	0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03,
++	0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39,
++	0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40,
++	0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6,
++	0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22,
++	0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5,
++	0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e,
++	0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32,
++	0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53,
++	0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42,
++	0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c,
++	0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68,
++	0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48,
++	0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c,
++	0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce,
++	0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd,
++	0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa,
++	0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69,
++	0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8,
++	0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58,
++	0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0,
++	0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45,
++	0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb,
++	0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33,
++	0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c,
++	0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23,
++	0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80,
++	0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1,
++	0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff,
++	0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24,
++	0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9,
++	0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46,
++	0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8,
++	0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20,
++	0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35,
++	0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63,
++	0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb,
++	0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36,
++	0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a,
++	0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c,
++	0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f,
++	0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02,
++	0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03,
++	0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa,
++	0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16,
++	0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d,
++	0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5,
++	0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7,
++	0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac,
++	0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47,
++	0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3,
++	0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35,
++	0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e,
++	0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6,
++	0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74,
++	0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e,
++	0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a,
++	0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0,
++	0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4,
++	0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8,
++	0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16,
++	0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32,
++	0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65,
++	0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06,
++	0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a,
++	0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7,
++	0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85,
++	0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb,
++	0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46,
++	0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e,
++	0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61,
++	0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb,
++	0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d,
++	0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00,
++	0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5,
++	0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6,
++	0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1,
++	0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a,
++	0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7,
++	0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63,
++	0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38,
++	0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3,
++	0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed,
++	0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49,
++	0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42,
++	0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0,
++	0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f,
++	0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1,
++	0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd,
++	0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d,
++	0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88,
++	0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1,
++	0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25,
++	0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22,
++	0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28,
++	0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f,
++	0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53,
++	0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28,
++	0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8,
++	0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc,
++	0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8,
++	0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb,
++	0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3,
++	0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3,
++	0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac,
++	0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2,
++	0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a,
++	0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad,
++	0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e,
++	0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd,
++	0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf,
++	0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba,
++	0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41,
++	0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91,
++	0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d,
++	0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6,
++	0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf,
++	0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92,
++	0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e,
++	0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72,
++	0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04,
++	0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46,
++	0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55,
++	0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84,
++	0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61,
++	0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d,
++	0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8,
++	0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d,
++	0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87,
++	0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70,
++	0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94,
++	0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f,
++	0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb,
++	0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90,
++	0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31,
++	0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06,
++	0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05,
++	0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7,
++	0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e,
++	0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae,
++	0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2,
++	0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21,
++	0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0,
++	0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d,
++	0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0,
++	0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6,
++	0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5,
++	0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9,
++	0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8,
++	0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57,
++	0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1,
++	0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c,
++	0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b,
++	0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69,
++	0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d,
++	0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d,
++	0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19,
++	0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82,
++	0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20,
++	0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f,
++	0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e,
++	0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f,
++	0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47,
++	0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b,
++	0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4,
++	0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b,
++	0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4,
++	0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9,
++	0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3,
++	0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0,
++	0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16,
++	0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d,
++	0x78, 0xec, 0x00
++};
++static const u8 dec_assoc013[] __initconst = {
++	0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8,
++	0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce,
++	0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c,
++	0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc,
++	0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e,
++	0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f,
++	0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b,
++	0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9
++};
++static const u8 dec_nonce013[] __initconst = {
++	0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06
++};
++static const u8 dec_key013[] __initconst = {
++	0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e,
++	0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d,
++	0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e,
++	0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
++};
++
++static const struct chacha20poly1305_testvec
++chacha20poly1305_dec_vectors[] __initconst = {
++	{ dec_input001, dec_output001, dec_assoc001, dec_nonce001, dec_key001,
++	  sizeof(dec_input001), sizeof(dec_assoc001), sizeof(dec_nonce001) },
++	{ dec_input002, dec_output002, dec_assoc002, dec_nonce002, dec_key002,
++	  sizeof(dec_input002), sizeof(dec_assoc002), sizeof(dec_nonce002) },
++	{ dec_input003, dec_output003, dec_assoc003, dec_nonce003, dec_key003,
++	  sizeof(dec_input003), sizeof(dec_assoc003), sizeof(dec_nonce003) },
++	{ dec_input004, dec_output004, dec_assoc004, dec_nonce004, dec_key004,
++	  sizeof(dec_input004), sizeof(dec_assoc004), sizeof(dec_nonce004) },
++	{ dec_input005, dec_output005, dec_assoc005, dec_nonce005, dec_key005,
++	  sizeof(dec_input005), sizeof(dec_assoc005), sizeof(dec_nonce005) },
++	{ dec_input006, dec_output006, dec_assoc006, dec_nonce006, dec_key006,
++	  sizeof(dec_input006), sizeof(dec_assoc006), sizeof(dec_nonce006) },
++	{ dec_input007, dec_output007, dec_assoc007, dec_nonce007, dec_key007,
++	  sizeof(dec_input007), sizeof(dec_assoc007), sizeof(dec_nonce007) },
++	{ dec_input008, dec_output008, dec_assoc008, dec_nonce008, dec_key008,
++	  sizeof(dec_input008), sizeof(dec_assoc008), sizeof(dec_nonce008) },
++	{ dec_input009, dec_output009, dec_assoc009, dec_nonce009, dec_key009,
++	  sizeof(dec_input009), sizeof(dec_assoc009), sizeof(dec_nonce009) },
++	{ dec_input010, dec_output010, dec_assoc010, dec_nonce010, dec_key010,
++	  sizeof(dec_input010), sizeof(dec_assoc010), sizeof(dec_nonce010) },
++	{ dec_input011, dec_output011, dec_assoc011, dec_nonce011, dec_key011,
++	  sizeof(dec_input011), sizeof(dec_assoc011), sizeof(dec_nonce011) },
++	{ dec_input012, dec_output012, dec_assoc012, dec_nonce012, dec_key012,
++	  sizeof(dec_input012), sizeof(dec_assoc012), sizeof(dec_nonce012) },
++	{ dec_input013, dec_output013, dec_assoc013, dec_nonce013, dec_key013,
++	  sizeof(dec_input013), sizeof(dec_assoc013), sizeof(dec_nonce013),
++	  true }
++};
++
++static const u8 xenc_input001[] __initconst = {
++	0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
++	0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
++	0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
++	0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
++	0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
++	0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
++	0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
++	0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
++	0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
++	0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
++	0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
++	0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
++	0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
++	0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
++	0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
++	0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
++	0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
++	0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
++	0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
++	0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
++	0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
++	0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
++	0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
++	0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
++	0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
++	0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
++	0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
++	0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
++	0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
++	0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
++	0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
++	0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
++	0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
++	0x9d
++};
++static const u8 xenc_output001[] __initconst = {
++	0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77,
++	0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92,
++	0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18,
++	0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d,
++	0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e,
++	0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86,
++	0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2,
++	0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85,
++	0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09,
++	0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49,
++	0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd,
++	0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8,
++	0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f,
++	0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79,
++	0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8,
++	0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0,
++	0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88,
++	0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71,
++	0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91,
++	0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf,
++	0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89,
++	0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46,
++	0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e,
++	0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90,
++	0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b,
++	0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58,
++	0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54,
++	0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1,
++	0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73,
++	0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69,
++	0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05,
++	0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83,
++	0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13,
++	0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8,
++	0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5,
++	0x9c
++};
++static const u8 xenc_assoc001[] __initconst = {
++	0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x4e, 0x91
++};
++static const u8 xenc_nonce001[] __initconst = {
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
++	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
++};
++static const u8 xenc_key001[] __initconst = {
++	0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
++	0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
++	0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
++	0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
++};
++
++static const struct chacha20poly1305_testvec
++xchacha20poly1305_enc_vectors[] __initconst = {
++	{ xenc_input001, xenc_output001, xenc_assoc001, xenc_nonce001, xenc_key001,
++	  sizeof(xenc_input001), sizeof(xenc_assoc001), sizeof(xenc_nonce001) }
++};
++
++static const u8 xdec_input001[] __initconst = {
++	0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77,
++	0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92,
++	0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18,
++	0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d,
++	0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e,
++	0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86,
++	0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2,
++	0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85,
++	0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09,
++	0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49,
++	0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd,
++	0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8,
++	0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f,
++	0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79,
++	0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8,
++	0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0,
++	0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88,
++	0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71,
++	0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91,
++	0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf,
++	0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89,
++	0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46,
++	0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e,
++	0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90,
++	0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b,
++	0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58,
++	0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54,
++	0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1,
++	0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73,
++	0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69,
++	0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05,
++	0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83,
++	0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13,
++	0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8,
++	0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5,
++	0x9c
++};
++static const u8 xdec_output001[] __initconst = {
++	0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
++	0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
++	0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
++	0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
++	0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
++	0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
++	0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
++	0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
++	0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
++	0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
++	0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
++	0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
++	0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
++	0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
++	0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
++	0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
++	0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
++	0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
++	0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
++	0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
++	0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
++	0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
++	0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
++	0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
++	0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
++	0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
++	0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
++	0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
++	0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
++	0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
++	0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
++	0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
++	0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
++	0x9d
++};
++static const u8 xdec_assoc001[] __initconst = {
++	0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x4e, 0x91
++};
++static const u8 xdec_nonce001[] __initconst = {
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
++	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
++};
++static const u8 xdec_key001[] __initconst = {
++	0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
++	0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
++	0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
++	0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
++};
++
++static const struct chacha20poly1305_testvec
++xchacha20poly1305_dec_vectors[] __initconst = {
++	{ xdec_input001, xdec_output001, xdec_assoc001, xdec_nonce001, xdec_key001,
++	  sizeof(xdec_input001), sizeof(xdec_assoc001), sizeof(xdec_nonce001) }
++};
++
++static void __init
++chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8 *src,
++					   const size_t src_len, const u8 *ad,
++					   const size_t ad_len,
++					   const u8 nonce[12],
++					   const u8 key[CHACHA20POLY1305_KEY_SIZE])
++{
++	simd_context_t simd_context;
++	struct poly1305_ctx poly1305_state;
++	struct chacha20_ctx chacha20_state;
++	union {
++		u8 block0[POLY1305_KEY_SIZE];
++		__le64 lens[2];
++	} b = {{ 0 }};
++
++	simd_get(&simd_context);
++	chacha20_init(&chacha20_state, key, 0);
++	chacha20_state.counter[1] = get_unaligned_le32(nonce + 0);
++	chacha20_state.counter[2] = get_unaligned_le32(nonce + 4);
++	chacha20_state.counter[3] = get_unaligned_le32(nonce + 8);
++	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
++		 &simd_context);
++	poly1305_init(&poly1305_state, b.block0);
++	poly1305_update(&poly1305_state, ad, ad_len, &simd_context);
++	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
++			&simd_context);
++	chacha20(&chacha20_state, dst, src, src_len, &simd_context);
++	poly1305_update(&poly1305_state, dst, src_len, &simd_context);
++	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
++			&simd_context);
++	b.lens[0] = cpu_to_le64(ad_len);
++	b.lens[1] = cpu_to_le64(src_len);
++	poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
++			&simd_context);
++	poly1305_final(&poly1305_state, dst + src_len, &simd_context);
++	simd_put(&simd_context);
++	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
++	memzero_explicit(&b, sizeof(b));
++}
++
++static void __init
++chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len,
++				  const u8 *ad, const size_t ad_len,
++				  const u8 *nonce, const size_t nonce_len,
++				  const u8 key[CHACHA20POLY1305_KEY_SIZE])
++{
++	if (nonce_len == 8)
++		chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
++					 get_unaligned_le64(nonce), key);
++	else if (nonce_len == 12)
++		chacha20poly1305_selftest_encrypt_bignonce(dst, src, src_len,
++							   ad, ad_len, nonce,
++							   key);
++	else
++		BUG();
++}
++
++static bool __init
++decryption_success(bool func_ret, bool expect_failure, int memcmp_result)
++{
++	if (expect_failure)
++		return !func_ret;
++	return func_ret && !memcmp_result;
++}
++
++static bool __init chacha20poly1305_selftest(void)
++{
++	enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 };
++	size_t i, j, k, total_len;
++	u8 *computed_output = NULL, *input = NULL;
++	bool success = true, ret;
++	simd_context_t simd_context;
++	struct scatterlist sg_src[3];
++
++	computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
++	input = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
++	if (!computed_output || !input) {
++		pr_err("chacha20poly1305 self-test malloc: FAIL\n");
++		success = false;
++		goto out;
++	}
++
++	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
++		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
++		chacha20poly1305_selftest_encrypt(computed_output,
++					chacha20poly1305_enc_vectors[i].input,
++					chacha20poly1305_enc_vectors[i].ilen,
++					chacha20poly1305_enc_vectors[i].assoc,
++					chacha20poly1305_enc_vectors[i].alen,
++					chacha20poly1305_enc_vectors[i].nonce,
++					chacha20poly1305_enc_vectors[i].nlen,
++					chacha20poly1305_enc_vectors[i].key);
++		if (memcmp(computed_output,
++			   chacha20poly1305_enc_vectors[i].output,
++			   chacha20poly1305_enc_vectors[i].ilen +
++							POLY1305_MAC_SIZE)) {
++			pr_err("chacha20poly1305 encryption self-test %zu: FAIL\n",
++			       i + 1);
++			success = false;
++		}
++	}
++	simd_get(&simd_context);
++	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
++		if (chacha20poly1305_enc_vectors[i].nlen != 8)
++			continue;
++		memcpy(computed_output, chacha20poly1305_enc_vectors[i].input,
++		       chacha20poly1305_enc_vectors[i].ilen);
++		sg_init_one(sg_src, computed_output,
++			    chacha20poly1305_enc_vectors[i].ilen +
++				POLY1305_MAC_SIZE);
++		ret = chacha20poly1305_encrypt_sg_inplace(sg_src,
++			chacha20poly1305_enc_vectors[i].ilen,
++			chacha20poly1305_enc_vectors[i].assoc,
++			chacha20poly1305_enc_vectors[i].alen,
++			get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce),
++			chacha20poly1305_enc_vectors[i].key,
++			&simd_context);
++		if (!ret || memcmp(computed_output,
++				   chacha20poly1305_enc_vectors[i].output,
++				   chacha20poly1305_enc_vectors[i].ilen +
++							POLY1305_MAC_SIZE)) {
++			pr_err("chacha20poly1305 sg encryption self-test %zu: FAIL\n",
++			       i + 1);
++			success = false;
++		}
++	}
++	simd_put(&simd_context);
++	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
++		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
++		ret = chacha20poly1305_decrypt(computed_output,
++			chacha20poly1305_dec_vectors[i].input,
++			chacha20poly1305_dec_vectors[i].ilen,
++			chacha20poly1305_dec_vectors[i].assoc,
++			chacha20poly1305_dec_vectors[i].alen,
++			get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce),
++			chacha20poly1305_dec_vectors[i].key);
++		if (!decryption_success(ret,
++				chacha20poly1305_dec_vectors[i].failure,
++				memcmp(computed_output,
++				       chacha20poly1305_dec_vectors[i].output,
++				       chacha20poly1305_dec_vectors[i].ilen -
++							POLY1305_MAC_SIZE))) {
++			pr_err("chacha20poly1305 decryption self-test %zu: FAIL\n",
++			       i + 1);
++			success = false;
++		}
++	}
++	simd_get(&simd_context);
++	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
++		memcpy(computed_output, chacha20poly1305_dec_vectors[i].input,
++		       chacha20poly1305_dec_vectors[i].ilen);
++		sg_init_one(sg_src, computed_output,
++			    chacha20poly1305_dec_vectors[i].ilen);
++		ret = chacha20poly1305_decrypt_sg_inplace(sg_src,
++			chacha20poly1305_dec_vectors[i].ilen,
++			chacha20poly1305_dec_vectors[i].assoc,
++			chacha20poly1305_dec_vectors[i].alen,
++			get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce),
++			chacha20poly1305_dec_vectors[i].key, &simd_context);
++		if (!decryption_success(ret,
++			chacha20poly1305_dec_vectors[i].failure,
++			memcmp(computed_output, chacha20poly1305_dec_vectors[i].output,
++			       chacha20poly1305_dec_vectors[i].ilen -
++							POLY1305_MAC_SIZE))) {
++			pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n",
++			       i + 1);
++			success = false;
++		}
++	}
++	simd_put(&simd_context);
++	for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
++		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
++		xchacha20poly1305_encrypt(computed_output,
++					xchacha20poly1305_enc_vectors[i].input,
++					xchacha20poly1305_enc_vectors[i].ilen,
++					xchacha20poly1305_enc_vectors[i].assoc,
++					xchacha20poly1305_enc_vectors[i].alen,
++					xchacha20poly1305_enc_vectors[i].nonce,
++					xchacha20poly1305_enc_vectors[i].key);
++		if (memcmp(computed_output,
++			   xchacha20poly1305_enc_vectors[i].output,
++			   xchacha20poly1305_enc_vectors[i].ilen +
++							POLY1305_MAC_SIZE)) {
++			pr_err("xchacha20poly1305 encryption self-test %zu: FAIL\n",
++			       i + 1);
++			success = false;
++		}
++	}
++	for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) {
++		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
++		ret = xchacha20poly1305_decrypt(computed_output,
++					xchacha20poly1305_dec_vectors[i].input,
++					xchacha20poly1305_dec_vectors[i].ilen,
++					xchacha20poly1305_dec_vectors[i].assoc,
++					xchacha20poly1305_dec_vectors[i].alen,
++					xchacha20poly1305_dec_vectors[i].nonce,
++					xchacha20poly1305_dec_vectors[i].key);
++		if (!decryption_success(ret,
++				xchacha20poly1305_dec_vectors[i].failure,
++				memcmp(computed_output,
++				       xchacha20poly1305_dec_vectors[i].output,
++				       xchacha20poly1305_dec_vectors[i].ilen -
++							POLY1305_MAC_SIZE))) {
++			pr_err("xchacha20poly1305 decryption self-test %zu: FAIL\n",
++			       i + 1);
++			success = false;
++		}
++	}
++
++	simd_get(&simd_context);
++	for (total_len = POLY1305_MAC_SIZE; IS_ENABLED(DEBUG_CHACHA20POLY1305_SLOW_CHUNK_TEST)
++	     && total_len <= 1 << 10; ++total_len) {
++		for (i = 0; i <= total_len; ++i) {
++			for (j = i; j <= total_len; ++j) {
++				sg_init_table(sg_src, 3);
++				sg_set_buf(&sg_src[0], input, i);
++				sg_set_buf(&sg_src[1], input + i, j - i);
++				sg_set_buf(&sg_src[2], input + j, total_len - j);
++				memset(computed_output, 0, total_len);
++				memset(input, 0, total_len);
++
++				if (!chacha20poly1305_encrypt_sg_inplace(sg_src,
++					total_len - POLY1305_MAC_SIZE, NULL, 0,
++					0, enc_key001, &simd_context))
++					goto chunkfail;
++				chacha20poly1305_encrypt(computed_output,
++					computed_output,
++					total_len - POLY1305_MAC_SIZE, NULL, 0, 0,
++					enc_key001);
++				if (memcmp(computed_output, input, total_len))
++					goto chunkfail;;
++				if (!chacha20poly1305_decrypt(computed_output,
++					input, total_len, NULL, 0, 0, enc_key001))
++					goto chunkfail;
++				for (k = 0; k < total_len - POLY1305_MAC_SIZE; ++k) {
++					if (computed_output[k])
++						goto chunkfail;
++				}
++				if (!chacha20poly1305_decrypt_sg_inplace(sg_src,
++					total_len, NULL, 0, 0, enc_key001,
++					&simd_context))
++					goto chunkfail;
++				for (k = 0; k < total_len - POLY1305_MAC_SIZE; ++k) {
++					if (input[k])
++						goto chunkfail;
++				}
++				continue;
++
++			chunkfail:
++				pr_err("chacha20poly1305 chunked self-test %zu/%zu/%zu: FAIL\n",
++				       total_len, i, j);
++				success = false;
++			}
++
++		}
++	}
++	simd_put(&simd_context);
++
++out:
++	kfree(computed_output);
++	kfree(input);
++	return success;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/selftest/curve25519.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,1315 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++struct curve25519_test_vector {
++	u8 private[CURVE25519_KEY_SIZE];
++	u8 public[CURVE25519_KEY_SIZE];
++	u8 result[CURVE25519_KEY_SIZE];
++	bool valid;
++};
++static const struct curve25519_test_vector curve25519_test_vectors[] __initconst = {
++	{
++		.private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d,
++			     0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45,
++			     0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a,
++			     0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a },
++		.public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4,
++			    0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37,
++			    0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d,
++			    0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f },
++		.result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
++			    0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
++			    0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
++			    0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
++		.valid = true
++	},
++	{
++		.private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b,
++			     0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6,
++			     0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd,
++			     0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb },
++		.public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54,
++			    0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a,
++			    0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4,
++			    0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a },
++		.result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
++			    0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
++			    0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
++			    0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
++		.valid = true
++	},
++	{
++		.private = { 1 },
++		.public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64,
++			    0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d,
++			    0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98,
++			    0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f },
++		.valid = true
++	},
++	{
++		.private = { 1 },
++		.public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f,
++			    0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d,
++			    0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3,
++			    0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 },
++		.valid = true
++	},
++	{
++		.private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
++			     0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
++			     0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
++			     0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 },
++		.public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
++			    0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
++			    0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
++			    0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
++		.result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
++			    0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
++			    0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
++			    0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
++		.valid = true
++	},
++	{
++		.private = { 1, 2, 3, 4 },
++		.public = { 0 },
++		.result = { 0 },
++		.valid = false
++	},
++	{
++		.private = { 2, 4, 6, 8 },
++		.public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
++			    0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
++			    0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
++			    0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 },
++		.result = { 0 },
++		.valid = false
++	},
++	{
++		.private = { 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff,
++			     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f },
++		.result = { 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2,
++			    0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57,
++			    0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05,
++			    0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 },
++		.valid = true
++	},
++	{
++		.private = { 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 },
++		.result = { 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d,
++			    0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12,
++			    0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99,
++			    0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c },
++		.valid = true
++	},
++	/* wycheproof - normal case */
++	{
++		.private = { 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda,
++			     0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66,
++			     0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3,
++			     0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba },
++		.public = { 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5,
++			    0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9,
++			    0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e,
++			    0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a },
++		.result = { 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5,
++			    0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38,
++			    0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e,
++			    0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 },
++		.valid = true
++	},
++	/* wycheproof - public key on twist */
++	{
++		.private = { 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4,
++			     0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5,
++			     0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49,
++			     0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 },
++		.public = { 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5,
++			    0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8,
++			    0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3,
++			    0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 },
++		.result = { 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff,
++			    0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d,
++			    0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe,
++			    0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 },
++		.valid = true
++	},
++	/* wycheproof - public key on twist */
++	{
++		.private = { 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9,
++			     0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39,
++			     0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5,
++			     0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 },
++		.public = { 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f,
++			    0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b,
++			    0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c,
++			    0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 },
++		.result = { 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53,
++			    0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57,
++			    0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0,
++			    0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b },
++		.valid = true
++	},
++	/* wycheproof - public key on twist */
++	{
++		.private = { 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc,
++			     0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d,
++			     0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67,
++			     0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c },
++		.public = { 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97,
++			    0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f,
++			    0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45,
++			    0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a },
++		.result = { 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93,
++			    0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2,
++			    0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44,
++			    0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a },
++		.valid = true
++	},
++	/* wycheproof - public key on twist */
++	{
++		.private = { 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1,
++			     0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95,
++			     0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99,
++			     0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d },
++		.public = { 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27,
++			    0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07,
++			    0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae,
++			    0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c },
++		.result = { 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73,
++			    0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2,
++			    0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f,
++			    0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 },
++		.valid = true
++	},
++	/* wycheproof - public key on twist */
++	{
++		.private = { 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9,
++			     0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd,
++			     0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b,
++			     0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 },
++		.public = { 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5,
++			    0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52,
++			    0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8,
++			    0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 },
++		.result = { 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86,
++			    0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4,
++			    0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6,
++			    0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 },
++		.valid = true
++	},
++	/* wycheproof - public key = 0 */
++	{
++		.private = { 0x20, 0x74, 0x94, 0x03, 0x8f, 0x2b, 0xb8, 0x11,
++			     0xd4, 0x78, 0x05, 0xbc, 0xdf, 0x04, 0xa2, 0xac,
++			     0x58, 0x5a, 0xda, 0x7f, 0x2f, 0x23, 0x38, 0x9b,
++			     0xfd, 0x46, 0x58, 0xf9, 0xdd, 0xd4, 0xde, 0xbc },
++		.public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key = 1 */
++	{
++		.private = { 0x20, 0x2e, 0x89, 0x72, 0xb6, 0x1c, 0x7e, 0x61,
++			     0x93, 0x0e, 0xb9, 0x45, 0x0b, 0x50, 0x70, 0xea,
++			     0xe1, 0xc6, 0x70, 0x47, 0x56, 0x85, 0x54, 0x1f,
++			     0x04, 0x76, 0x21, 0x7e, 0x48, 0x18, 0xcf, 0xab },
++		.public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - edge case on twist */
++	{
++		.private = { 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04,
++			     0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77,
++			     0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90,
++			     0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 },
++		.public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.result = { 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97,
++			    0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9,
++			    0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7,
++			    0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 },
++		.valid = true
++	},
++	/* wycheproof - edge case on twist */
++	{
++		.private = { 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36,
++			     0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd,
++			     0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c,
++			     0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 },
++		.public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.result = { 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e,
++			    0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b,
++			    0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e,
++			    0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 },
++		.valid = true
++	},
++	/* wycheproof - edge case on twist */
++	{
++		.private = { 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed,
++			     0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e,
++			     0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd,
++			     0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 },
++		.public = { 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff,
++			    0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff,
++			    0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00,
++			    0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 },
++		.result = { 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f,
++			    0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1,
++			    0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10,
++			    0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b },
++		.valid = true
++	},
++	/* wycheproof - edge case on twist */
++	{
++		.private = { 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3,
++			     0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d,
++			     0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00,
++			     0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 },
++		.public = { 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00,
++			    0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00,
++			    0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff,
++			    0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f },
++		.result = { 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8,
++			    0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4,
++			    0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70,
++			    0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b },
++		.valid = true
++	},
++	/* wycheproof - edge case on twist */
++	{
++		.private = { 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3,
++			     0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a,
++			     0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e,
++			     0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 },
++		.public = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++			    0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++			    0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
++			    0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f },
++		.result = { 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57,
++			    0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c,
++			    0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59,
++			    0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 },
++		.valid = true
++	},
++	/* wycheproof - edge case on twist */
++	{
++		.private = { 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f,
++			     0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42,
++			     0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9,
++			     0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 },
++		.public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.result = { 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c,
++			    0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5,
++			    0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65,
++			    0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 },
++		.valid = true
++	},
++	/* wycheproof - edge case for public key */
++	{
++		.private = { 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6,
++			     0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4,
++			     0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8,
++			     0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe },
++		.public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.result = { 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7,
++			    0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca,
++			    0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f,
++			    0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 },
++		.valid = true
++	},
++	/* wycheproof - edge case for public key */
++	{
++		.private = { 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa,
++			     0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3,
++			     0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52,
++			     0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 },
++		.public = { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++			    0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++			    0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
++			    0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 },
++		.result = { 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3,
++			    0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e,
++			    0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75,
++			    0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f },
++		.valid = true
++	},
++	/* wycheproof - edge case for public key */
++	{
++		.private = { 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26,
++			     0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea,
++			     0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00,
++			     0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 },
++		.public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
++		.result = { 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8,
++			    0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32,
++			    0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87,
++			    0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d },
++		.valid = true
++	},
++	/* wycheproof - edge case for public key */
++	{
++		.private = { 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c,
++			     0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6,
++			     0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb,
++			     0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 },
++		.public = { 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff,
++			    0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff,
++			    0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff,
++			    0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f },
++		.result = { 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85,
++			    0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f,
++			    0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0,
++			    0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d },
++		.valid = true
++	},
++	/* wycheproof - edge case for public key */
++	{
++		.private = { 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38,
++			     0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b,
++			     0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c,
++			     0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb },
++		.public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
++		.result = { 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b,
++			    0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81,
++			    0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3,
++			    0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d },
++		.valid = true
++	},
++	/* wycheproof - edge case for public key */
++	{
++		.private = { 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d,
++			     0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42,
++			     0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98,
++			     0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 },
++		.public = { 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
++			    0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
++			    0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
++			    0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f },
++		.result = { 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c,
++			    0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9,
++			    0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89,
++			    0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 },
++		.valid = true
++	},
++	/* wycheproof - edge case for public key */
++	{
++		.private = { 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29,
++			     0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6,
++			     0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c,
++			     0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f },
++		.public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.result = { 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75,
++			    0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89,
++			    0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c,
++			    0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f },
++		.valid = true
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0x10, 0x25, 0x5c, 0x92, 0x30, 0xa9, 0x7a, 0x30,
++			     0xa4, 0x58, 0xca, 0x28, 0x4a, 0x62, 0x96, 0x69,
++			     0x29, 0x3a, 0x31, 0x89, 0x0c, 0xda, 0x9d, 0x14,
++			     0x7f, 0xeb, 0xc7, 0xd1, 0xe2, 0x2d, 0x6b, 0xb1 },
++		.public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
++			    0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
++			    0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
++			    0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0x78, 0xf1, 0xe8, 0xed, 0xf1, 0x44, 0x81, 0xb3,
++			     0x89, 0x44, 0x8d, 0xac, 0x8f, 0x59, 0xc7, 0x0b,
++			     0x03, 0x8e, 0x7c, 0xf9, 0x2e, 0xf2, 0xc7, 0xef,
++			     0xf5, 0x7a, 0x72, 0x46, 0x6e, 0x11, 0x52, 0x96 },
++		.public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
++			    0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
++			    0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
++			    0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0xa0, 0xa0, 0x5a, 0x3e, 0x8f, 0x9f, 0x44, 0x20,
++			     0x4d, 0x5f, 0x80, 0x59, 0xa9, 0x4a, 0xc7, 0xdf,
++			     0xc3, 0x9a, 0x49, 0xac, 0x01, 0x6d, 0xd7, 0x43,
++			     0xdb, 0xfa, 0x43, 0xc5, 0xd6, 0x71, 0xfd, 0x88 },
++		.public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0xd0, 0xdb, 0xb3, 0xed, 0x19, 0x06, 0x66, 0x3f,
++			     0x15, 0x42, 0x0a, 0xf3, 0x1f, 0x4e, 0xaf, 0x65,
++			     0x09, 0xd9, 0xa9, 0x94, 0x97, 0x23, 0x50, 0x06,
++			     0x05, 0xad, 0x7c, 0x1c, 0x6e, 0x74, 0x50, 0xa9 },
++		.public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0xc0, 0xb1, 0xd0, 0xeb, 0x22, 0xb2, 0x44, 0xfe,
++			     0x32, 0x91, 0x14, 0x00, 0x72, 0xcd, 0xd9, 0xd9,
++			     0x89, 0xb5, 0xf0, 0xec, 0xd9, 0x6c, 0x10, 0x0f,
++			     0xeb, 0x5b, 0xca, 0x24, 0x1c, 0x1d, 0x9f, 0x8f },
++		.public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0x48, 0x0b, 0xf4, 0x5f, 0x59, 0x49, 0x42, 0xa8,
++			     0xbc, 0x0f, 0x33, 0x53, 0xc6, 0xe8, 0xb8, 0x85,
++			     0x3d, 0x77, 0xf3, 0x51, 0xf1, 0xc2, 0xca, 0x6c,
++			     0x2d, 0x1a, 0xbf, 0x8a, 0x00, 0xb4, 0x22, 0x9c },
++		.public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0x30, 0xf9, 0x93, 0xfc, 0xf8, 0x51, 0x4f, 0xc8,
++			     0x9b, 0xd8, 0xdb, 0x14, 0xcd, 0x43, 0xba, 0x0d,
++			     0x4b, 0x25, 0x30, 0xe7, 0x3c, 0x42, 0x76, 0xa0,
++			     0x5e, 0x1b, 0x14, 0x5d, 0x42, 0x0c, 0xed, 0xb4 },
++		.public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0xc0, 0x49, 0x74, 0xb7, 0x58, 0x38, 0x0e, 0x2a,
++			     0x5b, 0x5d, 0xf6, 0xeb, 0x09, 0xbb, 0x2f, 0x6b,
++			     0x34, 0x34, 0xf9, 0x82, 0x72, 0x2a, 0x8e, 0x67,
++			     0x6d, 0x3d, 0xa2, 0x51, 0xd1, 0xb3, 0xde, 0x83 },
++		.public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
++			    0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
++			    0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
++			    0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0x50, 0x2a, 0x31, 0x37, 0x3d, 0xb3, 0x24, 0x46,
++			     0x84, 0x2f, 0xe5, 0xad, 0xd3, 0xe0, 0x24, 0x02,
++			     0x2e, 0xa5, 0x4f, 0x27, 0x41, 0x82, 0xaf, 0xc3,
++			     0xd9, 0xf1, 0xbb, 0x3d, 0x39, 0x53, 0x4e, 0xb5 },
++		.public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
++			    0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
++			    0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
++			    0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0x90, 0xfa, 0x64, 0x17, 0xb0, 0xe3, 0x70, 0x30,
++			     0xfd, 0x6e, 0x43, 0xef, 0xf2, 0xab, 0xae, 0xf1,
++			     0x4c, 0x67, 0x93, 0x11, 0x7a, 0x03, 0x9c, 0xf6,
++			     0x21, 0x31, 0x8b, 0xa9, 0x0f, 0x4e, 0x98, 0xbe },
++		.public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0x78, 0xad, 0x3f, 0x26, 0x02, 0x7f, 0x1c, 0x9f,
++			     0xdd, 0x97, 0x5a, 0x16, 0x13, 0xb9, 0x47, 0x77,
++			     0x9b, 0xad, 0x2c, 0xf2, 0xb7, 0x41, 0xad, 0xe0,
++			     0x18, 0x40, 0x88, 0x5a, 0x30, 0xbb, 0x97, 0x9c },
++		.public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key with low order */
++	{
++		.private = { 0x98, 0xe2, 0x3d, 0xe7, 0xb1, 0xe0, 0x92, 0x6e,
++			     0xd9, 0xc8, 0x7e, 0x7b, 0x14, 0xba, 0xf5, 0x5f,
++			     0x49, 0x7a, 0x1d, 0x70, 0x96, 0xf9, 0x39, 0x77,
++			     0x68, 0x0e, 0x44, 0xdc, 0x1c, 0x7b, 0x7b, 0x8b },
++		.public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = false
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc,
++			     0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1,
++			     0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d,
++			     0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae },
++		.public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.result = { 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09,
++			    0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde,
++			    0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1,
++			    0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81,
++			     0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a,
++			     0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99,
++			     0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d },
++		.public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.result = { 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17,
++			    0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35,
++			    0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55,
++			    0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11,
++			     0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b,
++			     0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9,
++			     0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 },
++		.public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.result = { 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53,
++			    0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e,
++			    0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6,
++			    0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78,
++			     0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2,
++			     0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd,
++			     0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 },
++		.public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.result = { 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb,
++			    0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40,
++			    0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2,
++			    0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9,
++			     0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60,
++			     0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13,
++			     0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 },
++		.public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
++		.result = { 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c,
++			    0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3,
++			    0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65,
++			    0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a,
++			     0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7,
++			     0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11,
++			     0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e },
++		.public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
++		.result = { 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82,
++			    0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4,
++			    0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c,
++			    0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e,
++			     0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a,
++			     0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d,
++			     0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f },
++		.public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
++		.result = { 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2,
++			    0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60,
++			    0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25,
++			    0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb,
++			     0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97,
++			     0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c,
++			     0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 },
++		.public = { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23,
++			    0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8,
++			    0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69,
++			    0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a,
++			     0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23,
++			     0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b,
++			     0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 },
++		.public = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b,
++			    0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44,
++			    0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37,
++			    0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80,
++			     0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d,
++			     0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b,
++			     0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 },
++		.public = { 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63,
++			    0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae,
++			    0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f,
++			    0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0,
++			     0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd,
++			     0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49,
++			     0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 },
++		.public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41,
++			    0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0,
++			    0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf,
++			    0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9,
++			     0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa,
++			     0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5,
++			     0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e },
++		.public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47,
++			    0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3,
++			    0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b,
++			    0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8,
++			     0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98,
++			     0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0,
++			     0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 },
++		.public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0,
++			    0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1,
++			    0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a,
++			    0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02,
++			     0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4,
++			     0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68,
++			     0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d },
++		.public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f,
++			    0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2,
++			    0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95,
++			    0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7,
++			     0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06,
++			     0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9,
++			     0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 },
++		.public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5,
++			    0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0,
++			    0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80,
++			    0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 },
++		.valid = true
++	},
++	/* wycheproof - public key >= p */
++	{
++		.private = { 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd,
++			     0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4,
++			     0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04,
++			     0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 },
++		.public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++		.result = { 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0,
++			    0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac,
++			    0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48,
++			    0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 },
++		.valid = true
++	},
++	/* wycheproof - RFC 7748 */
++	{
++		.private = { 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
++			     0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
++			     0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
++			     0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 },
++		.public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
++			    0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
++			    0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
++			    0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
++		.result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
++			    0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
++			    0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
++			    0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
++		.valid = true
++	},
++	/* wycheproof - RFC 7748 */
++	{
++		.private = { 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c,
++			     0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5,
++			     0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4,
++			     0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d },
++		.public = { 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3,
++			    0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c,
++			    0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e,
++			    0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 },
++		.result = { 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d,
++			    0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8,
++			    0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52,
++			    0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde,
++			    0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8,
++			    0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4,
++			    0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 },
++		.result = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d,
++			    0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64,
++			    0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd,
++			    0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 },
++		.result = { 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8,
++			    0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf,
++			    0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94,
++			    0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d },
++		.result = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84,
++			    0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62,
++			    0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e,
++			    0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 },
++		.result = { 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8,
++			    0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58,
++			    0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02,
++			    0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 },
++		.result = { 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9,
++			    0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a,
++			    0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44,
++			    0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b },
++		.result = { 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd,
++			    0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22,
++			    0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56,
++			    0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b },
++		.result = { 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53,
++			    0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f,
++			    0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18,
++			    0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f },
++		.result = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55,
++			    0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b,
++			    0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79,
++			    0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f },
++		.result = { 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39,
++			    0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c,
++			    0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb,
++			    0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e },
++		.result = { 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04,
++			    0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10,
++			    0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58,
++			    0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c },
++		.result = { 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3,
++			    0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c,
++			    0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88,
++			    0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 },
++		.result = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a,
++			    0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49,
++			    0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a,
++			    0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 },
++		.valid = true
++	},
++	/* wycheproof - edge case for shared secret */
++	{
++		.private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
++			     0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
++			     0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
++			     0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
++		.public = { 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca,
++			    0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c,
++			    0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb,
++			    0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 },
++		.result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 },
++		.valid = true
++	},
++	/* wycheproof - checking for overflow */
++	{
++		.private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
++			     0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
++			     0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
++			     0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
++		.public = { 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58,
++			    0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7,
++			    0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01,
++			    0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d },
++		.result = { 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d,
++			    0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27,
++			    0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b,
++			    0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 },
++		.valid = true
++	},
++	/* wycheproof - checking for overflow */
++	{
++		.private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
++			     0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
++			     0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
++			     0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
++		.public = { 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26,
++			    0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2,
++			    0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44,
++			    0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e },
++		.result = { 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6,
++			    0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d,
++			    0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e,
++			    0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 },
++		.valid = true
++	},
++	/* wycheproof - checking for overflow */
++	{
++		.private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
++			     0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
++			     0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
++			     0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
++		.public = { 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61,
++			    0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67,
++			    0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e,
++			    0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c },
++		.result = { 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65,
++			    0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce,
++			    0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0,
++			    0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 },
++		.valid = true
++	},
++	/* wycheproof - checking for overflow */
++	{
++		.private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
++			     0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
++			     0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
++			     0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
++		.public = { 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee,
++			    0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d,
++			    0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14,
++			    0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 },
++		.result = { 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e,
++			    0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc,
++			    0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5,
++			    0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b },
++		.valid = true
++	},
++	/* wycheproof - checking for overflow */
++	{
++		.private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
++			     0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
++			     0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
++			     0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
++		.public = { 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4,
++			    0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5,
++			    0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c,
++			    0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 },
++		.result = { 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b,
++			    0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93,
++			    0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f,
++			    0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 },
++		.valid = true
++	},
++	/* wycheproof - private key == -1 (mod order) */
++	{
++		.private = { 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8,
++			     0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68,
++			     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 },
++		.public = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
++			    0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
++			    0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
++			    0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
++		.result = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
++			    0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
++			    0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
++			    0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
++		.valid = true
++	},
++	/* wycheproof - private key == 1 (mod order) on twist */
++	{
++		.private = { 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef,
++			     0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82,
++			     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++			     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f },
++		.public = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
++			    0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
++			    0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
++			    0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
++		.result = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
++			    0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
++			    0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
++			    0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
++		.valid = true
++	}
++};
++
++static bool __init curve25519_selftest(void)
++{
++	bool success = true, ret, ret2;
++	size_t i = 0, j;
++	u8 in[CURVE25519_KEY_SIZE];
++	u8 out[CURVE25519_KEY_SIZE], out2[CURVE25519_KEY_SIZE];
++
++	for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) {
++		memset(out, 0, CURVE25519_KEY_SIZE);
++		ret = curve25519(out, curve25519_test_vectors[i].private,
++				 curve25519_test_vectors[i].public);
++		if (ret != curve25519_test_vectors[i].valid ||
++		    memcmp(out, curve25519_test_vectors[i].result,
++			   CURVE25519_KEY_SIZE)) {
++			pr_err("curve25519 self-test %zu: FAIL\n", i + 1);
++			success = false;
++		}
++	}
++
++	for (i = 0; i < 5; ++i) {
++		get_random_bytes(in, sizeof(in));
++		ret = curve25519_generate_public(out, in);
++		ret2 = curve25519(out2, in, (u8[CURVE25519_KEY_SIZE]){ 9 });
++		if (ret != ret2 || memcmp(out, out2, CURVE25519_KEY_SIZE)) {
++			pr_err("curve25519 basepoint self-test %zu: FAIL: input - 0x",
++			       i + 1);
++			for (j = CURVE25519_KEY_SIZE; j-- > 0;)
++				printk(KERN_CONT "%02x", in[j]);
++			printk(KERN_CONT "\n");
++			success = false;
++		}
++	}
++
++	return success;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/selftest/poly1305.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,1107 @@
++// SPDX-License-Identifier: GPL-2.0 OR MIT
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++struct poly1305_testvec {
++	const u8 *input, *output, *key;
++	size_t ilen;
++};
++
++/* RFC7539 */
++static const u8 input01[] __initconst = {
++	0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72,
++	0x61, 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f,
++	0x72, 0x75, 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65,
++	0x61, 0x72, 0x63, 0x68, 0x20, 0x47, 0x72, 0x6f,
++	0x75, 0x70
++};
++static const u8 output01[] __initconst = {
++	0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6,
++	0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9
++};
++static const u8 key01[] __initconst = {
++	0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
++	0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
++	0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
++	0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b
++};
++
++/* "The Poly1305-AES message-authentication code" */
++static const u8 input02[] __initconst = {
++	0xf3, 0xf6
++};
++static const u8 output02[] __initconst = {
++	0xf4, 0xc6, 0x33, 0xc3, 0x04, 0x4f, 0xc1, 0x45,
++	0xf8, 0x4f, 0x33, 0x5c, 0xb8, 0x19, 0x53, 0xde
++};
++static const u8 key02[] __initconst = {
++	0x85, 0x1f, 0xc4, 0x0c, 0x34, 0x67, 0xac, 0x0b,
++	0xe0, 0x5c, 0xc2, 0x04, 0x04, 0xf3, 0xf7, 0x00,
++	0x58, 0x0b, 0x3b, 0x0f, 0x94, 0x47, 0xbb, 0x1e,
++	0x69, 0xd0, 0x95, 0xb5, 0x92, 0x8b, 0x6d, 0xbc
++};
++
++static const u8 input03[] __initconst = { };
++static const u8 output03[] __initconst = {
++	0xdd, 0x3f, 0xab, 0x22, 0x51, 0xf1, 0x1a, 0xc7,
++	0x59, 0xf0, 0x88, 0x71, 0x29, 0xcc, 0x2e, 0xe7
++};
++static const u8 key03[] __initconst = {
++	0xa0, 0xf3, 0x08, 0x00, 0x00, 0xf4, 0x64, 0x00,
++	0xd0, 0xc7, 0xe9, 0x07, 0x6c, 0x83, 0x44, 0x03,
++	0xdd, 0x3f, 0xab, 0x22, 0x51, 0xf1, 0x1a, 0xc7,
++	0x59, 0xf0, 0x88, 0x71, 0x29, 0xcc, 0x2e, 0xe7
++};
++
++static const u8 input04[] __initconst = {
++	0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
++	0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
++	0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
++	0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36
++};
++static const u8 output04[] __initconst = {
++	0x0e, 0xe1, 0xc1, 0x6b, 0xb7, 0x3f, 0x0f, 0x4f,
++	0xd1, 0x98, 0x81, 0x75, 0x3c, 0x01, 0xcd, 0xbe
++};
++static const u8 key04[] __initconst = {
++	0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
++	0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
++	0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
++	0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef
++};
++
++static const u8 input05[] __initconst = {
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9
++};
++static const u8 output05[] __initconst = {
++	0x51, 0x54, 0xad, 0x0d, 0x2c, 0xb2, 0x6e, 0x01,
++	0x27, 0x4f, 0xc5, 0x11, 0x48, 0x49, 0x1f, 0x1b
++};
++static const u8 key05[] __initconst = {
++	0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
++	0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
++	0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
++	0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57
++};
++
++/* self-generated vectors exercise "significant" lengths, such that they
++ * are handled by different code paths */
++static const u8 input06[] __initconst = {
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf
++};
++static const u8 output06[] __initconst = {
++	0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
++	0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66
++};
++static const u8 key06[] __initconst = {
++	0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
++	0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
++	0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
++	0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57
++};
++
++static const u8 input07[] __initconst = {
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67
++};
++static const u8 output07[] __initconst = {
++	0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2,
++	0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61
++};
++static const u8 key07[] __initconst = {
++	0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
++	0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
++	0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
++	0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57
++};
++
++static const u8 input08[] __initconst = {
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
++	0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
++	0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
++	0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
++	0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36
++};
++static const u8 output08[] __initconst = {
++	0xbb, 0xb6, 0x13, 0xb2, 0xb6, 0xd7, 0x53, 0xba,
++	0x07, 0x39, 0x5b, 0x91, 0x6a, 0xae, 0xce, 0x15
++};
++static const u8 key08[] __initconst = {
++	0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
++	0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
++	0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
++	0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57
++};
++
++static const u8 input09[] __initconst = {
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
++	0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
++	0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
++	0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
++	0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
++	0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
++	0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24
++};
++static const u8 output09[] __initconst = {
++	0xc7, 0x94, 0xd7, 0x05, 0x7d, 0x17, 0x78, 0xc4,
++	0xbb, 0xee, 0x0a, 0x39, 0xb3, 0xd9, 0x73, 0x42
++};
++static const u8 key09[] __initconst = {
++	0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
++	0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
++	0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
++	0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57
++};
++
++static const u8 input10[] __initconst = {
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
++	0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
++	0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
++	0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
++	0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
++	0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
++	0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
++	0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
++	0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36
++};
++static const u8 output10[] __initconst = {
++	0xff, 0xbc, 0xb9, 0xb3, 0x71, 0x42, 0x31, 0x52,
++	0xd7, 0xfc, 0xa5, 0xad, 0x04, 0x2f, 0xba, 0xa9
++};
++static const u8 key10[] __initconst = {
++	0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
++	0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
++	0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
++	0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57
++};
++
++static const u8 input11[] __initconst = {
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
++	0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
++	0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
++	0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
++	0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
++	0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
++	0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
++	0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
++	0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36,
++	0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
++	0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66
++};
++static const u8 output11[] __initconst = {
++	0x06, 0x9e, 0xd6, 0xb8, 0xef, 0x0f, 0x20, 0x7b,
++	0x3e, 0x24, 0x3b, 0xb1, 0x01, 0x9f, 0xe6, 0x32
++};
++static const u8 key11[] __initconst = {
++	0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
++	0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
++	0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
++	0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57
++};
++
++static const u8 input12[] __initconst = {
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
++	0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
++	0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
++	0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
++	0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
++	0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
++	0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
++	0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
++	0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36,
++	0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
++	0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66,
++	0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2,
++	0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61
++};
++static const u8 output12[] __initconst = {
++	0xcc, 0xa3, 0x39, 0xd9, 0xa4, 0x5f, 0xa2, 0x36,
++	0x8c, 0x2c, 0x68, 0xb3, 0xa4, 0x17, 0x91, 0x33
++};
++static const u8 key12[] __initconst = {
++	0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
++	0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
++	0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
++	0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57
++};
++
++static const u8 input13[] __initconst = {
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
++	0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
++	0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
++	0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
++	0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
++	0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
++	0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
++	0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
++	0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36,
++	0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
++	0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66,
++	0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2,
++	0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61,
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
++	0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
++	0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
++	0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
++	0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
++	0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
++	0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
++	0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
++	0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36
++};
++static const u8 output13[] __initconst = {
++	0x53, 0xf6, 0xe8, 0x28, 0xa2, 0xf0, 0xfe, 0x0e,
++	0xe8, 0x15, 0xbf, 0x0b, 0xd5, 0x84, 0x1a, 0x34
++};
++static const u8 key13[] __initconst = {
++	0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
++	0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
++	0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
++	0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57
++};
++
++static const u8 input14[] __initconst = {
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
++	0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
++	0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
++	0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
++	0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
++	0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
++	0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
++	0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
++	0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36,
++	0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
++	0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66,
++	0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2,
++	0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61,
++	0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
++	0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
++	0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
++	0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
++	0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
++	0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
++	0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
++	0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
++	0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
++	0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
++	0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
++	0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
++	0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
++	0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
++	0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
++	0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36,
++	0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
++	0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66,
++	0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2,
++	0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61
++};
++static const u8 output14[] __initconst = {
++	0xb8, 0x46, 0xd4, 0x4e, 0x9b, 0xbd, 0x53, 0xce,
++	0xdf, 0xfb, 0xfb, 0xb6, 0xb7, 0xfa, 0x49, 0x33
++};
++static const u8 key14[] __initconst = {
++	0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
++	0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
++	0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
++	0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57
++};
++
++/* 4th power of the key spills to 131th bit in SIMD key setup */
++static const u8 input15[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 output15[] __initconst = {
++	0x07, 0x14, 0x5a, 0x4c, 0x02, 0xfe, 0x5f, 0xa3,
++	0x20, 0x36, 0xde, 0x68, 0xfa, 0xbe, 0x90, 0x66
++};
++static const u8 key15[] __initconst = {
++	0xad, 0x62, 0x81, 0x07, 0xe8, 0x35, 0x1d, 0x0f,
++	0x2c, 0x23, 0x1a, 0x05, 0xdc, 0x4a, 0x41, 0x06,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++
++/* OpenSSL's poly1305_ieee754.c failed this in final stage */
++static const u8 input16[] __initconst = {
++	0x84, 0x23, 0x64, 0xe1, 0x56, 0x33, 0x6c, 0x09,
++	0x98, 0xb9, 0x33, 0xa6, 0x23, 0x77, 0x26, 0x18,
++	0x0d, 0x9e, 0x3f, 0xdc, 0xbd, 0xe4, 0xcd, 0x5d,
++	0x17, 0x08, 0x0f, 0xc3, 0xbe, 0xb4, 0x96, 0x14,
++	0xd7, 0x12, 0x2c, 0x03, 0x74, 0x63, 0xff, 0x10,
++	0x4d, 0x73, 0xf1, 0x9c, 0x12, 0x70, 0x46, 0x28,
++	0xd4, 0x17, 0xc4, 0xc5, 0x4a, 0x3f, 0xe3, 0x0d,
++	0x3c, 0x3d, 0x77, 0x14, 0x38, 0x2d, 0x43, 0xb0,
++	0x38, 0x2a, 0x50, 0xa5, 0xde, 0xe5, 0x4b, 0xe8,
++	0x44, 0xb0, 0x76, 0xe8, 0xdf, 0x88, 0x20, 0x1a,
++	0x1c, 0xd4, 0x3b, 0x90, 0xeb, 0x21, 0x64, 0x3f,
++	0xa9, 0x6f, 0x39, 0xb5, 0x18, 0xaa, 0x83, 0x40,
++	0xc9, 0x42, 0xff, 0x3c, 0x31, 0xba, 0xf7, 0xc9,
++	0xbd, 0xbf, 0x0f, 0x31, 0xae, 0x3f, 0xa0, 0x96,
++	0xbf, 0x8c, 0x63, 0x03, 0x06, 0x09, 0x82, 0x9f,
++	0xe7, 0x2e, 0x17, 0x98, 0x24, 0x89, 0x0b, 0xc8,
++	0xe0, 0x8c, 0x31, 0x5c, 0x1c, 0xce, 0x2a, 0x83,
++	0x14, 0x4d, 0xbb, 0xff, 0x09, 0xf7, 0x4e, 0x3e,
++	0xfc, 0x77, 0x0b, 0x54, 0xd0, 0x98, 0x4a, 0x8f,
++	0x19, 0xb1, 0x47, 0x19, 0xe6, 0x36, 0x35, 0x64,
++	0x1d, 0x6b, 0x1e, 0xed, 0xf6, 0x3e, 0xfb, 0xf0,
++	0x80, 0xe1, 0x78, 0x3d, 0x32, 0x44, 0x54, 0x12,
++	0x11, 0x4c, 0x20, 0xde, 0x0b, 0x83, 0x7a, 0x0d,
++	0xfa, 0x33, 0xd6, 0xb8, 0x28, 0x25, 0xff, 0xf4,
++	0x4c, 0x9a, 0x70, 0xea, 0x54, 0xce, 0x47, 0xf0,
++	0x7d, 0xf6, 0x98, 0xe6, 0xb0, 0x33, 0x23, 0xb5,
++	0x30, 0x79, 0x36, 0x4a, 0x5f, 0xc3, 0xe9, 0xdd,
++	0x03, 0x43, 0x92, 0xbd, 0xde, 0x86, 0xdc, 0xcd,
++	0xda, 0x94, 0x32, 0x1c, 0x5e, 0x44, 0x06, 0x04,
++	0x89, 0x33, 0x6c, 0xb6, 0x5b, 0xf3, 0x98, 0x9c,
++	0x36, 0xf7, 0x28, 0x2c, 0x2f, 0x5d, 0x2b, 0x88,
++	0x2c, 0x17, 0x1e, 0x74
++};
++static const u8 output16[] __initconst = {
++	0xf2, 0x48, 0x31, 0x2e, 0x57, 0x8d, 0x9d, 0x58,
++	0xf8, 0xb7, 0xbb, 0x4d, 0x19, 0x10, 0x54, 0x31
++};
++static const u8 key16[] __initconst = {
++	0x95, 0xd5, 0xc0, 0x05, 0x50, 0x3e, 0x51, 0x0d,
++	0x8c, 0xd0, 0xaa, 0x07, 0x2c, 0x4a, 0x4d, 0x06,
++	0x6e, 0xab, 0xc5, 0x2d, 0x11, 0x65, 0x3d, 0xf4,
++	0x7f, 0xbf, 0x63, 0xab, 0x19, 0x8b, 0xcc, 0x26
++};
++
++/* AVX2 in OpenSSL's poly1305-x86.pl failed this with 176+32 split */
++static const u8 input17[] __initconst = {
++	0x24, 0x8a, 0xc3, 0x10, 0x85, 0xb6, 0xc2, 0xad,
++	0xaa, 0xa3, 0x82, 0x59, 0xa0, 0xd7, 0x19, 0x2c,
++	0x5c, 0x35, 0xd1, 0xbb, 0x4e, 0xf3, 0x9a, 0xd9,
++	0x4c, 0x38, 0xd1, 0xc8, 0x24, 0x79, 0xe2, 0xdd,
++	0x21, 0x59, 0xa0, 0x77, 0x02, 0x4b, 0x05, 0x89,
++	0xbc, 0x8a, 0x20, 0x10, 0x1b, 0x50, 0x6f, 0x0a,
++	0x1a, 0xd0, 0xbb, 0xab, 0x76, 0xe8, 0x3a, 0x83,
++	0xf1, 0xb9, 0x4b, 0xe6, 0xbe, 0xae, 0x74, 0xe8,
++	0x74, 0xca, 0xb6, 0x92, 0xc5, 0x96, 0x3a, 0x75,
++	0x43, 0x6b, 0x77, 0x61, 0x21, 0xec, 0x9f, 0x62,
++	0x39, 0x9a, 0x3e, 0x66, 0xb2, 0xd2, 0x27, 0x07,
++	0xda, 0xe8, 0x19, 0x33, 0xb6, 0x27, 0x7f, 0x3c,
++	0x85, 0x16, 0xbc, 0xbe, 0x26, 0xdb, 0xbd, 0x86,
++	0xf3, 0x73, 0x10, 0x3d, 0x7c, 0xf4, 0xca, 0xd1,
++	0x88, 0x8c, 0x95, 0x21, 0x18, 0xfb, 0xfb, 0xd0,
++	0xd7, 0xb4, 0xbe, 0xdc, 0x4a, 0xe4, 0x93, 0x6a,
++	0xff, 0x91, 0x15, 0x7e, 0x7a, 0xa4, 0x7c, 0x54,
++	0x44, 0x2e, 0xa7, 0x8d, 0x6a, 0xc2, 0x51, 0xd3,
++	0x24, 0xa0, 0xfb, 0xe4, 0x9d, 0x89, 0xcc, 0x35,
++	0x21, 0xb6, 0x6d, 0x16, 0xe9, 0xc6, 0x6a, 0x37,
++	0x09, 0x89, 0x4e, 0x4e, 0xb0, 0xa4, 0xee, 0xdc,
++	0x4a, 0xe1, 0x94, 0x68, 0xe6, 0x6b, 0x81, 0xf2,
++	0x71, 0x35, 0x1b, 0x1d, 0x92, 0x1e, 0xa5, 0x51,
++	0x04, 0x7a, 0xbc, 0xc6, 0xb8, 0x7a, 0x90, 0x1f,
++	0xde, 0x7d, 0xb7, 0x9f, 0xa1, 0x81, 0x8c, 0x11,
++	0x33, 0x6d, 0xbc, 0x07, 0x24, 0x4a, 0x40, 0xeb
++};
++static const u8 output17[] __initconst = {
++	0xbc, 0x93, 0x9b, 0xc5, 0x28, 0x14, 0x80, 0xfa,
++	0x99, 0xc6, 0xd6, 0x8c, 0x25, 0x8e, 0xc4, 0x2f
++};
++static const u8 key17[] __initconst = {
++	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
++	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++
++/* test vectors from Google */
++static const u8 input18[] __initconst = { };
++static const u8 output18[] __initconst = {
++	0x47, 0x10, 0x13, 0x0e, 0x9f, 0x6f, 0xea, 0x8d,
++	0x72, 0x29, 0x38, 0x50, 0xa6, 0x67, 0xd8, 0x6c
++};
++static const u8 key18[] __initconst = {
++	0xc8, 0xaf, 0xaa, 0xc3, 0x31, 0xee, 0x37, 0x2c,
++	0xd6, 0x08, 0x2d, 0xe1, 0x34, 0x94, 0x3b, 0x17,
++	0x47, 0x10, 0x13, 0x0e, 0x9f, 0x6f, 0xea, 0x8d,
++	0x72, 0x29, 0x38, 0x50, 0xa6, 0x67, 0xd8, 0x6c
++};
++
++static const u8 input19[] __initconst = {
++	0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
++	0x72, 0x6c, 0x64, 0x21
++};
++static const u8 output19[] __initconst = {
++	0xa6, 0xf7, 0x45, 0x00, 0x8f, 0x81, 0xc9, 0x16,
++	0xa2, 0x0d, 0xcc, 0x74, 0xee, 0xf2, 0xb2, 0xf0
++};
++static const u8 key19[] __initconst = {
++	0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20,
++	0x33, 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20,
++	0x6b, 0x65, 0x79, 0x20, 0x66, 0x6f, 0x72, 0x20,
++	0x50, 0x6f, 0x6c, 0x79, 0x31, 0x33, 0x30, 0x35
++};
++
++static const u8 input20[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 output20[] __initconst = {
++	0x49, 0xec, 0x78, 0x09, 0x0e, 0x48, 0x1e, 0xc6,
++	0xc2, 0x6b, 0x33, 0xb9, 0x1c, 0xcc, 0x03, 0x07
++};
++static const u8 key20[] __initconst = {
++	0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20,
++	0x33, 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20,
++	0x6b, 0x65, 0x79, 0x20, 0x66, 0x6f, 0x72, 0x20,
++	0x50, 0x6f, 0x6c, 0x79, 0x31, 0x33, 0x30, 0x35
++};
++
++static const u8 input21[] __initconst = {
++	0x89, 0xda, 0xb8, 0x0b, 0x77, 0x17, 0xc1, 0xdb,
++	0x5d, 0xb4, 0x37, 0x86, 0x0a, 0x3f, 0x70, 0x21,
++	0x8e, 0x93, 0xe1, 0xb8, 0xf4, 0x61, 0xfb, 0x67,
++	0x7f, 0x16, 0xf3, 0x5f, 0x6f, 0x87, 0xe2, 0xa9,
++	0x1c, 0x99, 0xbc, 0x3a, 0x47, 0xac, 0xe4, 0x76,
++	0x40, 0xcc, 0x95, 0xc3, 0x45, 0xbe, 0x5e, 0xcc,
++	0xa5, 0xa3, 0x52, 0x3c, 0x35, 0xcc, 0x01, 0x89,
++	0x3a, 0xf0, 0xb6, 0x4a, 0x62, 0x03, 0x34, 0x27,
++	0x03, 0x72, 0xec, 0x12, 0x48, 0x2d, 0x1b, 0x1e,
++	0x36, 0x35, 0x61, 0x69, 0x8a, 0x57, 0x8b, 0x35,
++	0x98, 0x03, 0x49, 0x5b, 0xb4, 0xe2, 0xef, 0x19,
++	0x30, 0xb1, 0x7a, 0x51, 0x90, 0xb5, 0x80, 0xf1,
++	0x41, 0x30, 0x0d, 0xf3, 0x0a, 0xdb, 0xec, 0xa2,
++	0x8f, 0x64, 0x27, 0xa8, 0xbc, 0x1a, 0x99, 0x9f,
++	0xd5, 0x1c, 0x55, 0x4a, 0x01, 0x7d, 0x09, 0x5d,
++	0x8c, 0x3e, 0x31, 0x27, 0xda, 0xf9, 0xf5, 0x95
++};
++static const u8 output21[] __initconst = {
++	0xc8, 0x5d, 0x15, 0xed, 0x44, 0xc3, 0x78, 0xd6,
++	0xb0, 0x0e, 0x23, 0x06, 0x4c, 0x7b, 0xcd, 0x51
++};
++static const u8 key21[] __initconst = {
++	0x2d, 0x77, 0x3b, 0xe3, 0x7a, 0xdb, 0x1e, 0x4d,
++	0x68, 0x3b, 0xf0, 0x07, 0x5e, 0x79, 0xc4, 0xee,
++	0x03, 0x79, 0x18, 0x53, 0x5a, 0x7f, 0x99, 0xcc,
++	0xb7, 0x04, 0x0f, 0xb5, 0xf5, 0xf4, 0x3a, 0xea
++};
++
++static const u8 input22[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b,
++	0x17, 0x03, 0x03, 0x02, 0x00, 0x00, 0x00, 0x00,
++	0x06, 0xdb, 0x1f, 0x1f, 0x36, 0x8d, 0x69, 0x6a,
++	0x81, 0x0a, 0x34, 0x9c, 0x0c, 0x71, 0x4c, 0x9a,
++	0x5e, 0x78, 0x50, 0xc2, 0x40, 0x7d, 0x72, 0x1a,
++	0xcd, 0xed, 0x95, 0xe0, 0x18, 0xd7, 0xa8, 0x52,
++	0x66, 0xa6, 0xe1, 0x28, 0x9c, 0xdb, 0x4a, 0xeb,
++	0x18, 0xda, 0x5a, 0xc8, 0xa2, 0xb0, 0x02, 0x6d,
++	0x24, 0xa5, 0x9a, 0xd4, 0x85, 0x22, 0x7f, 0x3e,
++	0xae, 0xdb, 0xb2, 0xe7, 0xe3, 0x5e, 0x1c, 0x66,
++	0xcd, 0x60, 0xf9, 0xab, 0xf7, 0x16, 0xdc, 0xc9,
++	0xac, 0x42, 0x68, 0x2d, 0xd7, 0xda, 0xb2, 0x87,
++	0xa7, 0x02, 0x4c, 0x4e, 0xef, 0xc3, 0x21, 0xcc,
++	0x05, 0x74, 0xe1, 0x67, 0x93, 0xe3, 0x7c, 0xec,
++	0x03, 0xc5, 0xbd, 0xa4, 0x2b, 0x54, 0xc1, 0x14,
++	0xa8, 0x0b, 0x57, 0xaf, 0x26, 0x41, 0x6c, 0x7b,
++	0xe7, 0x42, 0x00, 0x5e, 0x20, 0x85, 0x5c, 0x73,
++	0xe2, 0x1d, 0xc8, 0xe2, 0xed, 0xc9, 0xd4, 0x35,
++	0xcb, 0x6f, 0x60, 0x59, 0x28, 0x00, 0x11, 0xc2,
++	0x70, 0xb7, 0x15, 0x70, 0x05, 0x1c, 0x1c, 0x9b,
++	0x30, 0x52, 0x12, 0x66, 0x20, 0xbc, 0x1e, 0x27,
++	0x30, 0xfa, 0x06, 0x6c, 0x7a, 0x50, 0x9d, 0x53,
++	0xc6, 0x0e, 0x5a, 0xe1, 0xb4, 0x0a, 0xa6, 0xe3,
++	0x9e, 0x49, 0x66, 0x92, 0x28, 0xc9, 0x0e, 0xec,
++	0xb4, 0xa5, 0x0d, 0xb3, 0x2a, 0x50, 0xbc, 0x49,
++	0xe9, 0x0b, 0x4f, 0x4b, 0x35, 0x9a, 0x1d, 0xfd,
++	0x11, 0x74, 0x9c, 0xd3, 0x86, 0x7f, 0xcf, 0x2f,
++	0xb7, 0xbb, 0x6c, 0xd4, 0x73, 0x8f, 0x6a, 0x4a,
++	0xd6, 0xf7, 0xca, 0x50, 0x58, 0xf7, 0x61, 0x88,
++	0x45, 0xaf, 0x9f, 0x02, 0x0f, 0x6c, 0x3b, 0x96,
++	0x7b, 0x8f, 0x4c, 0xd4, 0xa9, 0x1e, 0x28, 0x13,
++	0xb5, 0x07, 0xae, 0x66, 0xf2, 0xd3, 0x5c, 0x18,
++	0x28, 0x4f, 0x72, 0x92, 0x18, 0x60, 0x62, 0xe1,
++	0x0f, 0xd5, 0x51, 0x0d, 0x18, 0x77, 0x53, 0x51,
++	0xef, 0x33, 0x4e, 0x76, 0x34, 0xab, 0x47, 0x43,
++	0xf5, 0xb6, 0x8f, 0x49, 0xad, 0xca, 0xb3, 0x84,
++	0xd3, 0xfd, 0x75, 0xf7, 0x39, 0x0f, 0x40, 0x06,
++	0xef, 0x2a, 0x29, 0x5c, 0x8c, 0x7a, 0x07, 0x6a,
++	0xd5, 0x45, 0x46, 0xcd, 0x25, 0xd2, 0x10, 0x7f,
++	0xbe, 0x14, 0x36, 0xc8, 0x40, 0x92, 0x4a, 0xae,
++	0xbe, 0x5b, 0x37, 0x08, 0x93, 0xcd, 0x63, 0xd1,
++	0x32, 0x5b, 0x86, 0x16, 0xfc, 0x48, 0x10, 0x88,
++	0x6b, 0xc1, 0x52, 0xc5, 0x32, 0x21, 0xb6, 0xdf,
++	0x37, 0x31, 0x19, 0x39, 0x32, 0x55, 0xee, 0x72,
++	0xbc, 0xaa, 0x88, 0x01, 0x74, 0xf1, 0x71, 0x7f,
++	0x91, 0x84, 0xfa, 0x91, 0x64, 0x6f, 0x17, 0xa2,
++	0x4a, 0xc5, 0x5d, 0x16, 0xbf, 0xdd, 0xca, 0x95,
++	0x81, 0xa9, 0x2e, 0xda, 0x47, 0x92, 0x01, 0xf0,
++	0xed, 0xbf, 0x63, 0x36, 0x00, 0xd6, 0x06, 0x6d,
++	0x1a, 0xb3, 0x6d, 0x5d, 0x24, 0x15, 0xd7, 0x13,
++	0x51, 0xbb, 0xcd, 0x60, 0x8a, 0x25, 0x10, 0x8d,
++	0x25, 0x64, 0x19, 0x92, 0xc1, 0xf2, 0x6c, 0x53,
++	0x1c, 0xf9, 0xf9, 0x02, 0x03, 0xbc, 0x4c, 0xc1,
++	0x9f, 0x59, 0x27, 0xd8, 0x34, 0xb0, 0xa4, 0x71,
++	0x16, 0xd3, 0x88, 0x4b, 0xbb, 0x16, 0x4b, 0x8e,
++	0xc8, 0x83, 0xd1, 0xac, 0x83, 0x2e, 0x56, 0xb3,
++	0x91, 0x8a, 0x98, 0x60, 0x1a, 0x08, 0xd1, 0x71,
++	0x88, 0x15, 0x41, 0xd5, 0x94, 0xdb, 0x39, 0x9c,
++	0x6a, 0xe6, 0x15, 0x12, 0x21, 0x74, 0x5a, 0xec,
++	0x81, 0x4c, 0x45, 0xb0, 0xb0, 0x5b, 0x56, 0x54,
++	0x36, 0xfd, 0x6f, 0x13, 0x7a, 0xa1, 0x0a, 0x0c,
++	0x0b, 0x64, 0x37, 0x61, 0xdb, 0xd6, 0xf9, 0xa9,
++	0xdc, 0xb9, 0x9b, 0x1a, 0x6e, 0x69, 0x08, 0x54,
++	0xce, 0x07, 0x69, 0xcd, 0xe3, 0x97, 0x61, 0xd8,
++	0x2f, 0xcd, 0xec, 0x15, 0xf0, 0xd9, 0x2d, 0x7d,
++	0x8e, 0x94, 0xad, 0xe8, 0xeb, 0x83, 0xfb, 0xe0
++};
++static const u8 output22[] __initconst = {
++	0x26, 0x37, 0x40, 0x8f, 0xe1, 0x30, 0x86, 0xea,
++	0x73, 0xf9, 0x71, 0xe3, 0x42, 0x5e, 0x28, 0x20
++};
++static const u8 key22[] __initconst = {
++	0x99, 0xe5, 0x82, 0x2d, 0xd4, 0x17, 0x3c, 0x99,
++	0x5e, 0x3d, 0xae, 0x0d, 0xde, 0xfb, 0x97, 0x74,
++	0x3f, 0xde, 0x3b, 0x08, 0x01, 0x34, 0xb3, 0x9f,
++	0x76, 0xe9, 0xbf, 0x8d, 0x0e, 0x88, 0xd5, 0x46
++};
++
++/* test vectors from Hanno Böck */
++static const u8 input23[] __initconst = {
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0x80, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xc5,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xe3, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xac, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xe6,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x00, 0x00, 0x00,
++	0xaf, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
++	0xcc, 0xcc, 0xff, 0xff, 0xff, 0xf5, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0xff, 0xff, 0xff, 0xe7, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x71, 0x92, 0x05, 0xa8, 0x52, 0x1d,
++	0xfc
++};
++static const u8 output23[] __initconst = {
++	0x85, 0x59, 0xb8, 0x76, 0xec, 0xee, 0xd6, 0x6e,
++	0xb3, 0x77, 0x98, 0xc0, 0x45, 0x7b, 0xaf, 0xf9
++};
++static const u8 key23[] __initconst = {
++	0x7f, 0x1b, 0x02, 0x64, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc
++};
++
++static const u8 input24[] __initconst = {
++	0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
++	0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
++	0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
++	0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x80, 0x02, 0x64
++};
++static const u8 output24[] __initconst = {
++	0x00, 0xbd, 0x12, 0x58, 0x97, 0x8e, 0x20, 0x54,
++	0x44, 0xc9, 0xaa, 0xaa, 0x82, 0x00, 0x6f, 0xed
++};
++static const u8 key24[] __initconst = {
++	0xe0, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
++	0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa
++};
++
++static const u8 input25[] __initconst = {
++	0x02, 0xfc
++};
++static const u8 output25[] __initconst = {
++	0x06, 0x12, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
++	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c
++};
++static const u8 key25[] __initconst = {
++	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
++	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
++	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
++	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c
++};
++
++static const u8 input26[] __initconst = {
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7a, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x5c, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x6e, 0x7b, 0x00, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7a, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x5c,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
++	0x7b, 0x6e, 0x7b, 0x00, 0x13, 0x00, 0x00, 0x00,
++	0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0xf2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x20, 0x00, 0xef, 0xff, 0x00,
++	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00,
++	0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x64, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x00,
++	0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x20, 0x00, 0xef, 0xff, 0x00, 0x09,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x7a, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
++	0x00, 0x09, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc
++};
++static const u8 output26[] __initconst = {
++	0x33, 0x20, 0x5b, 0xbf, 0x9e, 0x9f, 0x8f, 0x72,
++	0x12, 0xab, 0x9e, 0x2a, 0xb9, 0xb7, 0xe4, 0xa5
++};
++static const u8 key26[] __initconst = {
++	0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7b, 0x7b
++};
++
++static const u8 input27[] __initconst = {
++	0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
++	0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
++	0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
++	0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
++	0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
++	0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
++	0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
++	0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
++	0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
++	0x77, 0x77, 0x77, 0x77, 0xff, 0xff, 0xff, 0xe9,
++	0xe9, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac,
++	0xac, 0xac, 0xac, 0xac, 0x00, 0x00, 0xac, 0xac,
++	0xec, 0x01, 0x00, 0xac, 0xac, 0xac, 0x2c, 0xac,
++	0xa2, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac,
++	0xac, 0xac, 0xac, 0xac, 0x64, 0xf2
++};
++static const u8 output27[] __initconst = {
++	0x02, 0xee, 0x7c, 0x8c, 0x54, 0x6d, 0xde, 0xb1,
++	0xa4, 0x67, 0xe4, 0xc3, 0x98, 0x11, 0x58, 0xb9
++};
++static const u8 key27[] __initconst = {
++	0x00, 0x00, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x7f,
++	0x01, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0xcf, 0x77, 0x77, 0x77, 0x77, 0x77,
++	0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77
++};
++
++/* nacl */
++static const u8 input28[] __initconst = {
++	0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73,
++	0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce,
++	0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4,
++	0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a,
++	0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b,
++	0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72,
++	0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2,
++	0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38,
++	0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a,
++	0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae,
++	0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea,
++	0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda,
++	0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde,
++	0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3,
++	0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6,
++	0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74,
++	0xe3, 0x55, 0xa5
++};
++static const u8 output28[] __initconst = {
++	0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5,
++	0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9
++};
++static const u8 key28[] __initconst = {
++	0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91,
++	0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25,
++	0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65,
++	0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80
++};
++
++/* wrap 2^130-5 */
++static const u8 input29[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 output29[] __initconst = {
++	0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 key29[] __initconst = {
++	0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++
++/* wrap 2^128 */
++static const u8 input30[] __initconst = {
++	0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 output30[] __initconst = {
++	0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 key30[] __initconst = {
++	0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++
++/* limb carry */
++static const u8 input31[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 output31[] __initconst = {
++	0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 key31[] __initconst = {
++	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++
++/* 2^130-5 */
++static const u8 input32[] __initconst = {
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xfb, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
++	0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
++	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
++	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01
++};
++static const u8 output32[] __initconst = {
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 key32[] __initconst = {
++	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++
++/* 2^130-6 */
++static const u8 input33[] __initconst = {
++	0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 output33[] __initconst = {
++	0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
++	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
++};
++static const u8 key33[] __initconst = {
++	0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++
++/* 5*H+L reduction intermediate */
++static const u8 input34[] __initconst = {
++	0xe3, 0x35, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0xb9,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x33, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0x79, 0xcd,
++	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 output34[] __initconst = {
++	0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 key34[] __initconst = {
++	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++
++/* 5*H+L reduction final */
++static const u8 input35[] __initconst = {
++	0xe3, 0x35, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0xb9,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x33, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0x79, 0xcd,
++	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 output35[] __initconst = {
++	0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++static const u8 key35[] __initconst = {
++	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
++};
++
++static const struct poly1305_testvec poly1305_testvecs[] __initconst = {
++	{ input01, output01, key01, sizeof(input01) },
++	{ input02, output02, key02, sizeof(input02) },
++	{ input03, output03, key03, sizeof(input03) },
++	{ input04, output04, key04, sizeof(input04) },
++	{ input05, output05, key05, sizeof(input05) },
++	{ input06, output06, key06, sizeof(input06) },
++	{ input07, output07, key07, sizeof(input07) },
++	{ input08, output08, key08, sizeof(input08) },
++	{ input09, output09, key09, sizeof(input09) },
++	{ input10, output10, key10, sizeof(input10) },
++	{ input11, output11, key11, sizeof(input11) },
++	{ input12, output12, key12, sizeof(input12) },
++	{ input13, output13, key13, sizeof(input13) },
++	{ input14, output14, key14, sizeof(input14) },
++	{ input15, output15, key15, sizeof(input15) },
++	{ input16, output16, key16, sizeof(input16) },
++	{ input17, output17, key17, sizeof(input17) },
++	{ input18, output18, key18, sizeof(input18) },
++	{ input19, output19, key19, sizeof(input19) },
++	{ input20, output20, key20, sizeof(input20) },
++	{ input21, output21, key21, sizeof(input21) },
++	{ input22, output22, key22, sizeof(input22) },
++	{ input23, output23, key23, sizeof(input23) },
++	{ input24, output24, key24, sizeof(input24) },
++	{ input25, output25, key25, sizeof(input25) },
++	{ input26, output26, key26, sizeof(input26) },
++	{ input27, output27, key27, sizeof(input27) },
++	{ input28, output28, key28, sizeof(input28) },
++	{ input29, output29, key29, sizeof(input29) },
++	{ input30, output30, key30, sizeof(input30) },
++	{ input31, output31, key31, sizeof(input31) },
++	{ input32, output32, key32, sizeof(input32) },
++	{ input33, output33, key33, sizeof(input33) },
++	{ input34, output34, key34, sizeof(input34) },
++	{ input35, output35, key35, sizeof(input35) }
++};
++
++static bool __init poly1305_selftest(void)
++{
++	simd_context_t simd_context;
++	bool success = true;
++	size_t i, j;
++
++	simd_get(&simd_context);
++	for (i = 0; i < ARRAY_SIZE(poly1305_testvecs); ++i) {
++		struct poly1305_ctx poly1305;
++		u8 out[POLY1305_MAC_SIZE];
++
++		memset(out, 0, sizeof(out));
++		memset(&poly1305, 0, sizeof(poly1305));
++		poly1305_init(&poly1305, poly1305_testvecs[i].key);
++		poly1305_update(&poly1305, poly1305_testvecs[i].input,
++				poly1305_testvecs[i].ilen, &simd_context);
++		poly1305_final(&poly1305, out, &simd_context);
++		if (memcmp(out, poly1305_testvecs[i].output,
++			   POLY1305_MAC_SIZE)) {
++			pr_err("poly1305 self-test %zu: FAIL\n", i + 1);
++			success = false;
++		}
++		simd_relax(&simd_context);
++
++		if (poly1305_testvecs[i].ilen <= 1)
++			continue;
++
++		for (j = 1; j < poly1305_testvecs[i].ilen - 1; ++j) {
++			memset(out, 0, sizeof(out));
++			memset(&poly1305, 0, sizeof(poly1305));
++			poly1305_init(&poly1305, poly1305_testvecs[i].key);
++			poly1305_update(&poly1305, poly1305_testvecs[i].input,
++					j, &simd_context);
++			poly1305_update(&poly1305,
++					poly1305_testvecs[i].input + j,
++					poly1305_testvecs[i].ilen - j,
++					&simd_context);
++			poly1305_final(&poly1305, out, &simd_context);
++			if (memcmp(out, poly1305_testvecs[i].output,
++				   POLY1305_MAC_SIZE)) {
++				pr_err("poly1305 self-test %zu (split %zu): FAIL\n",
++				       i + 1, j);
++				success = false;
++			}
++
++			memset(out, 0, sizeof(out));
++			memset(&poly1305, 0, sizeof(poly1305));
++			poly1305_init(&poly1305, poly1305_testvecs[i].key);
++			poly1305_update(&poly1305, poly1305_testvecs[i].input,
++					j, &simd_context);
++			poly1305_update(&poly1305,
++					poly1305_testvecs[i].input + j,
++					poly1305_testvecs[i].ilen - j,
++					DONT_USE_SIMD);
++			poly1305_final(&poly1305, out, &simd_context);
++			if (memcmp(out, poly1305_testvecs[i].output,
++				   POLY1305_MAC_SIZE)) {
++				pr_err("poly1305 self-test %zu (split %zu, mixed simd): FAIL\n",
++				       i + 1, j);
++				success = false;
++			}
++			simd_relax(&simd_context);
++		}
++	}
++	simd_put(&simd_context);
++
++	return success;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/device.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,473 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "queueing.h"
++#include "socket.h"
++#include "timers.h"
++#include "device.h"
++#include "ratelimiter.h"
++#include "peer.h"
++#include "messages.h"
++
++#include <linux/module.h>
++#include <linux/rtnetlink.h>
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <linux/if_arp.h>
++#include <linux/icmp.h>
++#include <linux/suspend.h>
++#include <net/icmp.h>
++#include <net/rtnetlink.h>
++#include <net/ip_tunnels.h>
++#include <net/addrconf.h>
++
++static LIST_HEAD(device_list);
++
++static int wg_open(struct net_device *dev)
++{
++	struct in_device *dev_v4 = __in_dev_get_rtnl(dev);
++#ifndef COMPAT_CANNOT_USE_IN6_DEV_GET
++	struct inet6_dev *dev_v6 = __in6_dev_get(dev);
++#endif
++	struct wg_device *wg = netdev_priv(dev);
++	struct wg_peer *peer;
++	int ret;
++
++	if (dev_v4) {
++		/* At some point we might put this check near the ip_rt_send_
++		 * redirect call of ip_forward in net/ipv4/ip_forward.c, similar
++		 * to the current secpath check.
++		 */
++		IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
++		IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
++	}
++#ifndef COMPAT_CANNOT_USE_IN6_DEV_GET
++	if (dev_v6)
++#ifndef COMPAT_CANNOT_USE_DEV_CNF
++		dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
++#else
++		dev_v6->addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
++#endif
++#endif
++
++	ret = wg_socket_init(wg, wg->incoming_port);
++	if (ret < 0)
++		return ret;
++	mutex_lock(&wg->device_update_lock);
++	list_for_each_entry(peer, &wg->peer_list, peer_list) {
++		wg_packet_send_staged_packets(peer);
++		if (peer->persistent_keepalive_interval)
++			wg_packet_send_keepalive(peer);
++	}
++	mutex_unlock(&wg->device_update_lock);
++	return 0;
++}
++
++#ifdef CONFIG_PM_SLEEP
++static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
++			      void *data)
++{
++	struct wg_device *wg;
++	struct wg_peer *peer;
++
++	/* If the machine is constantly suspending and resuming, as part of
++	 * its normal operation rather than as a somewhat rare event, then we
++	 * don't actually want to clear keys.
++	 */
++	if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID))
++		return 0;
++
++	if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE)
++		return 0;
++
++	rtnl_lock();
++	list_for_each_entry(wg, &device_list, device_list) {
++		mutex_lock(&wg->device_update_lock);
++		list_for_each_entry(peer, &wg->peer_list, peer_list) {
++			del_timer(&peer->timer_zero_key_material);
++			wg_noise_handshake_clear(&peer->handshake);
++			wg_noise_keypairs_clear(&peer->keypairs);
++		}
++		mutex_unlock(&wg->device_update_lock);
++	}
++	rtnl_unlock();
++	rcu_barrier();
++	return 0;
++}
++
++static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification };
++#endif
++
++static int wg_stop(struct net_device *dev)
++{
++	struct wg_device *wg = netdev_priv(dev);
++	struct wg_peer *peer;
++
++	mutex_lock(&wg->device_update_lock);
++	list_for_each_entry(peer, &wg->peer_list, peer_list) {
++		wg_packet_purge_staged_packets(peer);
++		wg_timers_stop(peer);
++		wg_noise_handshake_clear(&peer->handshake);
++		wg_noise_keypairs_clear(&peer->keypairs);
++		wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
++	}
++	mutex_unlock(&wg->device_update_lock);
++	skb_queue_purge(&wg->incoming_handshakes);
++	wg_socket_reinit(wg, NULL, NULL);
++	return 0;
++}
++
++static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct wg_device *wg = netdev_priv(dev);
++	struct sk_buff_head packets;
++	struct wg_peer *peer;
++	struct sk_buff *next;
++	sa_family_t family;
++	u32 mtu;
++	int ret;
++
++	if (unlikely(!wg_check_packet_protocol(skb))) {
++		ret = -EPROTONOSUPPORT;
++		net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
++		goto err;
++	}
++
++	peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb);
++	if (unlikely(!peer)) {
++		ret = -ENOKEY;
++		if (skb->protocol == htons(ETH_P_IP))
++			net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n",
++					    dev->name, &ip_hdr(skb)->daddr);
++		else if (skb->protocol == htons(ETH_P_IPV6))
++			net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n",
++					    dev->name, &ipv6_hdr(skb)->daddr);
++		goto err;
++	}
++
++	family = READ_ONCE(peer->endpoint.addr.sa_family);
++	if (unlikely(family != AF_INET && family != AF_INET6)) {
++		ret = -EDESTADDRREQ;
++		net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n",
++				    dev->name, peer->internal_id);
++		goto err_peer;
++	}
++
++	mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
++
++	__skb_queue_head_init(&packets);
++	if (!skb_is_gso(skb)) {
++		skb_mark_not_on_list(skb);
++	} else {
++		struct sk_buff *segs = skb_gso_segment(skb, 0);
++
++		if (unlikely(IS_ERR(segs))) {
++			ret = PTR_ERR(segs);
++			goto err_peer;
++		}
++		dev_kfree_skb(skb);
++		skb = segs;
++	}
++
++	skb_list_walk_safe(skb, skb, next) {
++		skb_mark_not_on_list(skb);
++
++		skb = skb_share_check(skb, GFP_ATOMIC);
++		if (unlikely(!skb))
++			continue;
++
++		/* We only need to keep the original dst around for icmp,
++		 * so at this point we're in a position to drop it.
++		 */
++		skb_dst_drop(skb);
++
++		PACKET_CB(skb)->mtu = mtu;
++
++		__skb_queue_tail(&packets, skb);
++	}
++
++	spin_lock_bh(&peer->staged_packet_queue.lock);
++	/* If the queue is getting too big, we start removing the oldest packets
++	 * until it's small again. We do this before adding the new packet, so
++	 * we don't remove GSO segments that are in excess.
++	 */
++	while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) {
++		dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue));
++		++dev->stats.tx_dropped;
++	}
++	skb_queue_splice_tail(&packets, &peer->staged_packet_queue);
++	spin_unlock_bh(&peer->staged_packet_queue.lock);
++
++	wg_packet_send_staged_packets(peer);
++
++	wg_peer_put(peer);
++	return NETDEV_TX_OK;
++
++err_peer:
++	wg_peer_put(peer);
++err:
++	++dev->stats.tx_errors;
++	if (skb->protocol == htons(ETH_P_IP))
++		icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
++	else if (skb->protocol == htons(ETH_P_IPV6))
++		icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
++	kfree_skb(skb);
++	return ret;
++}
++
++static const struct net_device_ops netdev_ops = {
++	.ndo_open		= wg_open,
++	.ndo_stop		= wg_stop,
++	.ndo_start_xmit		= wg_xmit,
++	.ndo_get_stats64	= ip_tunnel_get_stats64
++};
++
++static void wg_destruct(struct net_device *dev)
++{
++	struct wg_device *wg = netdev_priv(dev);
++
++	rtnl_lock();
++	list_del(&wg->device_list);
++	rtnl_unlock();
++	mutex_lock(&wg->device_update_lock);
++	wg->incoming_port = 0;
++	wg_socket_reinit(wg, NULL, NULL);
++	/* The final references are cleared in the below calls to destroy_workqueue. */
++	wg_peer_remove_all(wg);
++	destroy_workqueue(wg->handshake_receive_wq);
++	destroy_workqueue(wg->handshake_send_wq);
++	destroy_workqueue(wg->packet_crypt_wq);
++	wg_packet_queue_free(&wg->decrypt_queue, true);
++	wg_packet_queue_free(&wg->encrypt_queue, true);
++	rcu_barrier(); /* Wait for all the peers to be actually freed. */
++	wg_ratelimiter_uninit();
++	memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
++	skb_queue_purge(&wg->incoming_handshakes);
++	free_percpu(dev->tstats);
++	free_percpu(wg->incoming_handshakes_worker);
++	if (wg->have_creating_net_ref)
++		put_net(wg->creating_net);
++	kvfree(wg->index_hashtable);
++	kvfree(wg->peer_hashtable);
++	mutex_unlock(&wg->device_update_lock);
++
++	pr_debug("%s: Interface deleted\n", dev->name);
++	free_netdev(dev);
++}
++
++static const struct device_type device_type = { .name = KBUILD_MODNAME };
++
++static void wg_setup(struct net_device *dev)
++{
++	struct wg_device *wg = netdev_priv(dev);
++	enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
++				    NETIF_F_SG | NETIF_F_GSO |
++				    NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
++	const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) +
++			     max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
++
++	dev->netdev_ops = &netdev_ops;
++	dev->hard_header_len = 0;
++	dev->addr_len = 0;
++	dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
++	dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE);
++	dev->type = ARPHRD_NONE;
++	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
++#ifndef COMPAT_CANNOT_USE_IFF_NO_QUEUE
++	dev->priv_flags |= IFF_NO_QUEUE;
++#else
++	dev->tx_queue_len = 0;
++#endif
++	dev->features |= NETIF_F_LLTX;
++	dev->features |= WG_NETDEV_FEATURES;
++	dev->hw_features |= WG_NETDEV_FEATURES;
++	dev->hw_enc_features |= WG_NETDEV_FEATURES;
++	dev->mtu = ETH_DATA_LEN - overhead;
++#ifndef COMPAT_CANNOT_USE_MAX_MTU
++	dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead;
++#endif
++
++	SET_NETDEV_DEVTYPE(dev, &device_type);
++
++	/* We need to keep the dst around in case of icmp replies. */
++	netif_keep_dst(dev);
++
++	memset(wg, 0, sizeof(*wg));
++	wg->dev = dev;
++}
++
++static int wg_newlink(struct net *src_net, struct net_device *dev,
++		      struct nlattr *tb[], struct nlattr *data[],
++		      struct netlink_ext_ack *extack)
++{
++	struct wg_device *wg = netdev_priv(dev);
++	int ret = -ENOMEM;
++
++	wg->creating_net = src_net;
++	init_rwsem(&wg->static_identity.lock);
++	mutex_init(&wg->socket_update_lock);
++	mutex_init(&wg->device_update_lock);
++	skb_queue_head_init(&wg->incoming_handshakes);
++	wg_allowedips_init(&wg->peer_allowedips);
++	wg_cookie_checker_init(&wg->cookie_checker, wg);
++	INIT_LIST_HEAD(&wg->peer_list);
++	wg->device_update_gen = 1;
++
++	wg->peer_hashtable = wg_pubkey_hashtable_alloc();
++	if (!wg->peer_hashtable)
++		return ret;
++
++	wg->index_hashtable = wg_index_hashtable_alloc();
++	if (!wg->index_hashtable)
++		goto err_free_peer_hashtable;
++
++	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
++	if (!dev->tstats)
++		goto err_free_index_hashtable;
++
++	wg->incoming_handshakes_worker =
++		wg_packet_percpu_multicore_worker_alloc(
++				wg_packet_handshake_receive_worker, wg);
++	if (!wg->incoming_handshakes_worker)
++		goto err_free_tstats;
++
++	wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
++			WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
++	if (!wg->handshake_receive_wq)
++		goto err_free_incoming_handshakes;
++
++	wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
++			WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
++	if (!wg->handshake_send_wq)
++		goto err_destroy_handshake_receive;
++
++	wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s",
++			WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name);
++	if (!wg->packet_crypt_wq)
++		goto err_destroy_handshake_send;
++
++	ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker,
++				   true, MAX_QUEUED_PACKETS);
++	if (ret < 0)
++		goto err_destroy_packet_crypt;
++
++	ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker,
++				   true, MAX_QUEUED_PACKETS);
++	if (ret < 0)
++		goto err_free_encrypt_queue;
++
++	ret = wg_ratelimiter_init();
++	if (ret < 0)
++		goto err_free_decrypt_queue;
++
++	ret = register_netdevice(dev);
++	if (ret < 0)
++		goto err_uninit_ratelimiter;
++
++	list_add(&wg->device_list, &device_list);
++
++	/* We wait until the end to assign priv_destructor, so that
++	 * register_netdevice doesn't call it for us if it fails.
++	 */
++	dev->priv_destructor = wg_destruct;
++
++	pr_debug("%s: Interface created\n", dev->name);
++	return ret;
++
++err_uninit_ratelimiter:
++	wg_ratelimiter_uninit();
++err_free_decrypt_queue:
++	wg_packet_queue_free(&wg->decrypt_queue, true);
++err_free_encrypt_queue:
++	wg_packet_queue_free(&wg->encrypt_queue, true);
++err_destroy_packet_crypt:
++	destroy_workqueue(wg->packet_crypt_wq);
++err_destroy_handshake_send:
++	destroy_workqueue(wg->handshake_send_wq);
++err_destroy_handshake_receive:
++	destroy_workqueue(wg->handshake_receive_wq);
++err_free_incoming_handshakes:
++	free_percpu(wg->incoming_handshakes_worker);
++err_free_tstats:
++	free_percpu(dev->tstats);
++err_free_index_hashtable:
++	kvfree(wg->index_hashtable);
++err_free_peer_hashtable:
++	kvfree(wg->peer_hashtable);
++	return ret;
++}
++
++static struct rtnl_link_ops link_ops __read_mostly = {
++	.kind			= KBUILD_MODNAME,
++	.priv_size		= sizeof(struct wg_device),
++	.setup			= wg_setup,
++	.newlink		= wg_newlink,
++};
++
++static int wg_netdevice_notification(struct notifier_block *nb,
++				     unsigned long action, void *data)
++{
++	struct net_device *dev = ((struct netdev_notifier_info *)data)->dev;
++	struct wg_device *wg = netdev_priv(dev);
++
++	ASSERT_RTNL();
++
++	if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops)
++		return 0;
++
++	if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) {
++		put_net(wg->creating_net);
++		wg->have_creating_net_ref = false;
++	} else if (dev_net(dev) != wg->creating_net &&
++		   !wg->have_creating_net_ref) {
++		wg->have_creating_net_ref = true;
++		get_net(wg->creating_net);
++	}
++	return 0;
++}
++
++static struct notifier_block netdevice_notifier = {
++	.notifier_call = wg_netdevice_notification
++};
++
++int __init wg_device_init(void)
++{
++	int ret;
++
++#ifdef CONFIG_PM_SLEEP
++	ret = register_pm_notifier(&pm_notifier);
++	if (ret)
++		return ret;
++#endif
++
++	ret = register_netdevice_notifier(&netdevice_notifier);
++	if (ret)
++		goto error_pm;
++
++	ret = rtnl_link_register(&link_ops);
++	if (ret)
++		goto error_netdevice;
++
++	return 0;
++
++error_netdevice:
++	unregister_netdevice_notifier(&netdevice_notifier);
++error_pm:
++#ifdef CONFIG_PM_SLEEP
++	unregister_pm_notifier(&pm_notifier);
++#endif
++	return ret;
++}
++
++void wg_device_uninit(void)
++{
++	rtnl_link_unregister(&link_ops);
++	unregister_netdevice_notifier(&netdevice_notifier);
++#ifdef CONFIG_PM_SLEEP
++	unregister_pm_notifier(&pm_notifier);
++#endif
++	rcu_barrier();
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/main.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,68 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "version.h"
++#include "device.h"
++#include "noise.h"
++#include "queueing.h"
++#include "ratelimiter.h"
++#include "netlink.h"
++#include "uapi/wireguard.h"
++#include "crypto/zinc.h"
++
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/genetlink.h>
++#include <net/rtnetlink.h>
++
++static int __init mod_init(void)
++{
++	int ret;
++
++	if ((ret = chacha20_mod_init()) || (ret = poly1305_mod_init()) ||
++	    (ret = chacha20poly1305_mod_init()) || (ret = blake2s_mod_init()) ||
++	    (ret = curve25519_mod_init()))
++		return ret;
++
++#ifdef DEBUG
++	if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
++	    !wg_ratelimiter_selftest())
++		return -ENOTRECOVERABLE;
++#endif
++	wg_noise_init();
++
++	ret = wg_device_init();
++	if (ret < 0)
++		goto err_device;
++
++	ret = wg_genetlink_init();
++	if (ret < 0)
++		goto err_netlink;
++
++	pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n");
++	pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.\n");
++
++	return 0;
++
++err_netlink:
++	wg_device_uninit();
++err_device:
++	return ret;
++}
++
++static void __exit mod_exit(void)
++{
++	wg_genetlink_uninit();
++	wg_device_uninit();
++}
++
++module_init(mod_init);
++module_exit(mod_exit);
++MODULE_LICENSE("GPL v2");
++MODULE_DESCRIPTION("WireGuard secure network tunnel");
++MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
++MODULE_VERSION(WIREGUARD_VERSION);
++MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME);
++MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME);
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/netlink.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,654 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "netlink.h"
++#include "device.h"
++#include "peer.h"
++#include "socket.h"
++#include "queueing.h"
++#include "messages.h"
++#include "uapi/wireguard.h"
++#include <linux/if.h>
++#include <net/genetlink.h>
++#include <net/sock.h>
++#include <crypto/algapi.h>
++
++static struct genl_family genl_family;
++
++static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = {
++	[WGDEVICE_A_IFINDEX]		= { .type = NLA_U32 },
++	[WGDEVICE_A_IFNAME]		= { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
++	[WGDEVICE_A_PRIVATE_KEY]	= { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
++	[WGDEVICE_A_PUBLIC_KEY]		= { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
++	[WGDEVICE_A_FLAGS]		= { .type = NLA_U32 },
++	[WGDEVICE_A_LISTEN_PORT]	= { .type = NLA_U16 },
++	[WGDEVICE_A_FWMARK]		= { .type = NLA_U32 },
++	[WGDEVICE_A_PEERS]		= { .type = NLA_NESTED }
++};
++
++static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = {
++	[WGPEER_A_PUBLIC_KEY]				= { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
++	[WGPEER_A_PRESHARED_KEY]			= { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN },
++	[WGPEER_A_FLAGS]				= { .type = NLA_U32 },
++	[WGPEER_A_ENDPOINT]				= { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) },
++	[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]	= { .type = NLA_U16 },
++	[WGPEER_A_LAST_HANDSHAKE_TIME]			= { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) },
++	[WGPEER_A_RX_BYTES]				= { .type = NLA_U64 },
++	[WGPEER_A_TX_BYTES]				= { .type = NLA_U64 },
++	[WGPEER_A_ALLOWEDIPS]				= { .type = NLA_NESTED },
++	[WGPEER_A_PROTOCOL_VERSION]			= { .type = NLA_U32 }
++};
++
++static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = {
++	[WGALLOWEDIP_A_FAMILY]		= { .type = NLA_U16 },
++	[WGALLOWEDIP_A_IPADDR]		= { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) },
++	[WGALLOWEDIP_A_CIDR_MASK]	= { .type = NLA_U8 }
++};
++
++static struct wg_device *lookup_interface(struct nlattr **attrs,
++					  struct sk_buff *skb)
++{
++	struct net_device *dev = NULL;
++
++	if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME])
++		return ERR_PTR(-EBADR);
++	if (attrs[WGDEVICE_A_IFINDEX])
++		dev = dev_get_by_index(sock_net(skb->sk),
++				       nla_get_u32(attrs[WGDEVICE_A_IFINDEX]));
++	else if (attrs[WGDEVICE_A_IFNAME])
++		dev = dev_get_by_name(sock_net(skb->sk),
++				      nla_data(attrs[WGDEVICE_A_IFNAME]));
++	if (!dev)
++		return ERR_PTR(-ENODEV);
++	if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind ||
++	    strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) {
++		dev_put(dev);
++		return ERR_PTR(-EOPNOTSUPP);
++	}
++	return netdev_priv(dev);
++}
++
++static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr,
++			  int family)
++{
++	struct nlattr *allowedip_nest;
++
++	allowedip_nest = nla_nest_start(skb, 0);
++	if (!allowedip_nest)
++		return -EMSGSIZE;
++
++	if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) ||
++	    nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) ||
++	    nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ?
++		    sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) {
++		nla_nest_cancel(skb, allowedip_nest);
++		return -EMSGSIZE;
++	}
++
++	nla_nest_end(skb, allowedip_nest);
++	return 0;
++}
++
++struct dump_ctx {
++	struct wg_device *wg;
++	struct wg_peer *next_peer;
++	u64 allowedips_seq;
++	struct allowedips_node *next_allowedip;
++};
++
++#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args)
++
++static int
++get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx)
++{
++
++	struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0);
++	struct allowedips_node *allowedips_node = ctx->next_allowedip;
++	bool fail;
++
++	if (!peer_nest)
++		return -EMSGSIZE;
++
++	down_read(&peer->handshake.lock);
++	fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN,
++		       peer->handshake.remote_static);
++	up_read(&peer->handshake.lock);
++	if (fail)
++		goto err;
++
++	if (!allowedips_node) {
++		const struct __kernel_timespec last_handshake = {
++			.tv_sec = peer->walltime_last_handshake.tv_sec,
++			.tv_nsec = peer->walltime_last_handshake.tv_nsec
++		};
++
++		down_read(&peer->handshake.lock);
++		fail = nla_put(skb, WGPEER_A_PRESHARED_KEY,
++			       NOISE_SYMMETRIC_KEY_LEN,
++			       peer->handshake.preshared_key);
++		up_read(&peer->handshake.lock);
++		if (fail)
++			goto err;
++
++		if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME,
++			    sizeof(last_handshake), &last_handshake) ||
++		    nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
++				peer->persistent_keepalive_interval) ||
++		    nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes,
++				      WGPEER_A_UNSPEC) ||
++		    nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes,
++				      WGPEER_A_UNSPEC) ||
++		    nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1))
++			goto err;
++
++		read_lock_bh(&peer->endpoint_lock);
++		if (peer->endpoint.addr.sa_family == AF_INET)
++			fail = nla_put(skb, WGPEER_A_ENDPOINT,
++				       sizeof(peer->endpoint.addr4),
++				       &peer->endpoint.addr4);
++		else if (peer->endpoint.addr.sa_family == AF_INET6)
++			fail = nla_put(skb, WGPEER_A_ENDPOINT,
++				       sizeof(peer->endpoint.addr6),
++				       &peer->endpoint.addr6);
++		read_unlock_bh(&peer->endpoint_lock);
++		if (fail)
++			goto err;
++		allowedips_node =
++			list_first_entry_or_null(&peer->allowedips_list,
++					struct allowedips_node, peer_list);
++	}
++	if (!allowedips_node)
++		goto no_allowedips;
++	if (!ctx->allowedips_seq)
++		ctx->allowedips_seq = peer->device->peer_allowedips.seq;
++	else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq)
++		goto no_allowedips;
++
++	allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS);
++	if (!allowedips_nest)
++		goto err;
++
++	list_for_each_entry_from(allowedips_node, &peer->allowedips_list,
++				 peer_list) {
++		u8 cidr, ip[16] __aligned(__alignof(u64));
++		int family;
++
++		family = wg_allowedips_read_node(allowedips_node, ip, &cidr);
++		if (get_allowedips(skb, ip, cidr, family)) {
++			nla_nest_end(skb, allowedips_nest);
++			nla_nest_end(skb, peer_nest);
++			ctx->next_allowedip = allowedips_node;
++			return -EMSGSIZE;
++		}
++	}
++	nla_nest_end(skb, allowedips_nest);
++no_allowedips:
++	nla_nest_end(skb, peer_nest);
++	ctx->next_allowedip = NULL;
++	ctx->allowedips_seq = 0;
++	return 0;
++err:
++	nla_nest_cancel(skb, peer_nest);
++	return -EMSGSIZE;
++}
++
++static int wg_get_device_start(struct netlink_callback *cb)
++{
++	struct wg_device *wg;
++
++	wg = lookup_interface(genl_dumpit_info(cb)->attrs, cb->skb);
++	if (IS_ERR(wg))
++		return PTR_ERR(wg);
++	DUMP_CTX(cb)->wg = wg;
++	return 0;
++}
++
++static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb)
++{
++	struct wg_peer *peer, *next_peer_cursor;
++	struct dump_ctx *ctx = DUMP_CTX(cb);
++	struct wg_device *wg = ctx->wg;
++	struct nlattr *peers_nest;
++	int ret = -EMSGSIZE;
++	bool done = true;
++	void *hdr;
++
++	rtnl_lock();
++	mutex_lock(&wg->device_update_lock);
++	cb->seq = wg->device_update_gen;
++	next_peer_cursor = ctx->next_peer;
++
++	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
++			  &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE);
++	if (!hdr)
++		goto out;
++	genl_dump_check_consistent(cb, hdr);
++
++	if (!ctx->next_peer) {
++		if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT,
++				wg->incoming_port) ||
++		    nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) ||
++		    nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) ||
++		    nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name))
++			goto out;
++
++		down_read(&wg->static_identity.lock);
++		if (wg->static_identity.has_identity) {
++			if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY,
++				    NOISE_PUBLIC_KEY_LEN,
++				    wg->static_identity.static_private) ||
++			    nla_put(skb, WGDEVICE_A_PUBLIC_KEY,
++				    NOISE_PUBLIC_KEY_LEN,
++				    wg->static_identity.static_public)) {
++				up_read(&wg->static_identity.lock);
++				goto out;
++			}
++		}
++		up_read(&wg->static_identity.lock);
++	}
++
++	peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS);
++	if (!peers_nest)
++		goto out;
++	ret = 0;
++	/* If the last cursor was removed via list_del_init in peer_remove, then
++	 * we just treat this the same as there being no more peers left. The
++	 * reason is that seq_nr should indicate to userspace that this isn't a
++	 * coherent dump anyway, so they'll try again.
++	 */
++	if (list_empty(&wg->peer_list) ||
++	    (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) {
++		nla_nest_cancel(skb, peers_nest);
++		goto out;
++	}
++	lockdep_assert_held(&wg->device_update_lock);
++	peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list);
++	list_for_each_entry_continue(peer, &wg->peer_list, peer_list) {
++		if (get_peer(peer, skb, ctx)) {
++			done = false;
++			break;
++		}
++		next_peer_cursor = peer;
++	}
++	nla_nest_end(skb, peers_nest);
++
++out:
++	if (!ret && !done && next_peer_cursor)
++		wg_peer_get(next_peer_cursor);
++	wg_peer_put(ctx->next_peer);
++	mutex_unlock(&wg->device_update_lock);
++	rtnl_unlock();
++
++	if (ret) {
++		genlmsg_cancel(skb, hdr);
++		return ret;
++	}
++	genlmsg_end(skb, hdr);
++	if (done) {
++		ctx->next_peer = NULL;
++		return 0;
++	}
++	ctx->next_peer = next_peer_cursor;
++	return skb->len;
++
++	/* At this point, we can't really deal ourselves with safely zeroing out
++	 * the private key material after usage. This will need an additional API
++	 * in the kernel for marking skbs as zero_on_free.
++	 */
++}
++
++static int wg_get_device_done(struct netlink_callback *cb)
++{
++	struct dump_ctx *ctx = DUMP_CTX(cb);
++
++	if (ctx->wg)
++		dev_put(ctx->wg->dev);
++	wg_peer_put(ctx->next_peer);
++	return 0;
++}
++
++static int set_port(struct wg_device *wg, u16 port)
++{
++	struct wg_peer *peer;
++
++	if (wg->incoming_port == port)
++		return 0;
++	list_for_each_entry(peer, &wg->peer_list, peer_list)
++		wg_socket_clear_peer_endpoint_src(peer);
++	if (!netif_running(wg->dev)) {
++		wg->incoming_port = port;
++		return 0;
++	}
++	return wg_socket_init(wg, port);
++}
++
++static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs)
++{
++	int ret = -EINVAL;
++	u16 family;
++	u8 cidr;
++
++	if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] ||
++	    !attrs[WGALLOWEDIP_A_CIDR_MASK])
++		return ret;
++	family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]);
++	cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]);
++
++	if (family == AF_INET && cidr <= 32 &&
++	    nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr))
++		ret = wg_allowedips_insert_v4(
++			&peer->device->peer_allowedips,
++			nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
++			&peer->device->device_update_lock);
++	else if (family == AF_INET6 && cidr <= 128 &&
++		 nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr))
++		ret = wg_allowedips_insert_v6(
++			&peer->device->peer_allowedips,
++			nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
++			&peer->device->device_update_lock);
++
++	return ret;
++}
++
++static int set_peer(struct wg_device *wg, struct nlattr **attrs)
++{
++	u8 *public_key = NULL, *preshared_key = NULL;
++	struct wg_peer *peer = NULL;
++	u32 flags = 0;
++	int ret;
++
++	ret = -EINVAL;
++	if (attrs[WGPEER_A_PUBLIC_KEY] &&
++	    nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN)
++		public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]);
++	else
++		goto out;
++	if (attrs[WGPEER_A_PRESHARED_KEY] &&
++	    nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN)
++		preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]);
++
++	if (attrs[WGPEER_A_FLAGS])
++		flags = nla_get_u32(attrs[WGPEER_A_FLAGS]);
++	ret = -EOPNOTSUPP;
++	if (flags & ~__WGPEER_F_ALL)
++		goto out;
++
++	ret = -EPFNOSUPPORT;
++	if (attrs[WGPEER_A_PROTOCOL_VERSION]) {
++		if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1)
++			goto out;
++	}
++
++	peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
++					  nla_data(attrs[WGPEER_A_PUBLIC_KEY]));
++	ret = 0;
++	if (!peer) { /* Peer doesn't exist yet. Add a new one. */
++		if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY))
++			goto out;
++
++		/* The peer is new, so there aren't allowed IPs to remove. */
++		flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS;
++
++		down_read(&wg->static_identity.lock);
++		if (wg->static_identity.has_identity &&
++		    !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]),
++			    wg->static_identity.static_public,
++			    NOISE_PUBLIC_KEY_LEN)) {
++			/* We silently ignore peers that have the same public
++			 * key as the device. The reason we do it silently is
++			 * that we'd like for people to be able to reuse the
++			 * same set of API calls across peers.
++			 */
++			up_read(&wg->static_identity.lock);
++			ret = 0;
++			goto out;
++		}
++		up_read(&wg->static_identity.lock);
++
++		peer = wg_peer_create(wg, public_key, preshared_key);
++		if (IS_ERR(peer)) {
++			ret = PTR_ERR(peer);
++			peer = NULL;
++			goto out;
++		}
++		/* Take additional reference, as though we've just been
++		 * looked up.
++		 */
++		wg_peer_get(peer);
++	}
++
++	if (flags & WGPEER_F_REMOVE_ME) {
++		wg_peer_remove(peer);
++		goto out;
++	}
++
++	if (preshared_key) {
++		down_write(&peer->handshake.lock);
++		memcpy(&peer->handshake.preshared_key, preshared_key,
++		       NOISE_SYMMETRIC_KEY_LEN);
++		up_write(&peer->handshake.lock);
++	}
++
++	if (attrs[WGPEER_A_ENDPOINT]) {
++		struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]);
++		size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]);
++
++		if ((len == sizeof(struct sockaddr_in) &&
++		     addr->sa_family == AF_INET) ||
++		    (len == sizeof(struct sockaddr_in6) &&
++		     addr->sa_family == AF_INET6)) {
++			struct endpoint endpoint = { { { 0 } } };
++
++			memcpy(&endpoint.addr, addr, len);
++			wg_socket_set_peer_endpoint(peer, &endpoint);
++		}
++	}
++
++	if (flags & WGPEER_F_REPLACE_ALLOWEDIPS)
++		wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer,
++					     &wg->device_update_lock);
++
++	if (attrs[WGPEER_A_ALLOWEDIPS]) {
++		struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1];
++		int rem;
++
++		nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) {
++			ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX,
++					       attr, allowedip_policy, NULL);
++			if (ret < 0)
++				goto out;
++			ret = set_allowedip(peer, allowedip);
++			if (ret < 0)
++				goto out;
++		}
++	}
++
++	if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) {
++		const u16 persistent_keepalive_interval = nla_get_u16(
++				attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]);
++		const bool send_keepalive =
++			!peer->persistent_keepalive_interval &&
++			persistent_keepalive_interval &&
++			netif_running(wg->dev);
++
++		peer->persistent_keepalive_interval = persistent_keepalive_interval;
++		if (send_keepalive)
++			wg_packet_send_keepalive(peer);
++	}
++
++	if (netif_running(wg->dev))
++		wg_packet_send_staged_packets(peer);
++
++out:
++	wg_peer_put(peer);
++	if (attrs[WGPEER_A_PRESHARED_KEY])
++		memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]),
++				 nla_len(attrs[WGPEER_A_PRESHARED_KEY]));
++	return ret;
++}
++
++static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
++{
++	struct wg_device *wg = lookup_interface(info->attrs, skb);
++	u32 flags = 0;
++	int ret;
++
++	if (IS_ERR(wg)) {
++		ret = PTR_ERR(wg);
++		goto out_nodev;
++	}
++
++	rtnl_lock();
++	mutex_lock(&wg->device_update_lock);
++
++	if (info->attrs[WGDEVICE_A_FLAGS])
++		flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]);
++	ret = -EOPNOTSUPP;
++	if (flags & ~__WGDEVICE_F_ALL)
++		goto out;
++
++	ret = -EPERM;
++	if ((info->attrs[WGDEVICE_A_LISTEN_PORT] ||
++	     info->attrs[WGDEVICE_A_FWMARK]) &&
++	    !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN))
++		goto out;
++
++	++wg->device_update_gen;
++
++	if (info->attrs[WGDEVICE_A_FWMARK]) {
++		struct wg_peer *peer;
++
++		wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]);
++		list_for_each_entry(peer, &wg->peer_list, peer_list)
++			wg_socket_clear_peer_endpoint_src(peer);
++	}
++
++	if (info->attrs[WGDEVICE_A_LISTEN_PORT]) {
++		ret = set_port(wg,
++			nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT]));
++		if (ret)
++			goto out;
++	}
++
++	if (flags & WGDEVICE_F_REPLACE_PEERS)
++		wg_peer_remove_all(wg);
++
++	if (info->attrs[WGDEVICE_A_PRIVATE_KEY] &&
++	    nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) ==
++		    NOISE_PUBLIC_KEY_LEN) {
++		u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]);
++		u8 public_key[NOISE_PUBLIC_KEY_LEN];
++		struct wg_peer *peer, *temp;
++
++		if (!crypto_memneq(wg->static_identity.static_private,
++				   private_key, NOISE_PUBLIC_KEY_LEN))
++			goto skip_set_private_key;
++
++		/* We remove before setting, to prevent race, which means doing
++		 * two 25519-genpub ops.
++		 */
++		if (curve25519_generate_public(public_key, private_key)) {
++			peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
++							  public_key);
++			if (peer) {
++				wg_peer_put(peer);
++				wg_peer_remove(peer);
++			}
++		}
++
++		down_write(&wg->static_identity.lock);
++		wg_noise_set_static_identity_private_key(&wg->static_identity,
++							 private_key);
++		list_for_each_entry_safe(peer, temp, &wg->peer_list,
++					 peer_list) {
++			wg_noise_precompute_static_static(peer);
++			wg_noise_expire_current_peer_keypairs(peer);
++		}
++		wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
++		up_write(&wg->static_identity.lock);
++	}
++skip_set_private_key:
++
++	if (info->attrs[WGDEVICE_A_PEERS]) {
++		struct nlattr *attr, *peer[WGPEER_A_MAX + 1];
++		int rem;
++
++		nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) {
++			ret = nla_parse_nested(peer, WGPEER_A_MAX, attr,
++					       peer_policy, NULL);
++			if (ret < 0)
++				goto out;
++			ret = set_peer(wg, peer);
++			if (ret < 0)
++				goto out;
++		}
++	}
++	ret = 0;
++
++out:
++	mutex_unlock(&wg->device_update_lock);
++	rtnl_unlock();
++	dev_put(wg->dev);
++out_nodev:
++	if (info->attrs[WGDEVICE_A_PRIVATE_KEY])
++		memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]),
++				 nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]));
++	return ret;
++}
++
++#ifndef COMPAT_CANNOT_USE_CONST_GENL_OPS
++static const
++#else
++static
++#endif
++struct genl_ops genl_ops[] = {
++	{
++		.cmd = WG_CMD_GET_DEVICE,
++#ifndef COMPAT_CANNOT_USE_NETLINK_START
++		.start = wg_get_device_start,
++#endif
++		.dumpit = wg_get_device_dump,
++		.done = wg_get_device_done,
++#ifdef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
++		.policy = device_policy,
++#endif
++		.flags = GENL_UNS_ADMIN_PERM
++	}, {
++		.cmd = WG_CMD_SET_DEVICE,
++		.doit = wg_set_device,
++#ifdef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
++		.policy = device_policy,
++#endif
++		.flags = GENL_UNS_ADMIN_PERM
++	}
++};
++
++static struct genl_family genl_family
++#ifndef COMPAT_CANNOT_USE_GENL_NOPS
++__ro_after_init = {
++	.ops = genl_ops,
++	.n_ops = ARRAY_SIZE(genl_ops),
++#else
++= {
++#endif
++	.name = WG_GENL_NAME,
++	.version = WG_GENL_VERSION,
++	.maxattr = WGDEVICE_A_MAX,
++	.module = THIS_MODULE,
++#ifndef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
++	.policy = device_policy,
++#endif
++	.netnsok = true
++};
++
++int __init wg_genetlink_init(void)
++{
++	return genl_register_family(&genl_family);
++}
++
++void __exit wg_genetlink_uninit(void)
++{
++	genl_unregister_family(&genl_family);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/noise.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,837 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "noise.h"
++#include "device.h"
++#include "peer.h"
++#include "messages.h"
++#include "queueing.h"
++#include "peerlookup.h"
++
++#include <linux/rcupdate.h>
++#include <linux/slab.h>
++#include <linux/bitmap.h>
++#include <linux/scatterlist.h>
++#include <linux/highmem.h>
++#include <crypto/algapi.h>
++
++/* This implements Noise_IKpsk2:
++ *
++ * <- s
++ * ******
++ * -> e, es, s, ss, {t}
++ * <- e, ee, se, psk, {}
++ */
++
++static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s";
++static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com";
++static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init;
++static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init;
++static atomic64_t keypair_counter = ATOMIC64_INIT(0);
++
++void __init wg_noise_init(void)
++{
++	struct blake2s_state blake;
++
++	blake2s(handshake_init_chaining_key, handshake_name, NULL,
++		NOISE_HASH_LEN, sizeof(handshake_name), 0);
++	blake2s_init(&blake, NOISE_HASH_LEN);
++	blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN);
++	blake2s_update(&blake, identifier_name, sizeof(identifier_name));
++	blake2s_final(&blake, handshake_init_hash);
++}
++
++/* Must hold peer->handshake.static_identity->lock */
++void wg_noise_precompute_static_static(struct wg_peer *peer)
++{
++	down_write(&peer->handshake.lock);
++	if (!peer->handshake.static_identity->has_identity ||
++	    !curve25519(peer->handshake.precomputed_static_static,
++			peer->handshake.static_identity->static_private,
++			peer->handshake.remote_static))
++		memset(peer->handshake.precomputed_static_static, 0,
++		       NOISE_PUBLIC_KEY_LEN);
++	up_write(&peer->handshake.lock);
++}
++
++void wg_noise_handshake_init(struct noise_handshake *handshake,
++			     struct noise_static_identity *static_identity,
++			     const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
++			     const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
++			     struct wg_peer *peer)
++{
++	memset(handshake, 0, sizeof(*handshake));
++	init_rwsem(&handshake->lock);
++	handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE;
++	handshake->entry.peer = peer;
++	memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN);
++	if (peer_preshared_key)
++		memcpy(handshake->preshared_key, peer_preshared_key,
++		       NOISE_SYMMETRIC_KEY_LEN);
++	handshake->static_identity = static_identity;
++	handshake->state = HANDSHAKE_ZEROED;
++	wg_noise_precompute_static_static(peer);
++}
++
++static void handshake_zero(struct noise_handshake *handshake)
++{
++	memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN);
++	memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN);
++	memset(&handshake->hash, 0, NOISE_HASH_LEN);
++	memset(&handshake->chaining_key, 0, NOISE_HASH_LEN);
++	handshake->remote_index = 0;
++	handshake->state = HANDSHAKE_ZEROED;
++}
++
++void wg_noise_handshake_clear(struct noise_handshake *handshake)
++{
++	wg_index_hashtable_remove(
++			handshake->entry.peer->device->index_hashtable,
++			&handshake->entry);
++	down_write(&handshake->lock);
++	handshake_zero(handshake);
++	up_write(&handshake->lock);
++	wg_index_hashtable_remove(
++			handshake->entry.peer->device->index_hashtable,
++			&handshake->entry);
++}
++
++static struct noise_keypair *keypair_create(struct wg_peer *peer)
++{
++	struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL);
++
++	if (unlikely(!keypair))
++		return NULL;
++	keypair->internal_id = atomic64_inc_return(&keypair_counter);
++	keypair->entry.type = INDEX_HASHTABLE_KEYPAIR;
++	keypair->entry.peer = peer;
++	kref_init(&keypair->refcount);
++	return keypair;
++}
++
++static void keypair_free_rcu(struct rcu_head *rcu)
++{
++	kzfree(container_of(rcu, struct noise_keypair, rcu));
++}
++
++static void keypair_free_kref(struct kref *kref)
++{
++	struct noise_keypair *keypair =
++		container_of(kref, struct noise_keypair, refcount);
++
++	net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n",
++			    keypair->entry.peer->device->dev->name,
++			    keypair->internal_id,
++			    keypair->entry.peer->internal_id);
++	wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable,
++				  &keypair->entry);
++	call_rcu(&keypair->rcu, keypair_free_rcu);
++}
++
++void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now)
++{
++	if (unlikely(!keypair))
++		return;
++	if (unlikely(unreference_now))
++		wg_index_hashtable_remove(
++			keypair->entry.peer->device->index_hashtable,
++			&keypair->entry);
++	kref_put(&keypair->refcount, keypair_free_kref);
++}
++
++struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair)
++{
++	RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
++		"Taking noise keypair reference without holding the RCU BH read lock");
++	if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount)))
++		return NULL;
++	return keypair;
++}
++
++void wg_noise_keypairs_clear(struct noise_keypairs *keypairs)
++{
++	struct noise_keypair *old;
++
++	spin_lock_bh(&keypairs->keypair_update_lock);
++
++	/* We zero the next_keypair before zeroing the others, so that
++	 * wg_noise_received_with_keypair returns early before subsequent ones
++	 * are zeroed.
++	 */
++	old = rcu_dereference_protected(keypairs->next_keypair,
++		lockdep_is_held(&keypairs->keypair_update_lock));
++	RCU_INIT_POINTER(keypairs->next_keypair, NULL);
++	wg_noise_keypair_put(old, true);
++
++	old = rcu_dereference_protected(keypairs->previous_keypair,
++		lockdep_is_held(&keypairs->keypair_update_lock));
++	RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
++	wg_noise_keypair_put(old, true);
++
++	old = rcu_dereference_protected(keypairs->current_keypair,
++		lockdep_is_held(&keypairs->keypair_update_lock));
++	RCU_INIT_POINTER(keypairs->current_keypair, NULL);
++	wg_noise_keypair_put(old, true);
++
++	spin_unlock_bh(&keypairs->keypair_update_lock);
++}
++
++void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer)
++{
++	struct noise_keypair *keypair;
++
++	wg_noise_handshake_clear(&peer->handshake);
++	wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
++
++	spin_lock_bh(&peer->keypairs.keypair_update_lock);
++	keypair = rcu_dereference_protected(peer->keypairs.next_keypair,
++			lockdep_is_held(&peer->keypairs.keypair_update_lock));
++	if (keypair)
++		keypair->sending.is_valid = false;
++	keypair = rcu_dereference_protected(peer->keypairs.current_keypair,
++			lockdep_is_held(&peer->keypairs.keypair_update_lock));
++	if (keypair)
++		keypair->sending.is_valid = false;
++	spin_unlock_bh(&peer->keypairs.keypair_update_lock);
++}
++
++static void add_new_keypair(struct noise_keypairs *keypairs,
++			    struct noise_keypair *new_keypair)
++{
++	struct noise_keypair *previous_keypair, *next_keypair, *current_keypair;
++
++	spin_lock_bh(&keypairs->keypair_update_lock);
++	previous_keypair = rcu_dereference_protected(keypairs->previous_keypair,
++		lockdep_is_held(&keypairs->keypair_update_lock));
++	next_keypair = rcu_dereference_protected(keypairs->next_keypair,
++		lockdep_is_held(&keypairs->keypair_update_lock));
++	current_keypair = rcu_dereference_protected(keypairs->current_keypair,
++		lockdep_is_held(&keypairs->keypair_update_lock));
++	if (new_keypair->i_am_the_initiator) {
++		/* If we're the initiator, it means we've sent a handshake, and
++		 * received a confirmation response, which means this new
++		 * keypair can now be used.
++		 */
++		if (next_keypair) {
++			/* If there already was a next keypair pending, we
++			 * demote it to be the previous keypair, and free the
++			 * existing current. Note that this means KCI can result
++			 * in this transition. It would perhaps be more sound to
++			 * always just get rid of the unused next keypair
++			 * instead of putting it in the previous slot, but this
++			 * might be a bit less robust. Something to think about
++			 * for the future.
++			 */
++			RCU_INIT_POINTER(keypairs->next_keypair, NULL);
++			rcu_assign_pointer(keypairs->previous_keypair,
++					   next_keypair);
++			wg_noise_keypair_put(current_keypair, true);
++		} else /* If there wasn't an existing next keypair, we replace
++			* the previous with the current one.
++			*/
++			rcu_assign_pointer(keypairs->previous_keypair,
++					   current_keypair);
++		/* At this point we can get rid of the old previous keypair, and
++		 * set up the new keypair.
++		 */
++		wg_noise_keypair_put(previous_keypair, true);
++		rcu_assign_pointer(keypairs->current_keypair, new_keypair);
++	} else {
++		/* If we're the responder, it means we can't use the new keypair
++		 * until we receive confirmation via the first data packet, so
++		 * we get rid of the existing previous one, the possibly
++		 * existing next one, and slide in the new next one.
++		 */
++		rcu_assign_pointer(keypairs->next_keypair, new_keypair);
++		wg_noise_keypair_put(next_keypair, true);
++		RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
++		wg_noise_keypair_put(previous_keypair, true);
++	}
++	spin_unlock_bh(&keypairs->keypair_update_lock);
++}
++
++bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
++				    struct noise_keypair *received_keypair)
++{
++	struct noise_keypair *old_keypair;
++	bool key_is_new;
++
++	/* We first check without taking the spinlock. */
++	key_is_new = received_keypair ==
++		     rcu_access_pointer(keypairs->next_keypair);
++	if (likely(!key_is_new))
++		return false;
++
++	spin_lock_bh(&keypairs->keypair_update_lock);
++	/* After locking, we double check that things didn't change from
++	 * beneath us.
++	 */
++	if (unlikely(received_keypair !=
++		    rcu_dereference_protected(keypairs->next_keypair,
++			    lockdep_is_held(&keypairs->keypair_update_lock)))) {
++		spin_unlock_bh(&keypairs->keypair_update_lock);
++		return false;
++	}
++
++	/* When we've finally received the confirmation, we slide the next
++	 * into the current, the current into the previous, and get rid of
++	 * the old previous.
++	 */
++	old_keypair = rcu_dereference_protected(keypairs->previous_keypair,
++		lockdep_is_held(&keypairs->keypair_update_lock));
++	rcu_assign_pointer(keypairs->previous_keypair,
++		rcu_dereference_protected(keypairs->current_keypair,
++			lockdep_is_held(&keypairs->keypair_update_lock)));
++	wg_noise_keypair_put(old_keypair, true);
++	rcu_assign_pointer(keypairs->current_keypair, received_keypair);
++	RCU_INIT_POINTER(keypairs->next_keypair, NULL);
++
++	spin_unlock_bh(&keypairs->keypair_update_lock);
++	return true;
++}
++
++/* Must hold static_identity->lock */
++void wg_noise_set_static_identity_private_key(
++	struct noise_static_identity *static_identity,
++	const u8 private_key[NOISE_PUBLIC_KEY_LEN])
++{
++	memcpy(static_identity->static_private, private_key,
++	       NOISE_PUBLIC_KEY_LEN);
++	curve25519_clamp_secret(static_identity->static_private);
++	static_identity->has_identity = curve25519_generate_public(
++		static_identity->static_public, private_key);
++}
++
++/* This is Hugo Krawczyk's HKDF:
++ *  - https://eprint.iacr.org/2010/264.pdf
++ *  - https://tools.ietf.org/html/rfc5869
++ */
++static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
++		size_t first_len, size_t second_len, size_t third_len,
++		size_t data_len, const u8 chaining_key[NOISE_HASH_LEN])
++{
++	u8 output[BLAKE2S_HASH_SIZE + 1];
++	u8 secret[BLAKE2S_HASH_SIZE];
++
++	WARN_ON(IS_ENABLED(DEBUG) &&
++		(first_len > BLAKE2S_HASH_SIZE ||
++		 second_len > BLAKE2S_HASH_SIZE ||
++		 third_len > BLAKE2S_HASH_SIZE ||
++		 ((second_len || second_dst || third_len || third_dst) &&
++		  (!first_len || !first_dst)) ||
++		 ((third_len || third_dst) && (!second_len || !second_dst))));
++
++	/* Extract entropy from data into secret */
++	blake2s_hmac(secret, data, chaining_key, BLAKE2S_HASH_SIZE, data_len,
++		     NOISE_HASH_LEN);
++
++	if (!first_dst || !first_len)
++		goto out;
++
++	/* Expand first key: key = secret, data = 0x1 */
++	output[0] = 1;
++	blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE, 1,
++		     BLAKE2S_HASH_SIZE);
++	memcpy(first_dst, output, first_len);
++
++	if (!second_dst || !second_len)
++		goto out;
++
++	/* Expand second key: key = secret, data = first-key || 0x2 */
++	output[BLAKE2S_HASH_SIZE] = 2;
++	blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE,
++		     BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
++	memcpy(second_dst, output, second_len);
++
++	if (!third_dst || !third_len)
++		goto out;
++
++	/* Expand third key: key = secret, data = second-key || 0x3 */
++	output[BLAKE2S_HASH_SIZE] = 3;
++	blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE,
++		     BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
++	memcpy(third_dst, output, third_len);
++
++out:
++	/* Clear sensitive data from stack */
++	memzero_explicit(secret, BLAKE2S_HASH_SIZE);
++	memzero_explicit(output, BLAKE2S_HASH_SIZE + 1);
++}
++
++static void symmetric_key_init(struct noise_symmetric_key *key)
++{
++	spin_lock_init(&key->counter.receive.lock);
++	atomic64_set(&key->counter.counter, 0);
++	memset(key->counter.receive.backtrack, 0,
++	       sizeof(key->counter.receive.backtrack));
++	key->birthdate = ktime_get_coarse_boottime_ns();
++	key->is_valid = true;
++}
++
++static void derive_keys(struct noise_symmetric_key *first_dst,
++			struct noise_symmetric_key *second_dst,
++			const u8 chaining_key[NOISE_HASH_LEN])
++{
++	kdf(first_dst->key, second_dst->key, NULL, NULL,
++	    NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0,
++	    chaining_key);
++	symmetric_key_init(first_dst);
++	symmetric_key_init(second_dst);
++}
++
++static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
++				u8 key[NOISE_SYMMETRIC_KEY_LEN],
++				const u8 private[NOISE_PUBLIC_KEY_LEN],
++				const u8 public[NOISE_PUBLIC_KEY_LEN])
++{
++	u8 dh_calculation[NOISE_PUBLIC_KEY_LEN];
++
++	if (unlikely(!curve25519(dh_calculation, private, public)))
++		return false;
++	kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN,
++	    NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
++	memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN);
++	return true;
++}
++
++static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN],
++					    u8 key[NOISE_SYMMETRIC_KEY_LEN],
++					    const u8 precomputed[NOISE_PUBLIC_KEY_LEN])
++{
++	static u8 zero_point[NOISE_PUBLIC_KEY_LEN];
++	if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN)))
++		return false;
++	kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN,
++	    NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
++	    chaining_key);
++	return true;
++}
++
++static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
++{
++	struct blake2s_state blake;
++
++	blake2s_init(&blake, NOISE_HASH_LEN);
++	blake2s_update(&blake, hash, NOISE_HASH_LEN);
++	blake2s_update(&blake, src, src_len);
++	blake2s_final(&blake, hash);
++}
++
++static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN],
++		    u8 key[NOISE_SYMMETRIC_KEY_LEN],
++		    const u8 psk[NOISE_SYMMETRIC_KEY_LEN])
++{
++	u8 temp_hash[NOISE_HASH_LEN];
++
++	kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN,
++	    NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key);
++	mix_hash(hash, temp_hash, NOISE_HASH_LEN);
++	memzero_explicit(temp_hash, NOISE_HASH_LEN);
++}
++
++static void handshake_init(u8 chaining_key[NOISE_HASH_LEN],
++			   u8 hash[NOISE_HASH_LEN],
++			   const u8 remote_static[NOISE_PUBLIC_KEY_LEN])
++{
++	memcpy(hash, handshake_init_hash, NOISE_HASH_LEN);
++	memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN);
++	mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN);
++}
++
++static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext,
++			    size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
++			    u8 hash[NOISE_HASH_LEN])
++{
++	chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash,
++				 NOISE_HASH_LEN,
++				 0 /* Always zero for Noise_IK */, key);
++	mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len));
++}
++
++static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext,
++			    size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
++			    u8 hash[NOISE_HASH_LEN])
++{
++	if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len,
++				      hash, NOISE_HASH_LEN,
++				      0 /* Always zero for Noise_IK */, key))
++		return false;
++	mix_hash(hash, src_ciphertext, src_len);
++	return true;
++}
++
++static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN],
++			      const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN],
++			      u8 chaining_key[NOISE_HASH_LEN],
++			      u8 hash[NOISE_HASH_LEN])
++{
++	if (ephemeral_dst != ephemeral_src)
++		memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
++	mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
++	kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0,
++	    NOISE_PUBLIC_KEY_LEN, chaining_key);
++}
++
++static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN])
++{
++	struct timespec64 now;
++
++	ktime_get_real_ts64(&now);
++
++	/* In order to prevent some sort of infoleak from precise timers, we
++	 * round down the nanoseconds part to the closest rounded-down power of
++	 * two to the maximum initiations per second allowed anyway by the
++	 * implementation.
++	 */
++	now.tv_nsec = ALIGN_DOWN(now.tv_nsec,
++		rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND));
++
++	/* https://cr.yp.to/libtai/tai64.html */
++	*(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec);
++	*(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec);
++}
++
++bool
++wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
++				     struct noise_handshake *handshake)
++{
++	u8 timestamp[NOISE_TIMESTAMP_LEN];
++	u8 key[NOISE_SYMMETRIC_KEY_LEN];
++	bool ret = false;
++
++	/* We need to wait for crng _before_ taking any locks, since
++	 * curve25519_generate_secret uses get_random_bytes_wait.
++	 */
++	wait_for_random_bytes();
++
++	down_read(&handshake->static_identity->lock);
++	down_write(&handshake->lock);
++
++	if (unlikely(!handshake->static_identity->has_identity))
++		goto out;
++
++	dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION);
++
++	handshake_init(handshake->chaining_key, handshake->hash,
++		       handshake->remote_static);
++
++	/* e */
++	curve25519_generate_secret(handshake->ephemeral_private);
++	if (!curve25519_generate_public(dst->unencrypted_ephemeral,
++					handshake->ephemeral_private))
++		goto out;
++	message_ephemeral(dst->unencrypted_ephemeral,
++			  dst->unencrypted_ephemeral, handshake->chaining_key,
++			  handshake->hash);
++
++	/* es */
++	if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private,
++		    handshake->remote_static))
++		goto out;
++
++	/* s */
++	message_encrypt(dst->encrypted_static,
++			handshake->static_identity->static_public,
++			NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
++
++	/* ss */
++	if (!mix_precomputed_dh(handshake->chaining_key, key,
++				handshake->precomputed_static_static))
++		goto out;
++
++	/* {t} */
++	tai64n_now(timestamp);
++	message_encrypt(dst->encrypted_timestamp, timestamp,
++			NOISE_TIMESTAMP_LEN, key, handshake->hash);
++
++	dst->sender_index = wg_index_hashtable_insert(
++		handshake->entry.peer->device->index_hashtable,
++		&handshake->entry);
++
++	handshake->state = HANDSHAKE_CREATED_INITIATION;
++	ret = true;
++
++out:
++	up_write(&handshake->lock);
++	up_read(&handshake->static_identity->lock);
++	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
++	return ret;
++}
++
++struct wg_peer *
++wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
++				      struct wg_device *wg)
++{
++	struct wg_peer *peer = NULL, *ret_peer = NULL;
++	struct noise_handshake *handshake;
++	bool replay_attack, flood_attack;
++	u8 key[NOISE_SYMMETRIC_KEY_LEN];
++	u8 chaining_key[NOISE_HASH_LEN];
++	u8 hash[NOISE_HASH_LEN];
++	u8 s[NOISE_PUBLIC_KEY_LEN];
++	u8 e[NOISE_PUBLIC_KEY_LEN];
++	u8 t[NOISE_TIMESTAMP_LEN];
++	u64 initiation_consumption;
++
++	down_read(&wg->static_identity.lock);
++	if (unlikely(!wg->static_identity.has_identity))
++		goto out;
++
++	handshake_init(chaining_key, hash, wg->static_identity.static_public);
++
++	/* e */
++	message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
++
++	/* es */
++	if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e))
++		goto out;
++
++	/* s */
++	if (!message_decrypt(s, src->encrypted_static,
++			     sizeof(src->encrypted_static), key, hash))
++		goto out;
++
++	/* Lookup which peer we're actually talking to */
++	peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s);
++	if (!peer)
++		goto out;
++	handshake = &peer->handshake;
++
++	/* ss */
++	if (!mix_precomputed_dh(chaining_key, key,
++				handshake->precomputed_static_static))
++	    goto out;
++
++	/* {t} */
++	if (!message_decrypt(t, src->encrypted_timestamp,
++			     sizeof(src->encrypted_timestamp), key, hash))
++		goto out;
++
++	down_read(&handshake->lock);
++	replay_attack = memcmp(t, handshake->latest_timestamp,
++			       NOISE_TIMESTAMP_LEN) <= 0;
++	flood_attack = (s64)handshake->last_initiation_consumption +
++			       NSEC_PER_SEC / INITIATIONS_PER_SECOND >
++		       (s64)ktime_get_coarse_boottime_ns();
++	up_read(&handshake->lock);
++	if (replay_attack || flood_attack)
++		goto out;
++
++	/* Success! Copy everything to peer */
++	down_write(&handshake->lock);
++	memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
++	if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0)
++		memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN);
++	memcpy(handshake->hash, hash, NOISE_HASH_LEN);
++	memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
++	handshake->remote_index = src->sender_index;
++	if ((s64)(handshake->last_initiation_consumption -
++	    (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0)
++		handshake->last_initiation_consumption = initiation_consumption;
++	handshake->state = HANDSHAKE_CONSUMED_INITIATION;
++	up_write(&handshake->lock);
++	ret_peer = peer;
++
++out:
++	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
++	memzero_explicit(hash, NOISE_HASH_LEN);
++	memzero_explicit(chaining_key, NOISE_HASH_LEN);
++	up_read(&wg->static_identity.lock);
++	if (!ret_peer)
++		wg_peer_put(peer);
++	return ret_peer;
++}
++
++bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
++					struct noise_handshake *handshake)
++{
++	u8 key[NOISE_SYMMETRIC_KEY_LEN];
++	bool ret = false;
++
++	/* We need to wait for crng _before_ taking any locks, since
++	 * curve25519_generate_secret uses get_random_bytes_wait.
++	 */
++	wait_for_random_bytes();
++
++	down_read(&handshake->static_identity->lock);
++	down_write(&handshake->lock);
++
++	if (handshake->state != HANDSHAKE_CONSUMED_INITIATION)
++		goto out;
++
++	dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE);
++	dst->receiver_index = handshake->remote_index;
++
++	/* e */
++	curve25519_generate_secret(handshake->ephemeral_private);
++	if (!curve25519_generate_public(dst->unencrypted_ephemeral,
++					handshake->ephemeral_private))
++		goto out;
++	message_ephemeral(dst->unencrypted_ephemeral,
++			  dst->unencrypted_ephemeral, handshake->chaining_key,
++			  handshake->hash);
++
++	/* ee */
++	if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
++		    handshake->remote_ephemeral))
++		goto out;
++
++	/* se */
++	if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
++		    handshake->remote_static))
++		goto out;
++
++	/* psk */
++	mix_psk(handshake->chaining_key, handshake->hash, key,
++		handshake->preshared_key);
++
++	/* {} */
++	message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash);
++
++	dst->sender_index = wg_index_hashtable_insert(
++		handshake->entry.peer->device->index_hashtable,
++		&handshake->entry);
++
++	handshake->state = HANDSHAKE_CREATED_RESPONSE;
++	ret = true;
++
++out:
++	up_write(&handshake->lock);
++	up_read(&handshake->static_identity->lock);
++	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
++	return ret;
++}
++
++struct wg_peer *
++wg_noise_handshake_consume_response(struct message_handshake_response *src,
++				    struct wg_device *wg)
++{
++	enum noise_handshake_state state = HANDSHAKE_ZEROED;
++	struct wg_peer *peer = NULL, *ret_peer = NULL;
++	struct noise_handshake *handshake;
++	u8 key[NOISE_SYMMETRIC_KEY_LEN];
++	u8 hash[NOISE_HASH_LEN];
++	u8 chaining_key[NOISE_HASH_LEN];
++	u8 e[NOISE_PUBLIC_KEY_LEN];
++	u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
++	u8 static_private[NOISE_PUBLIC_KEY_LEN];
++
++	down_read(&wg->static_identity.lock);
++
++	if (unlikely(!wg->static_identity.has_identity))
++		goto out;
++
++	handshake = (struct noise_handshake *)wg_index_hashtable_lookup(
++		wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE,
++		src->receiver_index, &peer);
++	if (unlikely(!handshake))
++		goto out;
++
++	down_read(&handshake->lock);
++	state = handshake->state;
++	memcpy(hash, handshake->hash, NOISE_HASH_LEN);
++	memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN);
++	memcpy(ephemeral_private, handshake->ephemeral_private,
++	       NOISE_PUBLIC_KEY_LEN);
++	up_read(&handshake->lock);
++
++	if (state != HANDSHAKE_CREATED_INITIATION)
++		goto fail;
++
++	/* e */
++	message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
++
++	/* ee */
++	if (!mix_dh(chaining_key, NULL, ephemeral_private, e))
++		goto fail;
++
++	/* se */
++	if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e))
++		goto fail;
++
++	/* psk */
++	mix_psk(chaining_key, hash, key, handshake->preshared_key);
++
++	/* {} */
++	if (!message_decrypt(NULL, src->encrypted_nothing,
++			     sizeof(src->encrypted_nothing), key, hash))
++		goto fail;
++
++	/* Success! Copy everything to peer */
++	down_write(&handshake->lock);
++	/* It's important to check that the state is still the same, while we
++	 * have an exclusive lock.
++	 */
++	if (handshake->state != state) {
++		up_write(&handshake->lock);
++		goto fail;
++	}
++	memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
++	memcpy(handshake->hash, hash, NOISE_HASH_LEN);
++	memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
++	handshake->remote_index = src->sender_index;
++	handshake->state = HANDSHAKE_CONSUMED_RESPONSE;
++	up_write(&handshake->lock);
++	ret_peer = peer;
++	goto out;
++
++fail:
++	wg_peer_put(peer);
++out:
++	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
++	memzero_explicit(hash, NOISE_HASH_LEN);
++	memzero_explicit(chaining_key, NOISE_HASH_LEN);
++	memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN);
++	memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN);
++	up_read(&wg->static_identity.lock);
++	return ret_peer;
++}
++
++bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
++				      struct noise_keypairs *keypairs)
++{
++	struct noise_keypair *new_keypair;
++	bool ret = false;
++
++	down_write(&handshake->lock);
++	if (handshake->state != HANDSHAKE_CREATED_RESPONSE &&
++	    handshake->state != HANDSHAKE_CONSUMED_RESPONSE)
++		goto out;
++
++	new_keypair = keypair_create(handshake->entry.peer);
++	if (!new_keypair)
++		goto out;
++	new_keypair->i_am_the_initiator = handshake->state ==
++					  HANDSHAKE_CONSUMED_RESPONSE;
++	new_keypair->remote_index = handshake->remote_index;
++
++	if (new_keypair->i_am_the_initiator)
++		derive_keys(&new_keypair->sending, &new_keypair->receiving,
++			    handshake->chaining_key);
++	else
++		derive_keys(&new_keypair->receiving, &new_keypair->sending,
++			    handshake->chaining_key);
++
++	handshake_zero(handshake);
++	rcu_read_lock_bh();
++	if (likely(!READ_ONCE(container_of(handshake, struct wg_peer,
++					   handshake)->is_dead))) {
++		add_new_keypair(keypairs, new_keypair);
++		net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n",
++				    handshake->entry.peer->device->dev->name,
++				    new_keypair->internal_id,
++				    handshake->entry.peer->internal_id);
++		ret = wg_index_hashtable_replace(
++			handshake->entry.peer->device->index_hashtable,
++			&handshake->entry, &new_keypair->entry);
++	} else {
++		kzfree(new_keypair);
++	}
++	rcu_read_unlock_bh();
++
++out:
++	up_write(&handshake->lock);
++	return ret;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/peer.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,237 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "peer.h"
++#include "device.h"
++#include "queueing.h"
++#include "timers.h"
++#include "peerlookup.h"
++#include "noise.h"
++
++#include <linux/kref.h>
++#include <linux/lockdep.h>
++#include <linux/rcupdate.h>
++#include <linux/list.h>
++
++static atomic64_t peer_counter = ATOMIC64_INIT(0);
++
++struct wg_peer *wg_peer_create(struct wg_device *wg,
++			       const u8 public_key[NOISE_PUBLIC_KEY_LEN],
++			       const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN])
++{
++	struct wg_peer *peer;
++	int ret = -ENOMEM;
++
++	lockdep_assert_held(&wg->device_update_lock);
++
++	if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
++		return ERR_PTR(ret);
++
++	peer = kzalloc(sizeof(*peer), GFP_KERNEL);
++	if (unlikely(!peer))
++		return ERR_PTR(ret);
++	peer->device = wg;
++
++	wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
++				public_key, preshared_key, peer);
++	if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
++		goto err_1;
++	if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
++				 MAX_QUEUED_PACKETS))
++		goto err_2;
++	if (wg_packet_queue_init(&peer->rx_queue, NULL, false,
++				 MAX_QUEUED_PACKETS))
++		goto err_3;
++
++	peer->internal_id = atomic64_inc_return(&peer_counter);
++	peer->serial_work_cpu = nr_cpumask_bits;
++	wg_cookie_init(&peer->latest_cookie);
++	wg_timers_init(peer);
++	wg_cookie_checker_precompute_peer_keys(peer);
++	spin_lock_init(&peer->keypairs.keypair_update_lock);
++	INIT_WORK(&peer->transmit_handshake_work,
++		  wg_packet_handshake_send_worker);
++	rwlock_init(&peer->endpoint_lock);
++	kref_init(&peer->refcount);
++	skb_queue_head_init(&peer->staged_packet_queue);
++	wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
++	set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state);
++	netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll,
++		       NAPI_POLL_WEIGHT);
++	napi_enable(&peer->napi);
++	list_add_tail(&peer->peer_list, &wg->peer_list);
++	INIT_LIST_HEAD(&peer->allowedips_list);
++	wg_pubkey_hashtable_add(wg->peer_hashtable, peer);
++	++wg->num_peers;
++	pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
++	return peer;
++
++err_3:
++	wg_packet_queue_free(&peer->tx_queue, false);
++err_2:
++	dst_cache_destroy(&peer->endpoint_cache);
++err_1:
++	kfree(peer);
++	return ERR_PTR(ret);
++}
++
++struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer)
++{
++	RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
++			 "Taking peer reference without holding the RCU read lock");
++	if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount)))
++		return NULL;
++	return peer;
++}
++
++static void peer_make_dead(struct wg_peer *peer)
++{
++	/* Remove from configuration-time lookup structures. */
++	list_del_init(&peer->peer_list);
++	wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer,
++				     &peer->device->device_update_lock);
++	wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer);
++
++	/* Mark as dead, so that we don't allow jumping contexts after. */
++	WRITE_ONCE(peer->is_dead, true);
++
++	/* The caller must now synchronize_rcu() for this to take effect. */
++}
++
++static void peer_remove_after_dead(struct wg_peer *peer)
++{
++	WARN_ON(!peer->is_dead);
++
++	/* No more keypairs can be created for this peer, since is_dead protects
++	 * add_new_keypair, so we can now destroy existing ones.
++	 */
++	wg_noise_keypairs_clear(&peer->keypairs);
++
++	/* Destroy all ongoing timers that were in-flight at the beginning of
++	 * this function.
++	 */
++	wg_timers_stop(peer);
++
++	/* The transition between packet encryption/decryption queues isn't
++	 * guarded by is_dead, but each reference's life is strictly bounded by
++	 * two generations: once for parallel crypto and once for serial
++	 * ingestion, so we can simply flush twice, and be sure that we no
++	 * longer have references inside these queues.
++	 */
++
++	/* a) For encrypt/decrypt. */
++	flush_workqueue(peer->device->packet_crypt_wq);
++	/* b.1) For send (but not receive, since that's napi). */
++	flush_workqueue(peer->device->packet_crypt_wq);
++	/* b.2.1) For receive (but not send, since that's wq). */
++	napi_disable(&peer->napi);
++	/* b.2.1) It's now safe to remove the napi struct, which must be done
++	 * here from process context.
++	 */
++	netif_napi_del(&peer->napi);
++
++	/* Ensure any workstructs we own (like transmit_handshake_work or
++	 * clear_peer_work) no longer are in use.
++	 */
++	flush_workqueue(peer->device->handshake_send_wq);
++
++	/* After the above flushes, a peer might still be active in a few
++	 * different contexts: 1) from xmit(), before hitting is_dead and
++	 * returning, 2) from wg_packet_consume_data(), before hitting is_dead
++	 * and returning, 3) from wg_receive_handshake_packet() after a point
++	 * where it has processed an incoming handshake packet, but where
++	 * all calls to pass it off to timers fails because of is_dead. We won't
++	 * have new references in (1) eventually, because we're removed from
++	 * allowedips; we won't have new references in (2) eventually, because
++	 * wg_index_hashtable_lookup will always return NULL, since we removed
++	 * all existing keypairs and no more can be created; we won't have new
++	 * references in (3) eventually, because we're removed from the pubkey
++	 * hash table, which allows for a maximum of one handshake response,
++	 * via the still-uncleared index hashtable entry, but not more than one,
++	 * and in wg_cookie_message_consume, the lookup eventually gets a peer
++	 * with a refcount of zero, so no new reference is taken.
++	 */
++
++	--peer->device->num_peers;
++	wg_peer_put(peer);
++}
++
++/* We have a separate "remove" function make sure that all active places where
++ * a peer is currently operating will eventually come to an end and not pass
++ * their reference onto another context.
++ */
++void wg_peer_remove(struct wg_peer *peer)
++{
++	if (unlikely(!peer))
++		return;
++	lockdep_assert_held(&peer->device->device_update_lock);
++
++	peer_make_dead(peer);
++	synchronize_rcu();
++	peer_remove_after_dead(peer);
++}
++
++void wg_peer_remove_all(struct wg_device *wg)
++{
++	struct wg_peer *peer, *temp;
++	LIST_HEAD(dead_peers);
++
++	lockdep_assert_held(&wg->device_update_lock);
++
++	/* Avoid having to traverse individually for each one. */
++	wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock);
++
++	list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) {
++		peer_make_dead(peer);
++		list_add_tail(&peer->peer_list, &dead_peers);
++	}
++	synchronize_rcu();
++	list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
++		peer_remove_after_dead(peer);
++}
++
++static void rcu_release(struct rcu_head *rcu)
++{
++	struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu);
++
++	dst_cache_destroy(&peer->endpoint_cache);
++	wg_packet_queue_free(&peer->rx_queue, false);
++	wg_packet_queue_free(&peer->tx_queue, false);
++
++	/* The final zeroing takes care of clearing any remaining handshake key
++	 * material and other potentially sensitive information.
++	 */
++	kzfree(peer);
++}
++
++static void kref_release(struct kref *refcount)
++{
++	struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount);
++
++	pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n",
++		 peer->device->dev->name, peer->internal_id,
++		 &peer->endpoint.addr);
++
++	/* Remove ourself from dynamic runtime lookup structures, now that the
++	 * last reference is gone.
++	 */
++	wg_index_hashtable_remove(peer->device->index_hashtable,
++				  &peer->handshake.entry);
++
++	/* Remove any lingering packets that didn't have a chance to be
++	 * transmitted.
++	 */
++	wg_packet_purge_staged_packets(peer);
++
++	/* Free the memory used. */
++	call_rcu(&peer->rcu, rcu_release);
++}
++
++void wg_peer_put(struct wg_peer *peer)
++{
++	if (unlikely(!peer))
++		return;
++	kref_put(&peer->refcount, kref_release);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/peerlookup.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,221 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "peerlookup.h"
++#include "peer.h"
++#include "noise.h"
++
++static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table,
++					const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
++{
++	/* siphash gives us a secure 64bit number based on a random key. Since
++	 * the bits are uniformly distributed, we can then mask off to get the
++	 * bits we need.
++	 */
++	const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key);
++
++	return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)];
++}
++
++struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void)
++{
++	struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
++
++	if (!table)
++		return NULL;
++
++	get_random_bytes(&table->key, sizeof(table->key));
++	hash_init(table->hashtable);
++	mutex_init(&table->lock);
++	return table;
++}
++
++void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
++			     struct wg_peer *peer)
++{
++	mutex_lock(&table->lock);
++	hlist_add_head_rcu(&peer->pubkey_hash,
++			   pubkey_bucket(table, peer->handshake.remote_static));
++	mutex_unlock(&table->lock);
++}
++
++void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
++				struct wg_peer *peer)
++{
++	mutex_lock(&table->lock);
++	hlist_del_init_rcu(&peer->pubkey_hash);
++	mutex_unlock(&table->lock);
++}
++
++/* Returns a strong reference to a peer */
++struct wg_peer *
++wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
++			   const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
++{
++	struct wg_peer *iter_peer, *peer = NULL;
++
++	rcu_read_lock_bh();
++	hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey),
++				    pubkey_hash) {
++		if (!memcmp(pubkey, iter_peer->handshake.remote_static,
++			    NOISE_PUBLIC_KEY_LEN)) {
++			peer = iter_peer;
++			break;
++		}
++	}
++	peer = wg_peer_get_maybe_zero(peer);
++	rcu_read_unlock_bh();
++	return peer;
++}
++
++static struct hlist_head *index_bucket(struct index_hashtable *table,
++				       const __le32 index)
++{
++	/* Since the indices are random and thus all bits are uniformly
++	 * distributed, we can find its bucket simply by masking.
++	 */
++	return &table->hashtable[(__force u32)index &
++				 (HASH_SIZE(table->hashtable) - 1)];
++}
++
++struct index_hashtable *wg_index_hashtable_alloc(void)
++{
++	struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
++
++	if (!table)
++		return NULL;
++
++	hash_init(table->hashtable);
++	spin_lock_init(&table->lock);
++	return table;
++}
++
++/* At the moment, we limit ourselves to 2^20 total peers, which generally might
++ * amount to 2^20*3 items in this hashtable. The algorithm below works by
++ * picking a random number and testing it. We can see that these limits mean we
++ * usually succeed pretty quickly:
++ *
++ * >>> def calculation(tries, size):
++ * ...     return (size / 2**32)**(tries - 1) *  (1 - (size / 2**32))
++ * ...
++ * >>> calculation(1, 2**20 * 3)
++ * 0.999267578125
++ * >>> calculation(2, 2**20 * 3)
++ * 0.0007318854331970215
++ * >>> calculation(3, 2**20 * 3)
++ * 5.360489012673497e-07
++ * >>> calculation(4, 2**20 * 3)
++ * 3.9261394135792216e-10
++ *
++ * At the moment, we don't do any masking, so this algorithm isn't exactly
++ * constant time in either the random guessing or in the hash list lookup. We
++ * could require a minimum of 3 tries, which would successfully mask the
++ * guessing. this would not, however, help with the growing hash lengths, which
++ * is another thing to consider moving forward.
++ */
++
++__le32 wg_index_hashtable_insert(struct index_hashtable *table,
++				 struct index_hashtable_entry *entry)
++{
++	struct index_hashtable_entry *existing_entry;
++
++	spin_lock_bh(&table->lock);
++	hlist_del_init_rcu(&entry->index_hash);
++	spin_unlock_bh(&table->lock);
++
++	rcu_read_lock_bh();
++
++search_unused_slot:
++	/* First we try to find an unused slot, randomly, while unlocked. */
++	entry->index = (__force __le32)get_random_u32();
++	hlist_for_each_entry_rcu_bh(existing_entry,
++				    index_bucket(table, entry->index),
++				    index_hash) {
++		if (existing_entry->index == entry->index)
++			/* If it's already in use, we continue searching. */
++			goto search_unused_slot;
++	}
++
++	/* Once we've found an unused slot, we lock it, and then double-check
++	 * that nobody else stole it from us.
++	 */
++	spin_lock_bh(&table->lock);
++	hlist_for_each_entry_rcu_bh(existing_entry,
++				    index_bucket(table, entry->index),
++				    index_hash) {
++		if (existing_entry->index == entry->index) {
++			spin_unlock_bh(&table->lock);
++			/* If it was stolen, we start over. */
++			goto search_unused_slot;
++		}
++	}
++	/* Otherwise, we know we have it exclusively (since we're locked),
++	 * so we insert.
++	 */
++	hlist_add_head_rcu(&entry->index_hash,
++			   index_bucket(table, entry->index));
++	spin_unlock_bh(&table->lock);
++
++	rcu_read_unlock_bh();
++
++	return entry->index;
++}
++
++bool wg_index_hashtable_replace(struct index_hashtable *table,
++				struct index_hashtable_entry *old,
++				struct index_hashtable_entry *new)
++{
++	if (unlikely(hlist_unhashed(&old->index_hash)))
++		return false;
++	spin_lock_bh(&table->lock);
++	new->index = old->index;
++	hlist_replace_rcu(&old->index_hash, &new->index_hash);
++
++	/* Calling init here NULLs out index_hash, and in fact after this
++	 * function returns, it's theoretically possible for this to get
++	 * reinserted elsewhere. That means the RCU lookup below might either
++	 * terminate early or jump between buckets, in which case the packet
++	 * simply gets dropped, which isn't terrible.
++	 */
++	INIT_HLIST_NODE(&old->index_hash);
++	spin_unlock_bh(&table->lock);
++	return true;
++}
++
++void wg_index_hashtable_remove(struct index_hashtable *table,
++			       struct index_hashtable_entry *entry)
++{
++	spin_lock_bh(&table->lock);
++	hlist_del_init_rcu(&entry->index_hash);
++	spin_unlock_bh(&table->lock);
++}
++
++/* Returns a strong reference to a entry->peer */
++struct index_hashtable_entry *
++wg_index_hashtable_lookup(struct index_hashtable *table,
++			  const enum index_hashtable_type type_mask,
++			  const __le32 index, struct wg_peer **peer)
++{
++	struct index_hashtable_entry *iter_entry, *entry = NULL;
++
++	rcu_read_lock_bh();
++	hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index),
++				    index_hash) {
++		if (iter_entry->index == index) {
++			if (likely(iter_entry->type & type_mask))
++				entry = iter_entry;
++			break;
++		}
++	}
++	if (likely(entry)) {
++		entry->peer = wg_peer_get_maybe_zero(entry->peer);
++		if (likely(entry->peer))
++			*peer = entry->peer;
++		else
++			entry = NULL;
++	}
++	rcu_read_unlock_bh();
++	return entry;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/queueing.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,53 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "queueing.h"
++
++struct multicore_worker __percpu *
++wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
++{
++	int cpu;
++	struct multicore_worker __percpu *worker =
++		alloc_percpu(struct multicore_worker);
++
++	if (!worker)
++		return NULL;
++
++	for_each_possible_cpu(cpu) {
++		per_cpu_ptr(worker, cpu)->ptr = ptr;
++		INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function);
++	}
++	return worker;
++}
++
++int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
++			 bool multicore, unsigned int len)
++{
++	int ret;
++
++	memset(queue, 0, sizeof(*queue));
++	ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
++	if (ret)
++		return ret;
++	if (function) {
++		if (multicore) {
++			queue->worker = wg_packet_percpu_multicore_worker_alloc(
++				function, queue);
++			if (!queue->worker)
++				return -ENOMEM;
++		} else {
++			INIT_WORK(&queue->work, function);
++		}
++	}
++	return 0;
++}
++
++void wg_packet_queue_free(struct crypt_queue *queue, bool multicore)
++{
++	if (multicore)
++		free_percpu(queue->worker);
++	WARN_ON(!__ptr_ring_empty(&queue->ring));
++	ptr_ring_cleanup(&queue->ring, NULL);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/ratelimiter.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,235 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifdef COMPAT_CANNOT_DEPRECIATE_BH_RCU
++/* We normally alias all non-_bh functions to the _bh ones in the compat layer,
++ * but that's not appropriate here, where we actually do want non-_bh ones.
++ */
++#undef synchronize_rcu
++#define synchronize_rcu old_synchronize_rcu
++#undef call_rcu
++#define call_rcu old_call_rcu
++#undef rcu_barrier
++#define rcu_barrier old_rcu_barrier
++#endif
++
++#include "ratelimiter.h"
++#include <linux/siphash.h>
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <net/ip.h>
++
++static struct kmem_cache *entry_cache;
++static hsiphash_key_t key;
++static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
++static DEFINE_MUTEX(init_lock);
++static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */
++static atomic_t total_entries = ATOMIC_INIT(0);
++static unsigned int max_entries, table_size;
++static void wg_ratelimiter_gc_entries(struct work_struct *);
++static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries);
++static struct hlist_head *table_v4;
++#if IS_ENABLED(CONFIG_IPV6)
++static struct hlist_head *table_v6;
++#endif
++
++struct ratelimiter_entry {
++	u64 last_time_ns, tokens, ip;
++	void *net;
++	spinlock_t lock;
++	struct hlist_node hash;
++	struct rcu_head rcu;
++};
++
++enum {
++	PACKETS_PER_SECOND = 20,
++	PACKETS_BURSTABLE = 5,
++	PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
++	TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
++};
++
++static void entry_free(struct rcu_head *rcu)
++{
++	kmem_cache_free(entry_cache,
++			container_of(rcu, struct ratelimiter_entry, rcu));
++	atomic_dec(&total_entries);
++}
++
++static void entry_uninit(struct ratelimiter_entry *entry)
++{
++	hlist_del_rcu(&entry->hash);
++	call_rcu(&entry->rcu, entry_free);
++}
++
++/* Calling this function with a NULL work uninits all entries. */
++static void wg_ratelimiter_gc_entries(struct work_struct *work)
++{
++	const u64 now = ktime_get_coarse_boottime_ns();
++	struct ratelimiter_entry *entry;
++	struct hlist_node *temp;
++	unsigned int i;
++
++	for (i = 0; i < table_size; ++i) {
++		spin_lock(&table_lock);
++		hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {
++			if (unlikely(!work) ||
++			    now - entry->last_time_ns > NSEC_PER_SEC)
++				entry_uninit(entry);
++		}
++#if IS_ENABLED(CONFIG_IPV6)
++		hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {
++			if (unlikely(!work) ||
++			    now - entry->last_time_ns > NSEC_PER_SEC)
++				entry_uninit(entry);
++		}
++#endif
++		spin_unlock(&table_lock);
++		if (likely(work))
++			cond_resched();
++	}
++	if (likely(work))
++		queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
++}
++
++bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net)
++{
++	/* We only take the bottom half of the net pointer, so that we can hash
++	 * 3 words in the end. This way, siphash's len param fits into the final
++	 * u32, and we don't incur an extra round.
++	 */
++	const u32 net_word = (unsigned long)net;
++	struct ratelimiter_entry *entry;
++	struct hlist_head *bucket;
++	u64 ip;
++
++	if (skb->protocol == htons(ETH_P_IP)) {
++		ip = (u64 __force)ip_hdr(skb)->saddr;
++		bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) &
++				   (table_size - 1)];
++	}
++#if IS_ENABLED(CONFIG_IPV6)
++	else if (skb->protocol == htons(ETH_P_IPV6)) {
++		/* Only use 64 bits, so as to ratelimit the whole /64. */
++		memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip));
++		bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) &
++				   (table_size - 1)];
++	}
++#endif
++	else
++		return false;
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(entry, bucket, hash) {
++		if (entry->net == net && entry->ip == ip) {
++			u64 now, tokens;
++			bool ret;
++			/* Quasi-inspired by nft_limit.c, but this is actually a
++			 * slightly different algorithm. Namely, we incorporate
++			 * the burst as part of the maximum tokens, rather than
++			 * as part of the rate.
++			 */
++			spin_lock(&entry->lock);
++			now = ktime_get_coarse_boottime_ns();
++			tokens = min_t(u64, TOKEN_MAX,
++				       entry->tokens + now -
++					       entry->last_time_ns);
++			entry->last_time_ns = now;
++			ret = tokens >= PACKET_COST;
++			entry->tokens = ret ? tokens - PACKET_COST : tokens;
++			spin_unlock(&entry->lock);
++			rcu_read_unlock();
++			return ret;
++		}
++	}
++	rcu_read_unlock();
++
++	if (atomic_inc_return(&total_entries) > max_entries)
++		goto err_oom;
++
++	entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
++	if (unlikely(!entry))
++		goto err_oom;
++
++	entry->net = net;
++	entry->ip = ip;
++	INIT_HLIST_NODE(&entry->hash);
++	spin_lock_init(&entry->lock);
++	entry->last_time_ns = ktime_get_coarse_boottime_ns();
++	entry->tokens = TOKEN_MAX - PACKET_COST;
++	spin_lock(&table_lock);
++	hlist_add_head_rcu(&entry->hash, bucket);
++	spin_unlock(&table_lock);
++	return true;
++
++err_oom:
++	atomic_dec(&total_entries);
++	return false;
++}
++
++int wg_ratelimiter_init(void)
++{
++	mutex_lock(&init_lock);
++	if (++init_refcnt != 1)
++		goto out;
++
++	entry_cache = KMEM_CACHE(ratelimiter_entry, 0);
++	if (!entry_cache)
++		goto err;
++
++	/* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
++	 * but what it shares in common is that it uses a massive hashtable. So,
++	 * we borrow their wisdom about good table sizes on different systems
++	 * dependent on RAM. This calculation here comes from there.
++	 */
++	table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 :
++		max_t(unsigned long, 16, roundup_pow_of_two(
++			(totalram_pages() << PAGE_SHIFT) /
++			(1U << 14) / sizeof(struct hlist_head)));
++	max_entries = table_size * 8;
++
++	table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
++	if (unlikely(!table_v4))
++		goto err_kmemcache;
++
++#if IS_ENABLED(CONFIG_IPV6)
++	table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
++	if (unlikely(!table_v6)) {
++		kvfree(table_v4);
++		goto err_kmemcache;
++	}
++#endif
++
++	queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
++	get_random_bytes(&key, sizeof(key));
++out:
++	mutex_unlock(&init_lock);
++	return 0;
++
++err_kmemcache:
++	kmem_cache_destroy(entry_cache);
++err:
++	--init_refcnt;
++	mutex_unlock(&init_lock);
++	return -ENOMEM;
++}
++
++void wg_ratelimiter_uninit(void)
++{
++	mutex_lock(&init_lock);
++	if (!init_refcnt || --init_refcnt)
++		goto out;
++
++	cancel_delayed_work_sync(&gc_work);
++	wg_ratelimiter_gc_entries(NULL);
++	rcu_barrier();
++	kvfree(table_v4);
++#if IS_ENABLED(CONFIG_IPV6)
++	kvfree(table_v6);
++#endif
++	kmem_cache_destroy(entry_cache);
++out:
++	mutex_unlock(&init_lock);
++}
++
++#include "selftest/ratelimiter.c"
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/receive.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,607 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "queueing.h"
++#include "device.h"
++#include "peer.h"
++#include "timers.h"
++#include "messages.h"
++#include "cookie.h"
++#include "socket.h"
++
++#include <linux/simd.h>
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++#include <linux/udp.h>
++#include <net/ip_tunnels.h>
++
++/* Must be called with bh disabled. */
++static void update_rx_stats(struct wg_peer *peer, size_t len)
++{
++	struct pcpu_sw_netstats *tstats =
++		get_cpu_ptr(peer->device->dev->tstats);
++
++	u64_stats_update_begin(&tstats->syncp);
++	++tstats->rx_packets;
++	tstats->rx_bytes += len;
++	peer->rx_bytes += len;
++	u64_stats_update_end(&tstats->syncp);
++	put_cpu_ptr(tstats);
++}
++
++#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type)
++
++static size_t validate_header_len(struct sk_buff *skb)
++{
++	if (unlikely(skb->len < sizeof(struct message_header)))
++		return 0;
++	if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) &&
++	    skb->len >= MESSAGE_MINIMUM_LENGTH)
++		return sizeof(struct message_data);
++	if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) &&
++	    skb->len == sizeof(struct message_handshake_initiation))
++		return sizeof(struct message_handshake_initiation);
++	if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) &&
++	    skb->len == sizeof(struct message_handshake_response))
++		return sizeof(struct message_handshake_response);
++	if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) &&
++	    skb->len == sizeof(struct message_handshake_cookie))
++		return sizeof(struct message_handshake_cookie);
++	return 0;
++}
++
++static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg)
++{
++	size_t data_offset, data_len, header_len;
++	struct udphdr *udp;
++
++	if (unlikely(!wg_check_packet_protocol(skb) ||
++		     skb_transport_header(skb) < skb->head ||
++		     (skb_transport_header(skb) + sizeof(struct udphdr)) >
++			     skb_tail_pointer(skb)))
++		return -EINVAL; /* Bogus IP header */
++	udp = udp_hdr(skb);
++	data_offset = (u8 *)udp - skb->data;
++	if (unlikely(data_offset > U16_MAX ||
++		     data_offset + sizeof(struct udphdr) > skb->len))
++		/* Packet has offset at impossible location or isn't big enough
++		 * to have UDP fields.
++		 */
++		return -EINVAL;
++	data_len = ntohs(udp->len);
++	if (unlikely(data_len < sizeof(struct udphdr) ||
++		     data_len > skb->len - data_offset))
++		/* UDP packet is reporting too small of a size or lying about
++		 * its size.
++		 */
++		return -EINVAL;
++	data_len -= sizeof(struct udphdr);
++	data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data;
++	if (unlikely(!pskb_may_pull(skb,
++				data_offset + sizeof(struct message_header)) ||
++		     pskb_trim(skb, data_len + data_offset) < 0))
++		return -EINVAL;
++	skb_pull(skb, data_offset);
++	if (unlikely(skb->len != data_len))
++		/* Final len does not agree with calculated len */
++		return -EINVAL;
++	header_len = validate_header_len(skb);
++	if (unlikely(!header_len))
++		return -EINVAL;
++	__skb_push(skb, data_offset);
++	if (unlikely(!pskb_may_pull(skb, data_offset + header_len)))
++		return -EINVAL;
++	__skb_pull(skb, data_offset);
++	return 0;
++}
++
++static void wg_receive_handshake_packet(struct wg_device *wg,
++					struct sk_buff *skb)
++{
++	enum cookie_mac_state mac_state;
++	struct wg_peer *peer = NULL;
++	/* This is global, so that our load calculation applies to the whole
++	 * system. We don't care about races with it at all.
++	 */
++	static u64 last_under_load;
++	bool packet_needs_cookie;
++	bool under_load;
++
++	if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) {
++		net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n",
++					wg->dev->name, skb);
++		wg_cookie_message_consume(
++			(struct message_handshake_cookie *)skb->data, wg);
++		return;
++	}
++
++	under_load = skb_queue_len(&wg->incoming_handshakes) >=
++		     MAX_QUEUED_INCOMING_HANDSHAKES / 8;
++	if (under_load) {
++		last_under_load = ktime_get_coarse_boottime_ns();
++	} else if (last_under_load) {
++		under_load = !wg_birthdate_has_expired(last_under_load, 1);
++		if (!under_load)
++			last_under_load = 0;
++	}
++	mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
++					      under_load);
++	if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
++	    (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) {
++		packet_needs_cookie = false;
++	} else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) {
++		packet_needs_cookie = true;
++	} else {
++		net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n",
++					wg->dev->name, skb);
++		return;
++	}
++
++	switch (SKB_TYPE_LE32(skb)) {
++	case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): {
++		struct message_handshake_initiation *message =
++			(struct message_handshake_initiation *)skb->data;
++
++		if (packet_needs_cookie) {
++			wg_packet_send_handshake_cookie(wg, skb,
++							message->sender_index);
++			return;
++		}
++		peer = wg_noise_handshake_consume_initiation(message, wg);
++		if (unlikely(!peer)) {
++			net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n",
++						wg->dev->name, skb);
++			return;
++		}
++		wg_socket_set_peer_endpoint_from_skb(peer, skb);
++		net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n",
++				    wg->dev->name, peer->internal_id,
++				    &peer->endpoint.addr);
++		wg_packet_send_handshake_response(peer);
++		break;
++	}
++	case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): {
++		struct message_handshake_response *message =
++			(struct message_handshake_response *)skb->data;
++
++		if (packet_needs_cookie) {
++			wg_packet_send_handshake_cookie(wg, skb,
++							message->sender_index);
++			return;
++		}
++		peer = wg_noise_handshake_consume_response(message, wg);
++		if (unlikely(!peer)) {
++			net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n",
++						wg->dev->name, skb);
++			return;
++		}
++		wg_socket_set_peer_endpoint_from_skb(peer, skb);
++		net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n",
++				    wg->dev->name, peer->internal_id,
++				    &peer->endpoint.addr);
++		if (wg_noise_handshake_begin_session(&peer->handshake,
++						     &peer->keypairs)) {
++			wg_timers_session_derived(peer);
++			wg_timers_handshake_complete(peer);
++			/* Calling this function will either send any existing
++			 * packets in the queue and not send a keepalive, which
++			 * is the best case, Or, if there's nothing in the
++			 * queue, it will send a keepalive, in order to give
++			 * immediate confirmation of the session.
++			 */
++			wg_packet_send_keepalive(peer);
++		}
++		break;
++	}
++	}
++
++	if (unlikely(!peer)) {
++		WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n");
++		return;
++	}
++
++	local_bh_disable();
++	update_rx_stats(peer, skb->len);
++	local_bh_enable();
++
++	wg_timers_any_authenticated_packet_received(peer);
++	wg_timers_any_authenticated_packet_traversal(peer);
++	wg_peer_put(peer);
++}
++
++void wg_packet_handshake_receive_worker(struct work_struct *work)
++{
++	struct wg_device *wg = container_of(work, struct multicore_worker,
++					    work)->ptr;
++	struct sk_buff *skb;
++
++	while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
++		wg_receive_handshake_packet(wg, skb);
++		dev_kfree_skb(skb);
++		cond_resched();
++	}
++}
++
++static void keep_key_fresh(struct wg_peer *peer)
++{
++	struct noise_keypair *keypair;
++	bool send = false;
++
++	if (peer->sent_lastminute_handshake)
++		return;
++
++	rcu_read_lock_bh();
++	keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
++	if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
++	    keypair->i_am_the_initiator &&
++	    unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
++			REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT)))
++		send = true;
++	rcu_read_unlock_bh();
++
++	if (send) {
++		peer->sent_lastminute_handshake = true;
++		wg_packet_send_queued_handshake_initiation(peer, false);
++	}
++}
++
++static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key,
++			   simd_context_t *simd_context)
++{
++	struct scatterlist sg[MAX_SKB_FRAGS + 8];
++	struct sk_buff *trailer;
++	unsigned int offset;
++	int num_frags;
++
++	if (unlikely(!key))
++		return false;
++
++	if (unlikely(!READ_ONCE(key->is_valid) ||
++		  wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) ||
++		  key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) {
++		WRITE_ONCE(key->is_valid, false);
++		return false;
++	}
++
++	PACKET_CB(skb)->nonce =
++		le64_to_cpu(((struct message_data *)skb->data)->counter);
++
++	/* We ensure that the network header is part of the packet before we
++	 * call skb_cow_data, so that there's no chance that data is removed
++	 * from the skb, so that later we can extract the original endpoint.
++	 */
++	offset = skb->data - skb_network_header(skb);
++	skb_push(skb, offset);
++	num_frags = skb_cow_data(skb, 0, &trailer);
++	offset += sizeof(struct message_data);
++	skb_pull(skb, offset);
++	if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
++		return false;
++
++	sg_init_table(sg, num_frags);
++	if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0)
++		return false;
++
++	if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0,
++						 PACKET_CB(skb)->nonce, key->key,
++						 simd_context))
++		return false;
++
++	/* Another ugly situation of pushing and pulling the header so as to
++	 * keep endpoint information intact.
++	 */
++	skb_push(skb, offset);
++	if (pskb_trim(skb, skb->len - noise_encrypted_len(0)))
++		return false;
++	skb_pull(skb, offset);
++
++	return true;
++}
++
++/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */
++static bool counter_validate(union noise_counter *counter, u64 their_counter)
++{
++	unsigned long index, index_current, top, i;
++	bool ret = false;
++
++	spin_lock_bh(&counter->receive.lock);
++
++	if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 ||
++		     their_counter >= REJECT_AFTER_MESSAGES))
++		goto out;
++
++	++their_counter;
++
++	if (unlikely((COUNTER_WINDOW_SIZE + their_counter) <
++		     counter->receive.counter))
++		goto out;
++
++	index = their_counter >> ilog2(BITS_PER_LONG);
++
++	if (likely(their_counter > counter->receive.counter)) {
++		index_current = counter->receive.counter >> ilog2(BITS_PER_LONG);
++		top = min_t(unsigned long, index - index_current,
++			    COUNTER_BITS_TOTAL / BITS_PER_LONG);
++		for (i = 1; i <= top; ++i)
++			counter->receive.backtrack[(i + index_current) &
++				((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0;
++		counter->receive.counter = their_counter;
++	}
++
++	index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1;
++	ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1),
++				&counter->receive.backtrack[index]);
++
++out:
++	spin_unlock_bh(&counter->receive.lock);
++	return ret;
++}
++
++#include "selftest/counter.c"
++
++static void wg_packet_consume_data_done(struct wg_peer *peer,
++					struct sk_buff *skb,
++					struct endpoint *endpoint)
++{
++	struct net_device *dev = peer->device->dev;
++	unsigned int len, len_before_trim;
++	struct wg_peer *routed_peer;
++
++	wg_socket_set_peer_endpoint(peer, endpoint);
++
++	if (unlikely(wg_noise_received_with_keypair(&peer->keypairs,
++						    PACKET_CB(skb)->keypair))) {
++		wg_timers_handshake_complete(peer);
++		wg_packet_send_staged_packets(peer);
++	}
++
++	keep_key_fresh(peer);
++
++	wg_timers_any_authenticated_packet_received(peer);
++	wg_timers_any_authenticated_packet_traversal(peer);
++
++	/* A packet with length 0 is a keepalive packet */
++	if (unlikely(!skb->len)) {
++		update_rx_stats(peer, message_data_len(0));
++		net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n",
++				    dev->name, peer->internal_id,
++				    &peer->endpoint.addr);
++		goto packet_processed;
++	}
++
++	wg_timers_data_received(peer);
++
++	if (unlikely(skb_network_header(skb) < skb->head))
++		goto dishonest_packet_size;
++	if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) &&
++		       (ip_hdr(skb)->version == 4 ||
++			(ip_hdr(skb)->version == 6 &&
++			 pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))))))
++		goto dishonest_packet_type;
++
++	skb->dev = dev;
++	/* We've already verified the Poly1305 auth tag, which means this packet
++	 * was not modified in transit. We can therefore tell the networking
++	 * stack that all checksums of every layer of encapsulation have already
++	 * been checked "by the hardware" and therefore is unnecessary to check
++	 * again in software.
++	 */
++	skb->ip_summed = CHECKSUM_UNNECESSARY;
++#ifndef COMPAT_CANNOT_USE_CSUM_LEVEL
++	skb->csum_level = ~0; /* All levels */
++#endif
++	skb->protocol = wg_examine_packet_protocol(skb);
++	if (skb->protocol == htons(ETH_P_IP)) {
++		len = ntohs(ip_hdr(skb)->tot_len);
++		if (unlikely(len < sizeof(struct iphdr)))
++			goto dishonest_packet_size;
++		if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
++			IP_ECN_set_ce(ip_hdr(skb));
++	} else if (skb->protocol == htons(ETH_P_IPV6)) {
++		len = ntohs(ipv6_hdr(skb)->payload_len) +
++		      sizeof(struct ipv6hdr);
++		if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
++			IP6_ECN_set_ce(skb, ipv6_hdr(skb));
++	} else {
++		goto dishonest_packet_type;
++	}
++
++	if (unlikely(len > skb->len))
++		goto dishonest_packet_size;
++	len_before_trim = skb->len;
++	if (unlikely(pskb_trim(skb, len)))
++		goto packet_processed;
++
++	routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips,
++					       skb);
++	wg_peer_put(routed_peer); /* We don't need the extra reference. */
++
++	if (unlikely(routed_peer != peer))
++		goto dishonest_packet_peer;
++
++	if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) {
++		++dev->stats.rx_dropped;
++		net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n",
++				    dev->name, peer->internal_id,
++				    &peer->endpoint.addr);
++	} else {
++		update_rx_stats(peer, message_data_len(len_before_trim));
++	}
++	return;
++
++dishonest_packet_peer:
++	net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n",
++				dev->name, skb, peer->internal_id,
++				&peer->endpoint.addr);
++	++dev->stats.rx_errors;
++	++dev->stats.rx_frame_errors;
++	goto packet_processed;
++dishonest_packet_type:
++	net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n",
++			    dev->name, peer->internal_id, &peer->endpoint.addr);
++	++dev->stats.rx_errors;
++	++dev->stats.rx_frame_errors;
++	goto packet_processed;
++dishonest_packet_size:
++	net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n",
++			    dev->name, peer->internal_id, &peer->endpoint.addr);
++	++dev->stats.rx_errors;
++	++dev->stats.rx_length_errors;
++	goto packet_processed;
++packet_processed:
++	dev_kfree_skb(skb);
++}
++
++int wg_packet_rx_poll(struct napi_struct *napi, int budget)
++{
++	struct wg_peer *peer = container_of(napi, struct wg_peer, napi);
++	struct crypt_queue *queue = &peer->rx_queue;
++	struct noise_keypair *keypair;
++	struct endpoint endpoint;
++	enum packet_state state;
++	struct sk_buff *skb;
++	int work_done = 0;
++	bool free;
++
++	if (unlikely(budget <= 0))
++		return 0;
++
++	while ((skb = __ptr_ring_peek(&queue->ring)) != NULL &&
++	       (state = atomic_read_acquire(&PACKET_CB(skb)->state)) !=
++		       PACKET_STATE_UNCRYPTED) {
++		__ptr_ring_discard_one(&queue->ring);
++		peer = PACKET_PEER(skb);
++		keypair = PACKET_CB(skb)->keypair;
++		free = true;
++
++		if (unlikely(state != PACKET_STATE_CRYPTED))
++			goto next;
++
++		if (unlikely(!counter_validate(&keypair->receiving.counter,
++					       PACKET_CB(skb)->nonce))) {
++			net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n",
++					    peer->device->dev->name,
++					    PACKET_CB(skb)->nonce,
++					    keypair->receiving.counter.receive.counter);
++			goto next;
++		}
++
++		if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb)))
++			goto next;
++
++		wg_reset_packet(skb);
++		wg_packet_consume_data_done(peer, skb, &endpoint);
++		free = false;
++
++next:
++		wg_noise_keypair_put(keypair, false);
++		wg_peer_put(peer);
++		if (unlikely(free))
++			dev_kfree_skb(skb);
++
++		if (++work_done >= budget)
++			break;
++	}
++
++	if (work_done < budget)
++		napi_complete_done(napi, work_done);
++
++	return work_done;
++}
++
++void wg_packet_decrypt_worker(struct work_struct *work)
++{
++	struct crypt_queue *queue = container_of(work, struct multicore_worker,
++						 work)->ptr;
++	simd_context_t simd_context;
++	struct sk_buff *skb;
++
++	simd_get(&simd_context);
++	while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
++		enum packet_state state = likely(decrypt_packet(skb,
++					   &PACKET_CB(skb)->keypair->receiving,
++					   &simd_context)) ?
++				PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
++		wg_queue_enqueue_per_peer_napi(skb, state);
++		simd_relax(&simd_context);
++	}
++
++	simd_put(&simd_context);
++}
++
++static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb)
++{
++	__le32 idx = ((struct message_data *)skb->data)->key_idx;
++	struct wg_peer *peer = NULL;
++	int ret;
++
++	rcu_read_lock_bh();
++	PACKET_CB(skb)->keypair =
++		(struct noise_keypair *)wg_index_hashtable_lookup(
++			wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx,
++			&peer);
++	if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair)))
++		goto err_keypair;
++
++	if (unlikely(READ_ONCE(peer->is_dead)))
++		goto err;
++
++	ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue,
++						   &peer->rx_queue, skb,
++						   wg->packet_crypt_wq,
++						   &wg->decrypt_queue.last_cpu);
++	if (unlikely(ret == -EPIPE))
++		wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD);
++	if (likely(!ret || ret == -EPIPE)) {
++		rcu_read_unlock_bh();
++		return;
++	}
++err:
++	wg_noise_keypair_put(PACKET_CB(skb)->keypair, false);
++err_keypair:
++	rcu_read_unlock_bh();
++	wg_peer_put(peer);
++	dev_kfree_skb(skb);
++}
++
++void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
++{
++	if (unlikely(prepare_skb_header(skb, wg) < 0))
++		goto err;
++	switch (SKB_TYPE_LE32(skb)) {
++	case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
++	case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
++	case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
++		int cpu;
++
++		if (skb_queue_len(&wg->incoming_handshakes) >
++			    MAX_QUEUED_INCOMING_HANDSHAKES ||
++		    unlikely(!rng_is_initialized())) {
++			net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
++						wg->dev->name, skb);
++			goto err;
++		}
++		skb_queue_tail(&wg->incoming_handshakes, skb);
++		/* Queues up a call to packet_process_queued_handshake_
++		 * packets(skb):
++		 */
++		cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
++		queue_work_on(cpu, wg->handshake_receive_wq,
++			&per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
++		break;
++	}
++	case cpu_to_le32(MESSAGE_DATA):
++		PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
++		wg_packet_consume_data(wg, skb);
++		break;
++	default:
++		WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n");
++		goto err;
++	}
++	return;
++
++err:
++	dev_kfree_skb(skb);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/selftest/allowedips.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,683 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This contains some basic static unit tests for the allowedips data structure.
++ * It also has two additional modes that are disabled and meant to be used by
++ * folks directly playing with this file. If you define the macro
++ * DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in
++ * memory, it will be printed out as KERN_DEBUG in a format that can be passed
++ * to graphviz (the dot command) to visualize it. If you define the macro
++ * DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of
++ * randomized tests done against a trivial implementation, which may take
++ * upwards of a half-hour to complete. There's no set of users who should be
++ * enabling these, and the only developers that should go anywhere near these
++ * nobs are the ones who are reading this comment.
++ */
++
++#ifdef DEBUG
++
++#include <linux/siphash.h>
++
++static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits,
++					      u8 cidr)
++{
++	swap_endian(dst, src, bits);
++	memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
++	if (cidr)
++		dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
++}
++
++static __init void print_node(struct allowedips_node *node, u8 bits)
++{
++	char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
++	char *fmt_declaration = KERN_DEBUG
++		"\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
++	char *style = "dotted";
++	u8 ip1[16], ip2[16];
++	u32 color = 0;
++
++	if (bits == 32) {
++		fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
++		fmt_declaration = KERN_DEBUG
++			"\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
++	} else if (bits == 128) {
++		fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
++		fmt_declaration = KERN_DEBUG
++			"\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
++	}
++	if (node->peer) {
++		hsiphash_key_t key = { { 0 } };
++
++		memcpy(&key, &node->peer, sizeof(node->peer));
++		color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 |
++			hsiphash_1u32(0xbabecafe, &key) % 200 << 8 |
++			hsiphash_1u32(0xabad1dea, &key) % 200;
++		style = "bold";
++	}
++	swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
++	printk(fmt_declaration, ip1, node->cidr, style, color);
++	if (node->bit[0]) {
++		swap_endian_and_apply_cidr(ip2,
++				rcu_dereference_raw(node->bit[0])->bits, bits,
++				node->cidr);
++		printk(fmt_connection, ip1, node->cidr, ip2,
++		       rcu_dereference_raw(node->bit[0])->cidr);
++		print_node(rcu_dereference_raw(node->bit[0]), bits);
++	}
++	if (node->bit[1]) {
++		swap_endian_and_apply_cidr(ip2,
++				rcu_dereference_raw(node->bit[1])->bits,
++				bits, node->cidr);
++		printk(fmt_connection, ip1, node->cidr, ip2,
++		       rcu_dereference_raw(node->bit[1])->cidr);
++		print_node(rcu_dereference_raw(node->bit[1]), bits);
++	}
++}
++
++static __init void print_tree(struct allowedips_node __rcu *top, u8 bits)
++{
++	printk(KERN_DEBUG "digraph trie {\n");
++	print_node(rcu_dereference_raw(top), bits);
++	printk(KERN_DEBUG "}\n");
++}
++
++enum {
++	NUM_PEERS = 2000,
++	NUM_RAND_ROUTES = 400,
++	NUM_MUTATED_ROUTES = 100,
++	NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30
++};
++
++struct horrible_allowedips {
++	struct hlist_head head;
++};
++
++struct horrible_allowedips_node {
++	struct hlist_node table;
++	union nf_inet_addr ip;
++	union nf_inet_addr mask;
++	u8 ip_version;
++	void *value;
++};
++
++static __init void horrible_allowedips_init(struct horrible_allowedips *table)
++{
++	INIT_HLIST_HEAD(&table->head);
++}
++
++static __init void horrible_allowedips_free(struct horrible_allowedips *table)
++{
++	struct horrible_allowedips_node *node;
++	struct hlist_node *h;
++
++	hlist_for_each_entry_safe(node, h, &table->head, table) {
++		hlist_del(&node->table);
++		kfree(node);
++	}
++}
++
++static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr)
++{
++	union nf_inet_addr mask;
++
++	memset(&mask, 0x00, 128 / 8);
++	memset(&mask, 0xff, cidr / 8);
++	if (cidr % 32)
++		mask.all[cidr / 32] = (__force u32)htonl(
++			(0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
++	return mask;
++}
++
++static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet)
++{
++	return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) +
++	       hweight32(subnet.all[2]) + hweight32(subnet.all[3]);
++}
++
++static __init inline void
++horrible_mask_self(struct horrible_allowedips_node *node)
++{
++	if (node->ip_version == 4) {
++		node->ip.ip &= node->mask.ip;
++	} else if (node->ip_version == 6) {
++		node->ip.ip6[0] &= node->mask.ip6[0];
++		node->ip.ip6[1] &= node->mask.ip6[1];
++		node->ip.ip6[2] &= node->mask.ip6[2];
++		node->ip.ip6[3] &= node->mask.ip6[3];
++	}
++}
++
++static __init inline bool
++horrible_match_v4(const struct horrible_allowedips_node *node,
++		  struct in_addr *ip)
++{
++	return (ip->s_addr & node->mask.ip) == node->ip.ip;
++}
++
++static __init inline bool
++horrible_match_v6(const struct horrible_allowedips_node *node,
++		  struct in6_addr *ip)
++{
++	return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
++		       node->ip.ip6[0] &&
++	       (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
++		       node->ip.ip6[1] &&
++	       (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
++		       node->ip.ip6[2] &&
++	       (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
++}
++
++static __init void
++horrible_insert_ordered(struct horrible_allowedips *table,
++			struct horrible_allowedips_node *node)
++{
++	struct horrible_allowedips_node *other = NULL, *where = NULL;
++	u8 my_cidr = horrible_mask_to_cidr(node->mask);
++
++	hlist_for_each_entry(other, &table->head, table) {
++		if (!memcmp(&other->mask, &node->mask,
++			    sizeof(union nf_inet_addr)) &&
++		    !memcmp(&other->ip, &node->ip,
++			    sizeof(union nf_inet_addr)) &&
++		    other->ip_version == node->ip_version) {
++			other->value = node->value;
++			kfree(node);
++			return;
++		}
++		where = other;
++		if (horrible_mask_to_cidr(other->mask) <= my_cidr)
++			break;
++	}
++	if (!other && !where)
++		hlist_add_head(&node->table, &table->head);
++	else if (!other)
++		hlist_add_behind(&node->table, &where->table);
++	else
++		hlist_add_before(&node->table, &where->table);
++}
++
++static __init int
++horrible_allowedips_insert_v4(struct horrible_allowedips *table,
++			      struct in_addr *ip, u8 cidr, void *value)
++{
++	struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
++							GFP_KERNEL);
++
++	if (unlikely(!node))
++		return -ENOMEM;
++	node->ip.in = *ip;
++	node->mask = horrible_cidr_to_mask(cidr);
++	node->ip_version = 4;
++	node->value = value;
++	horrible_mask_self(node);
++	horrible_insert_ordered(table, node);
++	return 0;
++}
++
++static __init int
++horrible_allowedips_insert_v6(struct horrible_allowedips *table,
++			      struct in6_addr *ip, u8 cidr, void *value)
++{
++	struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
++							GFP_KERNEL);
++
++	if (unlikely(!node))
++		return -ENOMEM;
++	node->ip.in6 = *ip;
++	node->mask = horrible_cidr_to_mask(cidr);
++	node->ip_version = 6;
++	node->value = value;
++	horrible_mask_self(node);
++	horrible_insert_ordered(table, node);
++	return 0;
++}
++
++static __init void *
++horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
++			      struct in_addr *ip)
++{
++	struct horrible_allowedips_node *node;
++	void *ret = NULL;
++
++	hlist_for_each_entry(node, &table->head, table) {
++		if (node->ip_version != 4)
++			continue;
++		if (horrible_match_v4(node, ip)) {
++			ret = node->value;
++			break;
++		}
++	}
++	return ret;
++}
++
++static __init void *
++horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
++			      struct in6_addr *ip)
++{
++	struct horrible_allowedips_node *node;
++	void *ret = NULL;
++
++	hlist_for_each_entry(node, &table->head, table) {
++		if (node->ip_version != 6)
++			continue;
++		if (horrible_match_v6(node, ip)) {
++			ret = node->value;
++			break;
++		}
++	}
++	return ret;
++}
++
++static __init bool randomized_test(void)
++{
++	unsigned int i, j, k, mutate_amount, cidr;
++	u8 ip[16], mutate_mask[16], mutated[16];
++	struct wg_peer **peers, *peer;
++	struct horrible_allowedips h;
++	DEFINE_MUTEX(mutex);
++	struct allowedips t;
++	bool ret = false;
++
++	mutex_init(&mutex);
++
++	wg_allowedips_init(&t);
++	horrible_allowedips_init(&h);
++
++	peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL);
++	if (unlikely(!peers)) {
++		pr_err("allowedips random self-test malloc: FAIL\n");
++		goto free;
++	}
++	for (i = 0; i < NUM_PEERS; ++i) {
++		peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL);
++		if (unlikely(!peers[i])) {
++			pr_err("allowedips random self-test malloc: FAIL\n");
++			goto free;
++		}
++		kref_init(&peers[i]->refcount);
++	}
++
++	mutex_lock(&mutex);
++
++	for (i = 0; i < NUM_RAND_ROUTES; ++i) {
++		prandom_bytes(ip, 4);
++		cidr = prandom_u32_max(32) + 1;
++		peer = peers[prandom_u32_max(NUM_PEERS)];
++		if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr,
++					    peer, &mutex) < 0) {
++			pr_err("allowedips random self-test malloc: FAIL\n");
++			goto free_locked;
++		}
++		if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip,
++						  cidr, peer) < 0) {
++			pr_err("allowedips random self-test malloc: FAIL\n");
++			goto free_locked;
++		}
++		for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
++			memcpy(mutated, ip, 4);
++			prandom_bytes(mutate_mask, 4);
++			mutate_amount = prandom_u32_max(32);
++			for (k = 0; k < mutate_amount / 8; ++k)
++				mutate_mask[k] = 0xff;
++			mutate_mask[k] = 0xff
++					 << ((8 - (mutate_amount % 8)) % 8);
++			for (; k < 4; ++k)
++				mutate_mask[k] = 0;
++			for (k = 0; k < 4; ++k)
++				mutated[k] = (mutated[k] & mutate_mask[k]) |
++					     (~mutate_mask[k] &
++					      prandom_u32_max(256));
++			cidr = prandom_u32_max(32) + 1;
++			peer = peers[prandom_u32_max(NUM_PEERS)];
++			if (wg_allowedips_insert_v4(&t,
++						    (struct in_addr *)mutated,
++						    cidr, peer, &mutex) < 0) {
++				pr_err("allowedips random malloc: FAIL\n");
++				goto free_locked;
++			}
++			if (horrible_allowedips_insert_v4(&h,
++				(struct in_addr *)mutated, cidr, peer)) {
++				pr_err("allowedips random self-test malloc: FAIL\n");
++				goto free_locked;
++			}
++		}
++	}
++
++	for (i = 0; i < NUM_RAND_ROUTES; ++i) {
++		prandom_bytes(ip, 16);
++		cidr = prandom_u32_max(128) + 1;
++		peer = peers[prandom_u32_max(NUM_PEERS)];
++		if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr,
++					    peer, &mutex) < 0) {
++			pr_err("allowedips random self-test malloc: FAIL\n");
++			goto free_locked;
++		}
++		if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip,
++						  cidr, peer) < 0) {
++			pr_err("allowedips random self-test malloc: FAIL\n");
++			goto free_locked;
++		}
++		for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
++			memcpy(mutated, ip, 16);
++			prandom_bytes(mutate_mask, 16);
++			mutate_amount = prandom_u32_max(128);
++			for (k = 0; k < mutate_amount / 8; ++k)
++				mutate_mask[k] = 0xff;
++			mutate_mask[k] = 0xff
++					 << ((8 - (mutate_amount % 8)) % 8);
++			for (; k < 4; ++k)
++				mutate_mask[k] = 0;
++			for (k = 0; k < 4; ++k)
++				mutated[k] = (mutated[k] & mutate_mask[k]) |
++					     (~mutate_mask[k] &
++					      prandom_u32_max(256));
++			cidr = prandom_u32_max(128) + 1;
++			peer = peers[prandom_u32_max(NUM_PEERS)];
++			if (wg_allowedips_insert_v6(&t,
++						    (struct in6_addr *)mutated,
++						    cidr, peer, &mutex) < 0) {
++				pr_err("allowedips random self-test malloc: FAIL\n");
++				goto free_locked;
++			}
++			if (horrible_allowedips_insert_v6(
++				    &h, (struct in6_addr *)mutated, cidr,
++				    peer)) {
++				pr_err("allowedips random self-test malloc: FAIL\n");
++				goto free_locked;
++			}
++		}
++	}
++
++	mutex_unlock(&mutex);
++
++	if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
++		print_tree(t.root4, 32);
++		print_tree(t.root6, 128);
++	}
++
++	for (i = 0; i < NUM_QUERIES; ++i) {
++		prandom_bytes(ip, 4);
++		if (lookup(t.root4, 32, ip) !=
++		    horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
++			pr_err("allowedips random self-test: FAIL\n");
++			goto free;
++		}
++	}
++
++	for (i = 0; i < NUM_QUERIES; ++i) {
++		prandom_bytes(ip, 16);
++		if (lookup(t.root6, 128, ip) !=
++		    horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
++			pr_err("allowedips random self-test: FAIL\n");
++			goto free;
++		}
++	}
++	ret = true;
++
++free:
++	mutex_lock(&mutex);
++free_locked:
++	wg_allowedips_free(&t, &mutex);
++	mutex_unlock(&mutex);
++	horrible_allowedips_free(&h);
++	if (peers) {
++		for (i = 0; i < NUM_PEERS; ++i)
++			kfree(peers[i]);
++	}
++	kfree(peers);
++	return ret;
++}
++
++static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d)
++{
++	static struct in_addr ip;
++	u8 *split = (u8 *)&ip;
++
++	split[0] = a;
++	split[1] = b;
++	split[2] = c;
++	split[3] = d;
++	return &ip;
++}
++
++static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d)
++{
++	static struct in6_addr ip;
++	__be32 *split = (__be32 *)&ip;
++
++	split[0] = cpu_to_be32(a);
++	split[1] = cpu_to_be32(b);
++	split[2] = cpu_to_be32(c);
++	split[3] = cpu_to_be32(d);
++	return &ip;
++}
++
++static __init struct wg_peer *init_peer(void)
++{
++	struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL);
++
++	if (!peer)
++		return NULL;
++	kref_init(&peer->refcount);
++	INIT_LIST_HEAD(&peer->allowedips_list);
++	return peer;
++}
++
++#define insert(version, mem, ipa, ipb, ipc, ipd, cidr)                       \
++	wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \
++					cidr, mem, &mutex)
++
++#define maybe_fail() do {                                               \
++		++i;                                                    \
++		if (!_s) {                                              \
++			pr_info("allowedips self-test %zu: FAIL\n", i); \
++			success = false;                                \
++		}                                                       \
++	} while (0)
++
++#define test(version, mem, ipa, ipb, ipc, ipd) do {                          \
++		bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
++				 ip##version(ipa, ipb, ipc, ipd)) == (mem);  \
++		maybe_fail();                                                \
++	} while (0)
++
++#define test_negative(version, mem, ipa, ipb, ipc, ipd) do {                 \
++		bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
++				 ip##version(ipa, ipb, ipc, ipd)) != (mem);  \
++		maybe_fail();                                                \
++	} while (0)
++
++#define test_boolean(cond) do {   \
++		bool _s = (cond); \
++		maybe_fail();     \
++	} while (0)
++
++bool __init wg_allowedips_selftest(void)
++{
++	bool found_a = false, found_b = false, found_c = false, found_d = false,
++	     found_e = false, found_other = false;
++	struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(),
++		       *d = init_peer(), *e = init_peer(), *f = init_peer(),
++		       *g = init_peer(), *h = init_peer();
++	struct allowedips_node *iter_node;
++	bool success = false;
++	struct allowedips t;
++	DEFINE_MUTEX(mutex);
++	struct in6_addr ip;
++	size_t i = 0, count = 0;
++	__be64 part;
++
++	mutex_init(&mutex);
++	mutex_lock(&mutex);
++	wg_allowedips_init(&t);
++
++	if (!a || !b || !c || !d || !e || !f || !g || !h) {
++		pr_err("allowedips self-test malloc: FAIL\n");
++		goto free;
++	}
++
++	insert(4, a, 192, 168, 4, 0, 24);
++	insert(4, b, 192, 168, 4, 4, 32);
++	insert(4, c, 192, 168, 0, 0, 16);
++	insert(4, d, 192, 95, 5, 64, 27);
++	/* replaces previous entry, and maskself is required */
++	insert(4, c, 192, 95, 5, 65, 27);
++	insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
++	insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64);
++	insert(4, e, 0, 0, 0, 0, 0);
++	insert(6, e, 0, 0, 0, 0, 0);
++	/* replaces previous entry */
++	insert(6, f, 0, 0, 0, 0, 0);
++	insert(6, g, 0x24046800, 0, 0, 0, 32);
++	/* maskself is required */
++	insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64);
++	insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128);
++	insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128);
++	insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98);
++	insert(4, g, 64, 15, 112, 0, 20);
++	/* maskself is required */
++	insert(4, h, 64, 15, 123, 211, 25);
++	insert(4, a, 10, 0, 0, 0, 25);
++	insert(4, b, 10, 0, 0, 128, 25);
++	insert(4, a, 10, 1, 0, 0, 30);
++	insert(4, b, 10, 1, 0, 4, 30);
++	insert(4, c, 10, 1, 0, 8, 29);
++	insert(4, d, 10, 1, 0, 16, 29);
++
++	if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
++		print_tree(t.root4, 32);
++		print_tree(t.root6, 128);
++	}
++
++	success = true;
++
++	test(4, a, 192, 168, 4, 20);
++	test(4, a, 192, 168, 4, 0);
++	test(4, b, 192, 168, 4, 4);
++	test(4, c, 192, 168, 200, 182);
++	test(4, c, 192, 95, 5, 68);
++	test(4, e, 192, 95, 5, 96);
++	test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543);
++	test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee);
++	test(6, f, 0x26075300, 0x60006b01, 0, 0);
++	test(6, g, 0x24046800, 0x40040806, 0, 0x1006);
++	test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678);
++	test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678);
++	test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678);
++	test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678);
++	test(6, h, 0x24046800, 0x40040800, 0, 0);
++	test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010);
++	test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef);
++	test(4, g, 64, 15, 116, 26);
++	test(4, g, 64, 15, 127, 3);
++	test(4, g, 64, 15, 123, 1);
++	test(4, h, 64, 15, 123, 128);
++	test(4, h, 64, 15, 123, 129);
++	test(4, a, 10, 0, 0, 52);
++	test(4, b, 10, 0, 0, 220);
++	test(4, a, 10, 1, 0, 2);
++	test(4, b, 10, 1, 0, 6);
++	test(4, c, 10, 1, 0, 10);
++	test(4, d, 10, 1, 0, 20);
++
++	insert(4, a, 1, 0, 0, 0, 32);
++	insert(4, a, 64, 0, 0, 0, 32);
++	insert(4, a, 128, 0, 0, 0, 32);
++	insert(4, a, 192, 0, 0, 0, 32);
++	insert(4, a, 255, 0, 0, 0, 32);
++	wg_allowedips_remove_by_peer(&t, a, &mutex);
++	test_negative(4, a, 1, 0, 0, 0);
++	test_negative(4, a, 64, 0, 0, 0);
++	test_negative(4, a, 128, 0, 0, 0);
++	test_negative(4, a, 192, 0, 0, 0);
++	test_negative(4, a, 255, 0, 0, 0);
++
++	wg_allowedips_free(&t, &mutex);
++	wg_allowedips_init(&t);
++	insert(4, a, 192, 168, 0, 0, 16);
++	insert(4, a, 192, 168, 0, 0, 24);
++	wg_allowedips_remove_by_peer(&t, a, &mutex);
++	test_negative(4, a, 192, 168, 0, 1);
++
++	/* These will hit the WARN_ON(len >= 128) in free_node if something
++	 * goes wrong.
++	 */
++	for (i = 0; i < 128; ++i) {
++		part = cpu_to_be64(~(1LLU << (i % 64)));
++		memset(&ip, 0xff, 16);
++		memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
++		wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
++	}
++
++	wg_allowedips_free(&t, &mutex);
++
++	wg_allowedips_init(&t);
++	insert(4, a, 192, 95, 5, 93, 27);
++	insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
++	insert(4, a, 10, 1, 0, 20, 29);
++	insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83);
++	insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21);
++	list_for_each_entry(iter_node, &a->allowedips_list, peer_list) {
++		u8 cidr, ip[16] __aligned(__alignof(u64));
++		int family = wg_allowedips_read_node(iter_node, ip, &cidr);
++
++		count++;
++
++		if (cidr == 27 && family == AF_INET &&
++		    !memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr)))
++			found_a = true;
++		else if (cidr == 128 && family == AF_INET6 &&
++			 !memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543),
++				 sizeof(struct in6_addr)))
++			found_b = true;
++		else if (cidr == 29 && family == AF_INET &&
++			 !memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr)))
++			found_c = true;
++		else if (cidr == 83 && family == AF_INET6 &&
++			 !memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0),
++				 sizeof(struct in6_addr)))
++			found_d = true;
++		else if (cidr == 21 && family == AF_INET6 &&
++			 !memcmp(ip, ip6(0x26075000, 0, 0, 0),
++				 sizeof(struct in6_addr)))
++			found_e = true;
++		else
++			found_other = true;
++	}
++	test_boolean(count == 5);
++	test_boolean(found_a);
++	test_boolean(found_b);
++	test_boolean(found_c);
++	test_boolean(found_d);
++	test_boolean(found_e);
++	test_boolean(!found_other);
++
++	if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success)
++		success = randomized_test();
++
++	if (success)
++		pr_info("allowedips self-tests: pass\n");
++
++free:
++	wg_allowedips_free(&t, &mutex);
++	kfree(a);
++	kfree(b);
++	kfree(c);
++	kfree(d);
++	kfree(e);
++	kfree(f);
++	kfree(g);
++	kfree(h);
++	mutex_unlock(&mutex);
++
++	return success;
++}
++
++#undef test_negative
++#undef test
++#undef remove
++#undef insert
++#undef init_peer
++
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/selftest/counter.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,104 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifdef DEBUG
++bool __init wg_packet_counter_selftest(void)
++{
++	unsigned int test_num = 0, i;
++	union noise_counter counter;
++	bool success = true;
++
++#define T_INIT do {                                               \
++		memset(&counter, 0, sizeof(union noise_counter)); \
++		spin_lock_init(&counter.receive.lock);            \
++	} while (0)
++#define T_LIM (COUNTER_WINDOW_SIZE + 1)
++#define T(n, v) do {                                                  \
++		++test_num;                                           \
++		if (counter_validate(&counter, n) != (v)) {           \
++			pr_err("nonce counter self-test %u: FAIL\n",  \
++			       test_num);                             \
++			success = false;                              \
++		}                                                     \
++	} while (0)
++
++	T_INIT;
++	/*  1 */ T(0, true);
++	/*  2 */ T(1, true);
++	/*  3 */ T(1, false);
++	/*  4 */ T(9, true);
++	/*  5 */ T(8, true);
++	/*  6 */ T(7, true);
++	/*  7 */ T(7, false);
++	/*  8 */ T(T_LIM, true);
++	/*  9 */ T(T_LIM - 1, true);
++	/* 10 */ T(T_LIM - 1, false);
++	/* 11 */ T(T_LIM - 2, true);
++	/* 12 */ T(2, true);
++	/* 13 */ T(2, false);
++	/* 14 */ T(T_LIM + 16, true);
++	/* 15 */ T(3, false);
++	/* 16 */ T(T_LIM + 16, false);
++	/* 17 */ T(T_LIM * 4, true);
++	/* 18 */ T(T_LIM * 4 - (T_LIM - 1), true);
++	/* 19 */ T(10, false);
++	/* 20 */ T(T_LIM * 4 - T_LIM, false);
++	/* 21 */ T(T_LIM * 4 - (T_LIM + 1), false);
++	/* 22 */ T(T_LIM * 4 - (T_LIM - 2), true);
++	/* 23 */ T(T_LIM * 4 + 1 - T_LIM, false);
++	/* 24 */ T(0, false);
++	/* 25 */ T(REJECT_AFTER_MESSAGES, false);
++	/* 26 */ T(REJECT_AFTER_MESSAGES - 1, true);
++	/* 27 */ T(REJECT_AFTER_MESSAGES, false);
++	/* 28 */ T(REJECT_AFTER_MESSAGES - 1, false);
++	/* 29 */ T(REJECT_AFTER_MESSAGES - 2, true);
++	/* 30 */ T(REJECT_AFTER_MESSAGES + 1, false);
++	/* 31 */ T(REJECT_AFTER_MESSAGES + 2, false);
++	/* 32 */ T(REJECT_AFTER_MESSAGES - 2, false);
++	/* 33 */ T(REJECT_AFTER_MESSAGES - 3, true);
++	/* 34 */ T(0, false);
++
++	T_INIT;
++	for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i)
++		T(i, true);
++	T(0, true);
++	T(0, false);
++
++	T_INIT;
++	for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i)
++		T(i, true);
++	T(1, true);
++	T(0, false);
++
++	T_INIT;
++	for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;)
++		T(i, true);
++
++	T_INIT;
++	for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;)
++		T(i, true);
++	T(0, false);
++
++	T_INIT;
++	for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
++		T(i, true);
++	T(COUNTER_WINDOW_SIZE + 1, true);
++	T(0, false);
++
++	T_INIT;
++	for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
++		T(i, true);
++	T(0, true);
++	T(COUNTER_WINDOW_SIZE + 1, true);
++
++#undef T
++#undef T_LIM
++#undef T_INIT
++
++	if (success)
++		pr_info("nonce counter self-tests: pass\n");
++	return success;
++}
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/selftest/ratelimiter.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,226 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifdef DEBUG
++
++#include <linux/jiffies.h>
++
++static const struct {
++	bool result;
++	unsigned int msec_to_sleep_before;
++} expected_results[] __initconst = {
++	[0 ... PACKETS_BURSTABLE - 1] = { true, 0 },
++	[PACKETS_BURSTABLE] = { false, 0 },
++	[PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND },
++	[PACKETS_BURSTABLE + 2] = { false, 0 },
++	[PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 },
++	[PACKETS_BURSTABLE + 4] = { true, 0 },
++	[PACKETS_BURSTABLE + 5] = { false, 0 }
++};
++
++static __init unsigned int maximum_jiffies_at_index(int index)
++{
++	unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3;
++	int i;
++
++	for (i = 0; i <= index; ++i)
++		total_msecs += expected_results[i].msec_to_sleep_before;
++	return msecs_to_jiffies(total_msecs);
++}
++
++static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4,
++			       struct sk_buff *skb6, struct ipv6hdr *hdr6,
++			       int *test)
++{
++	unsigned long loop_start_time;
++	int i;
++
++	wg_ratelimiter_gc_entries(NULL);
++	rcu_barrier();
++	loop_start_time = jiffies;
++
++	for (i = 0; i < ARRAY_SIZE(expected_results); ++i) {
++		if (expected_results[i].msec_to_sleep_before)
++			msleep(expected_results[i].msec_to_sleep_before);
++
++		if (time_is_before_jiffies(loop_start_time +
++					   maximum_jiffies_at_index(i)))
++			return -ETIMEDOUT;
++		if (wg_ratelimiter_allow(skb4, &init_net) !=
++					expected_results[i].result)
++			return -EXFULL;
++		++(*test);
++
++		hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1);
++		if (time_is_before_jiffies(loop_start_time +
++					   maximum_jiffies_at_index(i)))
++			return -ETIMEDOUT;
++		if (!wg_ratelimiter_allow(skb4, &init_net))
++			return -EXFULL;
++		++(*test);
++
++		hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1);
++
++#if IS_ENABLED(CONFIG_IPV6)
++		hdr6->saddr.in6_u.u6_addr32[2] = htonl(i);
++		hdr6->saddr.in6_u.u6_addr32[3] = htonl(i);
++		if (time_is_before_jiffies(loop_start_time +
++					   maximum_jiffies_at_index(i)))
++			return -ETIMEDOUT;
++		if (wg_ratelimiter_allow(skb6, &init_net) !=
++					expected_results[i].result)
++			return -EXFULL;
++		++(*test);
++
++		hdr6->saddr.in6_u.u6_addr32[0] =
++			htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1);
++		if (time_is_before_jiffies(loop_start_time +
++					   maximum_jiffies_at_index(i)))
++			return -ETIMEDOUT;
++		if (!wg_ratelimiter_allow(skb6, &init_net))
++			return -EXFULL;
++		++(*test);
++
++		hdr6->saddr.in6_u.u6_addr32[0] =
++			htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1);
++
++		if (time_is_before_jiffies(loop_start_time +
++					   maximum_jiffies_at_index(i)))
++			return -ETIMEDOUT;
++#endif
++	}
++	return 0;
++}
++
++static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4,
++				int *test)
++{
++	int i;
++
++	wg_ratelimiter_gc_entries(NULL);
++	rcu_barrier();
++
++	if (atomic_read(&total_entries))
++		return -EXFULL;
++	++(*test);
++
++	for (i = 0; i <= max_entries; ++i) {
++		hdr4->saddr = htonl(i);
++		if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries))
++			return -EXFULL;
++		++(*test);
++	}
++	return 0;
++}
++
++bool __init wg_ratelimiter_selftest(void)
++{
++	enum { TRIALS_BEFORE_GIVING_UP = 5000 };
++	bool success = false;
++	int test = 0, trials;
++	struct sk_buff *skb4, *skb6;
++	struct iphdr *hdr4;
++	struct ipv6hdr *hdr6;
++
++	if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
++		return true;
++
++	BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0);
++
++	if (wg_ratelimiter_init())
++		goto out;
++	++test;
++	if (wg_ratelimiter_init()) {
++		wg_ratelimiter_uninit();
++		goto out;
++	}
++	++test;
++	if (wg_ratelimiter_init()) {
++		wg_ratelimiter_uninit();
++		wg_ratelimiter_uninit();
++		goto out;
++	}
++	++test;
++
++	skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL);
++	if (unlikely(!skb4))
++		goto err_nofree;
++	skb4->protocol = htons(ETH_P_IP);
++	hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4));
++	hdr4->saddr = htonl(8182);
++	skb_reset_network_header(skb4);
++	++test;
++
++#if IS_ENABLED(CONFIG_IPV6)
++	skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL);
++	if (unlikely(!skb6)) {
++		kfree_skb(skb4);
++		goto err_nofree;
++	}
++	skb6->protocol = htons(ETH_P_IPV6);
++	hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6));
++	hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212);
++	hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188);
++	skb_reset_network_header(skb6);
++	++test;
++#endif
++
++	for (trials = TRIALS_BEFORE_GIVING_UP;;) {
++		int test_count = 0, ret;
++
++		ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
++		if (ret == -ETIMEDOUT) {
++			if (!trials--) {
++				test += test_count;
++				goto err;
++			}
++			msleep(500);
++			continue;
++		} else if (ret < 0) {
++			test += test_count;
++			goto err;
++		} else {
++			test += test_count;
++			break;
++		}
++	}
++
++	for (trials = TRIALS_BEFORE_GIVING_UP;;) {
++		int test_count = 0;
++
++		if (capacity_test(skb4, hdr4, &test_count) < 0) {
++			if (!trials--) {
++				test += test_count;
++				goto err;
++			}
++			msleep(50);
++			continue;
++		}
++		test += test_count;
++		break;
++	}
++
++	success = true;
++
++err:
++	kfree_skb(skb4);
++#if IS_ENABLED(CONFIG_IPV6)
++	kfree_skb(skb6);
++#endif
++err_nofree:
++	wg_ratelimiter_uninit();
++	wg_ratelimiter_uninit();
++	wg_ratelimiter_uninit();
++	/* Uninit one extra time to check underflow detection. */
++	wg_ratelimiter_uninit();
++out:
++	if (success)
++		pr_info("ratelimiter self-tests: pass\n");
++	else
++		pr_err("ratelimiter self-test %d: FAIL\n", test);
++
++	return success;
++}
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/send.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,427 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "queueing.h"
++#include "timers.h"
++#include "device.h"
++#include "peer.h"
++#include "socket.h"
++#include "messages.h"
++#include "cookie.h"
++
++#include <linux/simd.h>
++#include <linux/uio.h>
++#include <linux/inetdevice.h>
++#include <linux/socket.h>
++#include <net/ip_tunnels.h>
++#include <net/udp.h>
++#include <net/sock.h>
++
++static void wg_packet_send_handshake_initiation(struct wg_peer *peer)
++{
++	struct message_handshake_initiation packet;
++
++	if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
++				      REKEY_TIMEOUT))
++		return; /* This function is rate limited. */
++
++	atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
++	net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n",
++			    peer->device->dev->name, peer->internal_id,
++			    &peer->endpoint.addr);
++
++	if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) {
++		wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
++		wg_timers_any_authenticated_packet_traversal(peer);
++		wg_timers_any_authenticated_packet_sent(peer);
++		atomic64_set(&peer->last_sent_handshake,
++			     ktime_get_coarse_boottime_ns());
++		wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet),
++					      HANDSHAKE_DSCP);
++		wg_timers_handshake_initiated(peer);
++	}
++}
++
++void wg_packet_handshake_send_worker(struct work_struct *work)
++{
++	struct wg_peer *peer = container_of(work, struct wg_peer,
++					    transmit_handshake_work);
++
++	wg_packet_send_handshake_initiation(peer);
++	wg_peer_put(peer);
++}
++
++void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
++						bool is_retry)
++{
++	if (!is_retry)
++		peer->timer_handshake_attempts = 0;
++
++	rcu_read_lock_bh();
++	/* We check last_sent_handshake here in addition to the actual function
++	 * we're queueing up, so that we don't queue things if not strictly
++	 * necessary:
++	 */
++	if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
++				      REKEY_TIMEOUT) ||
++			unlikely(READ_ONCE(peer->is_dead)))
++		goto out;
++
++	wg_peer_get(peer);
++	/* Queues up calling packet_send_queued_handshakes(peer), where we do a
++	 * peer_put(peer) after:
++	 */
++	if (!queue_work(peer->device->handshake_send_wq,
++			&peer->transmit_handshake_work))
++		/* If the work was already queued, we want to drop the
++		 * extra reference:
++		 */
++		wg_peer_put(peer);
++out:
++	rcu_read_unlock_bh();
++}
++
++void wg_packet_send_handshake_response(struct wg_peer *peer)
++{
++	struct message_handshake_response packet;
++
++	atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
++	net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n",
++			    peer->device->dev->name, peer->internal_id,
++			    &peer->endpoint.addr);
++
++	if (wg_noise_handshake_create_response(&packet, &peer->handshake)) {
++		wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
++		if (wg_noise_handshake_begin_session(&peer->handshake,
++						     &peer->keypairs)) {
++			wg_timers_session_derived(peer);
++			wg_timers_any_authenticated_packet_traversal(peer);
++			wg_timers_any_authenticated_packet_sent(peer);
++			atomic64_set(&peer->last_sent_handshake,
++				     ktime_get_coarse_boottime_ns());
++			wg_socket_send_buffer_to_peer(peer, &packet,
++						      sizeof(packet),
++						      HANDSHAKE_DSCP);
++		}
++	}
++}
++
++void wg_packet_send_handshake_cookie(struct wg_device *wg,
++				     struct sk_buff *initiating_skb,
++				     __le32 sender_index)
++{
++	struct message_handshake_cookie packet;
++
++	net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n",
++				wg->dev->name, initiating_skb);
++	wg_cookie_message_create(&packet, initiating_skb, sender_index,
++				 &wg->cookie_checker);
++	wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet,
++					      sizeof(packet));
++}
++
++static void keep_key_fresh(struct wg_peer *peer)
++{
++	struct noise_keypair *keypair;
++	bool send = false;
++
++	rcu_read_lock_bh();
++	keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
++	if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
++	    (unlikely(atomic64_read(&keypair->sending.counter.counter) >
++		      REKEY_AFTER_MESSAGES) ||
++	     (keypair->i_am_the_initiator &&
++	      unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
++						REKEY_AFTER_TIME)))))
++		send = true;
++	rcu_read_unlock_bh();
++
++	if (send)
++		wg_packet_send_queued_handshake_initiation(peer, false);
++}
++
++static unsigned int calculate_skb_padding(struct sk_buff *skb)
++{
++	unsigned int padded_size, last_unit = skb->len;
++
++	if (unlikely(!PACKET_CB(skb)->mtu))
++		return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit;
++
++	/* We do this modulo business with the MTU, just in case the networking
++	 * layer gives us a packet that's bigger than the MTU. In that case, we
++	 * wouldn't want the final subtraction to overflow in the case of the
++	 * padded_size being clamped. Fortunately, that's very rarely the case,
++	 * so we optimize for that not happening.
++	 */
++	if (unlikely(last_unit > PACKET_CB(skb)->mtu))
++		last_unit %= PACKET_CB(skb)->mtu;
++
++	padded_size = min(PACKET_CB(skb)->mtu,
++			  ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE));
++	return padded_size - last_unit;
++}
++
++static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair,
++			   simd_context_t *simd_context)
++{
++	unsigned int padding_len, plaintext_len, trailer_len;
++	struct scatterlist sg[MAX_SKB_FRAGS + 8];
++	struct message_data *header;
++	struct sk_buff *trailer;
++	int num_frags;
++
++	/* Calculate lengths. */
++	padding_len = calculate_skb_padding(skb);
++	trailer_len = padding_len + noise_encrypted_len(0);
++	plaintext_len = skb->len + padding_len;
++
++	/* Expand data section to have room for padding and auth tag. */
++	num_frags = skb_cow_data(skb, trailer_len, &trailer);
++	if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
++		return false;
++
++	/* Set the padding to zeros, and make sure it and the auth tag are part
++	 * of the skb.
++	 */
++	memset(skb_tail_pointer(trailer), 0, padding_len);
++
++	/* Expand head section to have room for our header and the network
++	 * stack's headers.
++	 */
++	if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0))
++		return false;
++
++	/* Finalize checksum calculation for the inner packet, if required. */
++	if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL &&
++		     skb_checksum_help(skb)))
++		return false;
++
++	/* Only after checksumming can we safely add on the padding at the end
++	 * and the header.
++	 */
++	skb_set_inner_network_header(skb, 0);
++	header = (struct message_data *)skb_push(skb, sizeof(*header));
++	header->header.type = cpu_to_le32(MESSAGE_DATA);
++	header->key_idx = keypair->remote_index;
++	header->counter = cpu_to_le64(PACKET_CB(skb)->nonce);
++	pskb_put(skb, trailer, trailer_len);
++
++	/* Now we can encrypt the scattergather segments */
++	sg_init_table(sg, num_frags);
++	if (skb_to_sgvec(skb, sg, sizeof(struct message_data),
++			 noise_encrypted_len(plaintext_len)) <= 0)
++		return false;
++	return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0,
++						   PACKET_CB(skb)->nonce,
++						   keypair->sending.key,
++						   simd_context);
++}
++
++void wg_packet_send_keepalive(struct wg_peer *peer)
++{
++	struct sk_buff *skb;
++
++	if (skb_queue_empty(&peer->staged_packet_queue)) {
++		skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
++				GFP_ATOMIC);
++		if (unlikely(!skb))
++			return;
++		skb_reserve(skb, DATA_PACKET_HEAD_ROOM);
++		skb->dev = peer->device->dev;
++		PACKET_CB(skb)->mtu = skb->dev->mtu;
++		skb_queue_tail(&peer->staged_packet_queue, skb);
++		net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n",
++				    peer->device->dev->name, peer->internal_id,
++				    &peer->endpoint.addr);
++	}
++
++	wg_packet_send_staged_packets(peer);
++}
++
++static void wg_packet_create_data_done(struct sk_buff *first,
++				       struct wg_peer *peer)
++{
++	struct sk_buff *skb, *next;
++	bool is_keepalive, data_sent = false;
++
++	wg_timers_any_authenticated_packet_traversal(peer);
++	wg_timers_any_authenticated_packet_sent(peer);
++	skb_list_walk_safe(first, skb, next) {
++		is_keepalive = skb->len == message_data_len(0);
++		if (likely(!wg_socket_send_skb_to_peer(peer, skb,
++				PACKET_CB(skb)->ds) && !is_keepalive))
++			data_sent = true;
++	}
++
++	if (likely(data_sent))
++		wg_timers_data_sent(peer);
++
++	keep_key_fresh(peer);
++}
++
++void wg_packet_tx_worker(struct work_struct *work)
++{
++	struct crypt_queue *queue = container_of(work, struct crypt_queue,
++						 work);
++	struct noise_keypair *keypair;
++	enum packet_state state;
++	struct sk_buff *first;
++	struct wg_peer *peer;
++
++	while ((first = __ptr_ring_peek(&queue->ring)) != NULL &&
++	       (state = atomic_read_acquire(&PACKET_CB(first)->state)) !=
++		       PACKET_STATE_UNCRYPTED) {
++		__ptr_ring_discard_one(&queue->ring);
++		peer = PACKET_PEER(first);
++		keypair = PACKET_CB(first)->keypair;
++
++		if (likely(state == PACKET_STATE_CRYPTED))
++			wg_packet_create_data_done(first, peer);
++		else
++			kfree_skb_list(first);
++
++		wg_noise_keypair_put(keypair, false);
++		wg_peer_put(peer);
++	}
++}
++
++void wg_packet_encrypt_worker(struct work_struct *work)
++{
++	struct crypt_queue *queue = container_of(work, struct multicore_worker,
++						 work)->ptr;
++	struct sk_buff *first, *skb, *next;
++	simd_context_t simd_context;
++
++	simd_get(&simd_context);
++	while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {
++		enum packet_state state = PACKET_STATE_CRYPTED;
++
++		skb_list_walk_safe(first, skb, next) {
++			if (likely(encrypt_packet(skb,
++						  PACKET_CB(first)->keypair,
++						  &simd_context))) {
++				wg_reset_packet(skb);
++			} else {
++				state = PACKET_STATE_DEAD;
++				break;
++			}
++		}
++		wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
++					  state);
++
++		simd_relax(&simd_context);
++	}
++	simd_put(&simd_context);
++}
++
++static void wg_packet_create_data(struct sk_buff *first)
++{
++	struct wg_peer *peer = PACKET_PEER(first);
++	struct wg_device *wg = peer->device;
++	int ret = -EINVAL;
++
++	rcu_read_lock_bh();
++	if (unlikely(READ_ONCE(peer->is_dead)))
++		goto err;
++
++	ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue,
++						   &peer->tx_queue, first,
++						   wg->packet_crypt_wq,
++						   &wg->encrypt_queue.last_cpu);
++	if (unlikely(ret == -EPIPE))
++		wg_queue_enqueue_per_peer(&peer->tx_queue, first,
++					  PACKET_STATE_DEAD);
++err:
++	rcu_read_unlock_bh();
++	if (likely(!ret || ret == -EPIPE))
++		return;
++	wg_noise_keypair_put(PACKET_CB(first)->keypair, false);
++	wg_peer_put(peer);
++	kfree_skb_list(first);
++}
++
++void wg_packet_purge_staged_packets(struct wg_peer *peer)
++{
++	spin_lock_bh(&peer->staged_packet_queue.lock);
++	peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen;
++	__skb_queue_purge(&peer->staged_packet_queue);
++	spin_unlock_bh(&peer->staged_packet_queue.lock);
++}
++
++void wg_packet_send_staged_packets(struct wg_peer *peer)
++{
++	struct noise_symmetric_key *key;
++	struct noise_keypair *keypair;
++	struct sk_buff_head packets;
++	struct sk_buff *skb;
++
++	/* Steal the current queue into our local one. */
++	__skb_queue_head_init(&packets);
++	spin_lock_bh(&peer->staged_packet_queue.lock);
++	skb_queue_splice_init(&peer->staged_packet_queue, &packets);
++	spin_unlock_bh(&peer->staged_packet_queue.lock);
++	if (unlikely(skb_queue_empty(&packets)))
++		return;
++
++	/* First we make sure we have a valid reference to a valid key. */
++	rcu_read_lock_bh();
++	keypair = wg_noise_keypair_get(
++		rcu_dereference_bh(peer->keypairs.current_keypair));
++	rcu_read_unlock_bh();
++	if (unlikely(!keypair))
++		goto out_nokey;
++	key = &keypair->sending;
++	if (unlikely(!READ_ONCE(key->is_valid)))
++		goto out_nokey;
++	if (unlikely(wg_birthdate_has_expired(key->birthdate,
++					      REJECT_AFTER_TIME)))
++		goto out_invalid;
++
++	/* After we know we have a somewhat valid key, we now try to assign
++	 * nonces to all of the packets in the queue. If we can't assign nonces
++	 * for all of them, we just consider it a failure and wait for the next
++	 * handshake.
++	 */
++	skb_queue_walk(&packets, skb) {
++		/* 0 for no outer TOS: no leak. TODO: at some later point, we
++		 * might consider using flowi->tos as outer instead.
++		 */
++		PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb);
++		PACKET_CB(skb)->nonce =
++				atomic64_inc_return(&key->counter.counter) - 1;
++		if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES))
++			goto out_invalid;
++	}
++
++	packets.prev->next = NULL;
++	wg_peer_get(keypair->entry.peer);
++	PACKET_CB(packets.next)->keypair = keypair;
++	wg_packet_create_data(packets.next);
++	return;
++
++out_invalid:
++	WRITE_ONCE(key->is_valid, false);
++out_nokey:
++	wg_noise_keypair_put(keypair, false);
++
++	/* We orphan the packets if we're waiting on a handshake, so that they
++	 * don't block a socket's pool.
++	 */
++	skb_queue_walk(&packets, skb)
++		skb_orphan(skb);
++	/* Then we put them back on the top of the queue. We're not too
++	 * concerned about accidentally getting things a little out of order if
++	 * packets are being added really fast, because this queue is for before
++	 * packets can even be sent and it's small anyway.
++	 */
++	spin_lock_bh(&peer->staged_packet_queue.lock);
++	skb_queue_splice(&packets, &peer->staged_packet_queue);
++	spin_unlock_bh(&peer->staged_packet_queue.lock);
++
++	/* If we're exiting because there's something wrong with the key, it
++	 * means we should initiate a new handshake.
++	 */
++	wg_packet_send_queued_handshake_initiation(peer, false);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/socket.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,437 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "device.h"
++#include "peer.h"
++#include "socket.h"
++#include "queueing.h"
++#include "messages.h"
++
++#include <linux/ctype.h>
++#include <linux/net.h>
++#include <linux/if_vlan.h>
++#include <linux/if_ether.h>
++#include <linux/inetdevice.h>
++#include <net/udp_tunnel.h>
++#include <net/ipv6.h>
++
++static int send4(struct wg_device *wg, struct sk_buff *skb,
++		 struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
++{
++	struct flowi4 fl = {
++		.saddr = endpoint->src4.s_addr,
++		.daddr = endpoint->addr4.sin_addr.s_addr,
++		.fl4_dport = endpoint->addr4.sin_port,
++		.flowi4_mark = wg->fwmark,
++		.flowi4_proto = IPPROTO_UDP
++	};
++	struct rtable *rt = NULL;
++	struct sock *sock;
++	int ret = 0;
++
++	skb_mark_not_on_list(skb);
++	skb->dev = wg->dev;
++	skb->mark = wg->fwmark;
++
++	rcu_read_lock_bh();
++	sock = rcu_dereference_bh(wg->sock4);
++
++	if (unlikely(!sock)) {
++		ret = -ENONET;
++		goto err;
++	}
++
++	fl.fl4_sport = inet_sk(sock)->inet_sport;
++
++	if (cache)
++		rt = dst_cache_get_ip4(cache, &fl.saddr);
++
++	if (!rt) {
++		security_sk_classify_flow(sock, flowi4_to_flowi(&fl));
++		if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0,
++						fl.saddr, RT_SCOPE_HOST))) {
++			endpoint->src4.s_addr = 0;
++			*(__force __be32 *)&endpoint->src_if4 = 0;
++			fl.saddr = 0;
++			if (cache)
++				dst_cache_reset(cache);
++		}
++		rt = ip_route_output_flow(sock_net(sock), &fl, sock);
++		if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) &&
++			     PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) &&
++			     rt->dst.dev->ifindex != endpoint->src_if4)))) {
++			endpoint->src4.s_addr = 0;
++			*(__force __be32 *)&endpoint->src_if4 = 0;
++			fl.saddr = 0;
++			if (cache)
++				dst_cache_reset(cache);
++			if (!IS_ERR(rt))
++				ip_rt_put(rt);
++			rt = ip_route_output_flow(sock_net(sock), &fl, sock);
++		}
++		if (unlikely(IS_ERR(rt))) {
++			ret = PTR_ERR(rt);
++			net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
++					    wg->dev->name, &endpoint->addr, ret);
++			goto err;
++		} else if (unlikely(rt->dst.dev == skb->dev)) {
++			ip_rt_put(rt);
++			ret = -ELOOP;
++			net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
++					    wg->dev->name, &endpoint->addr);
++			goto err;
++		}
++		if (cache)
++			dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
++	}
++
++	skb->ignore_df = 1;
++	udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds,
++			    ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
++			    fl.fl4_dport, false, false);
++	goto out;
++
++err:
++	kfree_skb(skb);
++out:
++	rcu_read_unlock_bh();
++	return ret;
++}
++
++static int send6(struct wg_device *wg, struct sk_buff *skb,
++		 struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
++{
++#if IS_ENABLED(CONFIG_IPV6)
++	struct flowi6 fl = {
++		.saddr = endpoint->src6,
++		.daddr = endpoint->addr6.sin6_addr,
++		.fl6_dport = endpoint->addr6.sin6_port,
++		.flowi6_mark = wg->fwmark,
++		.flowi6_oif = endpoint->addr6.sin6_scope_id,
++		.flowi6_proto = IPPROTO_UDP
++		/* TODO: addr->sin6_flowinfo */
++	};
++	struct dst_entry *dst = NULL;
++	struct sock *sock;
++	int ret = 0;
++
++	skb_mark_not_on_list(skb);
++	skb->dev = wg->dev;
++	skb->mark = wg->fwmark;
++
++	rcu_read_lock_bh();
++	sock = rcu_dereference_bh(wg->sock6);
++
++	if (unlikely(!sock)) {
++		ret = -ENONET;
++		goto err;
++	}
++
++	fl.fl6_sport = inet_sk(sock)->inet_sport;
++
++	if (cache)
++		dst = dst_cache_get_ip6(cache, &fl.saddr);
++
++	if (!dst) {
++		security_sk_classify_flow(sock, flowi6_to_flowi(&fl));
++		if (unlikely(!ipv6_addr_any(&fl.saddr) &&
++			     !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) {
++			endpoint->src6 = fl.saddr = in6addr_any;
++			if (cache)
++				dst_cache_reset(cache);
++		}
++		dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl,
++						      NULL);
++		if (unlikely(IS_ERR(dst))) {
++			ret = PTR_ERR(dst);
++			net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
++					    wg->dev->name, &endpoint->addr, ret);
++			goto err;
++		} else if (unlikely(dst->dev == skb->dev)) {
++			dst_release(dst);
++			ret = -ELOOP;
++			net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
++					    wg->dev->name, &endpoint->addr);
++			goto err;
++		}
++		if (cache)
++			dst_cache_set_ip6(cache, dst, &fl.saddr);
++	}
++
++	skb->ignore_df = 1;
++	udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds,
++			     ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
++			     fl.fl6_dport, false);
++	goto out;
++
++err:
++	kfree_skb(skb);
++out:
++	rcu_read_unlock_bh();
++	return ret;
++#else
++	return -EAFNOSUPPORT;
++#endif
++}
++
++int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds)
++{
++	size_t skb_len = skb->len;
++	int ret = -EAFNOSUPPORT;
++
++	read_lock_bh(&peer->endpoint_lock);
++	if (peer->endpoint.addr.sa_family == AF_INET)
++		ret = send4(peer->device, skb, &peer->endpoint, ds,
++			    &peer->endpoint_cache);
++	else if (peer->endpoint.addr.sa_family == AF_INET6)
++		ret = send6(peer->device, skb, &peer->endpoint, ds,
++			    &peer->endpoint_cache);
++	else
++		dev_kfree_skb(skb);
++	if (likely(!ret))
++		peer->tx_bytes += skb_len;
++	read_unlock_bh(&peer->endpoint_lock);
++
++	return ret;
++}
++
++int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer,
++				  size_t len, u8 ds)
++{
++	struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
++
++	if (unlikely(!skb))
++		return -ENOMEM;
++
++	skb_reserve(skb, SKB_HEADER_LEN);
++	skb_set_inner_network_header(skb, 0);
++	skb_put_data(skb, buffer, len);
++	return wg_socket_send_skb_to_peer(peer, skb, ds);
++}
++
++int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg,
++					  struct sk_buff *in_skb, void *buffer,
++					  size_t len)
++{
++	int ret = 0;
++	struct sk_buff *skb;
++	struct endpoint endpoint;
++
++	if (unlikely(!in_skb))
++		return -EINVAL;
++	ret = wg_socket_endpoint_from_skb(&endpoint, in_skb);
++	if (unlikely(ret < 0))
++		return ret;
++
++	skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
++	if (unlikely(!skb))
++		return -ENOMEM;
++	skb_reserve(skb, SKB_HEADER_LEN);
++	skb_set_inner_network_header(skb, 0);
++	skb_put_data(skb, buffer, len);
++
++	if (endpoint.addr.sa_family == AF_INET)
++		ret = send4(wg, skb, &endpoint, 0, NULL);
++	else if (endpoint.addr.sa_family == AF_INET6)
++		ret = send6(wg, skb, &endpoint, 0, NULL);
++	/* No other possibilities if the endpoint is valid, which it is,
++	 * as we checked above.
++	 */
++
++	return ret;
++}
++
++int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
++				const struct sk_buff *skb)
++{
++	memset(endpoint, 0, sizeof(*endpoint));
++	if (skb->protocol == htons(ETH_P_IP)) {
++		endpoint->addr4.sin_family = AF_INET;
++		endpoint->addr4.sin_port = udp_hdr(skb)->source;
++		endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
++		endpoint->src4.s_addr = ip_hdr(skb)->daddr;
++		endpoint->src_if4 = skb->skb_iif;
++	} else if (skb->protocol == htons(ETH_P_IPV6)) {
++		endpoint->addr6.sin6_family = AF_INET6;
++		endpoint->addr6.sin6_port = udp_hdr(skb)->source;
++		endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
++		endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id(
++			&ipv6_hdr(skb)->saddr, skb->skb_iif);
++		endpoint->src6 = ipv6_hdr(skb)->daddr;
++	} else {
++		return -EINVAL;
++	}
++	return 0;
++}
++
++static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b)
++{
++	return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET &&
++		a->addr4.sin_port == b->addr4.sin_port &&
++		a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr &&
++		a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) ||
++	       (a->addr.sa_family == AF_INET6 &&
++		b->addr.sa_family == AF_INET6 &&
++		a->addr6.sin6_port == b->addr6.sin6_port &&
++		ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) &&
++		a->addr6.sin6_scope_id == b->addr6.sin6_scope_id &&
++		ipv6_addr_equal(&a->src6, &b->src6)) ||
++	       unlikely(!a->addr.sa_family && !b->addr.sa_family);
++}
++
++void wg_socket_set_peer_endpoint(struct wg_peer *peer,
++				 const struct endpoint *endpoint)
++{
++	/* First we check unlocked, in order to optimize, since it's pretty rare
++	 * that an endpoint will change. If we happen to be mid-write, and two
++	 * CPUs wind up writing the same thing or something slightly different,
++	 * it doesn't really matter much either.
++	 */
++	if (endpoint_eq(endpoint, &peer->endpoint))
++		return;
++	write_lock_bh(&peer->endpoint_lock);
++	if (endpoint->addr.sa_family == AF_INET) {
++		peer->endpoint.addr4 = endpoint->addr4;
++		peer->endpoint.src4 = endpoint->src4;
++		peer->endpoint.src_if4 = endpoint->src_if4;
++	} else if (endpoint->addr.sa_family == AF_INET6) {
++		peer->endpoint.addr6 = endpoint->addr6;
++		peer->endpoint.src6 = endpoint->src6;
++	} else {
++		goto out;
++	}
++	dst_cache_reset(&peer->endpoint_cache);
++out:
++	write_unlock_bh(&peer->endpoint_lock);
++}
++
++void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer,
++					  const struct sk_buff *skb)
++{
++	struct endpoint endpoint;
++
++	if (!wg_socket_endpoint_from_skb(&endpoint, skb))
++		wg_socket_set_peer_endpoint(peer, &endpoint);
++}
++
++void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer)
++{
++	write_lock_bh(&peer->endpoint_lock);
++	memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
++	dst_cache_reset(&peer->endpoint_cache);
++	write_unlock_bh(&peer->endpoint_lock);
++}
++
++static int wg_receive(struct sock *sk, struct sk_buff *skb)
++{
++	struct wg_device *wg;
++
++	if (unlikely(!sk))
++		goto err;
++	wg = sk->sk_user_data;
++	if (unlikely(!wg))
++		goto err;
++	skb_mark_not_on_list(skb);
++	wg_packet_receive(wg, skb);
++	return 0;
++
++err:
++	kfree_skb(skb);
++	return 0;
++}
++
++static void sock_free(struct sock *sock)
++{
++	if (unlikely(!sock))
++		return;
++	sk_clear_memalloc(sock);
++	udp_tunnel_sock_release(sock->sk_socket);
++}
++
++static void set_sock_opts(struct socket *sock)
++{
++	sock->sk->sk_allocation = GFP_ATOMIC;
++	sock->sk->sk_sndbuf = INT_MAX;
++	sk_set_memalloc(sock->sk);
++}
++
++int wg_socket_init(struct wg_device *wg, u16 port)
++{
++	int ret;
++	struct udp_tunnel_sock_cfg cfg = {
++		.sk_user_data = wg,
++		.encap_type = 1,
++		.encap_rcv = wg_receive
++	};
++	struct socket *new4 = NULL, *new6 = NULL;
++	struct udp_port_cfg port4 = {
++		.family = AF_INET,
++		.local_ip.s_addr = htonl(INADDR_ANY),
++		.local_udp_port = htons(port),
++		.use_udp_checksums = true
++	};
++#if IS_ENABLED(CONFIG_IPV6)
++	int retries = 0;
++	struct udp_port_cfg port6 = {
++		.family = AF_INET6,
++		.local_ip6 = IN6ADDR_ANY_INIT,
++		.use_udp6_tx_checksums = true,
++		.use_udp6_rx_checksums = true,
++		.ipv6_v6only = true
++	};
++#endif
++
++#if IS_ENABLED(CONFIG_IPV6)
++retry:
++#endif
++
++	ret = udp_sock_create(wg->creating_net, &port4, &new4);
++	if (ret < 0) {
++		pr_err("%s: Could not create IPv4 socket\n", wg->dev->name);
++		return ret;
++	}
++	set_sock_opts(new4);
++	setup_udp_tunnel_sock(wg->creating_net, new4, &cfg);
++
++#if IS_ENABLED(CONFIG_IPV6)
++	if (ipv6_mod_enabled()) {
++		port6.local_udp_port = inet_sk(new4->sk)->inet_sport;
++		ret = udp_sock_create(wg->creating_net, &port6, &new6);
++		if (ret < 0) {
++			udp_tunnel_sock_release(new4);
++			if (ret == -EADDRINUSE && !port && retries++ < 100)
++				goto retry;
++			pr_err("%s: Could not create IPv6 socket\n",
++			       wg->dev->name);
++			return ret;
++		}
++		set_sock_opts(new6);
++		setup_udp_tunnel_sock(wg->creating_net, new6, &cfg);
++	}
++#endif
++
++	wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL);
++	return 0;
++}
++
++void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
++		      struct sock *new6)
++{
++	struct sock *old4, *old6;
++
++	mutex_lock(&wg->socket_update_lock);
++	old4 = rcu_dereference_protected(wg->sock4,
++				lockdep_is_held(&wg->socket_update_lock));
++	old6 = rcu_dereference_protected(wg->sock6,
++				lockdep_is_held(&wg->socket_update_lock));
++	rcu_assign_pointer(wg->sock4, new4);
++	rcu_assign_pointer(wg->sock6, new6);
++	if (new4)
++		wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
++	mutex_unlock(&wg->socket_update_lock);
++	synchronize_rcu();
++	sock_free(old4);
++	sock_free(old6);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/timers.c	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,243 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include "timers.h"
++#include "device.h"
++#include "peer.h"
++#include "queueing.h"
++#include "socket.h"
++
++/*
++ * - Timer for retransmitting the handshake if we don't hear back after
++ * `REKEY_TIMEOUT + jitter` ms.
++ *
++ * - Timer for sending empty packet if we have received a packet but after have
++ * not sent one for `KEEPALIVE_TIMEOUT` ms.
++ *
++ * - Timer for initiating new handshake if we have sent a packet but after have
++ * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) +
++ * jitter` ms.
++ *
++ * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms
++ * if no new keys have been received.
++ *
++ * - Timer for, if enabled, sending an empty authenticated packet every user-
++ * specified seconds.
++ */
++
++static inline void mod_peer_timer(struct wg_peer *peer,
++				  struct timer_list *timer,
++				  unsigned long expires)
++{
++	rcu_read_lock_bh();
++	if (likely(netif_running(peer->device->dev) &&
++		   !READ_ONCE(peer->is_dead)))
++		mod_timer(timer, expires);
++	rcu_read_unlock_bh();
++}
++
++static void wg_expired_retransmit_handshake(struct timer_list *timer)
++{
++	struct wg_peer *peer = from_timer(peer, timer,
++					  timer_retransmit_handshake);
++
++	if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) {
++		pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n",
++			 peer->device->dev->name, peer->internal_id,
++			 &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2);
++
++		del_timer(&peer->timer_send_keepalive);
++		/* We drop all packets without a keypair and don't try again,
++		 * if we try unsuccessfully for too long to make a handshake.
++		 */
++		wg_packet_purge_staged_packets(peer);
++
++		/* We set a timer for destroying any residue that might be left
++		 * of a partial exchange.
++		 */
++		if (!timer_pending(&peer->timer_zero_key_material))
++			mod_peer_timer(peer, &peer->timer_zero_key_material,
++				       jiffies + REJECT_AFTER_TIME * 3 * HZ);
++	} else {
++		++peer->timer_handshake_attempts;
++		pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n",
++			 peer->device->dev->name, peer->internal_id,
++			 &peer->endpoint.addr, REKEY_TIMEOUT,
++			 peer->timer_handshake_attempts + 1);
++
++		/* We clear the endpoint address src address, in case this is
++		 * the cause of trouble.
++		 */
++		wg_socket_clear_peer_endpoint_src(peer);
++
++		wg_packet_send_queued_handshake_initiation(peer, true);
++	}
++}
++
++static void wg_expired_send_keepalive(struct timer_list *timer)
++{
++	struct wg_peer *peer = from_timer(peer, timer, timer_send_keepalive);
++
++	wg_packet_send_keepalive(peer);
++	if (peer->timer_need_another_keepalive) {
++		peer->timer_need_another_keepalive = false;
++		mod_peer_timer(peer, &peer->timer_send_keepalive,
++			       jiffies + KEEPALIVE_TIMEOUT * HZ);
++	}
++}
++
++static void wg_expired_new_handshake(struct timer_list *timer)
++{
++	struct wg_peer *peer = from_timer(peer, timer, timer_new_handshake);
++
++	pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n",
++		 peer->device->dev->name, peer->internal_id,
++		 &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT);
++	/* We clear the endpoint address src address, in case this is the cause
++	 * of trouble.
++	 */
++	wg_socket_clear_peer_endpoint_src(peer);
++	wg_packet_send_queued_handshake_initiation(peer, false);
++}
++
++static void wg_expired_zero_key_material(struct timer_list *timer)
++{
++	struct wg_peer *peer = from_timer(peer, timer, timer_zero_key_material);
++
++	rcu_read_lock_bh();
++	if (!READ_ONCE(peer->is_dead)) {
++		wg_peer_get(peer);
++		if (!queue_work(peer->device->handshake_send_wq,
++				&peer->clear_peer_work))
++			/* If the work was already on the queue, we want to drop
++			 * the extra reference.
++			 */
++			wg_peer_put(peer);
++	}
++	rcu_read_unlock_bh();
++}
++
++static void wg_queued_expired_zero_key_material(struct work_struct *work)
++{
++	struct wg_peer *peer = container_of(work, struct wg_peer,
++					    clear_peer_work);
++
++	pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n",
++		 peer->device->dev->name, peer->internal_id,
++		 &peer->endpoint.addr, REJECT_AFTER_TIME * 3);
++	wg_noise_handshake_clear(&peer->handshake);
++	wg_noise_keypairs_clear(&peer->keypairs);
++	wg_peer_put(peer);
++}
++
++static void wg_expired_send_persistent_keepalive(struct timer_list *timer)
++{
++	struct wg_peer *peer = from_timer(peer, timer,
++					  timer_persistent_keepalive);
++
++	if (likely(peer->persistent_keepalive_interval))
++		wg_packet_send_keepalive(peer);
++}
++
++/* Should be called after an authenticated data packet is sent. */
++void wg_timers_data_sent(struct wg_peer *peer)
++{
++	if (!timer_pending(&peer->timer_new_handshake))
++		mod_peer_timer(peer, &peer->timer_new_handshake,
++			jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ +
++			prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES));
++}
++
++/* Should be called after an authenticated data packet is received. */
++void wg_timers_data_received(struct wg_peer *peer)
++{
++	if (likely(netif_running(peer->device->dev))) {
++		if (!timer_pending(&peer->timer_send_keepalive))
++			mod_peer_timer(peer, &peer->timer_send_keepalive,
++				       jiffies + KEEPALIVE_TIMEOUT * HZ);
++		else
++			peer->timer_need_another_keepalive = true;
++	}
++}
++
++/* Should be called after any type of authenticated packet is sent, whether
++ * keepalive, data, or handshake.
++ */
++void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer)
++{
++	del_timer(&peer->timer_send_keepalive);
++}
++
++/* Should be called after any type of authenticated packet is received, whether
++ * keepalive, data, or handshake.
++ */
++void wg_timers_any_authenticated_packet_received(struct wg_peer *peer)
++{
++	del_timer(&peer->timer_new_handshake);
++}
++
++/* Should be called after a handshake initiation message is sent. */
++void wg_timers_handshake_initiated(struct wg_peer *peer)
++{
++	mod_peer_timer(peer, &peer->timer_retransmit_handshake,
++		       jiffies + REKEY_TIMEOUT * HZ +
++		       prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES));
++}
++
++/* Should be called after a handshake response message is received and processed
++ * or when getting key confirmation via the first data message.
++ */
++void wg_timers_handshake_complete(struct wg_peer *peer)
++{
++	del_timer(&peer->timer_retransmit_handshake);
++	peer->timer_handshake_attempts = 0;
++	peer->sent_lastminute_handshake = false;
++	ktime_get_real_ts64(&peer->walltime_last_handshake);
++}
++
++/* Should be called after an ephemeral key is created, which is before sending a
++ * handshake response or after receiving a handshake response.
++ */
++void wg_timers_session_derived(struct wg_peer *peer)
++{
++	mod_peer_timer(peer, &peer->timer_zero_key_material,
++		       jiffies + REJECT_AFTER_TIME * 3 * HZ);
++}
++
++/* Should be called before a packet with authentication, whether
++ * keepalive, data, or handshakem is sent, or after one is received.
++ */
++void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer)
++{
++	if (peer->persistent_keepalive_interval)
++		mod_peer_timer(peer, &peer->timer_persistent_keepalive,
++			jiffies + peer->persistent_keepalive_interval * HZ);
++}
++
++void wg_timers_init(struct wg_peer *peer)
++{
++	timer_setup(&peer->timer_retransmit_handshake,
++		    wg_expired_retransmit_handshake, 0);
++	timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0);
++	timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0);
++	timer_setup(&peer->timer_zero_key_material,
++		    wg_expired_zero_key_material, 0);
++	timer_setup(&peer->timer_persistent_keepalive,
++		    wg_expired_send_persistent_keepalive, 0);
++	INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material);
++	peer->timer_handshake_attempts = 0;
++	peer->sent_lastminute_handshake = false;
++	peer->timer_need_another_keepalive = false;
++}
++
++void wg_timers_stop(struct wg_peer *peer)
++{
++	del_timer_sync(&peer->timer_retransmit_handshake);
++	del_timer_sync(&peer->timer_send_keepalive);
++	del_timer_sync(&peer->timer_new_handshake);
++	del_timer_sync(&peer->timer_zero_key_material);
++	del_timer_sync(&peer->timer_persistent_keepalive);
++	flush_work(&peer->clear_peer_work);
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/allowedips.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,59 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_ALLOWEDIPS_H
++#define _WG_ALLOWEDIPS_H
++
++#include <linux/mutex.h>
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++
++struct wg_peer;
++
++struct allowedips_node {
++	struct wg_peer __rcu *peer;
++	struct allowedips_node __rcu *bit[2];
++	/* While it may seem scandalous that we waste space for v4,
++	 * we're alloc'ing to the nearest power of 2 anyway, so this
++	 * doesn't actually make a difference.
++	 */
++	u8 bits[16] __aligned(__alignof(u64));
++	u8 cidr, bit_at_a, bit_at_b, bitlen;
++
++	/* Keep rarely used list at bottom to be beyond cache line. */
++	union {
++		struct list_head peer_list;
++		struct rcu_head rcu;
++	};
++};
++
++struct allowedips {
++	struct allowedips_node __rcu *root4;
++	struct allowedips_node __rcu *root6;
++	u64 seq;
++};
++
++void wg_allowedips_init(struct allowedips *table);
++void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);
++int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
++			    u8 cidr, struct wg_peer *peer, struct mutex *lock);
++int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
++			    u8 cidr, struct wg_peer *peer, struct mutex *lock);
++void wg_allowedips_remove_by_peer(struct allowedips *table,
++				  struct wg_peer *peer, struct mutex *lock);
++/* The ip input pointer should be __aligned(__alignof(u64))) */
++int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr);
++
++/* These return a strong reference to a peer: */
++struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
++					 struct sk_buff *skb);
++struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
++					 struct sk_buff *skb);
++
++#ifdef DEBUG
++bool wg_allowedips_selftest(void);
++#endif
++
++#endif /* _WG_ALLOWEDIPS_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/checksum/checksum_partial_compat.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,208 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#include <net/route.h>
++#include <net/esp.h>
++#include <net/ip.h>
++#include <net/ipv6.h>
++#include <net/ip6_checksum.h>
++
++#define IP6_MF          0x0001
++#define IP6_OFFSET      0xFFF8
++static inline int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, unsigned int max)
++{
++	if (skb_headlen(skb) >= len)
++		return 0;
++	if (max > skb->len)
++		max = skb->len;
++	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
++		return -ENOMEM;
++	if (skb_headlen(skb) < len)
++		return -EPROTO;
++	return 0;
++}
++#define MAX_IP_HDR_LEN 128
++static inline int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
++{
++	unsigned int off;
++	bool fragment;
++	int err;
++	fragment = false;
++	err = skb_maybe_pull_tail(skb, sizeof(struct iphdr), MAX_IP_HDR_LEN);
++	if (err < 0)
++		goto out;
++	if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
++		fragment = true;
++	off = ip_hdrlen(skb);
++	err = -EPROTO;
++	if (fragment)
++		goto out;
++	switch (ip_hdr(skb)->protocol) {
++	case IPPROTO_TCP:
++		err = skb_maybe_pull_tail(skb,
++					  off + sizeof(struct tcphdr),
++					  MAX_IP_HDR_LEN);
++		if (err < 0)
++			goto out;
++
++		if (!skb_partial_csum_set(skb, off,
++					  offsetof(struct tcphdr, check))) {
++			err = -EPROTO;
++			goto out;
++		}
++
++		if (recalculate)
++			tcp_hdr(skb)->check =
++				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
++						   ip_hdr(skb)->daddr,
++						   skb->len - off,
++						   IPPROTO_TCP, 0);
++		break;
++	case IPPROTO_UDP:
++		err = skb_maybe_pull_tail(skb,
++					  off + sizeof(struct udphdr),
++					  MAX_IP_HDR_LEN);
++		if (err < 0)
++			goto out;
++
++		if (!skb_partial_csum_set(skb, off,
++					  offsetof(struct udphdr, check))) {
++			err = -EPROTO;
++			goto out;
++		}
++
++		if (recalculate)
++			udp_hdr(skb)->check =
++				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
++						   ip_hdr(skb)->daddr,
++						   skb->len - off,
++						   IPPROTO_UDP, 0);
++		break;
++	default:
++		goto out;
++	}
++	err = 0;
++out:
++	return err;
++}
++#define MAX_IPV6_HDR_LEN 256
++#define OPT_HDR(type, skb, off) \
++	(type *)(skb_network_header(skb) + (off))
++static inline int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
++{
++	int err;
++	u8 nexthdr;
++	unsigned int off;
++	unsigned int len;
++	bool fragment;
++	bool done;
++	fragment = false;
++	done = false;
++	off = sizeof(struct ipv6hdr);
++	err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
++	if (err < 0)
++		goto out;
++	nexthdr = ipv6_hdr(skb)->nexthdr;
++	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
++	while (off <= len && !done) {
++		switch (nexthdr) {
++		case IPPROTO_DSTOPTS:
++		case IPPROTO_HOPOPTS:
++		case IPPROTO_ROUTING: {
++			struct ipv6_opt_hdr *hp;
++
++			err = skb_maybe_pull_tail(skb, off + sizeof(struct ipv6_opt_hdr), MAX_IPV6_HDR_LEN);
++			if (err < 0)
++				goto out;
++			hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
++			nexthdr = hp->nexthdr;
++			off += ipv6_optlen(hp);
++			break;
++		}
++		case IPPROTO_FRAGMENT: {
++			struct frag_hdr *hp;
++			err = skb_maybe_pull_tail(skb, off + sizeof(struct frag_hdr), MAX_IPV6_HDR_LEN);
++			if (err < 0)
++				goto out;
++			hp = OPT_HDR(struct frag_hdr, skb, off);
++			if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
++				fragment = true;
++			nexthdr = hp->nexthdr;
++			off += sizeof(struct frag_hdr);
++			break;
++		}
++		default:
++			done = true;
++			break;
++		}
++	}
++	err = -EPROTO;
++	if (!done || fragment)
++		goto out;
++	switch (nexthdr) {
++		case IPPROTO_TCP:
++			err = skb_maybe_pull_tail(skb,
++						  off + sizeof(struct tcphdr),
++						  MAX_IPV6_HDR_LEN);
++			if (err < 0)
++				goto out;
++
++			if (!skb_partial_csum_set(skb, off,
++						  offsetof(struct tcphdr, check))) {
++				err = -EPROTO;
++				goto out;
++			}
++
++			if (recalculate)
++				tcp_hdr(skb)->check =
++					~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
++							 &ipv6_hdr(skb)->daddr,
++							 skb->len - off,
++							 IPPROTO_TCP, 0);
++			break;
++		case IPPROTO_UDP:
++			err = skb_maybe_pull_tail(skb,
++						  off + sizeof(struct udphdr),
++						  MAX_IPV6_HDR_LEN);
++			if (err < 0)
++				goto out;
++
++			if (!skb_partial_csum_set(skb, off,
++						  offsetof(struct udphdr, check))) {
++				err = -EPROTO;
++				goto out;
++			}
++
++			if (recalculate)
++				udp_hdr(skb)->check =
++					~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
++							 &ipv6_hdr(skb)->daddr,
++							 skb->len - off,
++							 IPPROTO_UDP, 0);
++			break;
++		default:
++			goto out;
++	}
++	err = 0;
++out:
++	return err;
++}
++static inline int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
++{
++	int err;
++	switch (skb->protocol) {
++	case htons(ETH_P_IP):
++		err = skb_checksum_setup_ip(skb, recalculate);
++		break;
++
++	case htons(ETH_P_IPV6):
++		err = skb_checksum_setup_ipv6(skb, recalculate);
++		break;
++	default:
++		err = -EPROTO;
++		break;
++	}
++	return err;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/compat-asm.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,78 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_COMPATASM_H
++#define _WG_COMPATASM_H
++
++#include <linux/linkage.h>
++#include <linux/kconfig.h>
++#include <linux/version.h>
++
++/* PaX compatibility */
++#if defined(RAP_PLUGIN)
++#undef ENTRY
++#define ENTRY RAP_ENTRY
++#endif
++
++#if defined(__LINUX_ARM_ARCH__) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
++	.irp	c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
++	.macro	ret\c, reg
++#if __LINUX_ARM_ARCH__ < 6
++	mov\c	pc, \reg
++#else
++	.ifeqs	"\reg", "lr"
++	bx\c	\reg
++	.else
++	mov\c	pc, \reg
++	.endif
++#endif
++	.endm
++	.endr
++#endif
++
++#if defined(__LINUX_ARM_ARCH__) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
++#include <asm/assembler.h>
++#define lspush push
++#define lspull pull
++#undef push
++#undef pull
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0)
++#define SYM_FUNC_START ENTRY
++#define SYM_FUNC_END ENDPROC
++#endif
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
++#define blake2s_compress_ssse3 zinc_blake2s_compress_ssse3
++#define blake2s_compress_avx512 zinc_blake2s_compress_avx512
++#define poly1305_init_arm zinc_poly1305_init_arm
++#define poly1305_blocks_arm zinc_poly1305_blocks_arm
++#define poly1305_emit_arm zinc_poly1305_emit_arm
++#define poly1305_blocks_neon zinc_poly1305_blocks_neon
++#define poly1305_emit_neon zinc_poly1305_emit_neon
++#define poly1305_init_mips zinc_poly1305_init_mips
++#define poly1305_blocks_mips zinc_poly1305_blocks_mips
++#define poly1305_emit_mips zinc_poly1305_emit_mips
++#define poly1305_init_x86_64 zinc_poly1305_init_x86_64
++#define poly1305_blocks_x86_64 zinc_poly1305_blocks_x86_64
++#define poly1305_emit_x86_64 zinc_poly1305_emit_x86_64
++#define poly1305_emit_avx zinc_poly1305_emit_avx
++#define poly1305_blocks_avx zinc_poly1305_blocks_avx
++#define poly1305_blocks_avx2 zinc_poly1305_blocks_avx2
++#define poly1305_blocks_avx512 zinc_poly1305_blocks_avx512
++#define curve25519_neon zinc_curve25519_neon
++#define hchacha20_ssse3 zinc_hchacha20_ssse3
++#define chacha20_ssse3 zinc_chacha20_ssse3
++#define chacha20_avx2 zinc_chacha20_avx2
++#define chacha20_avx512 zinc_chacha20_avx512
++#define chacha20_avx512vl zinc_chacha20_avx512vl
++#define chacha20_mips zinc_chacha20_mips
++#define chacha20_arm zinc_chacha20_arm
++#define hchacha20_arm zinc_hchacha20_arm
++#define chacha20_neon zinc_chacha20_neon
++#endif
++
++#endif /* _WG_COMPATASM_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/compat.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,1070 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_COMPAT_H
++#define _WG_COMPAT_H
++
++#include <linux/kconfig.h>
++#include <linux/version.h>
++#include <linux/types.h>
++#include <generated/utsrelease.h>
++
++#ifdef RHEL_MAJOR
++#if RHEL_MAJOR == 7
++#define ISRHEL7
++#elif RHEL_MAJOR == 8
++#define ISRHEL8
++#ifdef RHEL_MINOR
++#if RHEL_MINOR == 2
++#define ISRHEL82
++#endif
++#endif
++#endif
++#endif
++#ifdef UTS_UBUNTU_RELEASE_ABI
++#if LINUX_VERSION_CODE == KERNEL_VERSION(3, 13, 11)
++#define ISUBUNTU1404
++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)
++#define ISUBUNTU1604
++#endif
++#endif
++#ifdef CONFIG_SUSE_KERNEL
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
++#define ISOPENSUSE42
++#endif
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
++#define ISOPENSUSE15
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
++#error "WireGuard requires Linux >= 3.10"
++#endif
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
++#error "WireGuard has been merged into Linux >= 5.6 and therefore this compatibility module is no longer required."
++#endif
++
++#if defined(ISRHEL7)
++#include <linux/skbuff.h>
++#define headers_end headers_start
++#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
++#define headers_start data
++#define headers_end data
++#endif
++
++#include <linux/cache.h>
++#include <linux/init.h>
++#ifndef __ro_after_init
++#define __ro_after_init __read_mostly
++#endif
++
++#include <linux/compiler.h>
++#ifndef READ_ONCE
++#define READ_ONCE ACCESS_ONCE
++#endif
++#ifndef WRITE_ONCE
++#ifdef ACCESS_ONCE_RW
++#define WRITE_ONCE(p, v) (ACCESS_ONCE_RW(p) = (v))
++#else
++#define WRITE_ONCE(p, v) (ACCESS_ONCE(p) = (v))
++#endif
++#endif
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
++#include "udp_tunnel/udp_tunnel_partial_compat.h"
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(DEBUG) && defined(net_dbg_ratelimited)
++#undef net_dbg_ratelimited
++#define net_dbg_ratelimited(fmt, ...) do { if (0) no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
++#include <linux/rcupdate.h>
++#ifndef RCU_LOCKDEP_WARN
++#define RCU_LOCKDEP_WARN(cond, message) rcu_lockdep_assert(!(cond), message)
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(ISRHEL7)
++#define ipv6_dst_lookup(a, b, c, d) ipv6_dst_lookup(b, c, d)
++#endif
++
++#if (LINUX_VERSION_CODE == KERNEL_VERSION(4, 4, 0) || \
++    (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 5) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)) || \
++    (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 17) && LINUX_VERSION_CODE > KERNEL_VERSION(3, 19, 0)) || \
++    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 27) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || \
++    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || \
++    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 40) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || \
++    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 54))) && !defined(ISUBUNTU1404) && !defined(ISRHEL7)
++#include <linux/if.h>
++#include <net/ip_tunnels.h>
++#define IP6_ECN_set_ce(a, b) IP6_ECN_set_ce(b)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && IS_ENABLED(CONFIG_IPV6) && !defined(ISRHEL7)
++#include <net/ipv6.h>
++struct ipv6_stub_type {
++	void *udpv6_encap_enable;
++	int (*ipv6_dst_lookup)(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6);
++};
++static const struct ipv6_stub_type ipv6_stub_impl = {
++	.udpv6_encap_enable = (void *)1,
++	.ipv6_dst_lookup = ip6_dst_lookup
++};
++static const struct ipv6_stub_type *ipv6_stub = &ipv6_stub_impl;
++#endif
++
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) && IS_ENABLED(CONFIG_IPV6) && !defined(ISOPENSUSE42) && !defined(ISRHEL7)
++#include <net/addrconf.h>
++static inline bool ipv6_mod_enabled(void)
++{
++	return ipv6_stub != NULL && ipv6_stub->udpv6_encap_enable != NULL;
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) && !defined(ISRHEL7)
++#include <linux/skbuff.h>
++static inline void skb_reset_tc(struct sk_buff *skb)
++{
++#ifdef CONFIG_NET_CLS_ACT
++	skb->tc_verd = 0;
++#endif
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
++#include <linux/random.h>
++#include <linux/siphash.h>
++static inline u32 __compat_get_random_u32(void)
++{
++	static siphash_key_t key;
++	static u32 counter = 0;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
++	static bool has_seeded = false;
++	if (unlikely(!has_seeded)) {
++		get_random_bytes(&key, sizeof(key));
++		has_seeded = true;
++	}
++#else
++	get_random_once(&key, sizeof(key));
++#endif
++	return siphash_2u32(counter++, get_random_int(), &key);
++}
++#define get_random_u32 __compat_get_random_u32
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) && !defined(ISRHEL7)
++static inline void netif_keep_dst(struct net_device *dev)
++{
++	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
++}
++#define COMPAT_CANNOT_USE_CSUM_LEVEL
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7)
++#include <linux/netdevice.h>
++#ifndef netdev_alloc_pcpu_stats
++#define pcpu_sw_netstats pcpu_tstats
++#endif
++#ifndef netdev_alloc_pcpu_stats
++#define netdev_alloc_pcpu_stats alloc_percpu
++#endif
++#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && !defined(ISRHEL7)
++#include <linux/netdevice.h>
++#ifndef netdev_alloc_pcpu_stats
++#define netdev_alloc_pcpu_stats(type)					\
++({									\
++	typeof(type) __percpu *pcpu_stats = alloc_percpu(type);		\
++	if (pcpu_stats)	{						\
++		int __cpu;						\
++		for_each_possible_cpu(__cpu) {				\
++			typeof(type) *stat;				\
++			stat = per_cpu_ptr(pcpu_stats, __cpu);		\
++			u64_stats_init(&stat->syncp);			\
++		}							\
++	}								\
++	pcpu_stats;							\
++})
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7)
++#include "checksum/checksum_partial_compat.h"
++static inline void *__compat_pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
++{
++	if (tail != skb) {
++		skb->data_len += len;
++		skb->len += len;
++	}
++	return skb_put(tail, len);
++}
++#define pskb_put __compat_pskb_put
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7)
++#include <net/xfrm.h>
++static inline void skb_scrub_packet(struct sk_buff *skb, bool xnet)
++{
++#ifdef CONFIG_CAVIUM_OCTEON_IPFWD_OFFLOAD
++	memset(&skb->cvm_info, 0, sizeof(skb->cvm_info));
++	skb->cvm_reserved = 0;
++#endif
++	skb->tstamp.tv64 = 0;
++	skb->pkt_type = PACKET_HOST;
++	skb->skb_iif = 0;
++	skb_dst_drop(skb);
++	secpath_reset(skb);
++	nf_reset(skb);
++	nf_reset_trace(skb);
++	if (!xnet)
++		return;
++	skb_orphan(skb);
++	skb->mark = 0;
++}
++#endif
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) || defined(ISUBUNTU1404)) && !defined(ISRHEL7)
++#include <linux/random.h>
++static inline u32 __compat_prandom_u32_max(u32 ep_ro)
++{
++	return (u32)(((u64)prandom_u32() * ep_ro) >> 32);
++}
++#define prandom_u32_max __compat_prandom_u32_max
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 75) && !defined(ISRHEL7)
++#ifndef U8_MAX
++#define U8_MAX ((u8)~0U)
++#endif
++#ifndef S8_MAX
++#define S8_MAX ((s8)(U8_MAX >> 1))
++#endif
++#ifndef S8_MIN
++#define S8_MIN ((s8)(-S8_MAX - 1))
++#endif
++#ifndef U16_MAX
++#define U16_MAX ((u16)~0U)
++#endif
++#ifndef S16_MAX
++#define S16_MAX ((s16)(U16_MAX >> 1))
++#endif
++#ifndef S16_MIN
++#define S16_MIN ((s16)(-S16_MAX - 1))
++#endif
++#ifndef U32_MAX
++#define U32_MAX ((u32)~0U)
++#endif
++#ifndef S32_MAX
++#define S32_MAX ((s32)(U32_MAX >> 1))
++#endif
++#ifndef S32_MIN
++#define S32_MIN ((s32)(-S32_MAX - 1))
++#endif
++#ifndef U64_MAX
++#define U64_MAX ((u64)~0ULL)
++#endif
++#ifndef S64_MAX
++#define S64_MAX ((s64)(U64_MAX >> 1))
++#endif
++#ifndef S64_MIN
++#define S64_MIN ((s64)(-S64_MAX - 1))
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 60) && !defined(ISRHEL7)
++/* Making this static may very well invalidate its usefulness,
++ * but so it goes with compat code. */
++static inline void memzero_explicit(void *s, size_t count)
++{
++	memset(s, 0, count);
++	barrier();
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && !defined(ISRHEL7)
++static const struct in6_addr __compat_in6addr_any = IN6ADDR_ANY_INIT;
++#define in6addr_any __compat_in6addr_any
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && !defined(ISOPENSUSE15)
++#include <linux/completion.h>
++#include <linux/random.h>
++#include <linux/errno.h>
++struct rng_initializer {
++	struct completion done;
++	struct random_ready_callback cb;
++};
++static inline void rng_initialized_callback(struct random_ready_callback *cb)
++{
++	complete(&container_of(cb, struct rng_initializer, cb)->done);
++}
++static inline int wait_for_random_bytes(void)
++{
++	static bool rng_is_initialized = false;
++	int ret;
++	if (unlikely(!rng_is_initialized)) {
++		struct rng_initializer rng = {
++			.done = COMPLETION_INITIALIZER(rng.done),
++			.cb = { .owner = THIS_MODULE, .func = rng_initialized_callback }
++		};
++		ret = add_random_ready_callback(&rng.cb);
++		if (!ret) {
++			ret = wait_for_completion_interruptible(&rng.done);
++			if (ret) {
++				del_random_ready_callback(&rng.cb);
++				return ret;
++			}
++		} else if (ret != -EALREADY)
++			return ret;
++		rng_is_initialized = true;
++	}
++	return 0;
++}
++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
++/* This is a disaster. Without this API, we really have no way of
++ * knowing if it's initialized. We just return that it has and hope
++ * for the best... */
++static inline int wait_for_random_bytes(void)
++{
++	return 0;
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && !defined(ISRHEL8)
++#include <linux/random.h>
++#include <linux/slab.h>
++struct rng_is_initialized_callback {
++	struct random_ready_callback cb;
++	atomic_t *rng_state;
++};
++static inline void rng_is_initialized_callback(struct random_ready_callback *cb)
++{
++	struct rng_is_initialized_callback *rdy = container_of(cb, struct rng_is_initialized_callback, cb);
++	atomic_set(rdy->rng_state, 2);
++	kfree(rdy);
++}
++static inline bool rng_is_initialized(void)
++{
++	static atomic_t rng_state = ATOMIC_INIT(0);
++
++	if (atomic_read(&rng_state) == 2)
++		return true;
++
++	if (atomic_cmpxchg(&rng_state, 0, 1) == 0) {
++		int ret;
++		struct rng_is_initialized_callback *rdy = kmalloc(sizeof(*rdy), GFP_ATOMIC);
++		if (!rdy) {
++			atomic_set(&rng_state, 0);
++			return false;
++		}
++		rdy->cb.owner = THIS_MODULE;
++		rdy->cb.func = rng_is_initialized_callback;
++		rdy->rng_state = &rng_state;
++		ret = add_random_ready_callback(&rdy->cb);
++		if (ret)
++			kfree(rdy);
++		if (ret == -EALREADY) {
++			atomic_set(&rng_state, 2);
++			return true;
++		} else if (ret)
++			atomic_set(&rng_state, 0);
++		return false;
++	}
++	return false;
++}
++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
++/* This is a disaster. Without this API, we really have no way of
++ * knowing if it's initialized. We just return that it has and hope
++ * for the best... */
++static inline bool rng_is_initialized(void)
++{
++	return true;
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && !defined(ISOPENSUSE15)
++static inline int get_random_bytes_wait(void *buf, int nbytes)
++{
++	int ret = wait_for_random_bytes();
++	if (unlikely(ret))
++		return ret;
++	get_random_bytes(buf, nbytes);
++	return 0;
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7)
++#define system_power_efficient_wq system_unbound_wq
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0)
++#include <linux/ktime.h>
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
++#include <linux/hrtimer.h>
++#ifndef ktime_get_real_ts64
++#define timespec64 timespec
++#define ktime_get_real_ts64 ktime_get_real_ts
++#endif
++#else
++#include <linux/timekeeping.h>
++#endif
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
++static inline u64 __compat_jiffies64_to_nsecs(u64 j)
++{
++#if !(NSEC_PER_SEC % HZ)
++	return (NSEC_PER_SEC / HZ) * j;
++#else
++	return div_u64(j * HZ_TO_USEC_NUM, HZ_TO_USEC_DEN) * 1000;
++#endif
++}
++#define jiffies64_to_nsecs __compat_jiffies64_to_nsecs
++#endif
++static inline u64 ktime_get_coarse_boottime_ns(void)
++{
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
++	return ktime_to_ns(ktime_get_boottime());
++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 12) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 53)
++	return ktime_to_ns(ktime_mono_to_any(ns_to_ktime(jiffies64_to_nsecs(get_jiffies_64())), TK_OFFS_BOOT));
++#else
++	return ktime_to_ns(ktime_get_coarse_boottime());
++#endif
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
++#include <linux/inetdevice.h>
++static inline __be32 __compat_confirm_addr_indev(struct in_device *in_dev, __be32 dst,  __be32 local, int scope)
++{
++	int same = 0;
++	__be32 addr = 0;
++	for_ifa(in_dev) {
++		if (!addr && (local == ifa->ifa_local || !local) && ifa->ifa_scope <= scope) {
++			addr = ifa->ifa_local;
++			if (same)
++				break;
++		}
++		if (!same) {
++			same = (!local || inet_ifa_match(local, ifa)) && (!dst || inet_ifa_match(dst, ifa));
++			if (same && addr) {
++				if (local || !dst)
++					break;
++				if (inet_ifa_match(addr, ifa))
++					break;
++				if (ifa->ifa_scope <= scope) {
++					addr = ifa->ifa_local;
++					break;
++				}
++				same = 0;
++			}
++		}
++	} endfor_ifa(in_dev);
++	return same ? addr : 0;
++}
++static inline __be32 __compat_inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope)
++{
++	__be32 addr = 0;
++	struct net_device *dev;
++	if (in_dev)
++		return __compat_confirm_addr_indev(in_dev, dst, local, scope);
++	rcu_read_lock();
++	for_each_netdev_rcu(net, dev) {
++		in_dev = __in_dev_get_rcu(dev);
++		if (in_dev) {
++			addr = __compat_confirm_addr_indev(in_dev, dst, local, scope);
++			if (addr)
++				break;
++		}
++	}
++	rcu_read_unlock();
++	return addr;
++}
++#define inet_confirm_addr __compat_inet_confirm_addr
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++#include <linux/slab.h>
++static inline void *__compat_kvmalloc(size_t size, gfp_t flags)
++{
++	gfp_t kmalloc_flags = flags;
++	void *ret;
++	if (size > PAGE_SIZE) {
++		kmalloc_flags |= __GFP_NOWARN;
++		if (!(kmalloc_flags & __GFP_REPEAT) || (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
++			kmalloc_flags |= __GFP_NORETRY;
++	}
++	ret = kmalloc(size, kmalloc_flags);
++	if (ret || size <= PAGE_SIZE)
++		return ret;
++	return __vmalloc(size, flags, PAGE_KERNEL);
++}
++static inline void *__compat_kvzalloc(size_t size, gfp_t flags)
++{
++	return __compat_kvmalloc(size, flags | __GFP_ZERO);
++}
++#define kvmalloc __compat_kvmalloc
++#define kvzalloc __compat_kvzalloc
++#endif
++
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 41)) && !defined(ISUBUNTU1404)
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++static inline void __compat_kvfree(const void *addr)
++{
++	if (is_vmalloc_addr(addr))
++		vfree(addr);
++	else
++		kfree(addr);
++}
++#define kvfree __compat_kvfree
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 9)
++#include <linux/netdevice.h>
++#define priv_destructor destructor
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && !defined(ISOPENSUSE15)
++#define wg_newlink(a,b,c,d,e) wg_newlink(a,b,c,d)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
++#include <net/netlink.h>
++#include <net/genetlink.h>
++#define nlmsg_parse(a, b, c, d, e, f) nlmsg_parse(a, b, c, d, e)
++#define nla_parse_nested(a, b, c, d, e) nla_parse_nested(a, b, c, d)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) && !defined(ISRHEL7)
++static inline struct nlattr **genl_family_attrbuf(const struct genl_family *family)
++{
++	return family->attrbuf;
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
++#define PTR_ERR_OR_ZERO(p) PTR_RET(p)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)
++#include <net/netlink.h>
++#define nla_put_u64_64bit(a, b, c, d) nla_put_u64(a, b, c)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
++#include <net/genetlink.h>
++#ifndef GENL_UNS_ADMIN_PERM
++#define GENL_UNS_ADMIN_PERM GENL_ADMIN_PERM
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) && !defined(ISRHEL7)
++#include <net/genetlink.h>
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
++#define genl_register_family(a) genl_register_family_with_ops(a, genl_ops, ARRAY_SIZE(genl_ops))
++#define COMPAT_CANNOT_USE_CONST_GENL_OPS
++#else
++#define genl_register_family(a) genl_register_family_with_ops(a, genl_ops)
++#endif
++#define COMPAT_CANNOT_USE_GENL_NOPS
++#endif
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 2) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 16) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 65) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 101) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 84)
++#define __COMPAT_NETLINK_DUMP_BLOCK { \
++	int ret; \
++	skb->end -= nlmsg_total_size(sizeof(int)); \
++	ret = wg_get_device_dump_real(skb, cb); \
++	skb->end += nlmsg_total_size(sizeof(int)); \
++	return ret; \
++}
++#define __COMPAT_NETLINK_DUMP_OVERRIDE
++#else
++#define __COMPAT_NETLINK_DUMP_BLOCK return wg_get_device_dump_real(skb, cb);
++#endif
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 25) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 87)
++#define wg_get_device_dump(a, b) wg_get_device_dump_real(a, b); \
++static int wg_get_device_dump(a, b) { \
++	struct wg_device *wg = (struct wg_device *)cb->args[0]; \
++	if (!wg) { \
++		int ret = wg_get_device_start(cb); \
++		if (ret) \
++			return ret; \
++	} \
++	__COMPAT_NETLINK_DUMP_BLOCK \
++} \
++static int wg_get_device_dump_real(a, b)
++#define COMPAT_CANNOT_USE_NETLINK_START
++#elif defined(__COMPAT_NETLINK_DUMP_OVERRIDE)
++#define wg_get_device_dump(a, b) wg_get_device_dump_real(a, b); \
++static int wg_get_device_dump(a, b) { \
++	__COMPAT_NETLINK_DUMP_BLOCK \
++} \
++static int wg_get_device_dump_real(a, b)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
++#define COMPAT_CANNOT_USE_IN6_DEV_GET
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
++#define COMPAT_CANNOT_USE_IFF_NO_QUEUE
++#endif
++
++#if defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
++#include <asm/user.h>
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
++#include <asm/xsave.h>
++#include <asm/xcr.h>
++static inline int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
++{
++	return boot_cpu_has(X86_FEATURE_XSAVE) && xgetbv(XCR_XFEATURE_ENABLED_MASK) & xfeatures_needed;
++}
++#endif
++#ifndef XFEATURE_MASK_YMM
++#define XFEATURE_MASK_YMM XSTATE_YMM
++#endif
++#ifndef XFEATURE_MASK_SSE
++#define XFEATURE_MASK_SSE XSTATE_SSE
++#endif
++#ifndef XSTATE_AVX512
++#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
++#endif
++#ifndef XFEATURE_MASK_AVX512
++#define XFEATURE_MASK_AVX512 XSTATE_AVX512
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && defined(CONFIG_X86_64)
++/* This is incredibly dumb and reckless, but as it turns out, there's
++ * not really hardware Linux runs properly on that supports F but not BW
++ * and VL, so in practice this isn't so bad. Plus, this is compat layer,
++ * so the bar remains fairly low.
++ */
++#include <asm/cpufeature.h>
++#ifndef X86_FEATURE_AVX512BW
++#define X86_FEATURE_AVX512BW X86_FEATURE_AVX512F
++#endif
++#ifndef X86_FEATURE_AVX512VL
++#define X86_FEATURE_AVX512VL X86_FEATURE_AVX512F
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0)
++struct __compat_dummy_container { char dev; };
++#define netdev_notifier_info net_device *)data); __attribute((unused)) char __compat_dummy_variable = ((struct __compat_dummy_container
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
++#define timer_setup(a, b, c) setup_timer(a, ((void (*)(unsigned long))b), ((unsigned long)a))
++#define from_timer(var, callback_timer, timer_fieldname) container_of(callback_timer, typeof(*var), timer_fieldname)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 3)
++#define COMPAT_CANNOT_USE_AVX512
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
++#include <net/genetlink.h>
++#define genl_dump_check_consistent(a, b) genl_dump_check_consistent(a, b, &genl_family)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && !defined(ISRHEL7) && !defined(ISOPENSUSE15)
++static inline void *skb_put_data(struct sk_buff *skb, const void *data, unsigned int len)
++{
++	void *tmp = skb_put(skb, len);
++	memcpy(tmp, data, len);
++	return tmp;
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) && !defined(ISRHEL7)
++#define napi_complete_done(n, work_done) napi_complete(n)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)
++#include <linux/netdevice.h>
++/* NAPI_STATE_SCHED gets set by netif_napi_add anyway, so this is safe.
++ * Also, kernels without NAPI_STATE_NO_BUSY_POLL don't have a call to
++ * napi_hash_add inside of netif_napi_add.
++ */
++#define NAPI_STATE_NO_BUSY_POLL NAPI_STATE_SCHED
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
++#include <linux/atomic.h>
++#ifndef atomic_read_acquire
++#define atomic_read_acquire(v) ({ int __compat_p1 = atomic_read(v); smp_rmb(); __compat_p1; })
++#endif
++#ifndef atomic_set_release
++#define atomic_set_release(v, i) ({ smp_wmb(); atomic_set(v, i); })
++#endif
++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
++#include <linux/atomic.h>
++#ifndef atomic_read_acquire
++#define atomic_read_acquire(v) smp_load_acquire(&(v)->counter)
++#endif
++#ifndef atomic_set_release
++#define atomic_set_release(v, i) smp_store_release(&(v)->counter, (i))
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0)
++static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
++{
++	while (words--) {
++		__le32_to_cpus(buf);
++		buf++;
++	}
++}
++static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
++{
++	while (words--) {
++		__cpu_to_le32s(buf);
++		buf++;
++	}
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
++#include <crypto/algapi.h>
++static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2,
++				  unsigned int size)
++{
++	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
++	    __builtin_constant_p(size) &&
++	    (size % sizeof(unsigned long)) == 0) {
++		unsigned long *d = (unsigned long *)dst;
++		unsigned long *s1 = (unsigned long *)src1;
++		unsigned long *s2 = (unsigned long *)src2;
++
++		while (size > 0) {
++			*d++ = *s1++ ^ *s2++;
++			size -= sizeof(unsigned long);
++		}
++	} else {
++		if (unlikely(dst != src1))
++			memmove(dst, src1, size);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
++		crypto_xor(dst, src2, size);
++#else
++		__crypto_xor(dst, src2, size);
++#endif
++	}
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
++#define read_cpuid_part() read_cpuid_part_number()
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) && !defined(ISRHEL7)
++#define hlist_add_behind(a, b) hlist_add_after(b, a)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0)
++#define totalram_pages() totalram_pages
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0)
++struct __kernel_timespec {
++	int64_t tv_sec, tv_nsec;
++};
++#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0)
++#include <linux/time64.h>
++#ifdef __kernel_timespec
++#undef __kernel_timespec
++struct __kernel_timespec {
++	int64_t tv_sec, tv_nsec;
++};
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
++#include <linux/kernel.h>
++#ifndef ALIGN_DOWN
++#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a))
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) && !defined(ISRHEL82)
++#include <linux/skbuff.h>
++#define skb_probe_transport_header(a) skb_probe_transport_header(a, 0)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) && !defined(ISRHEL7)
++#define ignore_df local_df
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) && !defined(ISRHEL82)
++/* Note that all intentional uses of the non-_bh variety need to explicitly
++ * undef these, conditionalized on COMPAT_CANNOT_DEPRECIATE_BH_RCU.
++ */
++#include <linux/rcupdate.h>
++static __always_inline void old_synchronize_rcu(void)
++{
++	synchronize_rcu();
++}
++static __always_inline void old_call_rcu(void *a, void *b)
++{
++	call_rcu(a, b);
++}
++static __always_inline void old_rcu_barrier(void)
++{
++	rcu_barrier();
++}
++#ifdef synchronize_rcu
++#undef synchronize_rcu
++#endif
++#ifdef call_rcu
++#undef call_rcu
++#endif
++#ifdef rcu_barrier
++#undef rcu_barrier
++#endif
++#define synchronize_rcu synchronize_rcu_bh
++#define call_rcu call_rcu_bh
++#define rcu_barrier rcu_barrier_bh
++#define COMPAT_CANNOT_DEPRECIATE_BH_RCU
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 10) && !defined(ISRHEL8)
++static inline void skb_mark_not_on_list(struct sk_buff *skb)
++{
++	skb->next = NULL;
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) && !defined(ISRHEL8)
++#define NLA_EXACT_LEN NLA_UNSPEC
++#endif
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) && !defined(ISRHEL82)
++#define NLA_MIN_LEN NLA_UNSPEC
++#define COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) && defined(__aarch64__)
++#define cpu_have_named_feature(name) (elf_hwcap & (HWCAP_ ## name))
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)
++#include <linux/stddef.h>
++#ifndef offsetofend
++#define offsetofend(TYPE, MEMBER) (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0)
++#define genl_dumpit_info(cb) ({ \
++	struct { struct nlattr **attrs; } *a = (void *)((u8 *)cb->args + offsetofend(struct dump_ctx, next_allowedip)); \
++	BUILD_BUG_ON(sizeof(cb->args) < offsetofend(struct dump_ctx, next_allowedip) + sizeof(*a)); \
++	a->attrs = genl_family_attrbuf(&genl_family); \
++	if (nlmsg_parse(cb->nlh, GENL_HDRLEN + genl_family.hdrsize, a->attrs, genl_family.maxattr, device_policy, NULL) < 0) \
++		memset(a->attrs, 0, (genl_family.maxattr + 1) * sizeof(struct nlattr *)); \
++	a; \
++})
++#endif
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 5) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 18) && !defined(ISRHEL82))
++#define ipv6_dst_lookup_flow(a, b, c, d) ipv6_dst_lookup(a, b, &dst, c) + (void *)0 ?: dst
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0)
++#include <linux/skbuff.h>
++#ifndef skb_list_walk_safe
++#define skb_list_walk_safe(first, skb, next)                                   \
++	for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb);      \
++	     (skb) = (next), (next) = (skb) ? (skb)->next : NULL)
++#endif
++#endif
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
++#define blake2s_init zinc_blake2s_init
++#define blake2s_init_key zinc_blake2s_init_key
++#define blake2s_update zinc_blake2s_update
++#define blake2s_final zinc_blake2s_final
++#define blake2s_hmac zinc_blake2s_hmac
++#define chacha20 zinc_chacha20
++#define hchacha20 zinc_hchacha20
++#define chacha20poly1305_encrypt zinc_chacha20poly1305_encrypt
++#define chacha20poly1305_encrypt_sg_inplace zinc_chacha20poly1305_encrypt_sg_inplace
++#define chacha20poly1305_decrypt zinc_chacha20poly1305_decrypt
++#define chacha20poly1305_decrypt_sg_inplace zinc_chacha20poly1305_decrypt_sg_inplace
++#define xchacha20poly1305_encrypt zinc_xchacha20poly1305_encrypt
++#define xchacha20poly1305_decrypt zinc_xchacha20poly1305_decrypt
++#define curve25519 zinc_curve25519
++#define curve25519_generate_secret zinc_curve25519_generate_secret
++#define curve25519_generate_public zinc_curve25519_generate_public
++#define poly1305_init zinc_poly1305_init
++#define poly1305_update zinc_poly1305_update
++#define poly1305_final zinc_poly1305_final
++#define blake2s_compress_ssse3 zinc_blake2s_compress_ssse3
++#define blake2s_compress_avx512 zinc_blake2s_compress_avx512
++#define poly1305_init_arm zinc_poly1305_init_arm
++#define poly1305_blocks_arm zinc_poly1305_blocks_arm
++#define poly1305_emit_arm zinc_poly1305_emit_arm
++#define poly1305_blocks_neon zinc_poly1305_blocks_neon
++#define poly1305_emit_neon zinc_poly1305_emit_neon
++#define poly1305_init_mips zinc_poly1305_init_mips
++#define poly1305_blocks_mips zinc_poly1305_blocks_mips
++#define poly1305_emit_mips zinc_poly1305_emit_mips
++#define poly1305_init_x86_64 zinc_poly1305_init_x86_64
++#define poly1305_blocks_x86_64 zinc_poly1305_blocks_x86_64
++#define poly1305_emit_x86_64 zinc_poly1305_emit_x86_64
++#define poly1305_emit_avx zinc_poly1305_emit_avx
++#define poly1305_blocks_avx zinc_poly1305_blocks_avx
++#define poly1305_blocks_avx2 zinc_poly1305_blocks_avx2
++#define poly1305_blocks_avx512 zinc_poly1305_blocks_avx512
++#define curve25519_neon zinc_curve25519_neon
++#define hchacha20_ssse3 zinc_hchacha20_ssse3
++#define chacha20_ssse3 zinc_chacha20_ssse3
++#define chacha20_avx2 zinc_chacha20_avx2
++#define chacha20_avx512 zinc_chacha20_avx512
++#define chacha20_avx512vl zinc_chacha20_avx512vl
++#define chacha20_mips zinc_chacha20_mips
++#define chacha20_arm zinc_chacha20_arm
++#define hchacha20_arm zinc_hchacha20_arm
++#define chacha20_neon zinc_chacha20_neon
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) && !defined(ISRHEL7)
++#include <linux/skbuff.h>
++static inline int skb_ensure_writable(struct sk_buff *skb, int write_len)
++{
++	if (!pskb_may_pull(skb, write_len))
++		return -ENOMEM;
++
++	if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
++		return 0;
++
++	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0)
++#if IS_ENABLED(CONFIG_NF_NAT)
++#include <linux/ip.h>
++#include <linux/icmpv6.h>
++#include <net/ipv6.h>
++#include <net/icmp.h>
++#include <net/netfilter/nf_conntrack.h>
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0)
++#include <net/netfilter/nf_nat_core.h>
++#endif
++static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
++{
++	struct sk_buff *cloned_skb = NULL;
++	enum ip_conntrack_info ctinfo;
++	struct nf_conn *ct;
++	__be32 orig_ip;
++
++	ct = nf_ct_get(skb_in, &ctinfo);
++	if (!ct || !(ct->status & IPS_SRC_NAT)) {
++		icmp_send(skb_in, type, code, info);
++		return;
++	}
++
++	if (skb_shared(skb_in))
++		skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
++
++	if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
++	    (skb_network_header(skb_in) + sizeof(struct iphdr)) >
++	    skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
++	    skb_network_offset(skb_in) + sizeof(struct iphdr))))
++		goto out;
++
++	orig_ip = ip_hdr(skb_in)->saddr;
++	ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
++	icmp_send(skb_in, type, code, info);
++	ip_hdr(skb_in)->saddr = orig_ip;
++out:
++	consume_skb(cloned_skb);
++}
++static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
++{
++	struct sk_buff *cloned_skb = NULL;
++	enum ip_conntrack_info ctinfo;
++	struct in6_addr orig_ip;
++	struct nf_conn *ct;
++
++	ct = nf_ct_get(skb_in, &ctinfo);
++	if (!ct || !(ct->status & IPS_SRC_NAT)) {
++		icmpv6_send(skb_in, type, code, info);
++		return;
++	}
++
++	if (skb_shared(skb_in))
++		skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
++
++	if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
++	    (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) >
++	    skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
++	    skb_network_offset(skb_in) + sizeof(struct ipv6hdr))))
++		goto out;
++
++	orig_ip = ipv6_hdr(skb_in)->saddr;
++	ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
++	icmpv6_send(skb_in, type, code, info);
++	ipv6_hdr(skb_in)->saddr = orig_ip;
++out:
++	consume_skb(cloned_skb);
++}
++#else
++#define icmp_ndo_send icmp_send
++#define icmpv6_ndo_send icmpv6_send
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
++#define COMPAT_CANNOT_USE_MAX_MTU
++#endif
++
++#if defined(ISUBUNTU1604)
++#include <linux/siphash.h>
++#ifndef _WG_LINUX_SIPHASH_H
++#define hsiphash_2u32 siphash_2u32
++#define hsiphash_3u32 siphash_3u32
++#define hsiphash_key_t siphash_key_t
++#endif
++#endif
++
++#ifdef CONFIG_VE
++#include <linux/netdev_features.h>
++#ifdef NETIF_F_VIRTUAL
++#undef NETIF_F_LLTX
++#define NETIF_F_LLTX (__NETIF_F(LLTX) | __NETIF_F(VIRTUAL))
++#endif
++#endif
++
++/* https://github.com/ClangBuiltLinux/linux/issues/7 */
++#if defined( __clang__) && (!defined(CONFIG_CLANG_VERSION) || CONFIG_CLANG_VERSION < 80000)
++#include <linux/bug.h>
++#undef BUILD_BUG_ON
++#define BUILD_BUG_ON(x)
++#endif
++
++/* PaX compatibility */
++#ifdef CONSTIFY_PLUGIN
++#include <linux/cache.h>
++#undef __read_mostly
++#define __read_mostly
++#endif
++#if (defined(RAP_PLUGIN) || defined(CONFIG_CFI_CLANG)) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
++#include <linux/timer.h>
++#define wg_expired_retransmit_handshake(a) wg_expired_retransmit_handshake(unsigned long timer)
++#define wg_expired_send_keepalive(a) wg_expired_send_keepalive(unsigned long timer)
++#define wg_expired_new_handshake(a) wg_expired_new_handshake(unsigned long timer)
++#define wg_expired_zero_key_material(a) wg_expired_zero_key_material(unsigned long timer)
++#define wg_expired_send_persistent_keepalive(a) wg_expired_send_persistent_keepalive(unsigned long timer)
++#undef timer_setup
++#define timer_setup(a, b, c) setup_timer(a, ((void (*)(unsigned long))b), ((unsigned long)a))
++#undef from_timer
++#define from_timer(var, callback_timer, timer_fieldname) container_of((struct timer_list *)callback_timer, typeof(*var), timer_fieldname)
++#endif
++
++#endif /* _WG_COMPAT_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/dst_cache/include/net/dst_cache.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,97 @@
++#ifndef _WG_NET_DST_CACHE_H
++#define _WG_NET_DST_CACHE_H
++
++#include <linux/jiffies.h>
++#include <net/dst.h>
++#if IS_ENABLED(CONFIG_IPV6)
++#include <net/ip6_fib.h>
++#endif
++
++struct dst_cache {
++	struct dst_cache_pcpu __percpu *cache;
++	unsigned long reset_ts;
++};
++
++/**
++ *	dst_cache_get - perform cache lookup
++ *	@dst_cache: the cache
++ *
++ *	The caller should use dst_cache_get_ip4() if it need to retrieve the
++ *	source address to be used when xmitting to the cached dst.
++ *	local BH must be disabled.
++ */
++struct dst_entry *dst_cache_get(struct dst_cache *dst_cache);
++
++/**
++ *	dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address
++ *	@dst_cache: the cache
++ *	@saddr: return value for the retrieved source address
++ *
++ *	local BH must be disabled.
++ */
++struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr);
++
++/**
++ *	dst_cache_set_ip4 - store the ipv4 dst into the cache
++ *	@dst_cache: the cache
++ *	@dst: the entry to be cached
++ *	@saddr: the source address to be stored inside the cache
++ *
++ *	local BH must be disabled.
++ */
++void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
++		       __be32 saddr);
++
++#if IS_ENABLED(CONFIG_IPV6)
++
++/**
++ *	dst_cache_set_ip6 - store the ipv6 dst into the cache
++ *	@dst_cache: the cache
++ *	@dst: the entry to be cached
++ *	@saddr: the source address to be stored inside the cache
++ *
++ *	local BH must be disabled.
++ */
++void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
++		       const struct in6_addr *addr);
++
++/**
++ *	dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
++ *	@dst_cache: the cache
++ *	@saddr: return value for the retrieved source address
++ *
++ *	local BH must be disabled.
++ */
++struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
++				    struct in6_addr *saddr);
++#endif
++
++/**
++ *	dst_cache_reset - invalidate the cache contents
++ *	@dst_cache: the cache
++ *
++ *	This do not free the cached dst to avoid races and contentions.
++ *	the dst will be freed on later cache lookup.
++ */
++static inline void dst_cache_reset(struct dst_cache *dst_cache)
++{
++	dst_cache->reset_ts = jiffies;
++}
++
++/**
++ *	dst_cache_init - initialize the cache, allocating the required storage
++ *	@dst_cache: the cache
++ *	@gfp: allocation flags
++ */
++int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
++
++/**
++ *	dst_cache_destroy - empty the cache and free the allocated storage
++ *	@dst_cache: the cache
++ *
++ *	No synchronization is enforced: it must be called only when the cache
++ *	is unused.
++ */
++void dst_cache_destroy(struct dst_cache *dst_cache);
++
++#endif /* _WG_NET_DST_CACHE_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/fpu-x86/include/asm/fpu/api.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1 @@
++#include <asm/i387.h>
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/intel-family-x86/include/asm/intel-family.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,73 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_X86_INTEL_FAMILY_H
++#define _ASM_X86_INTEL_FAMILY_H
++
++/*
++ * "Big Core" Processors (Branded as Core, Xeon, etc...)
++ *
++ * The "_X" parts are generally the EP and EX Xeons, or the
++ * "Extreme" ones, like Broadwell-E.
++ *
++ * Things ending in "2" are usually because we have no better
++ * name for them.  There's no processor called "SILVERMONT2".
++ */
++
++#define INTEL_FAM6_CORE_YONAH		0x0E
++
++#define INTEL_FAM6_CORE2_MEROM		0x0F
++#define INTEL_FAM6_CORE2_MEROM_L	0x16
++#define INTEL_FAM6_CORE2_PENRYN		0x17
++#define INTEL_FAM6_CORE2_DUNNINGTON	0x1D
++
++#define INTEL_FAM6_NEHALEM		0x1E
++#define INTEL_FAM6_NEHALEM_G		0x1F /* Auburndale / Havendale */
++#define INTEL_FAM6_NEHALEM_EP		0x1A
++#define INTEL_FAM6_NEHALEM_EX		0x2E
++
++#define INTEL_FAM6_WESTMERE		0x25
++#define INTEL_FAM6_WESTMERE_EP		0x2C
++#define INTEL_FAM6_WESTMERE_EX		0x2F
++
++#define INTEL_FAM6_SANDYBRIDGE		0x2A
++#define INTEL_FAM6_SANDYBRIDGE_X	0x2D
++#define INTEL_FAM6_IVYBRIDGE		0x3A
++#define INTEL_FAM6_IVYBRIDGE_X		0x3E
++
++#define INTEL_FAM6_HASWELL_CORE		0x3C
++#define INTEL_FAM6_HASWELL_X		0x3F
++#define INTEL_FAM6_HASWELL_ULT		0x45
++#define INTEL_FAM6_HASWELL_GT3E		0x46
++
++#define INTEL_FAM6_BROADWELL_CORE	0x3D
++#define INTEL_FAM6_BROADWELL_GT3E	0x47
++#define INTEL_FAM6_BROADWELL_X		0x4F
++#define INTEL_FAM6_BROADWELL_XEON_D	0x56
++
++#define INTEL_FAM6_SKYLAKE_MOBILE	0x4E
++#define INTEL_FAM6_SKYLAKE_DESKTOP	0x5E
++#define INTEL_FAM6_SKYLAKE_X		0x55
++#define INTEL_FAM6_KABYLAKE_MOBILE	0x8E
++#define INTEL_FAM6_KABYLAKE_DESKTOP	0x9E
++
++/* "Small Core" Processors (Atom) */
++
++#define INTEL_FAM6_ATOM_PINEVIEW	0x1C
++#define INTEL_FAM6_ATOM_LINCROFT	0x26
++#define INTEL_FAM6_ATOM_PENWELL		0x27
++#define INTEL_FAM6_ATOM_CLOVERVIEW	0x35
++#define INTEL_FAM6_ATOM_CEDARVIEW	0x36
++#define INTEL_FAM6_ATOM_SILVERMONT1	0x37 /* BayTrail/BYT / Valleyview */
++#define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
++#define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
++#define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
++#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Anniedale */
++#define INTEL_FAM6_ATOM_GOLDMONT	0x5C
++#define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
++#define INTEL_FAM6_ATOM_GEMINI_LAKE	0x7A
++
++/* Xeon Phi */
++
++#define INTEL_FAM6_XEON_PHI_KNL		0x57 /* Knights Landing */
++#define INTEL_FAM6_XEON_PHI_KNM		0x85 /* Knights Mill */
++
++#endif /* _ASM_X86_INTEL_FAMILY_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/memneq/include.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,5 @@
++extern noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size);
++static inline int crypto_memneq(const void *a, const void *b, size_t size)
++{
++	return __crypto_memneq(a, b, size) != 0UL ? 1 : 0;
++}
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/neon-arm/include/asm/neon.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,7 @@
++#ifndef _ARCH_ARM_ASM_NEON
++#define _ARCH_ARM_ASM_NEON
++#define kernel_neon_begin() \
++	BUILD_BUG_ON_MSG(1, "This kernel does not support ARM NEON")
++#define kernel_neon_end() \
++	BUILD_BUG_ON_MSG(1, "This kernel does not support ARM NEON")
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/ptr_ring/include/linux/ptr_ring.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,640 @@
++/*
++ *	Definitions for the 'struct ptr_ring' datastructure.
++ *
++ *	Author:
++ *		Michael S. Tsirkin <mst@redhat.com>
++ *
++ *	Copyright (C) 2016 Red Hat, Inc.
++ *
++ *	This program is free software; you can redistribute it and/or modify it
++ *	under the terms of the GNU General Public License as published by the
++ *	Free Software Foundation; either version 2 of the License, or (at your
++ *	option) any later version.
++ *
++ *	This is a limited-size FIFO maintaining pointers in FIFO order, with
++ *	one CPU producing entries and another consuming entries from a FIFO.
++ *
++ *	This implementation tries to minimize cache-contention when there is a
++ *	single producer and a single consumer CPU.
++ */
++
++#ifndef _LINUX_PTR_RING_H
++#define _LINUX_PTR_RING_H 1
++
++#ifdef __KERNEL__
++#include <linux/spinlock.h>
++#include <linux/cache.h>
++#include <linux/types.h>
++#include <linux/compiler.h>
++#include <linux/cache.h>
++#include <linux/slab.h>
++#include <asm/errno.h>
++#endif
++
++struct ptr_ring {
++	int producer ____cacheline_aligned_in_smp;
++	spinlock_t producer_lock;
++	int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
++	int consumer_tail; /* next entry to invalidate */
++	spinlock_t consumer_lock;
++	/* Shared consumer/producer data */
++	/* Read-only by both the producer and the consumer */
++	int size ____cacheline_aligned_in_smp; /* max entries in queue */
++	int batch; /* number of entries to consume in a batch */
++	void **queue;
++};
++
++/* Note: callers invoking this in a loop must use a compiler barrier,
++ * for example cpu_relax().  If ring is ever resized, callers must hold
++ * producer_lock - see e.g. ptr_ring_full.  Otherwise, if callers don't hold
++ * producer_lock, the next call to __ptr_ring_produce may fail.
++ */
++static inline bool __ptr_ring_full(struct ptr_ring *r)
++{
++	return r->queue[r->producer];
++}
++
++static inline bool ptr_ring_full(struct ptr_ring *r)
++{
++	bool ret;
++
++	spin_lock(&r->producer_lock);
++	ret = __ptr_ring_full(r);
++	spin_unlock(&r->producer_lock);
++
++	return ret;
++}
++
++static inline bool ptr_ring_full_irq(struct ptr_ring *r)
++{
++	bool ret;
++
++	spin_lock_irq(&r->producer_lock);
++	ret = __ptr_ring_full(r);
++	spin_unlock_irq(&r->producer_lock);
++
++	return ret;
++}
++
++static inline bool ptr_ring_full_any(struct ptr_ring *r)
++{
++	unsigned long flags;
++	bool ret;
++
++	spin_lock_irqsave(&r->producer_lock, flags);
++	ret = __ptr_ring_full(r);
++	spin_unlock_irqrestore(&r->producer_lock, flags);
++
++	return ret;
++}
++
++static inline bool ptr_ring_full_bh(struct ptr_ring *r)
++{
++	bool ret;
++
++	spin_lock_bh(&r->producer_lock);
++	ret = __ptr_ring_full(r);
++	spin_unlock_bh(&r->producer_lock);
++
++	return ret;
++}
++
++/* Note: callers invoking this in a loop must use a compiler barrier,
++ * for example cpu_relax(). Callers must hold producer_lock.
++ */
++static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
++{
++	if (unlikely(!r->size) || r->queue[r->producer])
++		return -ENOSPC;
++
++	r->queue[r->producer++] = ptr;
++	if (unlikely(r->producer >= r->size))
++		r->producer = 0;
++	return 0;
++}
++
++/*
++ * Note: resize (below) nests producer lock within consumer lock, so if you
++ * consume in interrupt or BH context, you must disable interrupts/BH when
++ * calling this.
++ */
++static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
++{
++	int ret;
++
++	spin_lock(&r->producer_lock);
++	ret = __ptr_ring_produce(r, ptr);
++	spin_unlock(&r->producer_lock);
++
++	return ret;
++}
++
++static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
++{
++	int ret;
++
++	spin_lock_irq(&r->producer_lock);
++	ret = __ptr_ring_produce(r, ptr);
++	spin_unlock_irq(&r->producer_lock);
++
++	return ret;
++}
++
++static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
++{
++	unsigned long flags;
++	int ret;
++
++	spin_lock_irqsave(&r->producer_lock, flags);
++	ret = __ptr_ring_produce(r, ptr);
++	spin_unlock_irqrestore(&r->producer_lock, flags);
++
++	return ret;
++}
++
++static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
++{
++	int ret;
++
++	spin_lock_bh(&r->producer_lock);
++	ret = __ptr_ring_produce(r, ptr);
++	spin_unlock_bh(&r->producer_lock);
++
++	return ret;
++}
++
++/* Note: callers invoking this in a loop must use a compiler barrier,
++ * for example cpu_relax(). Callers must take consumer_lock
++ * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
++ * If ring is never resized, and if the pointer is merely
++ * tested, there's no need to take the lock - see e.g.  __ptr_ring_empty.
++ */
++static inline void *__ptr_ring_peek(struct ptr_ring *r)
++{
++	if (likely(r->size))
++		return r->queue[r->consumer_head];
++	return NULL;
++}
++
++/* Note: callers invoking this in a loop must use a compiler barrier,
++ * for example cpu_relax(). Callers must take consumer_lock
++ * if the ring is ever resized - see e.g. ptr_ring_empty.
++ */
++static inline bool __ptr_ring_empty(struct ptr_ring *r)
++{
++	return !__ptr_ring_peek(r);
++}
++
++static inline bool ptr_ring_empty(struct ptr_ring *r)
++{
++	bool ret;
++
++	spin_lock(&r->consumer_lock);
++	ret = __ptr_ring_empty(r);
++	spin_unlock(&r->consumer_lock);
++
++	return ret;
++}
++
++static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
++{
++	bool ret;
++
++	spin_lock_irq(&r->consumer_lock);
++	ret = __ptr_ring_empty(r);
++	spin_unlock_irq(&r->consumer_lock);
++
++	return ret;
++}
++
++static inline bool ptr_ring_empty_any(struct ptr_ring *r)
++{
++	unsigned long flags;
++	bool ret;
++
++	spin_lock_irqsave(&r->consumer_lock, flags);
++	ret = __ptr_ring_empty(r);
++	spin_unlock_irqrestore(&r->consumer_lock, flags);
++
++	return ret;
++}
++
++static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
++{
++	bool ret;
++
++	spin_lock_bh(&r->consumer_lock);
++	ret = __ptr_ring_empty(r);
++	spin_unlock_bh(&r->consumer_lock);
++
++	return ret;
++}
++
++/* Must only be called after __ptr_ring_peek returned !NULL */
++static inline void __ptr_ring_discard_one(struct ptr_ring *r)
++{
++	/* Fundamentally, what we want to do is update consumer
++	 * index and zero out the entry so producer can reuse it.
++	 * Doing it naively at each consume would be as simple as:
++	 *       r->queue[r->consumer++] = NULL;
++	 *       if (unlikely(r->consumer >= r->size))
++	 *               r->consumer = 0;
++	 * but that is suboptimal when the ring is full as producer is writing
++	 * out new entries in the same cache line.  Defer these updates until a
++	 * batch of entries has been consumed.
++	 */
++	int head = r->consumer_head++;
++
++	/* Once we have processed enough entries invalidate them in
++	 * the ring all at once so producer can reuse their space in the ring.
++	 * We also do this when we reach end of the ring - not mandatory
++	 * but helps keep the implementation simple.
++	 */
++	if (unlikely(r->consumer_head - r->consumer_tail >= r->batch ||
++		     r->consumer_head >= r->size)) {
++		/* Zero out entries in the reverse order: this way we touch the
++		 * cache line that producer might currently be reading the last;
++		 * producer won't make progress and touch other cache lines
++		 * besides the first one until we write out all entries.
++		 */
++		while (likely(head >= r->consumer_tail))
++			r->queue[head--] = NULL;
++		r->consumer_tail = r->consumer_head;
++	}
++	if (unlikely(r->consumer_head >= r->size)) {
++		r->consumer_head = 0;
++		r->consumer_tail = 0;
++	}
++}
++
++static inline void *__ptr_ring_consume(struct ptr_ring *r)
++{
++	void *ptr;
++
++	ptr = __ptr_ring_peek(r);
++	if (ptr)
++		__ptr_ring_discard_one(r);
++
++	return ptr;
++}
++
++static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
++					     void **array, int n)
++{
++	void *ptr;
++	int i;
++
++	for (i = 0; i < n; i++) {
++		ptr = __ptr_ring_consume(r);
++		if (!ptr)
++			break;
++		array[i] = ptr;
++	}
++
++	return i;
++}
++
++/*
++ * Note: resize (below) nests producer lock within consumer lock, so if you
++ * call this in interrupt or BH context, you must disable interrupts/BH when
++ * producing.
++ */
++static inline void *ptr_ring_consume(struct ptr_ring *r)
++{
++	void *ptr;
++
++	spin_lock(&r->consumer_lock);
++	ptr = __ptr_ring_consume(r);
++	spin_unlock(&r->consumer_lock);
++
++	return ptr;
++}
++
++static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
++{
++	void *ptr;
++
++	spin_lock_irq(&r->consumer_lock);
++	ptr = __ptr_ring_consume(r);
++	spin_unlock_irq(&r->consumer_lock);
++
++	return ptr;
++}
++
++static inline void *ptr_ring_consume_any(struct ptr_ring *r)
++{
++	unsigned long flags;
++	void *ptr;
++
++	spin_lock_irqsave(&r->consumer_lock, flags);
++	ptr = __ptr_ring_consume(r);
++	spin_unlock_irqrestore(&r->consumer_lock, flags);
++
++	return ptr;
++}
++
++static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
++{
++	void *ptr;
++
++	spin_lock_bh(&r->consumer_lock);
++	ptr = __ptr_ring_consume(r);
++	spin_unlock_bh(&r->consumer_lock);
++
++	return ptr;
++}
++
++static inline int ptr_ring_consume_batched(struct ptr_ring *r,
++					   void **array, int n)
++{
++	int ret;
++
++	spin_lock(&r->consumer_lock);
++	ret = __ptr_ring_consume_batched(r, array, n);
++	spin_unlock(&r->consumer_lock);
++
++	return ret;
++}
++
++static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
++					       void **array, int n)
++{
++	int ret;
++
++	spin_lock_irq(&r->consumer_lock);
++	ret = __ptr_ring_consume_batched(r, array, n);
++	spin_unlock_irq(&r->consumer_lock);
++
++	return ret;
++}
++
++static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
++					       void **array, int n)
++{
++	unsigned long flags;
++	int ret;
++
++	spin_lock_irqsave(&r->consumer_lock, flags);
++	ret = __ptr_ring_consume_batched(r, array, n);
++	spin_unlock_irqrestore(&r->consumer_lock, flags);
++
++	return ret;
++}
++
++static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
++					      void **array, int n)
++{
++	int ret;
++
++	spin_lock_bh(&r->consumer_lock);
++	ret = __ptr_ring_consume_batched(r, array, n);
++	spin_unlock_bh(&r->consumer_lock);
++
++	return ret;
++}
++
++/* Cast to structure type and call a function without discarding from FIFO.
++ * Function must return a value.
++ * Callers must take consumer_lock.
++ */
++#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
++
++#define PTR_RING_PEEK_CALL(r, f) ({ \
++	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
++	\
++	spin_lock(&(r)->consumer_lock); \
++	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
++	spin_unlock(&(r)->consumer_lock); \
++	__PTR_RING_PEEK_CALL_v; \
++})
++
++#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
++	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
++	\
++	spin_lock_irq(&(r)->consumer_lock); \
++	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
++	spin_unlock_irq(&(r)->consumer_lock); \
++	__PTR_RING_PEEK_CALL_v; \
++})
++
++#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
++	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
++	\
++	spin_lock_bh(&(r)->consumer_lock); \
++	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
++	spin_unlock_bh(&(r)->consumer_lock); \
++	__PTR_RING_PEEK_CALL_v; \
++})
++
++#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
++	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
++	unsigned long __PTR_RING_PEEK_CALL_f;\
++	\
++	spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
++	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
++	spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
++	__PTR_RING_PEEK_CALL_v; \
++})
++
++static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
++{
++	return kcalloc(size, sizeof(void *), gfp);
++}
++
++static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
++{
++	r->size = size;
++	r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
++	/* We need to set batch at least to 1 to make logic
++	 * in __ptr_ring_discard_one work correctly.
++	 * Batching too much (because ring is small) would cause a lot of
++	 * burstiness. Needs tuning, for now disable batching.
++	 */
++	if (r->batch > r->size / 2 || !r->batch)
++		r->batch = 1;
++}
++
++static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
++{
++	r->queue = __ptr_ring_init_queue_alloc(size, gfp);
++	if (!r->queue)
++		return -ENOMEM;
++
++	__ptr_ring_set_size(r, size);
++	r->producer = r->consumer_head = r->consumer_tail = 0;
++	spin_lock_init(&r->producer_lock);
++	spin_lock_init(&r->consumer_lock);
++
++	return 0;
++}
++
++/*
++ * Return entries into ring. Destroy entries that don't fit.
++ *
++ * Note: this is expected to be a rare slow path operation.
++ *
++ * Note: producer lock is nested within consumer lock, so if you
++ * resize you must make sure all uses nest correctly.
++ * In particular if you consume ring in interrupt or BH context, you must
++ * disable interrupts/BH when doing so.
++ */
++static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
++				      void (*destroy)(void *))
++{
++	unsigned long flags;
++	int head;
++
++	spin_lock_irqsave(&r->consumer_lock, flags);
++	spin_lock(&r->producer_lock);
++
++	if (!r->size)
++		goto done;
++
++	/*
++	 * Clean out buffered entries (for simplicity). This way following code
++	 * can test entries for NULL and if not assume they are valid.
++	 */
++	head = r->consumer_head - 1;
++	while (likely(head >= r->consumer_tail))
++		r->queue[head--] = NULL;
++	r->consumer_tail = r->consumer_head;
++
++	/*
++	 * Go over entries in batch, start moving head back and copy entries.
++	 * Stop when we run into previously unconsumed entries.
++	 */
++	while (n) {
++		head = r->consumer_head - 1;
++		if (head < 0)
++			head = r->size - 1;
++		if (r->queue[head]) {
++			/* This batch entry will have to be destroyed. */
++			goto done;
++		}
++		r->queue[head] = batch[--n];
++		r->consumer_tail = r->consumer_head = head;
++	}
++
++done:
++	/* Destroy all entries left in the batch. */
++	while (n)
++		destroy(batch[--n]);
++	spin_unlock(&r->producer_lock);
++	spin_unlock_irqrestore(&r->consumer_lock, flags);
++}
++
++static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
++					   int size, gfp_t gfp,
++					   void (*destroy)(void *))
++{
++	int producer = 0;
++	void **old;
++	void *ptr;
++
++	while ((ptr = __ptr_ring_consume(r)))
++		if (producer < size)
++			queue[producer++] = ptr;
++		else if (destroy)
++			destroy(ptr);
++
++	__ptr_ring_set_size(r, size);
++	r->producer = producer;
++	r->consumer_head = 0;
++	r->consumer_tail = 0;
++	old = r->queue;
++	r->queue = queue;
++
++	return old;
++}
++
++/*
++ * Note: producer lock is nested within consumer lock, so if you
++ * resize you must make sure all uses nest correctly.
++ * In particular if you consume ring in interrupt or BH context, you must
++ * disable interrupts/BH when doing so.
++ */
++static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
++				  void (*destroy)(void *))
++{
++	unsigned long flags;
++	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
++	void **old;
++
++	if (!queue)
++		return -ENOMEM;
++
++	spin_lock_irqsave(&(r)->consumer_lock, flags);
++	spin_lock(&(r)->producer_lock);
++
++	old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
++
++	spin_unlock(&(r)->producer_lock);
++	spin_unlock_irqrestore(&(r)->consumer_lock, flags);
++
++	kfree(old);
++
++	return 0;
++}
++
++/*
++ * Note: producer lock is nested within consumer lock, so if you
++ * resize you must make sure all uses nest correctly.
++ * In particular if you consume ring in interrupt or BH context, you must
++ * disable interrupts/BH when doing so.
++ */
++static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
++					   unsigned int nrings,
++					   int size,
++					   gfp_t gfp, void (*destroy)(void *))
++{
++	unsigned long flags;
++	void ***queues;
++	int i;
++
++	queues = kmalloc_array(nrings, sizeof(*queues), gfp);
++	if (!queues)
++		goto noqueues;
++
++	for (i = 0; i < nrings; ++i) {
++		queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
++		if (!queues[i])
++			goto nomem;
++	}
++
++	for (i = 0; i < nrings; ++i) {
++		spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
++		spin_lock(&(rings[i])->producer_lock);
++		queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
++						  size, gfp, destroy);
++		spin_unlock(&(rings[i])->producer_lock);
++		spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
++	}
++
++	for (i = 0; i < nrings; ++i)
++		kfree(queues[i]);
++
++	kfree(queues);
++
++	return 0;
++
++nomem:
++	while (--i >= 0)
++		kfree(queues[i]);
++
++	kfree(queues);
++
++noqueues:
++	return -ENOMEM;
++}
++
++static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
++{
++	void *ptr;
++
++	if (destroy)
++		while ((ptr = ptr_ring_consume(r)))
++			destroy(ptr);
++	kfree(r->queue);
++}
++
++#endif /* _LINUX_PTR_RING_H  */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/simd-asm/include/asm/simd.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,21 @@
++#ifndef _COMPAT_ASM_SIMD_H
++#define _COMPAT_ASM_SIMD_H
++
++#if defined(CONFIG_X86_64)
++#include <asm/fpu/api.h>
++#endif
++
++static __must_check inline bool may_use_simd(void)
++{
++#if defined(CONFIG_X86_64)
++	return irq_fpu_usable();
++#elif defined(CONFIG_ARM64) && defined(CONFIG_KERNEL_MODE_NEON)
++	return true;
++#elif defined(CONFIG_ARM) && defined(CONFIG_KERNEL_MODE_NEON)
++	return !in_nmi() && !in_irq() && !in_serving_softirq();
++#else
++	return false;
++#endif
++}
++
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/simd/include/linux/simd.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,70 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_SIMD_H
++#define _WG_SIMD_H
++
++#include <linux/sched.h>
++#include <asm/simd.h>
++#if defined(CONFIG_X86_64)
++#include <linux/version.h>
++#include <asm/fpu/api.h>
++#elif defined(CONFIG_KERNEL_MODE_NEON)
++#include <asm/neon.h>
++#endif
++
++typedef enum {
++	HAVE_NO_SIMD = 1 << 0,
++	HAVE_FULL_SIMD = 1 << 1,
++	HAVE_SIMD_IN_USE = 1 << 31
++} simd_context_t;
++
++#define DONT_USE_SIMD ((simd_context_t []){ HAVE_NO_SIMD })
++
++static inline void simd_get(simd_context_t *ctx)
++{
++	*ctx = !IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
++}
++
++static inline void simd_put(simd_context_t *ctx)
++{
++#if defined(CONFIG_X86_64)
++	if (*ctx & HAVE_SIMD_IN_USE)
++		kernel_fpu_end();
++#elif defined(CONFIG_KERNEL_MODE_NEON)
++	if (*ctx & HAVE_SIMD_IN_USE)
++		kernel_neon_end();
++#endif
++	*ctx = HAVE_NO_SIMD;
++}
++
++static inline bool simd_relax(simd_context_t *ctx)
++{
++#ifdef CONFIG_PREEMPT
++	if ((*ctx & HAVE_SIMD_IN_USE) && need_resched()) {
++		simd_put(ctx);
++		simd_get(ctx);
++		return true;
++	}
++#endif
++	return false;
++}
++
++static __must_check inline bool simd_use(simd_context_t *ctx)
++{
++	if (!(*ctx & HAVE_FULL_SIMD))
++		return false;
++	if (*ctx & HAVE_SIMD_IN_USE)
++		return true;
++#if defined(CONFIG_X86_64)
++	kernel_fpu_begin();
++#elif defined(CONFIG_KERNEL_MODE_NEON)
++	kernel_neon_begin();
++#endif
++	*ctx |= HAVE_SIMD_IN_USE;
++	return true;
++}
++
++#endif /* _WG_SIMD_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/siphash/include/linux/siphash.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,140 @@
++/* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * This file is provided under a dual BSD/GPLv2 license.
++ *
++ * SipHash: a fast short-input PRF
++ * https://131002.net/siphash/
++ *
++ * This implementation is specifically for SipHash2-4 for a secure PRF
++ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
++ * hashtables.
++ */
++
++#ifndef _WG_LINUX_SIPHASH_H
++#define _WG_LINUX_SIPHASH_H
++
++#include <linux/types.h>
++#include <linux/kernel.h>
++
++#define SIPHASH_ALIGNMENT __alignof__(u64)
++typedef struct {
++	u64 key[2];
++} siphash_key_t;
++
++u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
++#endif
++
++u64 siphash_1u64(const u64 a, const siphash_key_t *key);
++u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
++u64 siphash_3u64(const u64 a, const u64 b, const u64 c,
++		 const siphash_key_t *key);
++u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d,
++		 const siphash_key_t *key);
++u64 siphash_1u32(const u32 a, const siphash_key_t *key);
++u64 siphash_3u32(const u32 a, const u32 b, const u32 c,
++		 const siphash_key_t *key);
++
++static inline u64 siphash_2u32(const u32 a, const u32 b,
++			       const siphash_key_t *key)
++{
++	return siphash_1u64((u64)b << 32 | a, key);
++}
++static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c,
++			       const u32 d, const siphash_key_t *key)
++{
++	return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key);
++}
++
++
++static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
++				     const siphash_key_t *key)
++{
++	if (__builtin_constant_p(len) && len == 4)
++		return siphash_1u32(le32_to_cpup((const __le32 *)data), key);
++	if (__builtin_constant_p(len) && len == 8)
++		return siphash_1u64(le64_to_cpu(data[0]), key);
++	if (__builtin_constant_p(len) && len == 16)
++		return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
++				    key);
++	if (__builtin_constant_p(len) && len == 24)
++		return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
++				    le64_to_cpu(data[2]), key);
++	if (__builtin_constant_p(len) && len == 32)
++		return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
++				    le64_to_cpu(data[2]), le64_to_cpu(data[3]),
++				    key);
++	return __siphash_aligned(data, len, key);
++}
++
++/**
++ * siphash - compute 64-bit siphash PRF value
++ * @data: buffer to hash
++ * @size: size of @data
++ * @key: the siphash key
++ */
++static inline u64 siphash(const void *data, size_t len,
++			  const siphash_key_t *key)
++{
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++	if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
++		return __siphash_unaligned(data, len, key);
++#endif
++	return ___siphash_aligned(data, len, key);
++}
++
++#define HSIPHASH_ALIGNMENT __alignof__(unsigned long)
++typedef struct {
++	unsigned long key[2];
++} hsiphash_key_t;
++
++u32 __hsiphash_aligned(const void *data, size_t len,
++		       const hsiphash_key_t *key);
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++u32 __hsiphash_unaligned(const void *data, size_t len,
++			 const hsiphash_key_t *key);
++#endif
++
++u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
++u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
++u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c,
++		  const hsiphash_key_t *key);
++u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d,
++		  const hsiphash_key_t *key);
++
++static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
++				      const hsiphash_key_t *key)
++{
++	if (__builtin_constant_p(len) && len == 4)
++		return hsiphash_1u32(le32_to_cpu(data[0]), key);
++	if (__builtin_constant_p(len) && len == 8)
++		return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
++				     key);
++	if (__builtin_constant_p(len) && len == 12)
++		return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
++				     le32_to_cpu(data[2]), key);
++	if (__builtin_constant_p(len) && len == 16)
++		return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
++				     le32_to_cpu(data[2]), le32_to_cpu(data[3]),
++				     key);
++	return __hsiphash_aligned(data, len, key);
++}
++
++/**
++ * hsiphash - compute 32-bit hsiphash PRF value
++ * @data: buffer to hash
++ * @size: size of @data
++ * @key: the hsiphash key
++ */
++static inline u32 hsiphash(const void *data, size_t len,
++			   const hsiphash_key_t *key)
++{
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++	if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
++		return __hsiphash_unaligned(data, len, key);
++#endif
++	return ___hsiphash_aligned(data, len, key);
++}
++
++#endif /* _WG_LINUX_SIPHASH_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/udp_tunnel/include/net/udp_tunnel.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,94 @@
++#ifndef _WG_NET_UDP_TUNNEL_H
++#define _WG_NET_UDP_TUNNEL_H
++
++#include <net/ip_tunnels.h>
++#include <net/udp.h>
++
++#if IS_ENABLED(CONFIG_IPV6)
++#include <net/ipv6.h>
++#include <net/addrconf.h>
++#endif
++
++struct udp_port_cfg {
++	u8			family;
++
++	/* Used only for kernel-created sockets */
++	union {
++		struct in_addr		local_ip;
++#if IS_ENABLED(CONFIG_IPV6)
++		struct in6_addr		local_ip6;
++#endif
++	};
++
++	union {
++		struct in_addr		peer_ip;
++#if IS_ENABLED(CONFIG_IPV6)
++		struct in6_addr		peer_ip6;
++#endif
++	};
++
++	__be16			local_udp_port;
++	__be16			peer_udp_port;
++	unsigned int		use_udp_checksums:1,
++				use_udp6_tx_checksums:1,
++				use_udp6_rx_checksums:1,
++				ipv6_v6only:1;
++};
++
++int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
++		     struct socket **sockp);
++
++#if IS_ENABLED(CONFIG_IPV6)
++int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
++		     struct socket **sockp);
++#else
++static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
++				   struct socket **sockp)
++{
++	return 0;
++}
++#endif
++
++static inline int udp_sock_create(struct net *net,
++				  struct udp_port_cfg *cfg,
++				  struct socket **sockp)
++{
++	if (cfg->family == AF_INET)
++		return udp_sock_create4(net, cfg, sockp);
++
++	if (cfg->family == AF_INET6)
++		return udp_sock_create6(net, cfg, sockp);
++
++	return -EPFNOSUPPORT;
++}
++
++typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
++
++struct udp_tunnel_sock_cfg {
++	void *sk_user_data;
++	__u8  encap_type;
++	udp_tunnel_encap_rcv_t encap_rcv;
++};
++
++/* Setup the given (UDP) sock to receive UDP encapsulated packets */
++void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
++			   struct udp_tunnel_sock_cfg *sock_cfg);
++
++/* Transmit the skb using UDP encapsulation. */
++void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
++			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
++			 __be16 df, __be16 src_port, __be16 dst_port,
++			 bool xnet, bool nocheck);
++
++#if IS_ENABLED(CONFIG_IPV6)
++int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
++			 struct sk_buff *skb,
++			 struct net_device *dev, struct in6_addr *saddr,
++			 struct in6_addr *daddr,
++			 __u8 prio, __u8 ttl, __be32 label,
++			 __be16 src_port, __be16 dst_port, bool nocheck);
++#endif
++
++void udp_tunnel_sock_release(struct socket *sock);
++
++#endif /* _WG_NET_UDP_TUNNEL_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/udp_tunnel/udp_tunnel_partial_compat.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,226 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
++#define udp_sock_create4 udp_sock_create
++#define udp_sock_create6 udp_sock_create
++#include <linux/socket.h>
++#include <linux/if.h>
++#include <linux/in.h>
++#include <net/ip_tunnels.h>
++#include <net/udp.h>
++#include <net/inet_common.h>
++#if IS_ENABLED(CONFIG_IPV6)
++#include <linux/in6.h>
++#include <net/ipv6.h>
++#include <net/addrconf.h>
++#include <net/ip6_checksum.h>
++#include <net/ip6_tunnel.h>
++#endif
++static inline void __compat_fake_destructor(struct sk_buff *skb)
++{
++}
++typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
++struct udp_tunnel_sock_cfg {
++        void *sk_user_data;
++        __u8  encap_type;
++        udp_tunnel_encap_rcv_t encap_rcv;
++};
++/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */
++static udp_tunnel_encap_rcv_t encap_rcv = NULL;
++static void __compat_sk_data_ready(struct sock *sk)
++{
++	struct sk_buff *skb;
++	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
++		skb_orphan(skb);
++		sk_mem_reclaim(sk);
++		encap_rcv(sk, skb);
++	}
++}
++static inline void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
++                           struct udp_tunnel_sock_cfg *cfg)
++{
++	struct sock *sk = sock->sk;
++	inet_sk(sk)->mc_loop = 0;
++	encap_rcv = cfg->encap_rcv;
++	rcu_assign_sk_user_data(sk, cfg->sk_user_data);
++	sk->sk_data_ready = __compat_sk_data_ready;
++}
++static inline void udp_tunnel_sock_release(struct socket *sock)
++{
++	rcu_assign_sk_user_data(sock->sk, NULL);
++	kernel_sock_shutdown(sock, SHUT_RDWR);
++	sk_release_kernel(sock->sk);
++}
++static inline int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
++                        struct sk_buff *skb, __be32 src, __be32 dst,
++                        __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
++                        __be16 dst_port, bool xnet)
++{
++	struct udphdr *uh;
++	__skb_push(skb, sizeof(*uh));
++	skb_reset_transport_header(skb);
++	uh = udp_hdr(skb);
++	uh->dest = dst_port;
++	uh->source = src_port;
++	uh->len = htons(skb->len);
++	udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
++	return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
++			     tos, ttl, df, xnet);
++}
++#if IS_ENABLED(CONFIG_IPV6)
++static inline int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
++                         struct sk_buff *skb, struct net_device *dev,
++                         struct in6_addr *saddr, struct in6_addr *daddr,
++                         __u8 prio, __u8 ttl, __be16 src_port,
++                         __be16 dst_port)
++{
++	struct udphdr *uh;
++	struct ipv6hdr *ip6h;
++	struct sock *sk = sock->sk;
++	__skb_push(skb, sizeof(*uh));
++	skb_reset_transport_header(skb);
++	uh = udp_hdr(skb);
++	uh->dest = dst_port;
++	uh->source = src_port;
++	uh->len = htons(skb->len);
++	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
++	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
++			    | IPSKB_REROUTED);
++	skb_dst_set(skb, dst);
++	udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr,
++	              &sk->sk_v6_daddr, skb->len);
++	__skb_push(skb, sizeof(*ip6h));
++	skb_reset_network_header(skb);
++	ip6h		  = ipv6_hdr(skb);
++	ip6_flow_hdr(ip6h, prio, htonl(0));
++	ip6h->payload_len = htons(skb->len);
++	ip6h->nexthdr     = IPPROTO_UDP;
++	ip6h->hop_limit   = ttl;
++	ip6h->daddr	  = *daddr;
++	ip6h->saddr	  = *saddr;
++	ip6tunnel_xmit(skb, dev);
++	return 0;
++}
++#endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <linux/udp.h>
++#include <linux/skbuff.h>
++#include <linux/if.h>
++#include <net/udp_tunnel.h>
++#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; ret__ = udp_tunnel_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, i, j, k); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
++#if IS_ENABLED(CONFIG_IPV6)
++#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, j, k);
++#endif
++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
++#include <linux/if.h>
++#include <net/udp_tunnel.h>
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
++static inline void __compat_fake_destructor(struct sk_buff *skb)
++{
++}
++#endif
++#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; if (!(c)->destructor) (c)->destructor = __compat_fake_destructor; if (!(c)->sk) (c)->sk = (b); ret__ = udp_tunnel_xmit_skb(a, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
++#if IS_ENABLED(CONFIG_IPV6)
++#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { if (!(c)->destructor) (c)->destructor = __compat_fake_destructor; if (!(c)->sk) (c)->sk = (b); udp_tunnel6_xmit_skb(a, c, d, e, f, g, h, j, k, l); } while(0)
++#endif
++#else
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
++#include <linux/if.h>
++#include <net/udp_tunnel.h>
++#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
++#include <linux/if.h>
++#include <net/udp_tunnel.h>
++#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l); iptunnel_xmit_stats(ret__, &dev__->stats, dev__->tstats); } while (0)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
++#include <linux/if.h>
++#include <net/udp_tunnel.h>
++#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, j, k, l)
++#endif
++
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
++#include <linux/skbuff.h>
++#include <linux/if.h>
++#include <net/udp_tunnel.h>
++struct __compat_udp_port_cfg {
++	u8 family;
++	union {
++		struct in_addr local_ip;
++#if IS_ENABLED(CONFIG_IPV6)
++		struct in6_addr local_ip6;
++#endif
++	};
++	union {
++		struct in_addr peer_ip;
++#if IS_ENABLED(CONFIG_IPV6)
++		struct in6_addr peer_ip6;
++#endif
++	};
++	__be16 local_udp_port;
++	__be16 peer_udp_port;
++	unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1;
++};
++static inline int __maybe_unused __compat_udp_sock_create(struct net *net, struct __compat_udp_port_cfg *cfg, struct socket **sockp)
++{
++	struct udp_port_cfg old_cfg = {
++		.family = cfg->family,
++		.local_ip = cfg->local_ip,
++#if IS_ENABLED(CONFIG_IPV6)
++		.local_ip6 = cfg->local_ip6,
++#endif
++		.peer_ip = cfg->peer_ip,
++#if IS_ENABLED(CONFIG_IPV6)
++		.peer_ip6 = cfg->peer_ip6,
++#endif
++		.local_udp_port = cfg->local_udp_port,
++		.peer_udp_port = cfg->peer_udp_port,
++		.use_udp_checksums = cfg->use_udp_checksums,
++		.use_udp6_tx_checksums = cfg->use_udp6_tx_checksums,
++		.use_udp6_rx_checksums = cfg->use_udp6_rx_checksums
++	};
++	if (cfg->family == AF_INET)
++		return udp_sock_create4(net, &old_cfg, sockp);
++
++#if IS_ENABLED(CONFIG_IPV6)
++	if (cfg->family == AF_INET6) {
++		int ret;
++		int old_bindv6only;
++		struct net *nobns;
++
++		if (cfg->ipv6_v6only) {
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
++			nobns = &init_net;
++#else
++			nobns = net;
++#endif
++			/* Since udp_port_cfg only learned of ipv6_v6only in 4.3, we do this horrible
++			 * hack here and set the sysctl variable temporarily to something that will
++			 * set the right option for us in sock_create. It's super racey! */
++			old_bindv6only = nobns->ipv6.sysctl.bindv6only;
++			nobns->ipv6.sysctl.bindv6only = 1;
++		}
++		ret = udp_sock_create6(net, &old_cfg, sockp);
++		if (cfg->ipv6_v6only)
++			nobns->ipv6.sysctl.bindv6only = old_bindv6only;
++		return ret;
++	}
++#endif
++	return -EPFNOSUPPORT;
++}
++#define udp_port_cfg __compat_udp_port_cfg
++#define udp_sock_create(a, b, c) __compat_udp_sock_create(a, b, c)
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/cookie.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,59 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_COOKIE_H
++#define _WG_COOKIE_H
++
++#include "messages.h"
++#include <linux/rwsem.h>
++
++struct wg_peer;
++
++struct cookie_checker {
++	u8 secret[NOISE_HASH_LEN];
++	u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN];
++	u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
++	u64 secret_birthdate;
++	struct rw_semaphore secret_lock;
++	struct wg_device *device;
++};
++
++struct cookie {
++	u64 birthdate;
++	bool is_valid;
++	u8 cookie[COOKIE_LEN];
++	bool have_sent_mac1;
++	u8 last_mac1_sent[COOKIE_LEN];
++	u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN];
++	u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
++	struct rw_semaphore lock;
++};
++
++enum cookie_mac_state {
++	INVALID_MAC,
++	VALID_MAC_BUT_NO_COOKIE,
++	VALID_MAC_WITH_COOKIE_BUT_RATELIMITED,
++	VALID_MAC_WITH_COOKIE
++};
++
++void wg_cookie_checker_init(struct cookie_checker *checker,
++			    struct wg_device *wg);
++void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker);
++void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer);
++void wg_cookie_init(struct cookie *cookie);
++
++enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
++						struct sk_buff *skb,
++						bool check_cookie);
++void wg_cookie_add_mac_to_packet(void *message, size_t len,
++				 struct wg_peer *peer);
++
++void wg_cookie_message_create(struct message_handshake_cookie *src,
++			      struct sk_buff *skb, __le32 index,
++			      struct cookie_checker *checker);
++void wg_cookie_message_consume(struct message_handshake_cookie *src,
++			       struct wg_device *wg);
++
++#endif /* _WG_COOKIE_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/include/zinc/blake2s.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,56 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _ZINC_BLAKE2S_H
++#define _ZINC_BLAKE2S_H
++
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <asm/bug.h>
++
++enum blake2s_lengths {
++	BLAKE2S_BLOCK_SIZE = 64,
++	BLAKE2S_HASH_SIZE = 32,
++	BLAKE2S_KEY_SIZE = 32
++};
++
++struct blake2s_state {
++	u32 h[8];
++	u32 t[2];
++	u32 f[2];
++	u8 buf[BLAKE2S_BLOCK_SIZE];
++	unsigned int buflen;
++	unsigned int outlen;
++};
++
++void blake2s_init(struct blake2s_state *state, const size_t outlen);
++void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
++		      const void *key, const size_t keylen);
++void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
++void blake2s_final(struct blake2s_state *state, u8 *out);
++
++static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
++			   const size_t outlen, const size_t inlen,
++			   const size_t keylen)
++{
++	struct blake2s_state state;
++
++	WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
++		outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
++		(!key && keylen)));
++
++	if (keylen)
++		blake2s_init_key(&state, outlen, key, keylen);
++	else
++		blake2s_init(&state, outlen);
++
++	blake2s_update(&state, in, inlen);
++	blake2s_final(&state, out);
++}
++
++void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen,
++		  const size_t inlen, const size_t keylen);
++
++#endif /* _ZINC_BLAKE2S_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/include/zinc/chacha20.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,70 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _ZINC_CHACHA20_H
++#define _ZINC_CHACHA20_H
++
++#include <asm/unaligned.h>
++#include <linux/simd.h>
++#include <linux/kernel.h>
++#include <linux/types.h>
++
++enum chacha20_lengths {
++	CHACHA20_NONCE_SIZE = 16,
++	CHACHA20_KEY_SIZE = 32,
++	CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(u32),
++	CHACHA20_BLOCK_SIZE = 64,
++	CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(u32),
++	HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
++	HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
++};
++
++enum chacha20_constants { /* expand 32-byte k */
++	CHACHA20_CONSTANT_EXPA = 0x61707865U,
++	CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
++	CHACHA20_CONSTANT_2_BY = 0x79622d32U,
++	CHACHA20_CONSTANT_TE_K = 0x6b206574U
++};
++
++struct chacha20_ctx {
++	union {
++		u32 state[16];
++		struct {
++			u32 constant[4];
++			u32 key[8];
++			u32 counter[4];
++		};
++	};
++};
++
++static inline void chacha20_init(struct chacha20_ctx *ctx,
++				 const u8 key[CHACHA20_KEY_SIZE],
++				 const u64 nonce)
++{
++	ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
++	ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
++	ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
++	ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
++	ctx->key[0] = get_unaligned_le32(key + 0);
++	ctx->key[1] = get_unaligned_le32(key + 4);
++	ctx->key[2] = get_unaligned_le32(key + 8);
++	ctx->key[3] = get_unaligned_le32(key + 12);
++	ctx->key[4] = get_unaligned_le32(key + 16);
++	ctx->key[5] = get_unaligned_le32(key + 20);
++	ctx->key[6] = get_unaligned_le32(key + 24);
++	ctx->key[7] = get_unaligned_le32(key + 28);
++	ctx->counter[0] = 0;
++	ctx->counter[1] = 0;
++	ctx->counter[2] = nonce & U32_MAX;
++	ctx->counter[3] = nonce >> 32;
++}
++void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
++	      simd_context_t *simd_context);
++
++void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
++	       const u8 nonce[HCHACHA20_NONCE_SIZE],
++	       const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context);
++
++#endif /* _ZINC_CHACHA20_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/include/zinc/chacha20poly1305.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,50 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _ZINC_CHACHA20POLY1305_H
++#define _ZINC_CHACHA20POLY1305_H
++
++#include <linux/simd.h>
++#include <linux/types.h>
++
++struct scatterlist;
++
++enum chacha20poly1305_lengths {
++	XCHACHA20POLY1305_NONCE_SIZE = 24,
++	CHACHA20POLY1305_KEY_SIZE = 32,
++	CHACHA20POLY1305_AUTHTAG_SIZE = 16
++};
++
++void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
++			      const u8 *ad, const size_t ad_len,
++			      const u64 nonce,
++			      const u8 key[CHACHA20POLY1305_KEY_SIZE]);
++
++bool __must_check chacha20poly1305_encrypt_sg_inplace(
++	struct scatterlist *src, const size_t src_len, const u8 *ad,
++	const size_t ad_len, const u64 nonce,
++	const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context);
++
++bool __must_check
++chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
++			 const u8 *ad, const size_t ad_len, const u64 nonce,
++			 const u8 key[CHACHA20POLY1305_KEY_SIZE]);
++
++bool __must_check chacha20poly1305_decrypt_sg_inplace(
++	struct scatterlist *src, size_t src_len, const u8 *ad,
++	const size_t ad_len, const u64 nonce,
++	const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context);
++
++void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
++			       const u8 *ad, const size_t ad_len,
++			       const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
++			       const u8 key[CHACHA20POLY1305_KEY_SIZE]);
++
++bool __must_check xchacha20poly1305_decrypt(
++	u8 *dst, const u8 *src, const size_t src_len, const u8 *ad,
++	const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
++	const u8 key[CHACHA20POLY1305_KEY_SIZE]);
++
++#endif /* _ZINC_CHACHA20POLY1305_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/include/zinc/curve25519.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,28 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _ZINC_CURVE25519_H
++#define _ZINC_CURVE25519_H
++
++#include <linux/types.h>
++
++enum curve25519_lengths {
++	CURVE25519_KEY_SIZE = 32
++};
++
++bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
++			     const u8 secret[CURVE25519_KEY_SIZE],
++			     const u8 basepoint[CURVE25519_KEY_SIZE]);
++void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE]);
++bool __must_check curve25519_generate_public(
++	u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]);
++
++static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE])
++{
++	secret[0] &= 248;
++	secret[31] = (secret[31] & 127) | 64;
++}
++
++#endif /* _ZINC_CURVE25519_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/include/zinc/poly1305.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,31 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _ZINC_POLY1305_H
++#define _ZINC_POLY1305_H
++
++#include <linux/simd.h>
++#include <linux/types.h>
++
++enum poly1305_lengths {
++	POLY1305_BLOCK_SIZE = 16,
++	POLY1305_KEY_SIZE = 32,
++	POLY1305_MAC_SIZE = 16
++};
++
++struct poly1305_ctx {
++	u8 opaque[24 * sizeof(u64)];
++	u32 nonce[4];
++	u8 data[POLY1305_BLOCK_SIZE];
++	size_t num;
++} __aligned(8);
++
++void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE]);
++void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
++		     simd_context_t *simd_context);
++void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
++		    simd_context_t *simd_context);
++
++#endif /* _ZINC_POLY1305_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,15 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_ZINC_H
++#define _WG_ZINC_H
++
++int chacha20_mod_init(void);
++int poly1305_mod_init(void);
++int chacha20poly1305_mod_init(void);
++int blake2s_mod_init(void);
++int curve25519_mod_init(void);
++
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/selftest/run.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,48 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _ZINC_SELFTEST_RUN_H
++#define _ZINC_SELFTEST_RUN_H
++
++#include <linux/kernel.h>
++#include <linux/printk.h>
++#include <linux/bug.h>
++
++static inline bool selftest_run(const char *name, bool (*selftest)(void),
++				bool *const nobs[], unsigned int nobs_len)
++{
++	unsigned long set = 0, subset = 0, largest_subset = 0;
++	unsigned int i;
++
++	BUILD_BUG_ON(!__builtin_constant_p(nobs_len) ||
++		     nobs_len >= BITS_PER_LONG);
++
++	if (!IS_ENABLED(CONFIG_ZINC_SELFTEST))
++		return true;
++
++	for (i = 0; i < nobs_len; ++i)
++		set |= ((unsigned long)*nobs[i]) << i;
++
++	do {
++		for (i = 0; i < nobs_len; ++i)
++			*nobs[i] = BIT(i) & subset;
++		if (selftest())
++			largest_subset = max(subset, largest_subset);
++		else
++			pr_err("%s self-test combination 0x%lx: FAIL\n", name,
++			       subset);
++		subset = (subset - set) & set;
++	} while (subset);
++
++	for (i = 0; i < nobs_len; ++i)
++		*nobs[i] = BIT(i) & largest_subset;
++
++	if (largest_subset == set)
++		pr_info("%s self-tests: pass\n", name);
++
++	return !WARN_ON(largest_subset != set);
++}
++
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/device.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,65 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_DEVICE_H
++#define _WG_DEVICE_H
++
++#include "noise.h"
++#include "allowedips.h"
++#include "peerlookup.h"
++#include "cookie.h"
++
++#include <linux/types.h>
++#include <linux/netdevice.h>
++#include <linux/workqueue.h>
++#include <linux/mutex.h>
++#include <linux/net.h>
++#include <linux/ptr_ring.h>
++
++struct wg_device;
++
++struct multicore_worker {
++	void *ptr;
++	struct work_struct work;
++};
++
++struct crypt_queue {
++	struct ptr_ring ring;
++	union {
++		struct {
++			struct multicore_worker __percpu *worker;
++			int last_cpu;
++		};
++		struct work_struct work;
++	};
++};
++
++struct wg_device {
++	struct net_device *dev;
++	struct crypt_queue encrypt_queue, decrypt_queue;
++	struct sock __rcu *sock4, *sock6;
++	struct net *creating_net;
++	struct noise_static_identity static_identity;
++	struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
++	struct workqueue_struct *packet_crypt_wq;
++	struct sk_buff_head incoming_handshakes;
++	int incoming_handshake_cpu;
++	struct multicore_worker __percpu *incoming_handshakes_worker;
++	struct cookie_checker cookie_checker;
++	struct pubkey_hashtable *peer_hashtable;
++	struct index_hashtable *index_hashtable;
++	struct allowedips peer_allowedips;
++	struct mutex device_update_lock, socket_update_lock;
++	struct list_head device_list, peer_list;
++	unsigned int num_peers, device_update_gen;
++	u32 fwmark;
++	u16 incoming_port;
++	bool have_creating_net_ref;
++};
++
++int wg_device_init(void);
++void wg_device_uninit(void);
++
++#endif /* _WG_DEVICE_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/messages.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,128 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_MESSAGES_H
++#define _WG_MESSAGES_H
++
++#include <zinc/curve25519.h>
++#include <zinc/chacha20poly1305.h>
++#include <zinc/blake2s.h>
++
++#include <linux/kernel.h>
++#include <linux/param.h>
++#include <linux/skbuff.h>
++
++enum noise_lengths {
++	NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE,
++	NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE,
++	NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32),
++	NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE,
++	NOISE_HASH_LEN = BLAKE2S_HASH_SIZE
++};
++
++#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN)
++
++enum cookie_values {
++	COOKIE_SECRET_MAX_AGE = 2 * 60,
++	COOKIE_SECRET_LATENCY = 5,
++	COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE,
++	COOKIE_LEN = 16
++};
++
++enum counter_values {
++	COUNTER_BITS_TOTAL = 2048,
++	COUNTER_REDUNDANT_BITS = BITS_PER_LONG,
++	COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS
++};
++
++enum limits {
++	REKEY_AFTER_MESSAGES = 1ULL << 60,
++	REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1,
++	REKEY_TIMEOUT = 5,
++	REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3,
++	REKEY_AFTER_TIME = 120,
++	REJECT_AFTER_TIME = 180,
++	INITIATIONS_PER_SECOND = 50,
++	MAX_PEERS_PER_DEVICE = 1U << 20,
++	KEEPALIVE_TIMEOUT = 10,
++	MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT,
++	MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */
++	MAX_STAGED_PACKETS = 128,
++	MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */
++};
++
++enum message_type {
++	MESSAGE_INVALID = 0,
++	MESSAGE_HANDSHAKE_INITIATION = 1,
++	MESSAGE_HANDSHAKE_RESPONSE = 2,
++	MESSAGE_HANDSHAKE_COOKIE = 3,
++	MESSAGE_DATA = 4
++};
++
++struct message_header {
++	/* The actual layout of this that we want is:
++	 * u8 type
++	 * u8 reserved_zero[3]
++	 *
++	 * But it turns out that by encoding this as little endian,
++	 * we achieve the same thing, and it makes checking faster.
++	 */
++	__le32 type;
++};
++
++struct message_macs {
++	u8 mac1[COOKIE_LEN];
++	u8 mac2[COOKIE_LEN];
++};
++
++struct message_handshake_initiation {
++	struct message_header header;
++	__le32 sender_index;
++	u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
++	u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)];
++	u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)];
++	struct message_macs macs;
++};
++
++struct message_handshake_response {
++	struct message_header header;
++	__le32 sender_index;
++	__le32 receiver_index;
++	u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
++	u8 encrypted_nothing[noise_encrypted_len(0)];
++	struct message_macs macs;
++};
++
++struct message_handshake_cookie {
++	struct message_header header;
++	__le32 receiver_index;
++	u8 nonce[COOKIE_NONCE_LEN];
++	u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)];
++};
++
++struct message_data {
++	struct message_header header;
++	__le32 key_idx;
++	__le64 counter;
++	u8 encrypted_data[];
++};
++
++#define message_data_len(plain_len) \
++	(noise_encrypted_len(plain_len) + sizeof(struct message_data))
++
++enum message_alignments {
++	MESSAGE_PADDING_MULTIPLE = 16,
++	MESSAGE_MINIMUM_LENGTH = message_data_len(0)
++};
++
++#define SKB_HEADER_LEN                                       \
++	(max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \
++	 sizeof(struct udphdr) + NET_SKB_PAD)
++#define DATA_PACKET_HEAD_ROOM \
++	ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4)
++
++enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ };
++
++#endif /* _WG_MESSAGES_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/netlink.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,12 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_NETLINK_H
++#define _WG_NETLINK_H
++
++int wg_genetlink_init(void);
++void wg_genetlink_uninit(void);
++
++#endif /* _WG_NETLINK_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/noise.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,137 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++#ifndef _WG_NOISE_H
++#define _WG_NOISE_H
++
++#include "messages.h"
++#include "peerlookup.h"
++
++#include <linux/types.h>
++#include <linux/spinlock.h>
++#include <linux/atomic.h>
++#include <linux/rwsem.h>
++#include <linux/mutex.h>
++#include <linux/kref.h>
++
++union noise_counter {
++	struct {
++		u64 counter;
++		unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
++		spinlock_t lock;
++	} receive;
++	atomic64_t counter;
++};
++
++struct noise_symmetric_key {
++	u8 key[NOISE_SYMMETRIC_KEY_LEN];
++	union noise_counter counter;
++	u64 birthdate;
++	bool is_valid;
++};
++
++struct noise_keypair {
++	struct index_hashtable_entry entry;
++	struct noise_symmetric_key sending;
++	struct noise_symmetric_key receiving;
++	__le32 remote_index;
++	bool i_am_the_initiator;
++	struct kref refcount;
++	struct rcu_head rcu;
++	u64 internal_id;
++};
++
++struct noise_keypairs {
++	struct noise_keypair __rcu *current_keypair;
++	struct noise_keypair __rcu *previous_keypair;
++	struct noise_keypair __rcu *next_keypair;
++	spinlock_t keypair_update_lock;
++};
++
++struct noise_static_identity {
++	u8 static_public[NOISE_PUBLIC_KEY_LEN];
++	u8 static_private[NOISE_PUBLIC_KEY_LEN];
++	struct rw_semaphore lock;
++	bool has_identity;
++};
++
++enum noise_handshake_state {
++	HANDSHAKE_ZEROED,
++	HANDSHAKE_CREATED_INITIATION,
++	HANDSHAKE_CONSUMED_INITIATION,
++	HANDSHAKE_CREATED_RESPONSE,
++	HANDSHAKE_CONSUMED_RESPONSE
++};
++
++struct noise_handshake {
++	struct index_hashtable_entry entry;
++
++	enum noise_handshake_state state;
++	u64 last_initiation_consumption;
++
++	struct noise_static_identity *static_identity;
++
++	u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
++	u8 remote_static[NOISE_PUBLIC_KEY_LEN];
++	u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN];
++	u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN];
++
++	u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
++
++	u8 hash[NOISE_HASH_LEN];
++	u8 chaining_key[NOISE_HASH_LEN];
++
++	u8 latest_timestamp[NOISE_TIMESTAMP_LEN];
++	__le32 remote_index;
++
++	/* Protects all members except the immutable (after noise_handshake_
++	 * init): remote_static, precomputed_static_static, static_identity.
++	 */
++	struct rw_semaphore lock;
++};
++
++struct wg_device;
++
++void wg_noise_init(void);
++void wg_noise_handshake_init(struct noise_handshake *handshake,
++			     struct noise_static_identity *static_identity,
++			     const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
++			     const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
++			     struct wg_peer *peer);
++void wg_noise_handshake_clear(struct noise_handshake *handshake);
++static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
++{
++	atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() -
++				       (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC);
++}
++
++void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now);
++struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair);
++void wg_noise_keypairs_clear(struct noise_keypairs *keypairs);
++bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
++				    struct noise_keypair *received_keypair);
++void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer);
++
++void wg_noise_set_static_identity_private_key(
++	struct noise_static_identity *static_identity,
++	const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
++void wg_noise_precompute_static_static(struct wg_peer *peer);
++
++bool
++wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
++				     struct noise_handshake *handshake);
++struct wg_peer *
++wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
++				      struct wg_device *wg);
++
++bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
++					struct noise_handshake *handshake);
++struct wg_peer *
++wg_noise_handshake_consume_response(struct message_handshake_response *src,
++				    struct wg_device *wg);
++
++bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
++				      struct noise_keypairs *keypairs);
++
++#endif /* _WG_NOISE_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/peer.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,83 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_PEER_H
++#define _WG_PEER_H
++
++#include "device.h"
++#include "noise.h"
++#include "cookie.h"
++
++#include <linux/types.h>
++#include <linux/netfilter.h>
++#include <linux/spinlock.h>
++#include <linux/kref.h>
++#include <net/dst_cache.h>
++
++struct wg_device;
++
++struct endpoint {
++	union {
++		struct sockaddr addr;
++		struct sockaddr_in addr4;
++		struct sockaddr_in6 addr6;
++	};
++	union {
++		struct {
++			struct in_addr src4;
++			/* Essentially the same as addr6->scope_id */
++			int src_if4;
++		};
++		struct in6_addr src6;
++	};
++};
++
++struct wg_peer {
++	struct wg_device *device;
++	struct crypt_queue tx_queue, rx_queue;
++	struct sk_buff_head staged_packet_queue;
++	int serial_work_cpu;
++	struct noise_keypairs keypairs;
++	struct endpoint endpoint;
++	struct dst_cache endpoint_cache;
++	rwlock_t endpoint_lock;
++	struct noise_handshake handshake;
++	atomic64_t last_sent_handshake;
++	struct work_struct transmit_handshake_work, clear_peer_work;
++	struct cookie latest_cookie;
++	struct hlist_node pubkey_hash;
++	u64 rx_bytes, tx_bytes;
++	struct timer_list timer_retransmit_handshake, timer_send_keepalive;
++	struct timer_list timer_new_handshake, timer_zero_key_material;
++	struct timer_list timer_persistent_keepalive;
++	unsigned int timer_handshake_attempts;
++	u16 persistent_keepalive_interval;
++	bool timer_need_another_keepalive;
++	bool sent_lastminute_handshake;
++	struct timespec64 walltime_last_handshake;
++	struct kref refcount;
++	struct rcu_head rcu;
++	struct list_head peer_list;
++	struct list_head allowedips_list;
++	u64 internal_id;
++	struct napi_struct napi;
++	bool is_dead;
++};
++
++struct wg_peer *wg_peer_create(struct wg_device *wg,
++			       const u8 public_key[NOISE_PUBLIC_KEY_LEN],
++			       const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]);
++
++struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer);
++static inline struct wg_peer *wg_peer_get(struct wg_peer *peer)
++{
++	kref_get(&peer->refcount);
++	return peer;
++}
++void wg_peer_put(struct wg_peer *peer);
++void wg_peer_remove(struct wg_peer *peer);
++void wg_peer_remove_all(struct wg_device *wg);
++
++#endif /* _WG_PEER_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/peerlookup.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,64 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_PEERLOOKUP_H
++#define _WG_PEERLOOKUP_H
++
++#include "messages.h"
++
++#include <linux/hashtable.h>
++#include <linux/mutex.h>
++#include <linux/siphash.h>
++
++struct wg_peer;
++
++struct pubkey_hashtable {
++	/* TODO: move to rhashtable */
++	DECLARE_HASHTABLE(hashtable, 11);
++	siphash_key_t key;
++	struct mutex lock;
++};
++
++struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void);
++void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
++			     struct wg_peer *peer);
++void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
++				struct wg_peer *peer);
++struct wg_peer *
++wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
++			   const u8 pubkey[NOISE_PUBLIC_KEY_LEN]);
++
++struct index_hashtable {
++	/* TODO: move to rhashtable */
++	DECLARE_HASHTABLE(hashtable, 13);
++	spinlock_t lock;
++};
++
++enum index_hashtable_type {
++	INDEX_HASHTABLE_HANDSHAKE = 1U << 0,
++	INDEX_HASHTABLE_KEYPAIR = 1U << 1
++};
++
++struct index_hashtable_entry {
++	struct wg_peer *peer;
++	struct hlist_node index_hash;
++	enum index_hashtable_type type;
++	__le32 index;
++};
++
++struct index_hashtable *wg_index_hashtable_alloc(void);
++__le32 wg_index_hashtable_insert(struct index_hashtable *table,
++				 struct index_hashtable_entry *entry);
++bool wg_index_hashtable_replace(struct index_hashtable *table,
++				struct index_hashtable_entry *old,
++				struct index_hashtable_entry *new);
++void wg_index_hashtable_remove(struct index_hashtable *table,
++			       struct index_hashtable_entry *entry);
++struct index_hashtable_entry *
++wg_index_hashtable_lookup(struct index_hashtable *table,
++			  const enum index_hashtable_type type_mask,
++			  const __le32 index, struct wg_peer **peer);
++
++#endif /* _WG_PEERLOOKUP_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/queueing.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,203 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_QUEUEING_H
++#define _WG_QUEUEING_H
++
++#include "peer.h"
++#include <linux/types.h>
++#include <linux/skbuff.h>
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++
++struct wg_device;
++struct wg_peer;
++struct multicore_worker;
++struct crypt_queue;
++struct sk_buff;
++
++/* queueing.c APIs: */
++int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
++			 bool multicore, unsigned int len);
++void wg_packet_queue_free(struct crypt_queue *queue, bool multicore);
++struct multicore_worker __percpu *
++wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
++
++/* receive.c APIs: */
++void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb);
++void wg_packet_handshake_receive_worker(struct work_struct *work);
++/* NAPI poll function: */
++int wg_packet_rx_poll(struct napi_struct *napi, int budget);
++/* Workqueue worker: */
++void wg_packet_decrypt_worker(struct work_struct *work);
++
++/* send.c APIs: */
++void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
++						bool is_retry);
++void wg_packet_send_handshake_response(struct wg_peer *peer);
++void wg_packet_send_handshake_cookie(struct wg_device *wg,
++				     struct sk_buff *initiating_skb,
++				     __le32 sender_index);
++void wg_packet_send_keepalive(struct wg_peer *peer);
++void wg_packet_purge_staged_packets(struct wg_peer *peer);
++void wg_packet_send_staged_packets(struct wg_peer *peer);
++/* Workqueue workers: */
++void wg_packet_handshake_send_worker(struct work_struct *work);
++void wg_packet_tx_worker(struct work_struct *work);
++void wg_packet_encrypt_worker(struct work_struct *work);
++
++enum packet_state {
++	PACKET_STATE_UNCRYPTED,
++	PACKET_STATE_CRYPTED,
++	PACKET_STATE_DEAD
++};
++
++struct packet_cb {
++	u64 nonce;
++	struct noise_keypair *keypair;
++	atomic_t state;
++	u32 mtu;
++	u8 ds;
++};
++
++#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
++#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
++
++/* Returns either the correct skb->protocol value, or 0 if invalid. */
++static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb)
++{
++	if (skb_network_header(skb) >= skb->head &&
++	    (skb_network_header(skb) + sizeof(struct iphdr)) <=
++		    skb_tail_pointer(skb) &&
++	    ip_hdr(skb)->version == 4)
++		return htons(ETH_P_IP);
++	if (skb_network_header(skb) >= skb->head &&
++	    (skb_network_header(skb) + sizeof(struct ipv6hdr)) <=
++		    skb_tail_pointer(skb) &&
++	    ipv6_hdr(skb)->version == 6)
++		return htons(ETH_P_IPV6);
++	return 0;
++}
++
++static inline bool wg_check_packet_protocol(struct sk_buff *skb)
++{
++	__be16 real_protocol = wg_examine_packet_protocol(skb);
++	return real_protocol && skb->protocol == real_protocol;
++}
++
++static inline void wg_reset_packet(struct sk_buff *skb)
++{
++	const int pfmemalloc = skb->pfmemalloc;
++
++	skb_scrub_packet(skb, true);
++	memset(&skb->headers_start, 0,
++	       offsetof(struct sk_buff, headers_end) -
++		       offsetof(struct sk_buff, headers_start));
++	skb->pfmemalloc = pfmemalloc;
++	skb->queue_mapping = 0;
++	skb->nohdr = 0;
++	skb->peeked = 0;
++	skb->mac_len = 0;
++	skb->dev = NULL;
++#ifdef CONFIG_NET_SCHED
++	skb->tc_index = 0;
++	skb_reset_tc(skb);
++#endif
++	skb->hdr_len = skb_headroom(skb);
++	skb_reset_mac_header(skb);
++	skb_reset_network_header(skb);
++	skb_reset_transport_header(skb);
++	skb_probe_transport_header(skb);
++	skb_reset_inner_headers(skb);
++}
++
++static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
++{
++	unsigned int cpu = *stored_cpu, cpu_index, i;
++
++	if (unlikely(cpu == nr_cpumask_bits ||
++		     !cpumask_test_cpu(cpu, cpu_online_mask))) {
++		cpu_index = id % cpumask_weight(cpu_online_mask);
++		cpu = cpumask_first(cpu_online_mask);
++		for (i = 0; i < cpu_index; ++i)
++			cpu = cpumask_next(cpu, cpu_online_mask);
++		*stored_cpu = cpu;
++	}
++	return cpu;
++}
++
++/* This function is racy, in the sense that next is unlocked, so it could return
++ * the same CPU twice. A race-free version of this would be to instead store an
++ * atomic sequence number, do an increment-and-return, and then iterate through
++ * every possible CPU until we get to that index -- choose_cpu. However that's
++ * a bit slower, and it doesn't seem like this potential race actually
++ * introduces any performance loss, so we live with it.
++ */
++static inline int wg_cpumask_next_online(int *next)
++{
++	int cpu = *next;
++
++	while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
++		cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
++	*next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
++	return cpu;
++}
++
++static inline int wg_queue_enqueue_per_device_and_peer(
++	struct crypt_queue *device_queue, struct crypt_queue *peer_queue,
++	struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
++{
++	int cpu;
++
++	atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED);
++	/* We first queue this up for the peer ingestion, but the consumer
++	 * will wait for the state to change to CRYPTED or DEAD before.
++	 */
++	if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
++		return -ENOSPC;
++	/* Then we queue it up in the device queue, which consumes the
++	 * packet as soon as it can.
++	 */
++	cpu = wg_cpumask_next_online(next_cpu);
++	if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
++		return -EPIPE;
++	queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
++	return 0;
++}
++
++static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
++					     struct sk_buff *skb,
++					     enum packet_state state)
++{
++	/* We take a reference, because as soon as we call atomic_set, the
++	 * peer can be freed from below us.
++	 */
++	struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
++
++	atomic_set_release(&PACKET_CB(skb)->state, state);
++	queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu,
++					       peer->internal_id),
++		      peer->device->packet_crypt_wq, &queue->work);
++	wg_peer_put(peer);
++}
++
++static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb,
++						  enum packet_state state)
++{
++	/* We take a reference, because as soon as we call atomic_set, the
++	 * peer can be freed from below us.
++	 */
++	struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
++
++	atomic_set_release(&PACKET_CB(skb)->state, state);
++	napi_schedule(&peer->napi);
++	wg_peer_put(peer);
++}
++
++#ifdef DEBUG
++bool wg_packet_counter_selftest(void);
++#endif
++
++#endif /* _WG_QUEUEING_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/ratelimiter.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,19 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_RATELIMITER_H
++#define _WG_RATELIMITER_H
++
++#include <linux/skbuff.h>
++
++int wg_ratelimiter_init(void);
++void wg_ratelimiter_uninit(void);
++bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net);
++
++#ifdef DEBUG
++bool wg_ratelimiter_selftest(void);
++#endif
++
++#endif /* _WG_RATELIMITER_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/socket.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,44 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_SOCKET_H
++#define _WG_SOCKET_H
++
++#include <linux/netdevice.h>
++#include <linux/udp.h>
++#include <linux/if_vlan.h>
++#include <linux/if_ether.h>
++
++int wg_socket_init(struct wg_device *wg, u16 port);
++void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
++		      struct sock *new6);
++int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *data,
++				  size_t len, u8 ds);
++int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb,
++			       u8 ds);
++int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg,
++					  struct sk_buff *in_skb,
++					  void *out_buffer, size_t len);
++
++int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
++				const struct sk_buff *skb);
++void wg_socket_set_peer_endpoint(struct wg_peer *peer,
++				 const struct endpoint *endpoint);
++void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer,
++					  const struct sk_buff *skb);
++void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer);
++
++#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG)
++#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do {                       \
++		struct endpoint __endpoint;                                    \
++		wg_socket_endpoint_from_skb(&__endpoint, skb);                 \
++		net_dbg_ratelimited(fmt, dev, &__endpoint.addr,                \
++				    ##__VA_ARGS__);                            \
++	} while (0)
++#else
++#define net_dbg_skb_ratelimited(fmt, skb, ...)
++#endif
++
++#endif /* _WG_SOCKET_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/timers.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,31 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#ifndef _WG_TIMERS_H
++#define _WG_TIMERS_H
++
++#include <linux/ktime.h>
++
++struct wg_peer;
++
++void wg_timers_init(struct wg_peer *peer);
++void wg_timers_stop(struct wg_peer *peer);
++void wg_timers_data_sent(struct wg_peer *peer);
++void wg_timers_data_received(struct wg_peer *peer);
++void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer);
++void wg_timers_any_authenticated_packet_received(struct wg_peer *peer);
++void wg_timers_handshake_initiated(struct wg_peer *peer);
++void wg_timers_handshake_complete(struct wg_peer *peer);
++void wg_timers_session_derived(struct wg_peer *peer);
++void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer);
++
++static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds,
++					    u64 expiration_seconds)
++{
++	return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC)
++		<= (s64)ktime_get_coarse_boottime_ns();
++}
++
++#endif /* _WG_TIMERS_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/uapi/wireguard.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,196 @@
++/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * Documentation
++ * =============
++ *
++ * The below enums and macros are for interfacing with WireGuard, using generic
++ * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two
++ * methods: get and set. Note that while they share many common attributes,
++ * these two functions actually accept a slightly different set of inputs and
++ * outputs.
++ *
++ * WG_CMD_GET_DEVICE
++ * -----------------
++ *
++ * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain
++ * one but not both of:
++ *
++ *    WGDEVICE_A_IFINDEX: NLA_U32
++ *    WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
++ *
++ * The kernel will then return several messages (NLM_F_MULTI) containing the
++ * following tree of nested items:
++ *
++ *    WGDEVICE_A_IFINDEX: NLA_U32
++ *    WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
++ *    WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
++ *    WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
++ *    WGDEVICE_A_LISTEN_PORT: NLA_U16
++ *    WGDEVICE_A_FWMARK: NLA_U32
++ *    WGDEVICE_A_PEERS: NLA_NESTED
++ *        0: NLA_NESTED
++ *            WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
++ *            WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
++ *            WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6
++ *            WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16
++ *            WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec
++ *            WGPEER_A_RX_BYTES: NLA_U64
++ *            WGPEER_A_TX_BYTES: NLA_U64
++ *            WGPEER_A_ALLOWEDIPS: NLA_NESTED
++ *                0: NLA_NESTED
++ *                    WGALLOWEDIP_A_FAMILY: NLA_U16
++ *                    WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr
++ *                    WGALLOWEDIP_A_CIDR_MASK: NLA_U8
++ *                0: NLA_NESTED
++ *                    ...
++ *                0: NLA_NESTED
++ *                    ...
++ *                ...
++ *            WGPEER_A_PROTOCOL_VERSION: NLA_U32
++ *        0: NLA_NESTED
++ *            ...
++ *        ...
++ *
++ * It is possible that all of the allowed IPs of a single peer will not
++ * fit within a single netlink message. In that case, the same peer will
++ * be written in the following message, except it will only contain
++ * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several
++ * times in a row for the same peer. It is then up to the receiver to
++ * coalesce adjacent peers. Likewise, it is possible that all peers will
++ * not fit within a single message. So, subsequent peers will be sent
++ * in following messages, except those will only contain WGDEVICE_A_IFNAME
++ * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these
++ * messages to form the complete list of peers.
++ *
++ * Since this is an NLA_F_DUMP command, the final message will always be
++ * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message
++ * contains an integer error code. It is either zero or a negative error
++ * code corresponding to the errno.
++ *
++ * WG_CMD_SET_DEVICE
++ * -----------------
++ *
++ * May only be called via NLM_F_REQUEST. The command should contain the
++ * following tree of nested items, containing one but not both of
++ * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME:
++ *
++ *    WGDEVICE_A_IFINDEX: NLA_U32
++ *    WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
++ *    WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
++ *                      peers should be removed prior to adding the list below.
++ *    WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
++ *    WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly
++ *    WGDEVICE_A_FWMARK: NLA_U32, 0 to disable
++ *    WGDEVICE_A_PEERS: NLA_NESTED
++ *        0: NLA_NESTED
++ *            WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
++ *            WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the
++ *                            specified peer should not exist at the end of the
++ *                            operation, rather than added/updated and/or
++ *                            WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed
++ *                            IPs of this peer should be removed prior to adding
++ *                            the list below and/or WGPEER_F_UPDATE_ONLY if the
++ *                            peer should only be set if it already exists.
++ *            WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove
++ *            WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
++ *            WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable
++ *            WGPEER_A_ALLOWEDIPS: NLA_NESTED
++ *                0: NLA_NESTED
++ *                    WGALLOWEDIP_A_FAMILY: NLA_U16
++ *                    WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
++ *                    WGALLOWEDIP_A_CIDR_MASK: NLA_U8
++ *                0: NLA_NESTED
++ *                    ...
++ *                0: NLA_NESTED
++ *                    ...
++ *                ...
++ *            WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at
++ *                                       all by most users of this API, as the
++ *                                       most recent protocol will be used when
++ *                                       this is unset. Otherwise, must be set
++ *                                       to 1.
++ *        0: NLA_NESTED
++ *            ...
++ *        ...
++ *
++ * It is possible that the amount of configuration data exceeds that of
++ * the maximum message length accepted by the kernel. In that case, several
++ * messages should be sent one after another, with each successive one
++ * filling in information not contained in the prior. Note that if
++ * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably
++ * should not be specified in fragments that come after, so that the list
++ * of peers is only cleared the first time but appended after. Likewise for
++ * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message
++ * of a peer, it likely should not be specified in subsequent fragments.
++ *
++ * If an error occurs, NLMSG_ERROR will reply containing an errno.
++ */
++
++#ifndef _WG_UAPI_WIREGUARD_H
++#define _WG_UAPI_WIREGUARD_H
++
++#define WG_GENL_NAME "wireguard"
++#define WG_GENL_VERSION 1
++
++#define WG_KEY_LEN 32
++
++enum wg_cmd {
++	WG_CMD_GET_DEVICE,
++	WG_CMD_SET_DEVICE,
++	__WG_CMD_MAX
++};
++#define WG_CMD_MAX (__WG_CMD_MAX - 1)
++
++enum wgdevice_flag {
++	WGDEVICE_F_REPLACE_PEERS = 1U << 0,
++	__WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS
++};
++enum wgdevice_attribute {
++	WGDEVICE_A_UNSPEC,
++	WGDEVICE_A_IFINDEX,
++	WGDEVICE_A_IFNAME,
++	WGDEVICE_A_PRIVATE_KEY,
++	WGDEVICE_A_PUBLIC_KEY,
++	WGDEVICE_A_FLAGS,
++	WGDEVICE_A_LISTEN_PORT,
++	WGDEVICE_A_FWMARK,
++	WGDEVICE_A_PEERS,
++	__WGDEVICE_A_LAST
++};
++#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1)
++
++enum wgpeer_flag {
++	WGPEER_F_REMOVE_ME = 1U << 0,
++	WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1,
++	WGPEER_F_UPDATE_ONLY = 1U << 2,
++	__WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS |
++			 WGPEER_F_UPDATE_ONLY
++};
++enum wgpeer_attribute {
++	WGPEER_A_UNSPEC,
++	WGPEER_A_PUBLIC_KEY,
++	WGPEER_A_PRESHARED_KEY,
++	WGPEER_A_FLAGS,
++	WGPEER_A_ENDPOINT,
++	WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
++	WGPEER_A_LAST_HANDSHAKE_TIME,
++	WGPEER_A_RX_BYTES,
++	WGPEER_A_TX_BYTES,
++	WGPEER_A_ALLOWEDIPS,
++	WGPEER_A_PROTOCOL_VERSION,
++	__WGPEER_A_LAST
++};
++#define WGPEER_A_MAX (__WGPEER_A_LAST - 1)
++
++enum wgallowedip_attribute {
++	WGALLOWEDIP_A_UNSPEC,
++	WGALLOWEDIP_A_FAMILY,
++	WGALLOWEDIP_A_IPADDR,
++	WGALLOWEDIP_A_CIDR_MASK,
++	__WGALLOWEDIP_A_LAST
++};
++#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1)
++
++#endif /* _WG_UAPI_WIREGUARD_H */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/version.h	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,3 @@
++#ifndef WIREGUARD_VERSION
++#define WIREGUARD_VERSION "0.0.20200318"
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/blake2s/blake2s-x86_64.S	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,258 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
++ */
++
++#include <linux/linkage.h>
++
++.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
++.align 32
++IV:	.octa 0xA54FF53A3C6EF372BB67AE856A09E667
++	.octa 0x5BE0CD191F83D9AB9B05688C510E527F
++.section .rodata.cst16.ROT16, "aM", @progbits, 16
++.align 16
++ROT16:	.octa 0x0D0C0F0E09080B0A0504070601000302
++.section .rodata.cst16.ROR328, "aM", @progbits, 16
++.align 16
++ROR328:	.octa 0x0C0F0E0D080B0A090407060500030201
++.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
++.align 64
++SIGMA:
++.byte  0,  2,  4,  6,  1,  3,  5,  7, 14,  8, 10, 12, 15,  9, 11, 13
++.byte 14,  4,  9, 13, 10,  8, 15,  6,  5,  1,  0, 11,  3, 12,  2,  7
++.byte 11, 12,  5, 15,  8,  0,  2, 13,  9, 10,  3,  7,  4, 14,  6,  1
++.byte  7,  3, 13, 11,  9,  1, 12, 14, 15,  2,  5,  4,  8,  6, 10,  0
++.byte  9,  5,  2, 10,  0,  7,  4, 15,  3, 14, 11,  6, 13,  1, 12,  8
++.byte  2,  6,  0,  8, 12, 10, 11,  3,  1,  4,  7, 15,  9, 13,  5, 14
++.byte 12,  1, 14,  4,  5, 15, 13, 10,  8,  0,  6,  9, 11,  7,  3,  2
++.byte 13,  7, 12,  3, 11, 14,  1,  9,  2,  5, 15,  8, 10,  0,  4,  6
++.byte  6, 14, 11,  0, 15,  9,  3,  8, 10, 12, 13,  1,  5,  2,  7,  4
++.byte 10,  8,  7,  1,  2,  4,  6,  5, 13, 15,  9,  3,  0, 11, 14, 12
++#ifdef CONFIG_AS_AVX512
++.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
++.align 64
++SIGMA2:
++.long  0,  2,  4,  6,  1,  3,  5,  7, 14,  8, 10, 12, 15,  9, 11, 13
++.long  8,  2, 13, 15, 10,  9, 12,  3,  6,  4,  0, 14,  5, 11,  1,  7
++.long 11, 13,  8,  6,  5, 10, 14,  3,  2,  4, 12, 15,  1,  0,  7,  9
++.long 11, 10,  7,  0,  8, 15,  1, 13,  3,  6,  2, 12,  4, 14,  9,  5
++.long  4, 10,  9, 14, 15,  0, 11,  8,  1,  7,  3, 13,  2,  5,  6, 12
++.long  2, 11,  4, 15, 14,  3, 10,  8, 13,  6,  5,  7,  0, 12,  1,  9
++.long  4,  8, 15,  9, 14, 11, 13,  5,  3,  2,  1, 12,  6, 10,  7,  0
++.long  6, 13,  0, 14, 12,  2,  1, 11, 15,  4,  5,  8,  7,  9,  3, 10
++.long 15,  5,  4, 13, 10,  7,  3, 11, 12,  2,  0,  6,  9,  8,  1, 14
++.long  8,  7, 14, 11, 13, 15,  0, 12, 10,  4,  5,  6,  3,  2,  1,  9
++#endif /* CONFIG_AS_AVX512 */
++
++.text
++#ifdef CONFIG_AS_SSSE3
++SYM_FUNC_START(blake2s_compress_ssse3)
++	testq		%rdx,%rdx
++	je		.Lendofloop
++	movdqu		(%rdi),%xmm0
++	movdqu		0x10(%rdi),%xmm1
++	movdqa		ROT16(%rip),%xmm12
++	movdqa		ROR328(%rip),%xmm13
++	movdqu		0x20(%rdi),%xmm14
++	movq		%rcx,%xmm15
++	leaq		SIGMA+0xa0(%rip),%r8
++	jmp		.Lbeginofloop
++	.align		32
++.Lbeginofloop:
++	movdqa		%xmm0,%xmm10
++	movdqa		%xmm1,%xmm11
++	paddq		%xmm15,%xmm14
++	movdqa		IV(%rip),%xmm2
++	movdqa		%xmm14,%xmm3
++	pxor		IV+0x10(%rip),%xmm3
++	leaq		SIGMA(%rip),%rcx
++.Lroundloop:
++	movzbl		(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm4
++	movzbl		0x1(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm5
++	movzbl		0x2(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm6
++	movzbl		0x3(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm7
++	punpckldq	%xmm5,%xmm4
++	punpckldq	%xmm7,%xmm6
++	punpcklqdq	%xmm6,%xmm4
++	paddd		%xmm4,%xmm0
++	paddd		%xmm1,%xmm0
++	pxor		%xmm0,%xmm3
++	pshufb		%xmm12,%xmm3
++	paddd		%xmm3,%xmm2
++	pxor		%xmm2,%xmm1
++	movdqa		%xmm1,%xmm8
++	psrld		$0xc,%xmm1
++	pslld		$0x14,%xmm8
++	por		%xmm8,%xmm1
++	movzbl		0x4(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm5
++	movzbl		0x5(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm6
++	movzbl		0x6(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm7
++	movzbl		0x7(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm4
++	punpckldq	%xmm6,%xmm5
++	punpckldq	%xmm4,%xmm7
++	punpcklqdq	%xmm7,%xmm5
++	paddd		%xmm5,%xmm0
++	paddd		%xmm1,%xmm0
++	pxor		%xmm0,%xmm3
++	pshufb		%xmm13,%xmm3
++	paddd		%xmm3,%xmm2
++	pxor		%xmm2,%xmm1
++	movdqa		%xmm1,%xmm8
++	psrld		$0x7,%xmm1
++	pslld		$0x19,%xmm8
++	por		%xmm8,%xmm1
++	pshufd		$0x93,%xmm0,%xmm0
++	pshufd		$0x4e,%xmm3,%xmm3
++	pshufd		$0x39,%xmm2,%xmm2
++	movzbl		0x8(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm6
++	movzbl		0x9(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm7
++	movzbl		0xa(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm4
++	movzbl		0xb(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm5
++	punpckldq	%xmm7,%xmm6
++	punpckldq	%xmm5,%xmm4
++	punpcklqdq	%xmm4,%xmm6
++	paddd		%xmm6,%xmm0
++	paddd		%xmm1,%xmm0
++	pxor		%xmm0,%xmm3
++	pshufb		%xmm12,%xmm3
++	paddd		%xmm3,%xmm2
++	pxor		%xmm2,%xmm1
++	movdqa		%xmm1,%xmm8
++	psrld		$0xc,%xmm1
++	pslld		$0x14,%xmm8
++	por		%xmm8,%xmm1
++	movzbl		0xc(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm7
++	movzbl		0xd(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm4
++	movzbl		0xe(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm5
++	movzbl		0xf(%rcx),%eax
++	movd		(%rsi,%rax,4),%xmm6
++	punpckldq	%xmm4,%xmm7
++	punpckldq	%xmm6,%xmm5
++	punpcklqdq	%xmm5,%xmm7
++	paddd		%xmm7,%xmm0
++	paddd		%xmm1,%xmm0
++	pxor		%xmm0,%xmm3
++	pshufb		%xmm13,%xmm3
++	paddd		%xmm3,%xmm2
++	pxor		%xmm2,%xmm1
++	movdqa		%xmm1,%xmm8
++	psrld		$0x7,%xmm1
++	pslld		$0x19,%xmm8
++	por		%xmm8,%xmm1
++	pshufd		$0x39,%xmm0,%xmm0
++	pshufd		$0x4e,%xmm3,%xmm3
++	pshufd		$0x93,%xmm2,%xmm2
++	addq		$0x10,%rcx
++	cmpq		%r8,%rcx
++	jnz		.Lroundloop
++	pxor		%xmm2,%xmm0
++	pxor		%xmm3,%xmm1
++	pxor		%xmm10,%xmm0
++	pxor		%xmm11,%xmm1
++	addq		$0x40,%rsi
++	decq		%rdx
++	jnz		.Lbeginofloop
++	movdqu		%xmm0,(%rdi)
++	movdqu		%xmm1,0x10(%rdi)
++	movdqu		%xmm14,0x20(%rdi)
++.Lendofloop:
++	ret
++SYM_FUNC_END(blake2s_compress_ssse3)
++#endif /* CONFIG_AS_SSSE3 */
++
++#ifdef CONFIG_AS_AVX512
++SYM_FUNC_START(blake2s_compress_avx512)
++	vmovdqu		(%rdi),%xmm0
++	vmovdqu		0x10(%rdi),%xmm1
++	vmovdqu		0x20(%rdi),%xmm4
++	vmovq		%rcx,%xmm5
++	vmovdqa		IV(%rip),%xmm14
++	vmovdqa		IV+16(%rip),%xmm15
++	jmp		.Lblake2s_compress_avx512_mainloop
++.align 32
++.Lblake2s_compress_avx512_mainloop:
++	vmovdqa		%xmm0,%xmm10
++	vmovdqa		%xmm1,%xmm11
++	vpaddq		%xmm5,%xmm4,%xmm4
++	vmovdqa		%xmm14,%xmm2
++	vpxor		%xmm15,%xmm4,%xmm3
++	vmovdqu		(%rsi),%ymm6
++	vmovdqu		0x20(%rsi),%ymm7
++	addq		$0x40,%rsi
++	leaq		SIGMA2(%rip),%rax
++	movb		$0xa,%cl
++.Lblake2s_compress_avx512_roundloop:
++	addq		$0x40,%rax
++	vmovdqa		-0x40(%rax),%ymm8
++	vmovdqa		-0x20(%rax),%ymm9
++	vpermi2d	%ymm7,%ymm6,%ymm8
++	vpermi2d	%ymm7,%ymm6,%ymm9
++	vmovdqa		%ymm8,%ymm6
++	vmovdqa		%ymm9,%ymm7
++	vpaddd		%xmm8,%xmm0,%xmm0
++	vpaddd		%xmm1,%xmm0,%xmm0
++	vpxor		%xmm0,%xmm3,%xmm3
++	vprord		$0x10,%xmm3,%xmm3
++	vpaddd		%xmm3,%xmm2,%xmm2
++	vpxor		%xmm2,%xmm1,%xmm1
++	vprord		$0xc,%xmm1,%xmm1
++	vextracti128	$0x1,%ymm8,%xmm8
++	vpaddd		%xmm8,%xmm0,%xmm0
++	vpaddd		%xmm1,%xmm0,%xmm0
++	vpxor		%xmm0,%xmm3,%xmm3
++	vprord		$0x8,%xmm3,%xmm3
++	vpaddd		%xmm3,%xmm2,%xmm2
++	vpxor		%xmm2,%xmm1,%xmm1
++	vprord		$0x7,%xmm1,%xmm1
++	vpshufd		$0x93,%xmm0,%xmm0
++	vpshufd		$0x4e,%xmm3,%xmm3
++	vpshufd		$0x39,%xmm2,%xmm2
++	vpaddd		%xmm9,%xmm0,%xmm0
++	vpaddd		%xmm1,%xmm0,%xmm0
++	vpxor		%xmm0,%xmm3,%xmm3
++	vprord		$0x10,%xmm3,%xmm3
++	vpaddd		%xmm3,%xmm2,%xmm2
++	vpxor		%xmm2,%xmm1,%xmm1
++	vprord		$0xc,%xmm1,%xmm1
++	vextracti128	$0x1,%ymm9,%xmm9
++	vpaddd		%xmm9,%xmm0,%xmm0
++	vpaddd		%xmm1,%xmm0,%xmm0
++	vpxor		%xmm0,%xmm3,%xmm3
++	vprord		$0x8,%xmm3,%xmm3
++	vpaddd		%xmm3,%xmm2,%xmm2
++	vpxor		%xmm2,%xmm1,%xmm1
++	vprord		$0x7,%xmm1,%xmm1
++	vpshufd		$0x39,%xmm0,%xmm0
++	vpshufd		$0x4e,%xmm3,%xmm3
++	vpshufd		$0x93,%xmm2,%xmm2
++	decb		%cl
++	jne		.Lblake2s_compress_avx512_roundloop
++	vpxor		%xmm10,%xmm0,%xmm0
++	vpxor		%xmm11,%xmm1,%xmm1
++	vpxor		%xmm2,%xmm0,%xmm0
++	vpxor		%xmm3,%xmm1,%xmm1
++	decq		%rdx
++	jne		.Lblake2s_compress_avx512_mainloop
++	vmovdqu		%xmm0,(%rdi)
++	vmovdqu		%xmm1,0x10(%rdi)
++	vmovdqu		%xmm4,0x20(%rdi)
++	vzeroupper
++	retq
++SYM_FUNC_END(blake2s_compress_avx512)
++#endif /* CONFIG_AS_AVX512 */
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/chacha20/chacha20-mips.S	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,424 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#define MASK_U32		0x3c
++#define CHACHA20_BLOCK_SIZE	64
++#define STACK_SIZE		32
++
++#define X0	$t0
++#define X1	$t1
++#define X2	$t2
++#define X3	$t3
++#define X4	$t4
++#define X5	$t5
++#define X6	$t6
++#define X7	$t7
++#define X8	$t8
++#define X9	$t9
++#define X10	$v1
++#define X11	$s6
++#define X12	$s5
++#define X13	$s4
++#define X14	$s3
++#define X15	$s2
++/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
++#define T0	$s1
++#define T1	$s0
++#define T(n)	T ## n
++#define X(n)	X ## n
++
++/* Input arguments */
++#define STATE		$a0
++#define OUT		$a1
++#define IN		$a2
++#define BYTES		$a3
++
++/* Output argument */
++/* NONCE[0] is kept in a register and not in memory.
++ * We don't want to touch original value in memory.
++ * Must be incremented every loop iteration.
++ */
++#define NONCE_0		$v0
++
++/* SAVED_X and SAVED_CA are set in the jump table.
++ * Use regs which are overwritten on exit else we don't leak clear data.
++ * They are used to handling the last bytes which are not multiple of 4.
++ */
++#define SAVED_X		X15
++#define SAVED_CA	$s7
++
++#define IS_UNALIGNED	$s7
++
++#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
++#define MSB 0
++#define LSB 3
++#define ROTx rotl
++#define ROTR(n) rotr n, 24
++#define	CPU_TO_LE32(n) \
++	wsbh	n; \
++	rotr	n, 16;
++#else
++#define MSB 3
++#define LSB 0
++#define ROTx rotr
++#define CPU_TO_LE32(n)
++#define ROTR(n)
++#endif
++
++#define FOR_EACH_WORD(x) \
++	x( 0); \
++	x( 1); \
++	x( 2); \
++	x( 3); \
++	x( 4); \
++	x( 5); \
++	x( 6); \
++	x( 7); \
++	x( 8); \
++	x( 9); \
++	x(10); \
++	x(11); \
++	x(12); \
++	x(13); \
++	x(14); \
++	x(15);
++
++#define FOR_EACH_WORD_REV(x) \
++	x(15); \
++	x(14); \
++	x(13); \
++	x(12); \
++	x(11); \
++	x(10); \
++	x( 9); \
++	x( 8); \
++	x( 7); \
++	x( 6); \
++	x( 5); \
++	x( 4); \
++	x( 3); \
++	x( 2); \
++	x( 1); \
++	x( 0);
++
++#define PLUS_ONE_0	 1
++#define PLUS_ONE_1	 2
++#define PLUS_ONE_2	 3
++#define PLUS_ONE_3	 4
++#define PLUS_ONE_4	 5
++#define PLUS_ONE_5	 6
++#define PLUS_ONE_6	 7
++#define PLUS_ONE_7	 8
++#define PLUS_ONE_8	 9
++#define PLUS_ONE_9	10
++#define PLUS_ONE_10	11
++#define PLUS_ONE_11	12
++#define PLUS_ONE_12	13
++#define PLUS_ONE_13	14
++#define PLUS_ONE_14	15
++#define PLUS_ONE_15	16
++#define PLUS_ONE(x)	PLUS_ONE_ ## x
++#define _CONCAT3(a,b,c)	a ## b ## c
++#define CONCAT3(a,b,c)	_CONCAT3(a,b,c)
++
++#define STORE_UNALIGNED(x) \
++CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
++	.if (x != 12); \
++		lw	T0, (x*4)(STATE); \
++	.endif; \
++	lwl	T1, (x*4)+MSB ## (IN); \
++	lwr	T1, (x*4)+LSB ## (IN); \
++	.if (x == 12); \
++		addu	X ## x, NONCE_0; \
++	.else; \
++		addu	X ## x, T0; \
++	.endif; \
++	CPU_TO_LE32(X ## x); \
++	xor	X ## x, T1; \
++	swl	X ## x, (x*4)+MSB ## (OUT); \
++	swr	X ## x, (x*4)+LSB ## (OUT);
++
++#define STORE_ALIGNED(x) \
++CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
++	.if (x != 12); \
++		lw	T0, (x*4)(STATE); \
++	.endif; \
++	lw	T1, (x*4) ## (IN); \
++	.if (x == 12); \
++		addu	X ## x, NONCE_0; \
++	.else; \
++		addu	X ## x, T0; \
++	.endif; \
++	CPU_TO_LE32(X ## x); \
++	xor	X ## x, T1; \
++	sw	X ## x, (x*4) ## (OUT);
++
++/* Jump table macro.
++ * Used for setup and handling the last bytes, which are not multiple of 4.
++ * X15 is free to store Xn
++ * Every jumptable entry must be equal in size.
++ */
++#define JMPTBL_ALIGNED(x) \
++.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
++	.set	noreorder; \
++	b	.Lchacha20_mips_xor_aligned_ ## x ## _b; \
++	.if (x == 12); \
++		addu	SAVED_X, X ## x, NONCE_0; \
++	.else; \
++		addu	SAVED_X, X ## x, SAVED_CA; \
++	.endif; \
++	.set	reorder
++
++#define JMPTBL_UNALIGNED(x) \
++.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
++	.set	noreorder; \
++	b	.Lchacha20_mips_xor_unaligned_ ## x ## _b; \
++	.if (x == 12); \
++		addu	SAVED_X, X ## x, NONCE_0; \
++	.else; \
++		addu	SAVED_X, X ## x, SAVED_CA; \
++	.endif; \
++	.set	reorder
++
++#define AXR(A, B, C, D,  K, L, M, N,  V, W, Y, Z,  S) \
++	addu	X(A), X(K); \
++	addu	X(B), X(L); \
++	addu	X(C), X(M); \
++	addu	X(D), X(N); \
++	xor	X(V), X(A); \
++	xor	X(W), X(B); \
++	xor	X(Y), X(C); \
++	xor	X(Z), X(D); \
++	rotl	X(V), S;    \
++	rotl	X(W), S;    \
++	rotl	X(Y), S;    \
++	rotl	X(Z), S;
++
++.text
++.set	reorder
++.set	noat
++.globl	chacha20_mips
++.ent	chacha20_mips
++chacha20_mips:
++	.frame	$sp, STACK_SIZE, $ra
++
++	addiu	$sp, -STACK_SIZE
++
++	/* Return bytes = 0. */
++	beqz	BYTES, .Lchacha20_mips_end
++
++	lw	NONCE_0, 48(STATE)
++
++	/* Save s0-s7 */
++	sw	$s0,  0($sp)
++	sw	$s1,  4($sp)
++	sw	$s2,  8($sp)
++	sw	$s3, 12($sp)
++	sw	$s4, 16($sp)
++	sw	$s5, 20($sp)
++	sw	$s6, 24($sp)
++	sw	$s7, 28($sp)
++
++	/* Test IN or OUT is unaligned.
++	 * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
++	 */
++	or	IS_UNALIGNED, IN, OUT
++	andi	IS_UNALIGNED, 0x3
++
++	/* Set number of rounds */
++	li	$at, 20
++
++	b	.Lchacha20_rounds_start
++
++.align 4
++.Loop_chacha20_rounds:
++	addiu	IN,  CHACHA20_BLOCK_SIZE
++	addiu	OUT, CHACHA20_BLOCK_SIZE
++	addiu	NONCE_0, 1
++
++.Lchacha20_rounds_start:
++	lw	X0,  0(STATE)
++	lw	X1,  4(STATE)
++	lw	X2,  8(STATE)
++	lw	X3,  12(STATE)
++
++	lw	X4,  16(STATE)
++	lw	X5,  20(STATE)
++	lw	X6,  24(STATE)
++	lw	X7,  28(STATE)
++	lw	X8,  32(STATE)
++	lw	X9,  36(STATE)
++	lw	X10, 40(STATE)
++	lw	X11, 44(STATE)
++
++	move	X12, NONCE_0
++	lw	X13, 52(STATE)
++	lw	X14, 56(STATE)
++	lw	X15, 60(STATE)
++
++.Loop_chacha20_xor_rounds:
++	addiu	$at, -2
++	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15, 16);
++	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7, 12);
++	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15,  8);
++	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7,  7);
++	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14, 16);
++	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4, 12);
++	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14,  8);
++	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4,  7);
++	bnez	$at, .Loop_chacha20_xor_rounds
++
++	addiu	BYTES, -(CHACHA20_BLOCK_SIZE)
++
++	/* Is data src/dst unaligned? Jump */
++	bnez	IS_UNALIGNED, .Loop_chacha20_unaligned
++
++	/* Set number rounds here to fill delayslot. */
++	li	$at, 20
++
++	/* BYTES < 0, it has no full block. */
++	bltz	BYTES, .Lchacha20_mips_no_full_block_aligned
++
++	FOR_EACH_WORD_REV(STORE_ALIGNED)
++
++	/* BYTES > 0? Loop again. */
++	bgtz	BYTES, .Loop_chacha20_rounds
++
++	/* Place this here to fill delay slot */
++	addiu	NONCE_0, 1
++
++	/* BYTES < 0? Handle last bytes */
++	bltz	BYTES, .Lchacha20_mips_xor_bytes
++
++.Lchacha20_mips_xor_done:
++	/* Restore used registers */
++	lw	$s0,  0($sp)
++	lw	$s1,  4($sp)
++	lw	$s2,  8($sp)
++	lw	$s3, 12($sp)
++	lw	$s4, 16($sp)
++	lw	$s5, 20($sp)
++	lw	$s6, 24($sp)
++	lw	$s7, 28($sp)
++
++	/* Write NONCE_0 back to right location in state */
++	sw	NONCE_0, 48(STATE)
++
++.Lchacha20_mips_end:
++	addiu	$sp, STACK_SIZE
++	jr	$ra
++
++.Lchacha20_mips_no_full_block_aligned:
++	/* Restore the offset on BYTES */
++	addiu	BYTES, CHACHA20_BLOCK_SIZE
++
++	/* Get number of full WORDS */
++	andi	$at, BYTES, MASK_U32
++
++	/* Load upper half of jump table addr */
++	lui	T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
++
++	/* Calculate lower half jump table offset */
++	ins	T0, $at, 1, 6
++
++	/* Add offset to STATE */
++	addu	T1, STATE, $at
++
++	/* Add lower half jump table addr */
++	addiu	T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
++
++	/* Read value from STATE */
++	lw	SAVED_CA, 0(T1)
++
++	/* Store remaining bytecounter as negative value */
++	subu	BYTES, $at, BYTES
++
++	jr	T0
++
++	/* Jump table */
++	FOR_EACH_WORD(JMPTBL_ALIGNED)
++
++
++.Loop_chacha20_unaligned:
++	/* Set number rounds here to fill delayslot. */
++	li	$at, 20
++
++	/* BYTES > 0, it has no full block. */
++	bltz	BYTES, .Lchacha20_mips_no_full_block_unaligned
++
++	FOR_EACH_WORD_REV(STORE_UNALIGNED)
++
++	/* BYTES > 0? Loop again. */
++	bgtz	BYTES, .Loop_chacha20_rounds
++
++	/* Write NONCE_0 back to right location in state */
++	sw	NONCE_0, 48(STATE)
++
++	.set noreorder
++	/* Fall through to byte handling */
++	bgez	BYTES, .Lchacha20_mips_xor_done
++.Lchacha20_mips_xor_unaligned_0_b:
++.Lchacha20_mips_xor_aligned_0_b:
++	/* Place this here to fill delay slot */
++	addiu	NONCE_0, 1
++	.set reorder
++
++.Lchacha20_mips_xor_bytes:
++	addu	IN, $at
++	addu	OUT, $at
++	/* First byte */
++	lbu	T1, 0(IN)
++	addiu	$at, BYTES, 1
++	CPU_TO_LE32(SAVED_X)
++	ROTR(SAVED_X)
++	xor	T1, SAVED_X
++	sb	T1, 0(OUT)
++	beqz	$at, .Lchacha20_mips_xor_done
++	/* Second byte */
++	lbu	T1, 1(IN)
++	addiu	$at, BYTES, 2
++	ROTx	SAVED_X, 8
++	xor	T1, SAVED_X
++	sb	T1, 1(OUT)
++	beqz	$at, .Lchacha20_mips_xor_done
++	/* Third byte */
++	lbu	T1, 2(IN)
++	ROTx	SAVED_X, 8
++	xor	T1, SAVED_X
++	sb	T1, 2(OUT)
++	b	.Lchacha20_mips_xor_done
++
++.Lchacha20_mips_no_full_block_unaligned:
++	/* Restore the offset on BYTES */
++	addiu	BYTES, CHACHA20_BLOCK_SIZE
++
++	/* Get number of full WORDS */
++	andi	$at, BYTES, MASK_U32
++
++	/* Load upper half of jump table addr */
++	lui	T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
++
++	/* Calculate lower half jump table offset */
++	ins	T0, $at, 1, 6
++
++	/* Add offset to STATE */
++	addu	T1, STATE, $at
++
++	/* Add lower half jump table addr */
++	addiu	T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
++
++	/* Read value from STATE */
++	lw	SAVED_CA, 0(T1)
++
++	/* Store remaining bytecounter as negative value */
++	subu	BYTES, $at, BYTES
++
++	jr	T0
++
++	/* Jump table */
++	FOR_EACH_WORD(JMPTBL_UNALIGNED)
++.end chacha20_mips
++.set at
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/chacha20/chacha20-unrolled-arm.S	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,461 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2018 Google, Inc.
++ */
++
++#include <linux/linkage.h>
++#include <asm/assembler.h>
++
++/*
++ * Design notes:
++ *
++ * 16 registers would be needed to hold the state matrix, but only 14 are
++ * available because 'sp' and 'pc' cannot be used.  So we spill the elements
++ * (x8, x9) to the stack and swap them out with (x10, x11).  This adds one
++ * 'ldrd' and one 'strd' instruction per round.
++ *
++ * All rotates are performed using the implicit rotate operand accepted by the
++ * 'add' and 'eor' instructions.  This is faster than using explicit rotate
++ * instructions.  To make this work, we allow the values in the second and last
++ * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
++ * wrong rotation amount.  The rotation amount is then fixed up just in time
++ * when the values are used.  'brot' is the number of bits the values in row 'b'
++ * need to be rotated right to arrive at the correct values, and 'drot'
++ * similarly for row 'd'.  (brot, drot) start out as (0, 0) but we make it such
++ * that they end up as (25, 24) after every round.
++ */
++
++	// ChaCha state registers
++	X0	.req	r0
++	X1	.req	r1
++	X2	.req	r2
++	X3	.req	r3
++	X4	.req	r4
++	X5	.req	r5
++	X6	.req	r6
++	X7	.req	r7
++	X8_X10	.req	r8	// shared by x8 and x10
++	X9_X11	.req	r9	// shared by x9 and x11
++	X12	.req	r10
++	X13	.req	r11
++	X14	.req	r12
++	X15	.req	r14
++
++.Lexpand_32byte_k:
++	// "expand 32-byte k"
++	.word	0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
++
++#ifdef __thumb2__
++#  define adrl adr
++#endif
++
++.macro __rev		out, in,  t0, t1, t2
++.if __LINUX_ARM_ARCH__ >= 6
++	rev		\out, \in
++.else
++	lsl		\t0, \in, #24
++	and		\t1, \in, #0xff00
++	and		\t2, \in, #0xff0000
++	orr		\out, \t0, \in, lsr #24
++	orr		\out, \out, \t1, lsl #8
++	orr		\out, \out, \t2, lsr #8
++.endif
++.endm
++
++.macro _le32_bswap	x,  t0, t1, t2
++#ifdef __ARMEB__
++	__rev		\x, \x,  \t0, \t1, \t2
++#endif
++.endm
++
++.macro _le32_bswap_4x	a, b, c, d,  t0, t1, t2
++	_le32_bswap	\a,  \t0, \t1, \t2
++	_le32_bswap	\b,  \t0, \t1, \t2
++	_le32_bswap	\c,  \t0, \t1, \t2
++	_le32_bswap	\d,  \t0, \t1, \t2
++.endm
++
++.macro __ldrd		a, b, src, offset
++#if __LINUX_ARM_ARCH__ >= 6
++	ldrd		\a, \b, [\src, #\offset]
++#else
++	ldr		\a, [\src, #\offset]
++	ldr		\b, [\src, #\offset + 4]
++#endif
++.endm
++
++.macro __strd		a, b, dst, offset
++#if __LINUX_ARM_ARCH__ >= 6
++	strd		\a, \b, [\dst, #\offset]
++#else
++	str		\a, [\dst, #\offset]
++	str		\b, [\dst, #\offset + 4]
++#endif
++.endm
++
++.macro _halfround	a1, b1, c1, d1,  a2, b2, c2, d2
++
++	// a += b; d ^= a; d = rol(d, 16);
++	add		\a1, \a1, \b1, ror #brot
++	add		\a2, \a2, \b2, ror #brot
++	eor		\d1, \a1, \d1, ror #drot
++	eor		\d2, \a2, \d2, ror #drot
++	// drot == 32 - 16 == 16
++
++	// c += d; b ^= c; b = rol(b, 12);
++	add		\c1, \c1, \d1, ror #16
++	add		\c2, \c2, \d2, ror #16
++	eor		\b1, \c1, \b1, ror #brot
++	eor		\b2, \c2, \b2, ror #brot
++	// brot == 32 - 12 == 20
++
++	// a += b; d ^= a; d = rol(d, 8);
++	add		\a1, \a1, \b1, ror #20
++	add		\a2, \a2, \b2, ror #20
++	eor		\d1, \a1, \d1, ror #16
++	eor		\d2, \a2, \d2, ror #16
++	// drot == 32 - 8 == 24
++
++	// c += d; b ^= c; b = rol(b, 7);
++	add		\c1, \c1, \d1, ror #24
++	add		\c2, \c2, \d2, ror #24
++	eor		\b1, \c1, \b1, ror #20
++	eor		\b2, \c2, \b2, ror #20
++	// brot == 32 - 7 == 25
++.endm
++
++.macro _doubleround
++
++	// column round
++
++	// quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
++	_halfround	X0, X4, X8_X10, X12,  X1, X5, X9_X11, X13
++
++	// save (x8, x9); restore (x10, x11)
++	__strd		X8_X10, X9_X11, sp, 0
++	__ldrd		X8_X10, X9_X11, sp, 8
++
++	// quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
++	_halfround	X2, X6, X8_X10, X14,  X3, X7, X9_X11, X15
++
++	.set brot, 25
++	.set drot, 24
++
++	// diagonal round
++
++	// quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
++	_halfround	X0, X5, X8_X10, X15,  X1, X6, X9_X11, X12
++
++	// save (x10, x11); restore (x8, x9)
++	__strd		X8_X10, X9_X11, sp, 8
++	__ldrd		X8_X10, X9_X11, sp, 0
++
++	// quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
++	_halfround	X2, X7, X8_X10, X13,  X3, X4, X9_X11, X14
++.endm
++
++.macro _chacha_permute	nrounds
++	.set brot, 0
++	.set drot, 0
++	.rept \nrounds / 2
++	 _doubleround
++	.endr
++.endm
++
++.macro _chacha		nrounds
++
++.Lnext_block\@:
++	// Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
++	// Registers contain x0-x9,x12-x15.
++
++	// Do the core ChaCha permutation to update x0-x15.
++	_chacha_permute	\nrounds
++
++	add		sp, #8
++	// Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
++	// Registers contain x0-x9,x12-x15.
++	// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
++
++	// Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
++	push		{X8_X10, X9_X11, X12, X13, X14, X15}
++
++	// Load (OUT, IN, LEN).
++	ldr		r14, [sp, #96]
++	ldr		r12, [sp, #100]
++	ldr		r11, [sp, #104]
++
++	orr		r10, r14, r12
++
++	// Use slow path if fewer than 64 bytes remain.
++	cmp		r11, #64
++	blt		.Lxor_slowpath\@
++
++	// Use slow path if IN and/or OUT isn't 4-byte aligned.  Needed even on
++	// ARMv6+, since ldmia and stmia (used below) still require alignment.
++	tst		r10, #3
++	bne		.Lxor_slowpath\@
++
++	// Fast path: XOR 64 bytes of aligned data.
++
++	// Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
++	// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
++	// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
++
++	// x0-x3
++	__ldrd		r8, r9, sp, 32
++	__ldrd		r10, r11, sp, 40
++	add		X0, X0, r8
++	add		X1, X1, r9
++	add		X2, X2, r10
++	add		X3, X3, r11
++	_le32_bswap_4x	X0, X1, X2, X3,  r8, r9, r10
++	ldmia		r12!, {r8-r11}
++	eor		X0, X0, r8
++	eor		X1, X1, r9
++	eor		X2, X2, r10
++	eor		X3, X3, r11
++	stmia		r14!, {X0-X3}
++
++	// x4-x7
++	__ldrd		r8, r9, sp, 48
++	__ldrd		r10, r11, sp, 56
++	add		X4, r8, X4, ror #brot
++	add		X5, r9, X5, ror #brot
++	ldmia		r12!, {X0-X3}
++	add		X6, r10, X6, ror #brot
++	add		X7, r11, X7, ror #brot
++	_le32_bswap_4x	X4, X5, X6, X7,  r8, r9, r10
++	eor		X4, X4, X0
++	eor		X5, X5, X1
++	eor		X6, X6, X2
++	eor		X7, X7, X3
++	stmia		r14!, {X4-X7}
++
++	// x8-x15
++	pop		{r0-r7}			// (x8-x9,x12-x15,x10-x11)
++	__ldrd		r8, r9, sp, 32
++	__ldrd		r10, r11, sp, 40
++	add		r0, r0, r8		// x8
++	add		r1, r1, r9		// x9
++	add		r6, r6, r10		// x10
++	add		r7, r7, r11		// x11
++	_le32_bswap_4x	r0, r1, r6, r7,  r8, r9, r10
++	ldmia		r12!, {r8-r11}
++	eor		r0, r0, r8		// x8
++	eor		r1, r1, r9		// x9
++	eor		r6, r6, r10		// x10
++	eor		r7, r7, r11		// x11
++	stmia		r14!, {r0,r1,r6,r7}
++	ldmia		r12!, {r0,r1,r6,r7}
++	__ldrd		r8, r9, sp, 48
++	__ldrd		r10, r11, sp, 56
++	add		r2, r8, r2, ror #drot	// x12
++	add		r3, r9, r3, ror #drot	// x13
++	add		r4, r10, r4, ror #drot	// x14
++	add		r5, r11, r5, ror #drot	// x15
++	_le32_bswap_4x	r2, r3, r4, r5,  r9, r10, r11
++	  ldr		r9, [sp, #72]		// load LEN
++	eor		r2, r2, r0		// x12
++	eor		r3, r3, r1		// x13
++	eor		r4, r4, r6		// x14
++	eor		r5, r5, r7		// x15
++	  subs		r9, #64			// decrement and check LEN
++	stmia		r14!, {r2-r5}
++
++	beq		.Ldone\@
++
++.Lprepare_for_next_block\@:
++
++	// Stack: x0-x15 OUT IN LEN
++
++	// Increment block counter (x12)
++	add		r8, #1
++
++	// Store updated (OUT, IN, LEN)
++	str		r14, [sp, #64]
++	str		r12, [sp, #68]
++	str		r9, [sp, #72]
++
++	  mov		r14, sp
++
++	// Store updated block counter (x12)
++	str		r8, [sp, #48]
++
++	  sub		sp, #16
++
++	// Reload state and do next block
++	ldmia		r14!, {r0-r11}		// load x0-x11
++	__strd		r10, r11, sp, 8		// store x10-x11 before state
++	ldmia		r14, {r10-r12,r14}	// load x12-x15
++	b		.Lnext_block\@
++
++.Lxor_slowpath\@:
++	// Slow path: < 64 bytes remaining, or unaligned input or output buffer.
++	// We handle it by storing the 64 bytes of keystream to the stack, then
++	// XOR-ing the needed portion with the data.
++
++	// Allocate keystream buffer
++	sub		sp, #64
++	mov		r14, sp
++
++	// Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
++	// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
++	// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
++
++	// Save keystream for x0-x3
++	__ldrd		r8, r9, sp, 96
++	__ldrd		r10, r11, sp, 104
++	add		X0, X0, r8
++	add		X1, X1, r9
++	add		X2, X2, r10
++	add		X3, X3, r11
++	_le32_bswap_4x	X0, X1, X2, X3,  r8, r9, r10
++	stmia		r14!, {X0-X3}
++
++	// Save keystream for x4-x7
++	__ldrd		r8, r9, sp, 112
++	__ldrd		r10, r11, sp, 120
++	add		X4, r8, X4, ror #brot
++	add		X5, r9, X5, ror #brot
++	add		X6, r10, X6, ror #brot
++	add		X7, r11, X7, ror #brot
++	_le32_bswap_4x	X4, X5, X6, X7,  r8, r9, r10
++	  add		r8, sp, #64
++	stmia		r14!, {X4-X7}
++
++	// Save keystream for x8-x15
++	ldm		r8, {r0-r7}		// (x8-x9,x12-x15,x10-x11)
++	__ldrd		r8, r9, sp, 128
++	__ldrd		r10, r11, sp, 136
++	add		r0, r0, r8		// x8
++	add		r1, r1, r9		// x9
++	add		r6, r6, r10		// x10
++	add		r7, r7, r11		// x11
++	_le32_bswap_4x	r0, r1, r6, r7,  r8, r9, r10
++	stmia		r14!, {r0,r1,r6,r7}
++	__ldrd		r8, r9, sp, 144
++	__ldrd		r10, r11, sp, 152
++	add		r2, r8, r2, ror #drot	// x12
++	add		r3, r9, r3, ror #drot	// x13
++	add		r4, r10, r4, ror #drot	// x14
++	add		r5, r11, r5, ror #drot	// x15
++	_le32_bswap_4x	r2, r3, r4, r5,  r9, r10, r11
++	stmia		r14, {r2-r5}
++
++	// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
++	// Registers: r8 is block counter, r12 is IN.
++
++	ldr		r9, [sp, #168]		// LEN
++	ldr		r14, [sp, #160]		// OUT
++	cmp		r9, #64
++	  mov		r0, sp
++	movle		r1, r9
++	movgt		r1, #64
++	// r1 is number of bytes to XOR, in range [1, 64]
++
++.if __LINUX_ARM_ARCH__ < 6
++	orr		r2, r12, r14
++	tst		r2, #3			// IN or OUT misaligned?
++	bne		.Lxor_next_byte\@
++.endif
++
++	// XOR a word at a time
++.rept 16
++	subs		r1, #4
++	blt		.Lxor_words_done\@
++	ldr		r2, [r12], #4
++	ldr		r3, [r0], #4
++	eor		r2, r2, r3
++	str		r2, [r14], #4
++.endr
++	b		.Lxor_slowpath_done\@
++.Lxor_words_done\@:
++	ands		r1, r1, #3
++	beq		.Lxor_slowpath_done\@
++
++	// XOR a byte at a time
++.Lxor_next_byte\@:
++	ldrb		r2, [r12], #1
++	ldrb		r3, [r0], #1
++	eor		r2, r2, r3
++	strb		r2, [r14], #1
++	subs		r1, #1
++	bne		.Lxor_next_byte\@
++
++.Lxor_slowpath_done\@:
++	subs		r9, #64
++	add		sp, #96
++	bgt		.Lprepare_for_next_block\@
++
++.Ldone\@:
++.endm	// _chacha
++
++/*
++ * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
++ *		     const u32 iv[4]);
++ */
++SYM_FUNC_START(chacha20_arm)
++	cmp		r2, #0			// len == 0?
++	reteq		lr
++
++	push		{r0-r2,r4-r11,lr}
++
++	// Push state x0-x15 onto stack.
++	// Also store an extra copy of x10-x11 just before the state.
++
++	ldr		r4, [sp, #48]		// iv
++	mov		r0, sp
++	sub		sp, #80
++
++	// iv: x12-x15
++	ldm		r4, {X12,X13,X14,X15}
++	stmdb		r0!, {X12,X13,X14,X15}
++
++	// key: x4-x11
++	__ldrd		X8_X10, X9_X11, r3, 24
++	__strd		X8_X10, X9_X11, sp, 8
++	stmdb		r0!, {X8_X10, X9_X11}
++	ldm		r3, {X4-X9_X11}
++	stmdb		r0!, {X4-X9_X11}
++
++	// constants: x0-x3
++	adrl		X3, .Lexpand_32byte_k
++	ldm		X3, {X0-X3}
++	__strd		X0, X1, sp, 16
++	__strd		X2, X3, sp, 24
++
++	_chacha		20
++
++	add		sp, #76
++	pop		{r4-r11, pc}
++SYM_FUNC_END(chacha20_arm)
++
++/*
++ * void hchacha20_arm(const u32 state[16], u32 out[8]);
++ */
++SYM_FUNC_START(hchacha20_arm)
++	push		{r1,r4-r11,lr}
++
++	mov		r14, r0
++	ldmia		r14!, {r0-r11}		// load x0-x11
++	push		{r10-r11}		// store x10-x11 to stack
++	ldm		r14, {r10-r12,r14}	// load x12-x15
++	sub		sp, #8
++
++	_chacha_permute	20
++
++	// Skip over (unused0-unused1, x10-x11)
++	add		sp, #16
++
++	// Fix up rotations of x12-x15
++	ror		X12, X12, #drot
++	ror		X13, X13, #drot
++	  pop		{r4}			// load 'out'
++	ror		X14, X14, #drot
++	ror		X15, X15, #drot
++
++	// Store (x0-x3,x12-x15) to 'out'
++	stm		r4, {X0,X1,X2,X3,X12,X13,X14,X15}
++
++	pop		{r4-r11,pc}
++SYM_FUNC_END(hchacha20_arm)
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/curve25519/curve25519-arm.S	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,2064 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ *
++ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
++ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
++ * manually reworked for use in kernel space.
++ */
++
++#if defined(CONFIG_KERNEL_MODE_NEON) && !defined(__ARMEB__)
++#include <linux/linkage.h>
++
++.text
++.fpu neon
++.arch armv7-a
++.align 4
++
++SYM_FUNC_START(curve25519_neon)
++	push		{r4-r11, lr}
++	mov		ip, sp
++	sub		r3, sp, #704
++	and		r3, r3, #0xfffffff0
++	mov		sp, r3
++	movw		r4, #0
++	movw		r5, #254
++	vmov.i32	q0, #1
++	vshr.u64	q1, q0, #7
++	vshr.u64	q0, q0, #8
++	vmov.i32	d4, #19
++	vmov.i32	d5, #38
++	add		r6, sp, #480
++	vst1.8		{d2-d3}, [r6, : 128]!
++	vst1.8		{d0-d1}, [r6, : 128]!
++	vst1.8		{d4-d5}, [r6, : 128]
++	add		r6, r3, #0
++	vmov.i32	q2, #0
++	vst1.8		{d4-d5}, [r6, : 128]!
++	vst1.8		{d4-d5}, [r6, : 128]!
++	vst1.8		d4, [r6, : 64]
++	add		r6, r3, #0
++	movw		r7, #960
++	sub		r7, r7, #2
++	neg		r7, r7
++	sub		r7, r7, r7, LSL #7
++	str		r7, [r6]
++	add		r6, sp, #672
++	vld1.8		{d4-d5}, [r1]!
++	vld1.8		{d6-d7}, [r1]
++	vst1.8		{d4-d5}, [r6, : 128]!
++	vst1.8		{d6-d7}, [r6, : 128]
++	sub		r1, r6, #16
++	ldrb		r6, [r1]
++	and		r6, r6, #248
++	strb		r6, [r1]
++	ldrb		r6, [r1, #31]
++	and		r6, r6, #127
++	orr		r6, r6, #64
++	strb		r6, [r1, #31]
++	vmov.i64	q2, #0xffffffff
++	vshr.u64	q3, q2, #7
++	vshr.u64	q2, q2, #6
++	vld1.8		{d8}, [r2]
++	vld1.8		{d10}, [r2]
++	add		r2, r2, #6
++	vld1.8		{d12}, [r2]
++	vld1.8		{d14}, [r2]
++	add		r2, r2, #6
++	vld1.8		{d16}, [r2]
++	add		r2, r2, #4
++	vld1.8		{d18}, [r2]
++	vld1.8		{d20}, [r2]
++	add		r2, r2, #6
++	vld1.8		{d22}, [r2]
++	add		r2, r2, #2
++	vld1.8		{d24}, [r2]
++	vld1.8		{d26}, [r2]
++	vshr.u64	q5, q5, #26
++	vshr.u64	q6, q6, #3
++	vshr.u64	q7, q7, #29
++	vshr.u64	q8, q8, #6
++	vshr.u64	q10, q10, #25
++	vshr.u64	q11, q11, #3
++	vshr.u64	q12, q12, #12
++	vshr.u64	q13, q13, #38
++	vand		q4, q4, q2
++	vand		q6, q6, q2
++	vand		q8, q8, q2
++	vand		q10, q10, q2
++	vand		q2, q12, q2
++	vand		q5, q5, q3
++	vand		q7, q7, q3
++	vand		q9, q9, q3
++	vand		q11, q11, q3
++	vand		q3, q13, q3
++	add		r2, r3, #48
++	vadd.i64	q12, q4, q1
++	vadd.i64	q13, q10, q1
++	vshr.s64	q12, q12, #26
++	vshr.s64	q13, q13, #26
++	vadd.i64	q5, q5, q12
++	vshl.i64	q12, q12, #26
++	vadd.i64	q14, q5, q0
++	vadd.i64	q11, q11, q13
++	vshl.i64	q13, q13, #26
++	vadd.i64	q15, q11, q0
++	vsub.i64	q4, q4, q12
++	vshr.s64	q12, q14, #25
++	vsub.i64	q10, q10, q13
++	vshr.s64	q13, q15, #25
++	vadd.i64	q6, q6, q12
++	vshl.i64	q12, q12, #25
++	vadd.i64	q14, q6, q1
++	vadd.i64	q2, q2, q13
++	vsub.i64	q5, q5, q12
++	vshr.s64	q12, q14, #26
++	vshl.i64	q13, q13, #25
++	vadd.i64	q14, q2, q1
++	vadd.i64	q7, q7, q12
++	vshl.i64	q12, q12, #26
++	vadd.i64	q15, q7, q0
++	vsub.i64	q11, q11, q13
++	vshr.s64	q13, q14, #26
++	vsub.i64	q6, q6, q12
++	vshr.s64	q12, q15, #25
++	vadd.i64	q3, q3, q13
++	vshl.i64	q13, q13, #26
++	vadd.i64	q14, q3, q0
++	vadd.i64	q8, q8, q12
++	vshl.i64	q12, q12, #25
++	vadd.i64	q15, q8, q1
++	add		r2, r2, #8
++	vsub.i64	q2, q2, q13
++	vshr.s64	q13, q14, #25
++	vsub.i64	q7, q7, q12
++	vshr.s64	q12, q15, #26
++	vadd.i64	q14, q13, q13
++	vadd.i64	q9, q9, q12
++	vtrn.32		d12, d14
++	vshl.i64	q12, q12, #26
++	vtrn.32		d13, d15
++	vadd.i64	q0, q9, q0
++	vadd.i64	q4, q4, q14
++	vst1.8		d12, [r2, : 64]!
++	vshl.i64	q6, q13, #4
++	vsub.i64	q7, q8, q12
++	vshr.s64	q0, q0, #25
++	vadd.i64	q4, q4, q6
++	vadd.i64	q6, q10, q0
++	vshl.i64	q0, q0, #25
++	vadd.i64	q8, q6, q1
++	vadd.i64	q4, q4, q13
++	vshl.i64	q10, q13, #25
++	vadd.i64	q1, q4, q1
++	vsub.i64	q0, q9, q0
++	vshr.s64	q8, q8, #26
++	vsub.i64	q3, q3, q10
++	vtrn.32		d14, d0
++	vshr.s64	q1, q1, #26
++	vtrn.32		d15, d1
++	vadd.i64	q0, q11, q8
++	vst1.8		d14, [r2, : 64]
++	vshl.i64	q7, q8, #26
++	vadd.i64	q5, q5, q1
++	vtrn.32		d4, d6
++	vshl.i64	q1, q1, #26
++	vtrn.32		d5, d7
++	vsub.i64	q3, q6, q7
++	add		r2, r2, #16
++	vsub.i64	q1, q4, q1
++	vst1.8		d4, [r2, : 64]
++	vtrn.32		d6, d0
++	vtrn.32		d7, d1
++	sub		r2, r2, #8
++	vtrn.32		d2, d10
++	vtrn.32		d3, d11
++	vst1.8		d6, [r2, : 64]
++	sub		r2, r2, #24
++	vst1.8		d2, [r2, : 64]
++	add		r2, r3, #96
++	vmov.i32	q0, #0
++	vmov.i64	d2, #0xff
++	vmov.i64	d3, #0
++	vshr.u32	q1, q1, #7
++	vst1.8		{d2-d3}, [r2, : 128]!
++	vst1.8		{d0-d1}, [r2, : 128]!
++	vst1.8		d0, [r2, : 64]
++	add		r2, r3, #144
++	vmov.i32	q0, #0
++	vst1.8		{d0-d1}, [r2, : 128]!
++	vst1.8		{d0-d1}, [r2, : 128]!
++	vst1.8		d0, [r2, : 64]
++	add		r2, r3, #240
++	vmov.i32	q0, #0
++	vmov.i64	d2, #0xff
++	vmov.i64	d3, #0
++	vshr.u32	q1, q1, #7
++	vst1.8		{d2-d3}, [r2, : 128]!
++	vst1.8		{d0-d1}, [r2, : 128]!
++	vst1.8		d0, [r2, : 64]
++	add		r2, r3, #48
++	add		r6, r3, #192
++	vld1.8		{d0-d1}, [r2, : 128]!
++	vld1.8		{d2-d3}, [r2, : 128]!
++	vld1.8		{d4}, [r2, : 64]
++	vst1.8		{d0-d1}, [r6, : 128]!
++	vst1.8		{d2-d3}, [r6, : 128]!
++	vst1.8		d4, [r6, : 64]
++.Lmainloop:
++	mov		r2, r5, LSR #3
++	and		r6, r5, #7
++	ldrb		r2, [r1, r2]
++	mov		r2, r2, LSR r6
++	and		r2, r2, #1
++	str		r5, [sp, #456]
++	eor		r4, r4, r2
++	str		r2, [sp, #460]
++	neg		r2, r4
++	add		r4, r3, #96
++	add		r5, r3, #192
++	add		r6, r3, #144
++	vld1.8		{d8-d9}, [r4, : 128]!
++	add		r7, r3, #240
++	vld1.8		{d10-d11}, [r5, : 128]!
++	veor		q6, q4, q5
++	vld1.8		{d14-d15}, [r6, : 128]!
++	vdup.i32	q8, r2
++	vld1.8		{d18-d19}, [r7, : 128]!
++	veor		q10, q7, q9
++	vld1.8		{d22-d23}, [r4, : 128]!
++	vand		q6, q6, q8
++	vld1.8		{d24-d25}, [r5, : 128]!
++	vand		q10, q10, q8
++	vld1.8		{d26-d27}, [r6, : 128]!
++	veor		q4, q4, q6
++	vld1.8		{d28-d29}, [r7, : 128]!
++	veor		q5, q5, q6
++	vld1.8		{d0}, [r4, : 64]
++	veor		q6, q7, q10
++	vld1.8		{d2}, [r5, : 64]
++	veor		q7, q9, q10
++	vld1.8		{d4}, [r6, : 64]
++	veor		q9, q11, q12
++	vld1.8		{d6}, [r7, : 64]
++	veor		q10, q0, q1
++	sub		r2, r4, #32
++	vand		q9, q9, q8
++	sub		r4, r5, #32
++	vand		q10, q10, q8
++	sub		r5, r6, #32
++	veor		q11, q11, q9
++	sub		r6, r7, #32
++	veor		q0, q0, q10
++	veor		q9, q12, q9
++	veor		q1, q1, q10
++	veor		q10, q13, q14
++	veor		q12, q2, q3
++	vand		q10, q10, q8
++	vand		q8, q12, q8
++	veor		q12, q13, q10
++	veor		q2, q2, q8
++	veor		q10, q14, q10
++	veor		q3, q3, q8
++	vadd.i32	q8, q4, q6
++	vsub.i32	q4, q4, q6
++	vst1.8		{d16-d17}, [r2, : 128]!
++	vadd.i32	q6, q11, q12
++	vst1.8		{d8-d9}, [r5, : 128]!
++	vsub.i32	q4, q11, q12
++	vst1.8		{d12-d13}, [r2, : 128]!
++	vadd.i32	q6, q0, q2
++	vst1.8		{d8-d9}, [r5, : 128]!
++	vsub.i32	q0, q0, q2
++	vst1.8		d12, [r2, : 64]
++	vadd.i32	q2, q5, q7
++	vst1.8		d0, [r5, : 64]
++	vsub.i32	q0, q5, q7
++	vst1.8		{d4-d5}, [r4, : 128]!
++	vadd.i32	q2, q9, q10
++	vst1.8		{d0-d1}, [r6, : 128]!
++	vsub.i32	q0, q9, q10
++	vst1.8		{d4-d5}, [r4, : 128]!
++	vadd.i32	q2, q1, q3
++	vst1.8		{d0-d1}, [r6, : 128]!
++	vsub.i32	q0, q1, q3
++	vst1.8		d4, [r4, : 64]
++	vst1.8		d0, [r6, : 64]
++	add		r2, sp, #512
++	add		r4, r3, #96
++	add		r5, r3, #144
++	vld1.8		{d0-d1}, [r2, : 128]
++	vld1.8		{d2-d3}, [r4, : 128]!
++	vld1.8		{d4-d5}, [r5, : 128]!
++	vzip.i32	q1, q2
++	vld1.8		{d6-d7}, [r4, : 128]!
++	vld1.8		{d8-d9}, [r5, : 128]!
++	vshl.i32	q5, q1, #1
++	vzip.i32	q3, q4
++	vshl.i32	q6, q2, #1
++	vld1.8		{d14}, [r4, : 64]
++	vshl.i32	q8, q3, #1
++	vld1.8		{d15}, [r5, : 64]
++	vshl.i32	q9, q4, #1
++	vmul.i32	d21, d7, d1
++	vtrn.32		d14, d15
++	vmul.i32	q11, q4, q0
++	vmul.i32	q0, q7, q0
++	vmull.s32	q12, d2, d2
++	vmlal.s32	q12, d11, d1
++	vmlal.s32	q12, d12, d0
++	vmlal.s32	q12, d13, d23
++	vmlal.s32	q12, d16, d22
++	vmlal.s32	q12, d7, d21
++	vmull.s32	q10, d2, d11
++	vmlal.s32	q10, d4, d1
++	vmlal.s32	q10, d13, d0
++	vmlal.s32	q10, d6, d23
++	vmlal.s32	q10, d17, d22
++	vmull.s32	q13, d10, d4
++	vmlal.s32	q13, d11, d3
++	vmlal.s32	q13, d13, d1
++	vmlal.s32	q13, d16, d0
++	vmlal.s32	q13, d17, d23
++	vmlal.s32	q13, d8, d22
++	vmull.s32	q1, d10, d5
++	vmlal.s32	q1, d11, d4
++	vmlal.s32	q1, d6, d1
++	vmlal.s32	q1, d17, d0
++	vmlal.s32	q1, d8, d23
++	vmull.s32	q14, d10, d6
++	vmlal.s32	q14, d11, d13
++	vmlal.s32	q14, d4, d4
++	vmlal.s32	q14, d17, d1
++	vmlal.s32	q14, d18, d0
++	vmlal.s32	q14, d9, d23
++	vmull.s32	q11, d10, d7
++	vmlal.s32	q11, d11, d6
++	vmlal.s32	q11, d12, d5
++	vmlal.s32	q11, d8, d1
++	vmlal.s32	q11, d19, d0
++	vmull.s32	q15, d10, d8
++	vmlal.s32	q15, d11, d17
++	vmlal.s32	q15, d12, d6
++	vmlal.s32	q15, d13, d5
++	vmlal.s32	q15, d19, d1
++	vmlal.s32	q15, d14, d0
++	vmull.s32	q2, d10, d9
++	vmlal.s32	q2, d11, d8
++	vmlal.s32	q2, d12, d7
++	vmlal.s32	q2, d13, d6
++	vmlal.s32	q2, d14, d1
++	vmull.s32	q0, d15, d1
++	vmlal.s32	q0, d10, d14
++	vmlal.s32	q0, d11, d19
++	vmlal.s32	q0, d12, d8
++	vmlal.s32	q0, d13, d17
++	vmlal.s32	q0, d6, d6
++	add		r2, sp, #480
++	vld1.8		{d18-d19}, [r2, : 128]!
++	vmull.s32	q3, d16, d7
++	vmlal.s32	q3, d10, d15
++	vmlal.s32	q3, d11, d14
++	vmlal.s32	q3, d12, d9
++	vmlal.s32	q3, d13, d8
++	vld1.8		{d8-d9}, [r2, : 128]
++	vadd.i64	q5, q12, q9
++	vadd.i64	q6, q15, q9
++	vshr.s64	q5, q5, #26
++	vshr.s64	q6, q6, #26
++	vadd.i64	q7, q10, q5
++	vshl.i64	q5, q5, #26
++	vadd.i64	q8, q7, q4
++	vadd.i64	q2, q2, q6
++	vshl.i64	q6, q6, #26
++	vadd.i64	q10, q2, q4
++	vsub.i64	q5, q12, q5
++	vshr.s64	q8, q8, #25
++	vsub.i64	q6, q15, q6
++	vshr.s64	q10, q10, #25
++	vadd.i64	q12, q13, q8
++	vshl.i64	q8, q8, #25
++	vadd.i64	q13, q12, q9
++	vadd.i64	q0, q0, q10
++	vsub.i64	q7, q7, q8
++	vshr.s64	q8, q13, #26
++	vshl.i64	q10, q10, #25
++	vadd.i64	q13, q0, q9
++	vadd.i64	q1, q1, q8
++	vshl.i64	q8, q8, #26
++	vadd.i64	q15, q1, q4
++	vsub.i64	q2, q2, q10
++	vshr.s64	q10, q13, #26
++	vsub.i64	q8, q12, q8
++	vshr.s64	q12, q15, #25
++	vadd.i64	q3, q3, q10
++	vshl.i64	q10, q10, #26
++	vadd.i64	q13, q3, q4
++	vadd.i64	q14, q14, q12
++	add		r2, r3, #288
++	vshl.i64	q12, q12, #25
++	add		r4, r3, #336
++	vadd.i64	q15, q14, q9
++	add		r2, r2, #8
++	vsub.i64	q0, q0, q10
++	add		r4, r4, #8
++	vshr.s64	q10, q13, #25
++	vsub.i64	q1, q1, q12
++	vshr.s64	q12, q15, #26
++	vadd.i64	q13, q10, q10
++	vadd.i64	q11, q11, q12
++	vtrn.32		d16, d2
++	vshl.i64	q12, q12, #26
++	vtrn.32		d17, d3
++	vadd.i64	q1, q11, q4
++	vadd.i64	q4, q5, q13
++	vst1.8		d16, [r2, : 64]!
++	vshl.i64	q5, q10, #4
++	vst1.8		d17, [r4, : 64]!
++	vsub.i64	q8, q14, q12
++	vshr.s64	q1, q1, #25
++	vadd.i64	q4, q4, q5
++	vadd.i64	q5, q6, q1
++	vshl.i64	q1, q1, #25
++	vadd.i64	q6, q5, q9
++	vadd.i64	q4, q4, q10
++	vshl.i64	q10, q10, #25
++	vadd.i64	q9, q4, q9
++	vsub.i64	q1, q11, q1
++	vshr.s64	q6, q6, #26
++	vsub.i64	q3, q3, q10
++	vtrn.32		d16, d2
++	vshr.s64	q9, q9, #26
++	vtrn.32		d17, d3
++	vadd.i64	q1, q2, q6
++	vst1.8		d16, [r2, : 64]
++	vshl.i64	q2, q6, #26
++	vst1.8		d17, [r4, : 64]
++	vadd.i64	q6, q7, q9
++	vtrn.32		d0, d6
++	vshl.i64	q7, q9, #26
++	vtrn.32		d1, d7
++	vsub.i64	q2, q5, q2
++	add		r2, r2, #16
++	vsub.i64	q3, q4, q7
++	vst1.8		d0, [r2, : 64]
++	add		r4, r4, #16
++	vst1.8		d1, [r4, : 64]
++	vtrn.32		d4, d2
++	vtrn.32		d5, d3
++	sub		r2, r2, #8
++	sub		r4, r4, #8
++	vtrn.32		d6, d12
++	vtrn.32		d7, d13
++	vst1.8		d4, [r2, : 64]
++	vst1.8		d5, [r4, : 64]
++	sub		r2, r2, #24
++	sub		r4, r4, #24
++	vst1.8		d6, [r2, : 64]
++	vst1.8		d7, [r4, : 64]
++	add		r2, r3, #240
++	add		r4, r3, #96
++	vld1.8		{d0-d1}, [r4, : 128]!
++	vld1.8		{d2-d3}, [r4, : 128]!
++	vld1.8		{d4}, [r4, : 64]
++	add		r4, r3, #144
++	vld1.8		{d6-d7}, [r4, : 128]!
++	vtrn.32		q0, q3
++	vld1.8		{d8-d9}, [r4, : 128]!
++	vshl.i32	q5, q0, #4
++	vtrn.32		q1, q4
++	vshl.i32	q6, q3, #4
++	vadd.i32	q5, q5, q0
++	vadd.i32	q6, q6, q3
++	vshl.i32	q7, q1, #4
++	vld1.8		{d5}, [r4, : 64]
++	vshl.i32	q8, q4, #4
++	vtrn.32		d4, d5
++	vadd.i32	q7, q7, q1
++	vadd.i32	q8, q8, q4
++	vld1.8		{d18-d19}, [r2, : 128]!
++	vshl.i32	q10, q2, #4
++	vld1.8		{d22-d23}, [r2, : 128]!
++	vadd.i32	q10, q10, q2
++	vld1.8		{d24}, [r2, : 64]
++	vadd.i32	q5, q5, q0
++	add		r2, r3, #192
++	vld1.8		{d26-d27}, [r2, : 128]!
++	vadd.i32	q6, q6, q3
++	vld1.8		{d28-d29}, [r2, : 128]!
++	vadd.i32	q8, q8, q4
++	vld1.8		{d25}, [r2, : 64]
++	vadd.i32	q10, q10, q2
++	vtrn.32		q9, q13
++	vadd.i32	q7, q7, q1
++	vadd.i32	q5, q5, q0
++	vtrn.32		q11, q14
++	vadd.i32	q6, q6, q3
++	add		r2, sp, #528
++	vadd.i32	q10, q10, q2
++	vtrn.32		d24, d25
++	vst1.8		{d12-d13}, [r2, : 128]!
++	vshl.i32	q6, q13, #1
++	vst1.8		{d20-d21}, [r2, : 128]!
++	vshl.i32	q10, q14, #1
++	vst1.8		{d12-d13}, [r2, : 128]!
++	vshl.i32	q15, q12, #1
++	vadd.i32	q8, q8, q4
++	vext.32		d10, d31, d30, #0
++	vadd.i32	q7, q7, q1
++	vst1.8		{d16-d17}, [r2, : 128]!
++	vmull.s32	q8, d18, d5
++	vmlal.s32	q8, d26, d4
++	vmlal.s32	q8, d19, d9
++	vmlal.s32	q8, d27, d3
++	vmlal.s32	q8, d22, d8
++	vmlal.s32	q8, d28, d2
++	vmlal.s32	q8, d23, d7
++	vmlal.s32	q8, d29, d1
++	vmlal.s32	q8, d24, d6
++	vmlal.s32	q8, d25, d0
++	vst1.8		{d14-d15}, [r2, : 128]!
++	vmull.s32	q2, d18, d4
++	vmlal.s32	q2, d12, d9
++	vmlal.s32	q2, d13, d8
++	vmlal.s32	q2, d19, d3
++	vmlal.s32	q2, d22, d2
++	vmlal.s32	q2, d23, d1
++	vmlal.s32	q2, d24, d0
++	vst1.8		{d20-d21}, [r2, : 128]!
++	vmull.s32	q7, d18, d9
++	vmlal.s32	q7, d26, d3
++	vmlal.s32	q7, d19, d8
++	vmlal.s32	q7, d27, d2
++	vmlal.s32	q7, d22, d7
++	vmlal.s32	q7, d28, d1
++	vmlal.s32	q7, d23, d6
++	vmlal.s32	q7, d29, d0
++	vst1.8		{d10-d11}, [r2, : 128]!
++	vmull.s32	q5, d18, d3
++	vmlal.s32	q5, d19, d2
++	vmlal.s32	q5, d22, d1
++	vmlal.s32	q5, d23, d0
++	vmlal.s32	q5, d12, d8
++	vst1.8		{d16-d17}, [r2, : 128]
++	vmull.s32	q4, d18, d8
++	vmlal.s32	q4, d26, d2
++	vmlal.s32	q4, d19, d7
++	vmlal.s32	q4, d27, d1
++	vmlal.s32	q4, d22, d6
++	vmlal.s32	q4, d28, d0
++	vmull.s32	q8, d18, d7
++	vmlal.s32	q8, d26, d1
++	vmlal.s32	q8, d19, d6
++	vmlal.s32	q8, d27, d0
++	add		r2, sp, #544
++	vld1.8		{d20-d21}, [r2, : 128]
++	vmlal.s32	q7, d24, d21
++	vmlal.s32	q7, d25, d20
++	vmlal.s32	q4, d23, d21
++	vmlal.s32	q4, d29, d20
++	vmlal.s32	q8, d22, d21
++	vmlal.s32	q8, d28, d20
++	vmlal.s32	q5, d24, d20
++	vst1.8		{d14-d15}, [r2, : 128]
++	vmull.s32	q7, d18, d6
++	vmlal.s32	q7, d26, d0
++	add		r2, sp, #624
++	vld1.8		{d30-d31}, [r2, : 128]
++	vmlal.s32	q2, d30, d21
++	vmlal.s32	q7, d19, d21
++	vmlal.s32	q7, d27, d20
++	add		r2, sp, #592
++	vld1.8		{d26-d27}, [r2, : 128]
++	vmlal.s32	q4, d25, d27
++	vmlal.s32	q8, d29, d27
++	vmlal.s32	q8, d25, d26
++	vmlal.s32	q7, d28, d27
++	vmlal.s32	q7, d29, d26
++	add		r2, sp, #576
++	vld1.8		{d28-d29}, [r2, : 128]
++	vmlal.s32	q4, d24, d29
++	vmlal.s32	q8, d23, d29
++	vmlal.s32	q8, d24, d28
++	vmlal.s32	q7, d22, d29
++	vmlal.s32	q7, d23, d28
++	vst1.8		{d8-d9}, [r2, : 128]
++	add		r2, sp, #528
++	vld1.8		{d8-d9}, [r2, : 128]
++	vmlal.s32	q7, d24, d9
++	vmlal.s32	q7, d25, d31
++	vmull.s32	q1, d18, d2
++	vmlal.s32	q1, d19, d1
++	vmlal.s32	q1, d22, d0
++	vmlal.s32	q1, d24, d27
++	vmlal.s32	q1, d23, d20
++	vmlal.s32	q1, d12, d7
++	vmlal.s32	q1, d13, d6
++	vmull.s32	q6, d18, d1
++	vmlal.s32	q6, d19, d0
++	vmlal.s32	q6, d23, d27
++	vmlal.s32	q6, d22, d20
++	vmlal.s32	q6, d24, d26
++	vmull.s32	q0, d18, d0
++	vmlal.s32	q0, d22, d27
++	vmlal.s32	q0, d23, d26
++	vmlal.s32	q0, d24, d31
++	vmlal.s32	q0, d19, d20
++	add		r2, sp, #608
++	vld1.8		{d18-d19}, [r2, : 128]
++	vmlal.s32	q2, d18, d7
++	vmlal.s32	q5, d18, d6
++	vmlal.s32	q1, d18, d21
++	vmlal.s32	q0, d18, d28
++	vmlal.s32	q6, d18, d29
++	vmlal.s32	q2, d19, d6
++	vmlal.s32	q5, d19, d21
++	vmlal.s32	q1, d19, d29
++	vmlal.s32	q0, d19, d9
++	vmlal.s32	q6, d19, d28
++	add		r2, sp, #560
++	vld1.8		{d18-d19}, [r2, : 128]
++	add		r2, sp, #480
++	vld1.8		{d22-d23}, [r2, : 128]
++	vmlal.s32	q5, d19, d7
++	vmlal.s32	q0, d18, d21
++	vmlal.s32	q0, d19, d29
++	vmlal.s32	q6, d18, d6
++	add		r2, sp, #496
++	vld1.8		{d6-d7}, [r2, : 128]
++	vmlal.s32	q6, d19, d21
++	add		r2, sp, #544
++	vld1.8		{d18-d19}, [r2, : 128]
++	vmlal.s32	q0, d30, d8
++	add		r2, sp, #640
++	vld1.8		{d20-d21}, [r2, : 128]
++	vmlal.s32	q5, d30, d29
++	add		r2, sp, #576
++	vld1.8		{d24-d25}, [r2, : 128]
++	vmlal.s32	q1, d30, d28
++	vadd.i64	q13, q0, q11
++	vadd.i64	q14, q5, q11
++	vmlal.s32	q6, d30, d9
++	vshr.s64	q4, q13, #26
++	vshr.s64	q13, q14, #26
++	vadd.i64	q7, q7, q4
++	vshl.i64	q4, q4, #26
++	vadd.i64	q14, q7, q3
++	vadd.i64	q9, q9, q13
++	vshl.i64	q13, q13, #26
++	vadd.i64	q15, q9, q3
++	vsub.i64	q0, q0, q4
++	vshr.s64	q4, q14, #25
++	vsub.i64	q5, q5, q13
++	vshr.s64	q13, q15, #25
++	vadd.i64	q6, q6, q4
++	vshl.i64	q4, q4, #25
++	vadd.i64	q14, q6, q11
++	vadd.i64	q2, q2, q13
++	vsub.i64	q4, q7, q4
++	vshr.s64	q7, q14, #26
++	vshl.i64	q13, q13, #25
++	vadd.i64	q14, q2, q11
++	vadd.i64	q8, q8, q7
++	vshl.i64	q7, q7, #26
++	vadd.i64	q15, q8, q3
++	vsub.i64	q9, q9, q13
++	vshr.s64	q13, q14, #26
++	vsub.i64	q6, q6, q7
++	vshr.s64	q7, q15, #25
++	vadd.i64	q10, q10, q13
++	vshl.i64	q13, q13, #26
++	vadd.i64	q14, q10, q3
++	vadd.i64	q1, q1, q7
++	add		r2, r3, #144
++	vshl.i64	q7, q7, #25
++	add		r4, r3, #96
++	vadd.i64	q15, q1, q11
++	add		r2, r2, #8
++	vsub.i64	q2, q2, q13
++	add		r4, r4, #8
++	vshr.s64	q13, q14, #25
++	vsub.i64	q7, q8, q7
++	vshr.s64	q8, q15, #26
++	vadd.i64	q14, q13, q13
++	vadd.i64	q12, q12, q8
++	vtrn.32		d12, d14
++	vshl.i64	q8, q8, #26
++	vtrn.32		d13, d15
++	vadd.i64	q3, q12, q3
++	vadd.i64	q0, q0, q14
++	vst1.8		d12, [r2, : 64]!
++	vshl.i64	q7, q13, #4
++	vst1.8		d13, [r4, : 64]!
++	vsub.i64	q1, q1, q8
++	vshr.s64	q3, q3, #25
++	vadd.i64	q0, q0, q7
++	vadd.i64	q5, q5, q3
++	vshl.i64	q3, q3, #25
++	vadd.i64	q6, q5, q11
++	vadd.i64	q0, q0, q13
++	vshl.i64	q7, q13, #25
++	vadd.i64	q8, q0, q11
++	vsub.i64	q3, q12, q3
++	vshr.s64	q6, q6, #26
++	vsub.i64	q7, q10, q7
++	vtrn.32		d2, d6
++	vshr.s64	q8, q8, #26
++	vtrn.32		d3, d7
++	vadd.i64	q3, q9, q6
++	vst1.8		d2, [r2, : 64]
++	vshl.i64	q6, q6, #26
++	vst1.8		d3, [r4, : 64]
++	vadd.i64	q1, q4, q8
++	vtrn.32		d4, d14
++	vshl.i64	q4, q8, #26
++	vtrn.32		d5, d15
++	vsub.i64	q5, q5, q6
++	add		r2, r2, #16
++	vsub.i64	q0, q0, q4
++	vst1.8		d4, [r2, : 64]
++	add		r4, r4, #16
++	vst1.8		d5, [r4, : 64]
++	vtrn.32		d10, d6
++	vtrn.32		d11, d7
++	sub		r2, r2, #8
++	sub		r4, r4, #8
++	vtrn.32		d0, d2
++	vtrn.32		d1, d3
++	vst1.8		d10, [r2, : 64]
++	vst1.8		d11, [r4, : 64]
++	sub		r2, r2, #24
++	sub		r4, r4, #24
++	vst1.8		d0, [r2, : 64]
++	vst1.8		d1, [r4, : 64]
++	add		r2, r3, #288
++	add		r4, r3, #336
++	vld1.8		{d0-d1}, [r2, : 128]!
++	vld1.8		{d2-d3}, [r4, : 128]!
++	vsub.i32	q0, q0, q1
++	vld1.8		{d2-d3}, [r2, : 128]!
++	vld1.8		{d4-d5}, [r4, : 128]!
++	vsub.i32	q1, q1, q2
++	add		r5, r3, #240
++	vld1.8		{d4}, [r2, : 64]
++	vld1.8		{d6}, [r4, : 64]
++	vsub.i32	q2, q2, q3
++	vst1.8		{d0-d1}, [r5, : 128]!
++	vst1.8		{d2-d3}, [r5, : 128]!
++	vst1.8		d4, [r5, : 64]
++	add		r2, r3, #144
++	add		r4, r3, #96
++	add		r5, r3, #144
++	add		r6, r3, #192
++	vld1.8		{d0-d1}, [r2, : 128]!
++	vld1.8		{d2-d3}, [r4, : 128]!
++	vsub.i32	q2, q0, q1
++	vadd.i32	q0, q0, q1
++	vld1.8		{d2-d3}, [r2, : 128]!
++	vld1.8		{d6-d7}, [r4, : 128]!
++	vsub.i32	q4, q1, q3
++	vadd.i32	q1, q1, q3
++	vld1.8		{d6}, [r2, : 64]
++	vld1.8		{d10}, [r4, : 64]
++	vsub.i32	q6, q3, q5
++	vadd.i32	q3, q3, q5
++	vst1.8		{d4-d5}, [r5, : 128]!
++	vst1.8		{d0-d1}, [r6, : 128]!
++	vst1.8		{d8-d9}, [r5, : 128]!
++	vst1.8		{d2-d3}, [r6, : 128]!
++	vst1.8		d12, [r5, : 64]
++	vst1.8		d6, [r6, : 64]
++	add		r2, r3, #0
++	add		r4, r3, #240
++	vld1.8		{d0-d1}, [r4, : 128]!
++	vld1.8		{d2-d3}, [r4, : 128]!
++	vld1.8		{d4}, [r4, : 64]
++	add		r4, r3, #336
++	vld1.8		{d6-d7}, [r4, : 128]!
++	vtrn.32		q0, q3
++	vld1.8		{d8-d9}, [r4, : 128]!
++	vshl.i32	q5, q0, #4
++	vtrn.32		q1, q4
++	vshl.i32	q6, q3, #4
++	vadd.i32	q5, q5, q0
++	vadd.i32	q6, q6, q3
++	vshl.i32	q7, q1, #4
++	vld1.8		{d5}, [r4, : 64]
++	vshl.i32	q8, q4, #4
++	vtrn.32		d4, d5
++	vadd.i32	q7, q7, q1
++	vadd.i32	q8, q8, q4
++	vld1.8		{d18-d19}, [r2, : 128]!
++	vshl.i32	q10, q2, #4
++	vld1.8		{d22-d23}, [r2, : 128]!
++	vadd.i32	q10, q10, q2
++	vld1.8		{d24}, [r2, : 64]
++	vadd.i32	q5, q5, q0
++	add		r2, r3, #288
++	vld1.8		{d26-d27}, [r2, : 128]!
++	vadd.i32	q6, q6, q3
++	vld1.8		{d28-d29}, [r2, : 128]!
++	vadd.i32	q8, q8, q4
++	vld1.8		{d25}, [r2, : 64]
++	vadd.i32	q10, q10, q2
++	vtrn.32		q9, q13
++	vadd.i32	q7, q7, q1
++	vadd.i32	q5, q5, q0
++	vtrn.32		q11, q14
++	vadd.i32	q6, q6, q3
++	add		r2, sp, #528
++	vadd.i32	q10, q10, q2
++	vtrn.32		d24, d25
++	vst1.8		{d12-d13}, [r2, : 128]!
++	vshl.i32	q6, q13, #1
++	vst1.8		{d20-d21}, [r2, : 128]!
++	vshl.i32	q10, q14, #1
++	vst1.8		{d12-d13}, [r2, : 128]!
++	vshl.i32	q15, q12, #1
++	vadd.i32	q8, q8, q4
++	vext.32		d10, d31, d30, #0
++	vadd.i32	q7, q7, q1
++	vst1.8		{d16-d17}, [r2, : 128]!
++	vmull.s32	q8, d18, d5
++	vmlal.s32	q8, d26, d4
++	vmlal.s32	q8, d19, d9
++	vmlal.s32	q8, d27, d3
++	vmlal.s32	q8, d22, d8
++	vmlal.s32	q8, d28, d2
++	vmlal.s32	q8, d23, d7
++	vmlal.s32	q8, d29, d1
++	vmlal.s32	q8, d24, d6
++	vmlal.s32	q8, d25, d0
++	vst1.8		{d14-d15}, [r2, : 128]!
++	vmull.s32	q2, d18, d4
++	vmlal.s32	q2, d12, d9
++	vmlal.s32	q2, d13, d8
++	vmlal.s32	q2, d19, d3
++	vmlal.s32	q2, d22, d2
++	vmlal.s32	q2, d23, d1
++	vmlal.s32	q2, d24, d0
++	vst1.8		{d20-d21}, [r2, : 128]!
++	vmull.s32	q7, d18, d9
++	vmlal.s32	q7, d26, d3
++	vmlal.s32	q7, d19, d8
++	vmlal.s32	q7, d27, d2
++	vmlal.s32	q7, d22, d7
++	vmlal.s32	q7, d28, d1
++	vmlal.s32	q7, d23, d6
++	vmlal.s32	q7, d29, d0
++	vst1.8		{d10-d11}, [r2, : 128]!
++	vmull.s32	q5, d18, d3
++	vmlal.s32	q5, d19, d2
++	vmlal.s32	q5, d22, d1
++	vmlal.s32	q5, d23, d0
++	vmlal.s32	q5, d12, d8
++	vst1.8		{d16-d17}, [r2, : 128]!
++	vmull.s32	q4, d18, d8
++	vmlal.s32	q4, d26, d2
++	vmlal.s32	q4, d19, d7
++	vmlal.s32	q4, d27, d1
++	vmlal.s32	q4, d22, d6
++	vmlal.s32	q4, d28, d0
++	vmull.s32	q8, d18, d7
++	vmlal.s32	q8, d26, d1
++	vmlal.s32	q8, d19, d6
++	vmlal.s32	q8, d27, d0
++	add		r2, sp, #544
++	vld1.8		{d20-d21}, [r2, : 128]
++	vmlal.s32	q7, d24, d21
++	vmlal.s32	q7, d25, d20
++	vmlal.s32	q4, d23, d21
++	vmlal.s32	q4, d29, d20
++	vmlal.s32	q8, d22, d21
++	vmlal.s32	q8, d28, d20
++	vmlal.s32	q5, d24, d20
++	vst1.8		{d14-d15}, [r2, : 128]
++	vmull.s32	q7, d18, d6
++	vmlal.s32	q7, d26, d0
++	add		r2, sp, #624
++	vld1.8		{d30-d31}, [r2, : 128]
++	vmlal.s32	q2, d30, d21
++	vmlal.s32	q7, d19, d21
++	vmlal.s32	q7, d27, d20
++	add		r2, sp, #592
++	vld1.8		{d26-d27}, [r2, : 128]
++	vmlal.s32	q4, d25, d27
++	vmlal.s32	q8, d29, d27
++	vmlal.s32	q8, d25, d26
++	vmlal.s32	q7, d28, d27
++	vmlal.s32	q7, d29, d26
++	add		r2, sp, #576
++	vld1.8		{d28-d29}, [r2, : 128]
++	vmlal.s32	q4, d24, d29
++	vmlal.s32	q8, d23, d29
++	vmlal.s32	q8, d24, d28
++	vmlal.s32	q7, d22, d29
++	vmlal.s32	q7, d23, d28
++	vst1.8		{d8-d9}, [r2, : 128]
++	add		r2, sp, #528
++	vld1.8		{d8-d9}, [r2, : 128]
++	vmlal.s32	q7, d24, d9
++	vmlal.s32	q7, d25, d31
++	vmull.s32	q1, d18, d2
++	vmlal.s32	q1, d19, d1
++	vmlal.s32	q1, d22, d0
++	vmlal.s32	q1, d24, d27
++	vmlal.s32	q1, d23, d20
++	vmlal.s32	q1, d12, d7
++	vmlal.s32	q1, d13, d6
++	vmull.s32	q6, d18, d1
++	vmlal.s32	q6, d19, d0
++	vmlal.s32	q6, d23, d27
++	vmlal.s32	q6, d22, d20
++	vmlal.s32	q6, d24, d26
++	vmull.s32	q0, d18, d0
++	vmlal.s32	q0, d22, d27
++	vmlal.s32	q0, d23, d26
++	vmlal.s32	q0, d24, d31
++	vmlal.s32	q0, d19, d20
++	add		r2, sp, #608
++	vld1.8		{d18-d19}, [r2, : 128]
++	vmlal.s32	q2, d18, d7
++	vmlal.s32	q5, d18, d6
++	vmlal.s32	q1, d18, d21
++	vmlal.s32	q0, d18, d28
++	vmlal.s32	q6, d18, d29
++	vmlal.s32	q2, d19, d6
++	vmlal.s32	q5, d19, d21
++	vmlal.s32	q1, d19, d29
++	vmlal.s32	q0, d19, d9
++	vmlal.s32	q6, d19, d28
++	add		r2, sp, #560
++	vld1.8		{d18-d19}, [r2, : 128]
++	add		r2, sp, #480
++	vld1.8		{d22-d23}, [r2, : 128]
++	vmlal.s32	q5, d19, d7
++	vmlal.s32	q0, d18, d21
++	vmlal.s32	q0, d19, d29
++	vmlal.s32	q6, d18, d6
++	add		r2, sp, #496
++	vld1.8		{d6-d7}, [r2, : 128]
++	vmlal.s32	q6, d19, d21
++	add		r2, sp, #544
++	vld1.8		{d18-d19}, [r2, : 128]
++	vmlal.s32	q0, d30, d8
++	add		r2, sp, #640
++	vld1.8		{d20-d21}, [r2, : 128]
++	vmlal.s32	q5, d30, d29
++	add		r2, sp, #576
++	vld1.8		{d24-d25}, [r2, : 128]
++	vmlal.s32	q1, d30, d28
++	vadd.i64	q13, q0, q11
++	vadd.i64	q14, q5, q11
++	vmlal.s32	q6, d30, d9
++	vshr.s64	q4, q13, #26
++	vshr.s64	q13, q14, #26
++	vadd.i64	q7, q7, q4
++	vshl.i64	q4, q4, #26
++	vadd.i64	q14, q7, q3
++	vadd.i64	q9, q9, q13
++	vshl.i64	q13, q13, #26
++	vadd.i64	q15, q9, q3
++	vsub.i64	q0, q0, q4
++	vshr.s64	q4, q14, #25
++	vsub.i64	q5, q5, q13
++	vshr.s64	q13, q15, #25
++	vadd.i64	q6, q6, q4
++	vshl.i64	q4, q4, #25
++	vadd.i64	q14, q6, q11
++	vadd.i64	q2, q2, q13
++	vsub.i64	q4, q7, q4
++	vshr.s64	q7, q14, #26
++	vshl.i64	q13, q13, #25
++	vadd.i64	q14, q2, q11
++	vadd.i64	q8, q8, q7
++	vshl.i64	q7, q7, #26
++	vadd.i64	q15, q8, q3
++	vsub.i64	q9, q9, q13
++	vshr.s64	q13, q14, #26
++	vsub.i64	q6, q6, q7
++	vshr.s64	q7, q15, #25
++	vadd.i64	q10, q10, q13
++	vshl.i64	q13, q13, #26
++	vadd.i64	q14, q10, q3
++	vadd.i64	q1, q1, q7
++	add		r2, r3, #288
++	vshl.i64	q7, q7, #25
++	add		r4, r3, #96
++	vadd.i64	q15, q1, q11
++	add		r2, r2, #8
++	vsub.i64	q2, q2, q13
++	add		r4, r4, #8
++	vshr.s64	q13, q14, #25
++	vsub.i64	q7, q8, q7
++	vshr.s64	q8, q15, #26
++	vadd.i64	q14, q13, q13
++	vadd.i64	q12, q12, q8
++	vtrn.32		d12, d14
++	vshl.i64	q8, q8, #26
++	vtrn.32		d13, d15
++	vadd.i64	q3, q12, q3
++	vadd.i64	q0, q0, q14
++	vst1.8		d12, [r2, : 64]!
++	vshl.i64	q7, q13, #4
++	vst1.8		d13, [r4, : 64]!
++	vsub.i64	q1, q1, q8
++	vshr.s64	q3, q3, #25
++	vadd.i64	q0, q0, q7
++	vadd.i64	q5, q5, q3
++	vshl.i64	q3, q3, #25
++	vadd.i64	q6, q5, q11
++	vadd.i64	q0, q0, q13
++	vshl.i64	q7, q13, #25
++	vadd.i64	q8, q0, q11
++	vsub.i64	q3, q12, q3
++	vshr.s64	q6, q6, #26
++	vsub.i64	q7, q10, q7
++	vtrn.32		d2, d6
++	vshr.s64	q8, q8, #26
++	vtrn.32		d3, d7
++	vadd.i64	q3, q9, q6
++	vst1.8		d2, [r2, : 64]
++	vshl.i64	q6, q6, #26
++	vst1.8		d3, [r4, : 64]
++	vadd.i64	q1, q4, q8
++	vtrn.32		d4, d14
++	vshl.i64	q4, q8, #26
++	vtrn.32		d5, d15
++	vsub.i64	q5, q5, q6
++	add		r2, r2, #16
++	vsub.i64	q0, q0, q4
++	vst1.8		d4, [r2, : 64]
++	add		r4, r4, #16
++	vst1.8		d5, [r4, : 64]
++	vtrn.32		d10, d6
++	vtrn.32		d11, d7
++	sub		r2, r2, #8
++	sub		r4, r4, #8
++	vtrn.32		d0, d2
++	vtrn.32		d1, d3
++	vst1.8		d10, [r2, : 64]
++	vst1.8		d11, [r4, : 64]
++	sub		r2, r2, #24
++	sub		r4, r4, #24
++	vst1.8		d0, [r2, : 64]
++	vst1.8		d1, [r4, : 64]
++	add		r2, sp, #512
++	add		r4, r3, #144
++	add		r5, r3, #192
++	vld1.8		{d0-d1}, [r2, : 128]
++	vld1.8		{d2-d3}, [r4, : 128]!
++	vld1.8		{d4-d5}, [r5, : 128]!
++	vzip.i32	q1, q2
++	vld1.8		{d6-d7}, [r4, : 128]!
++	vld1.8		{d8-d9}, [r5, : 128]!
++	vshl.i32	q5, q1, #1
++	vzip.i32	q3, q4
++	vshl.i32	q6, q2, #1
++	vld1.8		{d14}, [r4, : 64]
++	vshl.i32	q8, q3, #1
++	vld1.8		{d15}, [r5, : 64]
++	vshl.i32	q9, q4, #1
++	vmul.i32	d21, d7, d1
++	vtrn.32		d14, d15
++	vmul.i32	q11, q4, q0
++	vmul.i32	q0, q7, q0
++	vmull.s32	q12, d2, d2
++	vmlal.s32	q12, d11, d1
++	vmlal.s32	q12, d12, d0
++	vmlal.s32	q12, d13, d23
++	vmlal.s32	q12, d16, d22
++	vmlal.s32	q12, d7, d21
++	vmull.s32	q10, d2, d11
++	vmlal.s32	q10, d4, d1
++	vmlal.s32	q10, d13, d0
++	vmlal.s32	q10, d6, d23
++	vmlal.s32	q10, d17, d22
++	vmull.s32	q13, d10, d4
++	vmlal.s32	q13, d11, d3
++	vmlal.s32	q13, d13, d1
++	vmlal.s32	q13, d16, d0
++	vmlal.s32	q13, d17, d23
++	vmlal.s32	q13, d8, d22
++	vmull.s32	q1, d10, d5
++	vmlal.s32	q1, d11, d4
++	vmlal.s32	q1, d6, d1
++	vmlal.s32	q1, d17, d0
++	vmlal.s32	q1, d8, d23
++	vmull.s32	q14, d10, d6
++	vmlal.s32	q14, d11, d13
++	vmlal.s32	q14, d4, d4
++	vmlal.s32	q14, d17, d1
++	vmlal.s32	q14, d18, d0
++	vmlal.s32	q14, d9, d23
++	vmull.s32	q11, d10, d7
++	vmlal.s32	q11, d11, d6
++	vmlal.s32	q11, d12, d5
++	vmlal.s32	q11, d8, d1
++	vmlal.s32	q11, d19, d0
++	vmull.s32	q15, d10, d8
++	vmlal.s32	q15, d11, d17
++	vmlal.s32	q15, d12, d6
++	vmlal.s32	q15, d13, d5
++	vmlal.s32	q15, d19, d1
++	vmlal.s32	q15, d14, d0
++	vmull.s32	q2, d10, d9
++	vmlal.s32	q2, d11, d8
++	vmlal.s32	q2, d12, d7
++	vmlal.s32	q2, d13, d6
++	vmlal.s32	q2, d14, d1
++	vmull.s32	q0, d15, d1
++	vmlal.s32	q0, d10, d14
++	vmlal.s32	q0, d11, d19
++	vmlal.s32	q0, d12, d8
++	vmlal.s32	q0, d13, d17
++	vmlal.s32	q0, d6, d6
++	add		r2, sp, #480
++	vld1.8		{d18-d19}, [r2, : 128]!
++	vmull.s32	q3, d16, d7
++	vmlal.s32	q3, d10, d15
++	vmlal.s32	q3, d11, d14
++	vmlal.s32	q3, d12, d9
++	vmlal.s32	q3, d13, d8
++	vld1.8		{d8-d9}, [r2, : 128]
++	vadd.i64	q5, q12, q9
++	vadd.i64	q6, q15, q9
++	vshr.s64	q5, q5, #26
++	vshr.s64	q6, q6, #26
++	vadd.i64	q7, q10, q5
++	vshl.i64	q5, q5, #26
++	vadd.i64	q8, q7, q4
++	vadd.i64	q2, q2, q6
++	vshl.i64	q6, q6, #26
++	vadd.i64	q10, q2, q4
++	vsub.i64	q5, q12, q5
++	vshr.s64	q8, q8, #25
++	vsub.i64	q6, q15, q6
++	vshr.s64	q10, q10, #25
++	vadd.i64	q12, q13, q8
++	vshl.i64	q8, q8, #25
++	vadd.i64	q13, q12, q9
++	vadd.i64	q0, q0, q10
++	vsub.i64	q7, q7, q8
++	vshr.s64	q8, q13, #26
++	vshl.i64	q10, q10, #25
++	vadd.i64	q13, q0, q9
++	vadd.i64	q1, q1, q8
++	vshl.i64	q8, q8, #26
++	vadd.i64	q15, q1, q4
++	vsub.i64	q2, q2, q10
++	vshr.s64	q10, q13, #26
++	vsub.i64	q8, q12, q8
++	vshr.s64	q12, q15, #25
++	vadd.i64	q3, q3, q10
++	vshl.i64	q10, q10, #26
++	vadd.i64	q13, q3, q4
++	vadd.i64	q14, q14, q12
++	add		r2, r3, #144
++	vshl.i64	q12, q12, #25
++	add		r4, r3, #192
++	vadd.i64	q15, q14, q9
++	add		r2, r2, #8
++	vsub.i64	q0, q0, q10
++	add		r4, r4, #8
++	vshr.s64	q10, q13, #25
++	vsub.i64	q1, q1, q12
++	vshr.s64	q12, q15, #26
++	vadd.i64	q13, q10, q10
++	vadd.i64	q11, q11, q12
++	vtrn.32		d16, d2
++	vshl.i64	q12, q12, #26
++	vtrn.32		d17, d3
++	vadd.i64	q1, q11, q4
++	vadd.i64	q4, q5, q13
++	vst1.8		d16, [r2, : 64]!
++	vshl.i64	q5, q10, #4
++	vst1.8		d17, [r4, : 64]!
++	vsub.i64	q8, q14, q12
++	vshr.s64	q1, q1, #25
++	vadd.i64	q4, q4, q5
++	vadd.i64	q5, q6, q1
++	vshl.i64	q1, q1, #25
++	vadd.i64	q6, q5, q9
++	vadd.i64	q4, q4, q10
++	vshl.i64	q10, q10, #25
++	vadd.i64	q9, q4, q9
++	vsub.i64	q1, q11, q1
++	vshr.s64	q6, q6, #26
++	vsub.i64	q3, q3, q10
++	vtrn.32		d16, d2
++	vshr.s64	q9, q9, #26
++	vtrn.32		d17, d3
++	vadd.i64	q1, q2, q6
++	vst1.8		d16, [r2, : 64]
++	vshl.i64	q2, q6, #26
++	vst1.8		d17, [r4, : 64]
++	vadd.i64	q6, q7, q9
++	vtrn.32		d0, d6
++	vshl.i64	q7, q9, #26
++	vtrn.32		d1, d7
++	vsub.i64	q2, q5, q2
++	add		r2, r2, #16
++	vsub.i64	q3, q4, q7
++	vst1.8		d0, [r2, : 64]
++	add		r4, r4, #16
++	vst1.8		d1, [r4, : 64]
++	vtrn.32		d4, d2
++	vtrn.32		d5, d3
++	sub		r2, r2, #8
++	sub		r4, r4, #8
++	vtrn.32		d6, d12
++	vtrn.32		d7, d13
++	vst1.8		d4, [r2, : 64]
++	vst1.8		d5, [r4, : 64]
++	sub		r2, r2, #24
++	sub		r4, r4, #24
++	vst1.8		d6, [r2, : 64]
++	vst1.8		d7, [r4, : 64]
++	add		r2, r3, #336
++	add		r4, r3, #288
++	vld1.8		{d0-d1}, [r2, : 128]!
++	vld1.8		{d2-d3}, [r4, : 128]!
++	vadd.i32	q0, q0, q1
++	vld1.8		{d2-d3}, [r2, : 128]!
++	vld1.8		{d4-d5}, [r4, : 128]!
++	vadd.i32	q1, q1, q2
++	add		r5, r3, #288
++	vld1.8		{d4}, [r2, : 64]
++	vld1.8		{d6}, [r4, : 64]
++	vadd.i32	q2, q2, q3
++	vst1.8		{d0-d1}, [r5, : 128]!
++	vst1.8		{d2-d3}, [r5, : 128]!
++	vst1.8		d4, [r5, : 64]
++	add		r2, r3, #48
++	add		r4, r3, #144
++	vld1.8		{d0-d1}, [r4, : 128]!
++	vld1.8		{d2-d3}, [r4, : 128]!
++	vld1.8		{d4}, [r4, : 64]
++	add		r4, r3, #288
++	vld1.8		{d6-d7}, [r4, : 128]!
++	vtrn.32		q0, q3
++	vld1.8		{d8-d9}, [r4, : 128]!
++	vshl.i32	q5, q0, #4
++	vtrn.32		q1, q4
++	vshl.i32	q6, q3, #4
++	vadd.i32	q5, q5, q0
++	vadd.i32	q6, q6, q3
++	vshl.i32	q7, q1, #4
++	vld1.8		{d5}, [r4, : 64]
++	vshl.i32	q8, q4, #4
++	vtrn.32		d4, d5
++	vadd.i32	q7, q7, q1
++	vadd.i32	q8, q8, q4
++	vld1.8		{d18-d19}, [r2, : 128]!
++	vshl.i32	q10, q2, #4
++	vld1.8		{d22-d23}, [r2, : 128]!
++	vadd.i32	q10, q10, q2
++	vld1.8		{d24}, [r2, : 64]
++	vadd.i32	q5, q5, q0
++	add		r2, r3, #240
++	vld1.8		{d26-d27}, [r2, : 128]!
++	vadd.i32	q6, q6, q3
++	vld1.8		{d28-d29}, [r2, : 128]!
++	vadd.i32	q8, q8, q4
++	vld1.8		{d25}, [r2, : 64]
++	vadd.i32	q10, q10, q2
++	vtrn.32		q9, q13
++	vadd.i32	q7, q7, q1
++	vadd.i32	q5, q5, q0
++	vtrn.32		q11, q14
++	vadd.i32	q6, q6, q3
++	add		r2, sp, #528
++	vadd.i32	q10, q10, q2
++	vtrn.32		d24, d25
++	vst1.8		{d12-d13}, [r2, : 128]!
++	vshl.i32	q6, q13, #1
++	vst1.8		{d20-d21}, [r2, : 128]!
++	vshl.i32	q10, q14, #1
++	vst1.8		{d12-d13}, [r2, : 128]!
++	vshl.i32	q15, q12, #1
++	vadd.i32	q8, q8, q4
++	vext.32		d10, d31, d30, #0
++	vadd.i32	q7, q7, q1
++	vst1.8		{d16-d17}, [r2, : 128]!
++	vmull.s32	q8, d18, d5
++	vmlal.s32	q8, d26, d4
++	vmlal.s32	q8, d19, d9
++	vmlal.s32	q8, d27, d3
++	vmlal.s32	q8, d22, d8
++	vmlal.s32	q8, d28, d2
++	vmlal.s32	q8, d23, d7
++	vmlal.s32	q8, d29, d1
++	vmlal.s32	q8, d24, d6
++	vmlal.s32	q8, d25, d0
++	vst1.8		{d14-d15}, [r2, : 128]!
++	vmull.s32	q2, d18, d4
++	vmlal.s32	q2, d12, d9
++	vmlal.s32	q2, d13, d8
++	vmlal.s32	q2, d19, d3
++	vmlal.s32	q2, d22, d2
++	vmlal.s32	q2, d23, d1
++	vmlal.s32	q2, d24, d0
++	vst1.8		{d20-d21}, [r2, : 128]!
++	vmull.s32	q7, d18, d9
++	vmlal.s32	q7, d26, d3
++	vmlal.s32	q7, d19, d8
++	vmlal.s32	q7, d27, d2
++	vmlal.s32	q7, d22, d7
++	vmlal.s32	q7, d28, d1
++	vmlal.s32	q7, d23, d6
++	vmlal.s32	q7, d29, d0
++	vst1.8		{d10-d11}, [r2, : 128]!
++	vmull.s32	q5, d18, d3
++	vmlal.s32	q5, d19, d2
++	vmlal.s32	q5, d22, d1
++	vmlal.s32	q5, d23, d0
++	vmlal.s32	q5, d12, d8
++	vst1.8		{d16-d17}, [r2, : 128]!
++	vmull.s32	q4, d18, d8
++	vmlal.s32	q4, d26, d2
++	vmlal.s32	q4, d19, d7
++	vmlal.s32	q4, d27, d1
++	vmlal.s32	q4, d22, d6
++	vmlal.s32	q4, d28, d0
++	vmull.s32	q8, d18, d7
++	vmlal.s32	q8, d26, d1
++	vmlal.s32	q8, d19, d6
++	vmlal.s32	q8, d27, d0
++	add		r2, sp, #544
++	vld1.8		{d20-d21}, [r2, : 128]
++	vmlal.s32	q7, d24, d21
++	vmlal.s32	q7, d25, d20
++	vmlal.s32	q4, d23, d21
++	vmlal.s32	q4, d29, d20
++	vmlal.s32	q8, d22, d21
++	vmlal.s32	q8, d28, d20
++	vmlal.s32	q5, d24, d20
++	vst1.8		{d14-d15}, [r2, : 128]
++	vmull.s32	q7, d18, d6
++	vmlal.s32	q7, d26, d0
++	add		r2, sp, #624
++	vld1.8		{d30-d31}, [r2, : 128]
++	vmlal.s32	q2, d30, d21
++	vmlal.s32	q7, d19, d21
++	vmlal.s32	q7, d27, d20
++	add		r2, sp, #592
++	vld1.8		{d26-d27}, [r2, : 128]
++	vmlal.s32	q4, d25, d27
++	vmlal.s32	q8, d29, d27
++	vmlal.s32	q8, d25, d26
++	vmlal.s32	q7, d28, d27
++	vmlal.s32	q7, d29, d26
++	add		r2, sp, #576
++	vld1.8		{d28-d29}, [r2, : 128]
++	vmlal.s32	q4, d24, d29
++	vmlal.s32	q8, d23, d29
++	vmlal.s32	q8, d24, d28
++	vmlal.s32	q7, d22, d29
++	vmlal.s32	q7, d23, d28
++	vst1.8		{d8-d9}, [r2, : 128]
++	add		r2, sp, #528
++	vld1.8		{d8-d9}, [r2, : 128]
++	vmlal.s32	q7, d24, d9
++	vmlal.s32	q7, d25, d31
++	vmull.s32	q1, d18, d2
++	vmlal.s32	q1, d19, d1
++	vmlal.s32	q1, d22, d0
++	vmlal.s32	q1, d24, d27
++	vmlal.s32	q1, d23, d20
++	vmlal.s32	q1, d12, d7
++	vmlal.s32	q1, d13, d6
++	vmull.s32	q6, d18, d1
++	vmlal.s32	q6, d19, d0
++	vmlal.s32	q6, d23, d27
++	vmlal.s32	q6, d22, d20
++	vmlal.s32	q6, d24, d26
++	vmull.s32	q0, d18, d0
++	vmlal.s32	q0, d22, d27
++	vmlal.s32	q0, d23, d26
++	vmlal.s32	q0, d24, d31
++	vmlal.s32	q0, d19, d20
++	add		r2, sp, #608
++	vld1.8		{d18-d19}, [r2, : 128]
++	vmlal.s32	q2, d18, d7
++	vmlal.s32	q5, d18, d6
++	vmlal.s32	q1, d18, d21
++	vmlal.s32	q0, d18, d28
++	vmlal.s32	q6, d18, d29
++	vmlal.s32	q2, d19, d6
++	vmlal.s32	q5, d19, d21
++	vmlal.s32	q1, d19, d29
++	vmlal.s32	q0, d19, d9
++	vmlal.s32	q6, d19, d28
++	add		r2, sp, #560
++	vld1.8		{d18-d19}, [r2, : 128]
++	add		r2, sp, #480
++	vld1.8		{d22-d23}, [r2, : 128]
++	vmlal.s32	q5, d19, d7
++	vmlal.s32	q0, d18, d21
++	vmlal.s32	q0, d19, d29
++	vmlal.s32	q6, d18, d6
++	add		r2, sp, #496
++	vld1.8		{d6-d7}, [r2, : 128]
++	vmlal.s32	q6, d19, d21
++	add		r2, sp, #544
++	vld1.8		{d18-d19}, [r2, : 128]
++	vmlal.s32	q0, d30, d8
++	add		r2, sp, #640
++	vld1.8		{d20-d21}, [r2, : 128]
++	vmlal.s32	q5, d30, d29
++	add		r2, sp, #576
++	vld1.8		{d24-d25}, [r2, : 128]
++	vmlal.s32	q1, d30, d28
++	vadd.i64	q13, q0, q11
++	vadd.i64	q14, q5, q11
++	vmlal.s32	q6, d30, d9
++	vshr.s64	q4, q13, #26
++	vshr.s64	q13, q14, #26
++	vadd.i64	q7, q7, q4
++	vshl.i64	q4, q4, #26
++	vadd.i64	q14, q7, q3
++	vadd.i64	q9, q9, q13
++	vshl.i64	q13, q13, #26
++	vadd.i64	q15, q9, q3
++	vsub.i64	q0, q0, q4
++	vshr.s64	q4, q14, #25
++	vsub.i64	q5, q5, q13
++	vshr.s64	q13, q15, #25
++	vadd.i64	q6, q6, q4
++	vshl.i64	q4, q4, #25
++	vadd.i64	q14, q6, q11
++	vadd.i64	q2, q2, q13
++	vsub.i64	q4, q7, q4
++	vshr.s64	q7, q14, #26
++	vshl.i64	q13, q13, #25
++	vadd.i64	q14, q2, q11
++	vadd.i64	q8, q8, q7
++	vshl.i64	q7, q7, #26
++	vadd.i64	q15, q8, q3
++	vsub.i64	q9, q9, q13
++	vshr.s64	q13, q14, #26
++	vsub.i64	q6, q6, q7
++	vshr.s64	q7, q15, #25
++	vadd.i64	q10, q10, q13
++	vshl.i64	q13, q13, #26
++	vadd.i64	q14, q10, q3
++	vadd.i64	q1, q1, q7
++	add		r2, r3, #240
++	vshl.i64	q7, q7, #25
++	add		r4, r3, #144
++	vadd.i64	q15, q1, q11
++	add		r2, r2, #8
++	vsub.i64	q2, q2, q13
++	add		r4, r4, #8
++	vshr.s64	q13, q14, #25
++	vsub.i64	q7, q8, q7
++	vshr.s64	q8, q15, #26
++	vadd.i64	q14, q13, q13
++	vadd.i64	q12, q12, q8
++	vtrn.32		d12, d14
++	vshl.i64	q8, q8, #26
++	vtrn.32		d13, d15
++	vadd.i64	q3, q12, q3
++	vadd.i64	q0, q0, q14
++	vst1.8		d12, [r2, : 64]!
++	vshl.i64	q7, q13, #4
++	vst1.8		d13, [r4, : 64]!
++	vsub.i64	q1, q1, q8
++	vshr.s64	q3, q3, #25
++	vadd.i64	q0, q0, q7
++	vadd.i64	q5, q5, q3
++	vshl.i64	q3, q3, #25
++	vadd.i64	q6, q5, q11
++	vadd.i64	q0, q0, q13
++	vshl.i64	q7, q13, #25
++	vadd.i64	q8, q0, q11
++	vsub.i64	q3, q12, q3
++	vshr.s64	q6, q6, #26
++	vsub.i64	q7, q10, q7
++	vtrn.32		d2, d6
++	vshr.s64	q8, q8, #26
++	vtrn.32		d3, d7
++	vadd.i64	q3, q9, q6
++	vst1.8		d2, [r2, : 64]
++	vshl.i64	q6, q6, #26
++	vst1.8		d3, [r4, : 64]
++	vadd.i64	q1, q4, q8
++	vtrn.32		d4, d14
++	vshl.i64	q4, q8, #26
++	vtrn.32		d5, d15
++	vsub.i64	q5, q5, q6
++	add		r2, r2, #16
++	vsub.i64	q0, q0, q4
++	vst1.8		d4, [r2, : 64]
++	add		r4, r4, #16
++	vst1.8		d5, [r4, : 64]
++	vtrn.32		d10, d6
++	vtrn.32		d11, d7
++	sub		r2, r2, #8
++	sub		r4, r4, #8
++	vtrn.32		d0, d2
++	vtrn.32		d1, d3
++	vst1.8		d10, [r2, : 64]
++	vst1.8		d11, [r4, : 64]
++	sub		r2, r2, #24
++	sub		r4, r4, #24
++	vst1.8		d0, [r2, : 64]
++	vst1.8		d1, [r4, : 64]
++	ldr		r2, [sp, #456]
++	ldr		r4, [sp, #460]
++	subs		r5, r2, #1
++	bge		.Lmainloop
++	add		r1, r3, #144
++	add		r2, r3, #336
++	vld1.8		{d0-d1}, [r1, : 128]!
++	vld1.8		{d2-d3}, [r1, : 128]!
++	vld1.8		{d4}, [r1, : 64]
++	vst1.8		{d0-d1}, [r2, : 128]!
++	vst1.8		{d2-d3}, [r2, : 128]!
++	vst1.8		d4, [r2, : 64]
++	movw		r1, #0
++.Linvertloop:
++	add		r2, r3, #144
++	movw		r4, #0
++	movw		r5, #2
++	cmp		r1, #1
++	moveq		r5, #1
++	addeq		r2, r3, #336
++	addeq		r4, r3, #48
++	cmp		r1, #2
++	moveq		r5, #1
++	addeq		r2, r3, #48
++	cmp		r1, #3
++	moveq		r5, #5
++	addeq		r4, r3, #336
++	cmp		r1, #4
++	moveq		r5, #10
++	cmp		r1, #5
++	moveq		r5, #20
++	cmp		r1, #6
++	moveq		r5, #10
++	addeq		r2, r3, #336
++	addeq		r4, r3, #336
++	cmp		r1, #7
++	moveq		r5, #50
++	cmp		r1, #8
++	moveq		r5, #100
++	cmp		r1, #9
++	moveq		r5, #50
++	addeq		r2, r3, #336
++	cmp		r1, #10
++	moveq		r5, #5
++	addeq		r2, r3, #48
++	cmp		r1, #11
++	moveq		r5, #0
++	addeq		r2, r3, #96
++	add		r6, r3, #144
++	add		r7, r3, #288
++	vld1.8		{d0-d1}, [r6, : 128]!
++	vld1.8		{d2-d3}, [r6, : 128]!
++	vld1.8		{d4}, [r6, : 64]
++	vst1.8		{d0-d1}, [r7, : 128]!
++	vst1.8		{d2-d3}, [r7, : 128]!
++	vst1.8		d4, [r7, : 64]
++	cmp		r5, #0
++	beq		.Lskipsquaringloop
++.Lsquaringloop:
++	add		r6, r3, #288
++	add		r7, r3, #288
++	add		r8, r3, #288
++	vmov.i32	q0, #19
++	vmov.i32	q1, #0
++	vmov.i32	q2, #1
++	vzip.i32	q1, q2
++	vld1.8		{d4-d5}, [r7, : 128]!
++	vld1.8		{d6-d7}, [r7, : 128]!
++	vld1.8		{d9}, [r7, : 64]
++	vld1.8		{d10-d11}, [r6, : 128]!
++	add		r7, sp, #384
++	vld1.8		{d12-d13}, [r6, : 128]!
++	vmul.i32	q7, q2, q0
++	vld1.8		{d8}, [r6, : 64]
++	vext.32		d17, d11, d10, #1
++	vmul.i32	q9, q3, q0
++	vext.32		d16, d10, d8, #1
++	vshl.u32	q10, q5, q1
++	vext.32		d22, d14, d4, #1
++	vext.32		d24, d18, d6, #1
++	vshl.u32	q13, q6, q1
++	vshl.u32	d28, d8, d2
++	vrev64.i32	d22, d22
++	vmul.i32	d1, d9, d1
++	vrev64.i32	d24, d24
++	vext.32		d29, d8, d13, #1
++	vext.32		d0, d1, d9, #1
++	vrev64.i32	d0, d0
++	vext.32		d2, d9, d1, #1
++	vext.32		d23, d15, d5, #1
++	vmull.s32	q4, d20, d4
++	vrev64.i32	d23, d23
++	vmlal.s32	q4, d21, d1
++	vrev64.i32	d2, d2
++	vmlal.s32	q4, d26, d19
++	vext.32		d3, d5, d15, #1
++	vmlal.s32	q4, d27, d18
++	vrev64.i32	d3, d3
++	vmlal.s32	q4, d28, d15
++	vext.32		d14, d12, d11, #1
++	vmull.s32	q5, d16, d23
++	vext.32		d15, d13, d12, #1
++	vmlal.s32	q5, d17, d4
++	vst1.8		d8, [r7, : 64]!
++	vmlal.s32	q5, d14, d1
++	vext.32		d12, d9, d8, #0
++	vmlal.s32	q5, d15, d19
++	vmov.i64	d13, #0
++	vmlal.s32	q5, d29, d18
++	vext.32		d25, d19, d7, #1
++	vmlal.s32	q6, d20, d5
++	vrev64.i32	d25, d25
++	vmlal.s32	q6, d21, d4
++	vst1.8		d11, [r7, : 64]!
++	vmlal.s32	q6, d26, d1
++	vext.32		d9, d10, d10, #0
++	vmlal.s32	q6, d27, d19
++	vmov.i64	d8, #0
++	vmlal.s32	q6, d28, d18
++	vmlal.s32	q4, d16, d24
++	vmlal.s32	q4, d17, d5
++	vmlal.s32	q4, d14, d4
++	vst1.8		d12, [r7, : 64]!
++	vmlal.s32	q4, d15, d1
++	vext.32		d10, d13, d12, #0
++	vmlal.s32	q4, d29, d19
++	vmov.i64	d11, #0
++	vmlal.s32	q5, d20, d6
++	vmlal.s32	q5, d21, d5
++	vmlal.s32	q5, d26, d4
++	vext.32		d13, d8, d8, #0
++	vmlal.s32	q5, d27, d1
++	vmov.i64	d12, #0
++	vmlal.s32	q5, d28, d19
++	vst1.8		d9, [r7, : 64]!
++	vmlal.s32	q6, d16, d25
++	vmlal.s32	q6, d17, d6
++	vst1.8		d10, [r7, : 64]
++	vmlal.s32	q6, d14, d5
++	vext.32		d8, d11, d10, #0
++	vmlal.s32	q6, d15, d4
++	vmov.i64	d9, #0
++	vmlal.s32	q6, d29, d1
++	vmlal.s32	q4, d20, d7
++	vmlal.s32	q4, d21, d6
++	vmlal.s32	q4, d26, d5
++	vext.32		d11, d12, d12, #0
++	vmlal.s32	q4, d27, d4
++	vmov.i64	d10, #0
++	vmlal.s32	q4, d28, d1
++	vmlal.s32	q5, d16, d0
++	sub		r6, r7, #32
++	vmlal.s32	q5, d17, d7
++	vmlal.s32	q5, d14, d6
++	vext.32		d30, d9, d8, #0
++	vmlal.s32	q5, d15, d5
++	vld1.8		{d31}, [r6, : 64]!
++	vmlal.s32	q5, d29, d4
++	vmlal.s32	q15, d20, d0
++	vext.32		d0, d6, d18, #1
++	vmlal.s32	q15, d21, d25
++	vrev64.i32	d0, d0
++	vmlal.s32	q15, d26, d24
++	vext.32		d1, d7, d19, #1
++	vext.32		d7, d10, d10, #0
++	vmlal.s32	q15, d27, d23
++	vrev64.i32	d1, d1
++	vld1.8		{d6}, [r6, : 64]
++	vmlal.s32	q15, d28, d22
++	vmlal.s32	q3, d16, d4
++	add		r6, r6, #24
++	vmlal.s32	q3, d17, d2
++	vext.32		d4, d31, d30, #0
++	vmov		d17, d11
++	vmlal.s32	q3, d14, d1
++	vext.32		d11, d13, d13, #0
++	vext.32		d13, d30, d30, #0
++	vmlal.s32	q3, d15, d0
++	vext.32		d1, d8, d8, #0
++	vmlal.s32	q3, d29, d3
++	vld1.8		{d5}, [r6, : 64]
++	sub		r6, r6, #16
++	vext.32		d10, d6, d6, #0
++	vmov.i32	q1, #0xffffffff
++	vshl.i64	q4, q1, #25
++	add		r7, sp, #480
++	vld1.8		{d14-d15}, [r7, : 128]
++	vadd.i64	q9, q2, q7
++	vshl.i64	q1, q1, #26
++	vshr.s64	q10, q9, #26
++	vld1.8		{d0}, [r6, : 64]!
++	vadd.i64	q5, q5, q10
++	vand		q9, q9, q1
++	vld1.8		{d16}, [r6, : 64]!
++	add		r6, sp, #496
++	vld1.8		{d20-d21}, [r6, : 128]
++	vadd.i64	q11, q5, q10
++	vsub.i64	q2, q2, q9
++	vshr.s64	q9, q11, #25
++	vext.32		d12, d5, d4, #0
++	vand		q11, q11, q4
++	vadd.i64	q0, q0, q9
++	vmov		d19, d7
++	vadd.i64	q3, q0, q7
++	vsub.i64	q5, q5, q11
++	vshr.s64	q11, q3, #26
++	vext.32		d18, d11, d10, #0
++	vand		q3, q3, q1
++	vadd.i64	q8, q8, q11
++	vadd.i64	q11, q8, q10
++	vsub.i64	q0, q0, q3
++	vshr.s64	q3, q11, #25
++	vand		q11, q11, q4
++	vadd.i64	q3, q6, q3
++	vadd.i64	q6, q3, q7
++	vsub.i64	q8, q8, q11
++	vshr.s64	q11, q6, #26
++	vand		q6, q6, q1
++	vadd.i64	q9, q9, q11
++	vadd.i64	d25, d19, d21
++	vsub.i64	q3, q3, q6
++	vshr.s64	d23, d25, #25
++	vand		q4, q12, q4
++	vadd.i64	d21, d23, d23
++	vshl.i64	d25, d23, #4
++	vadd.i64	d21, d21, d23
++	vadd.i64	d25, d25, d21
++	vadd.i64	d4, d4, d25
++	vzip.i32	q0, q8
++	vadd.i64	d12, d4, d14
++	add		r6, r8, #8
++	vst1.8		d0, [r6, : 64]
++	vsub.i64	d19, d19, d9
++	add		r6, r6, #16
++	vst1.8		d16, [r6, : 64]
++	vshr.s64	d22, d12, #26
++	vand		q0, q6, q1
++	vadd.i64	d10, d10, d22
++	vzip.i32	q3, q9
++	vsub.i64	d4, d4, d0
++	sub		r6, r6, #8
++	vst1.8		d6, [r6, : 64]
++	add		r6, r6, #16
++	vst1.8		d18, [r6, : 64]
++	vzip.i32	q2, q5
++	sub		r6, r6, #32
++	vst1.8		d4, [r6, : 64]
++	subs		r5, r5, #1
++	bhi		.Lsquaringloop
++.Lskipsquaringloop:
++	mov		r2, r2
++	add		r5, r3, #288
++	add		r6, r3, #144
++	vmov.i32	q0, #19
++	vmov.i32	q1, #0
++	vmov.i32	q2, #1
++	vzip.i32	q1, q2
++	vld1.8		{d4-d5}, [r5, : 128]!
++	vld1.8		{d6-d7}, [r5, : 128]!
++	vld1.8		{d9}, [r5, : 64]
++	vld1.8		{d10-d11}, [r2, : 128]!
++	add		r5, sp, #384
++	vld1.8		{d12-d13}, [r2, : 128]!
++	vmul.i32	q7, q2, q0
++	vld1.8		{d8}, [r2, : 64]
++	vext.32		d17, d11, d10, #1
++	vmul.i32	q9, q3, q0
++	vext.32		d16, d10, d8, #1
++	vshl.u32	q10, q5, q1
++	vext.32		d22, d14, d4, #1
++	vext.32		d24, d18, d6, #1
++	vshl.u32	q13, q6, q1
++	vshl.u32	d28, d8, d2
++	vrev64.i32	d22, d22
++	vmul.i32	d1, d9, d1
++	vrev64.i32	d24, d24
++	vext.32		d29, d8, d13, #1
++	vext.32		d0, d1, d9, #1
++	vrev64.i32	d0, d0
++	vext.32		d2, d9, d1, #1
++	vext.32		d23, d15, d5, #1
++	vmull.s32	q4, d20, d4
++	vrev64.i32	d23, d23
++	vmlal.s32	q4, d21, d1
++	vrev64.i32	d2, d2
++	vmlal.s32	q4, d26, d19
++	vext.32		d3, d5, d15, #1
++	vmlal.s32	q4, d27, d18
++	vrev64.i32	d3, d3
++	vmlal.s32	q4, d28, d15
++	vext.32		d14, d12, d11, #1
++	vmull.s32	q5, d16, d23
++	vext.32		d15, d13, d12, #1
++	vmlal.s32	q5, d17, d4
++	vst1.8		d8, [r5, : 64]!
++	vmlal.s32	q5, d14, d1
++	vext.32		d12, d9, d8, #0
++	vmlal.s32	q5, d15, d19
++	vmov.i64	d13, #0
++	vmlal.s32	q5, d29, d18
++	vext.32		d25, d19, d7, #1
++	vmlal.s32	q6, d20, d5
++	vrev64.i32	d25, d25
++	vmlal.s32	q6, d21, d4
++	vst1.8		d11, [r5, : 64]!
++	vmlal.s32	q6, d26, d1
++	vext.32		d9, d10, d10, #0
++	vmlal.s32	q6, d27, d19
++	vmov.i64	d8, #0
++	vmlal.s32	q6, d28, d18
++	vmlal.s32	q4, d16, d24
++	vmlal.s32	q4, d17, d5
++	vmlal.s32	q4, d14, d4
++	vst1.8		d12, [r5, : 64]!
++	vmlal.s32	q4, d15, d1
++	vext.32		d10, d13, d12, #0
++	vmlal.s32	q4, d29, d19
++	vmov.i64	d11, #0
++	vmlal.s32	q5, d20, d6
++	vmlal.s32	q5, d21, d5
++	vmlal.s32	q5, d26, d4
++	vext.32		d13, d8, d8, #0
++	vmlal.s32	q5, d27, d1
++	vmov.i64	d12, #0
++	vmlal.s32	q5, d28, d19
++	vst1.8		d9, [r5, : 64]!
++	vmlal.s32	q6, d16, d25
++	vmlal.s32	q6, d17, d6
++	vst1.8		d10, [r5, : 64]
++	vmlal.s32	q6, d14, d5
++	vext.32		d8, d11, d10, #0
++	vmlal.s32	q6, d15, d4
++	vmov.i64	d9, #0
++	vmlal.s32	q6, d29, d1
++	vmlal.s32	q4, d20, d7
++	vmlal.s32	q4, d21, d6
++	vmlal.s32	q4, d26, d5
++	vext.32		d11, d12, d12, #0
++	vmlal.s32	q4, d27, d4
++	vmov.i64	d10, #0
++	vmlal.s32	q4, d28, d1
++	vmlal.s32	q5, d16, d0
++	sub		r2, r5, #32
++	vmlal.s32	q5, d17, d7
++	vmlal.s32	q5, d14, d6
++	vext.32		d30, d9, d8, #0
++	vmlal.s32	q5, d15, d5
++	vld1.8		{d31}, [r2, : 64]!
++	vmlal.s32	q5, d29, d4
++	vmlal.s32	q15, d20, d0
++	vext.32		d0, d6, d18, #1
++	vmlal.s32	q15, d21, d25
++	vrev64.i32	d0, d0
++	vmlal.s32	q15, d26, d24
++	vext.32		d1, d7, d19, #1
++	vext.32		d7, d10, d10, #0
++	vmlal.s32	q15, d27, d23
++	vrev64.i32	d1, d1
++	vld1.8		{d6}, [r2, : 64]
++	vmlal.s32	q15, d28, d22
++	vmlal.s32	q3, d16, d4
++	add		r2, r2, #24
++	vmlal.s32	q3, d17, d2
++	vext.32		d4, d31, d30, #0
++	vmov		d17, d11
++	vmlal.s32	q3, d14, d1
++	vext.32		d11, d13, d13, #0
++	vext.32		d13, d30, d30, #0
++	vmlal.s32	q3, d15, d0
++	vext.32		d1, d8, d8, #0
++	vmlal.s32	q3, d29, d3
++	vld1.8		{d5}, [r2, : 64]
++	sub		r2, r2, #16
++	vext.32		d10, d6, d6, #0
++	vmov.i32	q1, #0xffffffff
++	vshl.i64	q4, q1, #25
++	add		r5, sp, #480
++	vld1.8		{d14-d15}, [r5, : 128]
++	vadd.i64	q9, q2, q7
++	vshl.i64	q1, q1, #26
++	vshr.s64	q10, q9, #26
++	vld1.8		{d0}, [r2, : 64]!
++	vadd.i64	q5, q5, q10
++	vand		q9, q9, q1
++	vld1.8		{d16}, [r2, : 64]!
++	add		r2, sp, #496
++	vld1.8		{d20-d21}, [r2, : 128]
++	vadd.i64	q11, q5, q10
++	vsub.i64	q2, q2, q9
++	vshr.s64	q9, q11, #25
++	vext.32		d12, d5, d4, #0
++	vand		q11, q11, q4
++	vadd.i64	q0, q0, q9
++	vmov		d19, d7
++	vadd.i64	q3, q0, q7
++	vsub.i64	q5, q5, q11
++	vshr.s64	q11, q3, #26
++	vext.32		d18, d11, d10, #0
++	vand		q3, q3, q1
++	vadd.i64	q8, q8, q11
++	vadd.i64	q11, q8, q10
++	vsub.i64	q0, q0, q3
++	vshr.s64	q3, q11, #25
++	vand		q11, q11, q4
++	vadd.i64	q3, q6, q3
++	vadd.i64	q6, q3, q7
++	vsub.i64	q8, q8, q11
++	vshr.s64	q11, q6, #26
++	vand		q6, q6, q1
++	vadd.i64	q9, q9, q11
++	vadd.i64	d25, d19, d21
++	vsub.i64	q3, q3, q6
++	vshr.s64	d23, d25, #25
++	vand		q4, q12, q4
++	vadd.i64	d21, d23, d23
++	vshl.i64	d25, d23, #4
++	vadd.i64	d21, d21, d23
++	vadd.i64	d25, d25, d21
++	vadd.i64	d4, d4, d25
++	vzip.i32	q0, q8
++	vadd.i64	d12, d4, d14
++	add		r2, r6, #8
++	vst1.8		d0, [r2, : 64]
++	vsub.i64	d19, d19, d9
++	add		r2, r2, #16
++	vst1.8		d16, [r2, : 64]
++	vshr.s64	d22, d12, #26
++	vand		q0, q6, q1
++	vadd.i64	d10, d10, d22
++	vzip.i32	q3, q9
++	vsub.i64	d4, d4, d0
++	sub		r2, r2, #8
++	vst1.8		d6, [r2, : 64]
++	add		r2, r2, #16
++	vst1.8		d18, [r2, : 64]
++	vzip.i32	q2, q5
++	sub		r2, r2, #32
++	vst1.8		d4, [r2, : 64]
++	cmp		r4, #0
++	beq		.Lskippostcopy
++	add		r2, r3, #144
++	mov		r4, r4
++	vld1.8		{d0-d1}, [r2, : 128]!
++	vld1.8		{d2-d3}, [r2, : 128]!
++	vld1.8		{d4}, [r2, : 64]
++	vst1.8		{d0-d1}, [r4, : 128]!
++	vst1.8		{d2-d3}, [r4, : 128]!
++	vst1.8		d4, [r4, : 64]
++.Lskippostcopy:
++	cmp		r1, #1
++	bne		.Lskipfinalcopy
++	add		r2, r3, #288
++	add		r4, r3, #144
++	vld1.8		{d0-d1}, [r2, : 128]!
++	vld1.8		{d2-d3}, [r2, : 128]!
++	vld1.8		{d4}, [r2, : 64]
++	vst1.8		{d0-d1}, [r4, : 128]!
++	vst1.8		{d2-d3}, [r4, : 128]!
++	vst1.8		d4, [r4, : 64]
++.Lskipfinalcopy:
++	add		r1, r1, #1
++	cmp		r1, #12
++	blo		.Linvertloop
++	add		r1, r3, #144
++	ldr		r2, [r1], #4
++	ldr		r3, [r1], #4
++	ldr		r4, [r1], #4
++	ldr		r5, [r1], #4
++	ldr		r6, [r1], #4
++	ldr		r7, [r1], #4
++	ldr		r8, [r1], #4
++	ldr		r9, [r1], #4
++	ldr		r10, [r1], #4
++	ldr		r1, [r1]
++	add		r11, r1, r1, LSL #4
++	add		r11, r11, r1, LSL #1
++	add		r11, r11, #16777216
++	mov		r11, r11, ASR #25
++	add		r11, r11, r2
++	mov		r11, r11, ASR #26
++	add		r11, r11, r3
++	mov		r11, r11, ASR #25
++	add		r11, r11, r4
++	mov		r11, r11, ASR #26
++	add		r11, r11, r5
++	mov		r11, r11, ASR #25
++	add		r11, r11, r6
++	mov		r11, r11, ASR #26
++	add		r11, r11, r7
++	mov		r11, r11, ASR #25
++	add		r11, r11, r8
++	mov		r11, r11, ASR #26
++	add		r11, r11, r9
++	mov		r11, r11, ASR #25
++	add		r11, r11, r10
++	mov		r11, r11, ASR #26
++	add		r11, r11, r1
++	mov		r11, r11, ASR #25
++	add		r2, r2, r11
++	add		r2, r2, r11, LSL #1
++	add		r2, r2, r11, LSL #4
++	mov		r11, r2, ASR #26
++	add		r3, r3, r11
++	sub		r2, r2, r11, LSL #26
++	mov		r11, r3, ASR #25
++	add		r4, r4, r11
++	sub		r3, r3, r11, LSL #25
++	mov		r11, r4, ASR #26
++	add		r5, r5, r11
++	sub		r4, r4, r11, LSL #26
++	mov		r11, r5, ASR #25
++	add		r6, r6, r11
++	sub		r5, r5, r11, LSL #25
++	mov		r11, r6, ASR #26
++	add		r7, r7, r11
++	sub		r6, r6, r11, LSL #26
++	mov		r11, r7, ASR #25
++	add		r8, r8, r11
++	sub		r7, r7, r11, LSL #25
++	mov		r11, r8, ASR #26
++	add		r9, r9, r11
++	sub		r8, r8, r11, LSL #26
++	mov		r11, r9, ASR #25
++	add		r10, r10, r11
++	sub		r9, r9, r11, LSL #25
++	mov		r11, r10, ASR #26
++	add		r1, r1, r11
++	sub		r10, r10, r11, LSL #26
++	mov		r11, r1, ASR #25
++	sub		r1, r1, r11, LSL #25
++	add		r2, r2, r3, LSL #26
++	mov		r3, r3, LSR #6
++	add		r3, r3, r4, LSL #19
++	mov		r4, r4, LSR #13
++	add		r4, r4, r5, LSL #13
++	mov		r5, r5, LSR #19
++	add		r5, r5, r6, LSL #6
++	add		r6, r7, r8, LSL #25
++	mov		r7, r8, LSR #7
++	add		r7, r7, r9, LSL #19
++	mov		r8, r9, LSR #13
++	add		r8, r8, r10, LSL #12
++	mov		r9, r10, LSR #20
++	add		r1, r9, r1, LSL #6
++	str		r2, [r0]
++	str		r3, [r0, #4]
++	str		r4, [r0, #8]
++	str		r5, [r0, #12]
++	str		r6, [r0, #16]
++	str		r7, [r0, #20]
++	str		r8, [r0, #24]
++	str		r1, [r0, #28]
++	movw		r0, #0
++	mov		sp, ip
++	pop		{r4-r11, pc}
++SYM_FUNC_END(curve25519_neon)
++#endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305-mips.S	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,407 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com> All Rights Reserved.
++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++ */
++
++#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
++#define MSB 0
++#define LSB 3
++#else
++#define MSB 3
++#define LSB 0
++#endif
++
++#define POLY1305_BLOCK_SIZE 16
++.text
++#define H0 $t0
++#define H1 $t1
++#define H2 $t2
++#define H3 $t3
++#define H4 $t4
++
++#define R0 $t5
++#define R1 $t6
++#define R2 $t7
++#define R3 $t8
++
++#define O0 $s0
++#define O1 $s4
++#define O2 $v1
++#define O3 $t9
++#define O4 $s5
++
++#define S1 $s1
++#define S2 $s2
++#define S3 $s3
++
++#define SC $at
++#define CA $v0
++
++/* Input arguments */
++#define poly	$a0
++#define src	$a1
++#define srclen	$a2
++#define hibit	$a3
++
++/* Location in the opaque buffer
++ * R[0..3], CA, H[0..4]
++ */
++#define PTR_POLY1305_R(n) ( 0 + (n*4)) ## ($a0)
++#define PTR_POLY1305_CA   (16        ) ## ($a0)
++#define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0)
++
++#define POLY1305_BLOCK_SIZE 16
++#define POLY1305_STACK_SIZE 32
++
++.set	noat
++.align	4
++.globl	poly1305_blocks_mips
++.ent	poly1305_blocks_mips
++poly1305_blocks_mips:
++	.frame	$sp, POLY1305_STACK_SIZE, $ra
++	/* srclen &= 0xFFFFFFF0 */
++	ins	srclen, $zero, 0, 4
++
++	addiu	$sp, -(POLY1305_STACK_SIZE)
++
++	/* check srclen >= 16 bytes */
++	beqz	srclen, .Lpoly1305_blocks_mips_end
++
++	/* Calculate last round based on src address pointer.
++	 * last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0)
++	 */
++	addu	srclen, src
++
++	lw	R0, PTR_POLY1305_R(0)
++	lw	R1, PTR_POLY1305_R(1)
++	lw	R2, PTR_POLY1305_R(2)
++	lw	R3, PTR_POLY1305_R(3)
++
++	/* store the used save registers. */
++	sw	$s0, 0($sp)
++	sw	$s1, 4($sp)
++	sw	$s2, 8($sp)
++	sw	$s3, 12($sp)
++	sw	$s4, 16($sp)
++	sw	$s5, 20($sp)
++
++	/* load Hx and Carry */
++	lw	CA, PTR_POLY1305_CA
++	lw	H0, PTR_POLY1305_H(0)
++	lw	H1, PTR_POLY1305_H(1)
++	lw	H2, PTR_POLY1305_H(2)
++	lw	H3, PTR_POLY1305_H(3)
++	lw	H4, PTR_POLY1305_H(4)
++
++	/* Sx = Rx + (Rx >> 2) */
++	srl	S1, R1, 2
++	srl	S2, R2, 2
++	srl	S3, R3, 2
++	addu	S1, R1
++	addu	S2, R2
++	addu	S3, R3
++
++	addiu	SC, $zero, 1
++
++.Lpoly1305_loop:
++	lwl	O0, 0+MSB(src)
++	lwl	O1, 4+MSB(src)
++	lwl	O2, 8+MSB(src)
++	lwl	O3,12+MSB(src)
++	lwr	O0, 0+LSB(src)
++	lwr	O1, 4+LSB(src)
++	lwr	O2, 8+LSB(src)
++	lwr	O3,12+LSB(src)
++
++#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
++	wsbh	O0
++	wsbh	O1
++	wsbh	O2
++	wsbh	O3
++	rotr	O0, 16
++	rotr	O1, 16
++	rotr	O2, 16
++	rotr	O3, 16
++#endif
++
++	/* h0 = (u32)(d0 = (u64)h0 + inp[0] + c 'Carry_previous cycle'); */
++	addu	H0, CA
++	sltu	CA, H0, CA
++	addu	O0, H0
++	sltu	H0, O0, H0
++	addu	CA, H0
++
++	/* h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + inp[4]); */
++	addu	H1, CA
++	sltu	CA, H1, CA
++	addu	O1, H1
++	sltu	H1, O1, H1
++	addu	CA, H1
++
++	/* h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + inp[8]); */
++	addu	H2, CA
++	sltu	CA, H2, CA
++	addu	O2, H2
++	sltu	H2, O2, H2
++	addu	CA, H2
++
++	/* h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + inp[12]); */
++	addu	H3, CA
++	sltu	CA, H3, CA
++	addu	O3, H3
++	sltu	H3, O3, H3
++	addu	CA, H3
++
++	/* h4 += (u32)(d3 >> 32) + padbit; */
++	addu	H4, hibit
++	addu	O4, H4, CA
++
++	/* D0 */
++	multu	O0, R0
++	maddu	O1, S3
++	maddu	O2, S2
++	maddu	O3, S1
++	mfhi	CA
++	mflo	H0
++
++	/* D1 */
++	multu	O0, R1
++	maddu	O1, R0
++	maddu	O2, S3
++	maddu	O3, S2
++	maddu	O4, S1
++	maddu	CA, SC
++	mfhi	CA
++	mflo	H1
++
++	/* D2 */
++	multu	O0, R2
++	maddu	O1, R1
++	maddu	O2, R0
++	maddu	O3, S3
++	maddu	O4, S2
++	maddu	CA, SC
++	mfhi	CA
++	mflo	H2
++
++	/* D4 */
++	mul	H4, O4, R0
++
++	/* D3 */
++	multu	O0, R3
++	maddu	O1, R2
++	maddu	O2, R1
++	maddu	O3, R0
++	maddu	O4, S3
++	maddu	CA, SC
++	mfhi	CA
++	mflo	H3
++
++	addiu	src, POLY1305_BLOCK_SIZE
++
++	/* h4 += (u32)(d3 >> 32); */
++	addu	O4, H4, CA
++	/* h4 &= 3 */
++	andi	H4, O4, 3
++	/* c = (h4 >> 2) + (h4 & ~3U); */
++	srl	CA, O4, 2
++	ins	O4, $zero, 0, 2
++
++	addu	CA, O4
++
++	/* able to do a 16 byte block. */
++	bne	src, srclen, .Lpoly1305_loop
++
++	/* restore the used save registers. */
++	lw	$s0, 0($sp)
++	lw	$s1, 4($sp)
++	lw	$s2, 8($sp)
++	lw	$s3, 12($sp)
++	lw	$s4, 16($sp)
++	lw	$s5, 20($sp)
++
++	/* store Hx and Carry */
++	sw	CA, PTR_POLY1305_CA
++	sw	H0, PTR_POLY1305_H(0)
++	sw	H1, PTR_POLY1305_H(1)
++	sw	H2, PTR_POLY1305_H(2)
++	sw	H3, PTR_POLY1305_H(3)
++	sw	H4, PTR_POLY1305_H(4)
++
++.Lpoly1305_blocks_mips_end:
++	addiu	$sp, POLY1305_STACK_SIZE
++
++	/* Jump Back */
++	jr	$ra
++.end poly1305_blocks_mips
++.set at
++
++/* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */
++#define MAC	$a1
++#define NONCE	$a2
++
++#define G0	$t5
++#define G1	$t6
++#define G2	$t7
++#define G3	$t8
++#define G4	$t9
++
++.set	noat
++.align	4
++.globl	poly1305_emit_mips
++.ent	poly1305_emit_mips
++poly1305_emit_mips:
++	/* load Hx and Carry */
++	lw	CA, PTR_POLY1305_CA
++	lw	H0, PTR_POLY1305_H(0)
++	lw	H1, PTR_POLY1305_H(1)
++	lw	H2, PTR_POLY1305_H(2)
++	lw	H3, PTR_POLY1305_H(3)
++	lw	H4, PTR_POLY1305_H(4)
++
++	/* Add left over carry */
++	addu	H0, CA
++	sltu	CA, H0, CA
++	addu	H1, CA
++	sltu	CA, H1, CA
++	addu	H2, CA
++	sltu	CA, H2, CA
++	addu	H3, CA
++	sltu	CA, H3, CA
++	addu	H4, CA
++
++	/* compare to modulus by computing h + -p */
++	addiu	G0, H0, 5
++	sltu	CA, G0, H0
++	addu	G1, H1, CA
++	sltu	CA, G1, H1
++	addu	G2, H2, CA
++	sltu	CA, G2, H2
++	addu	G3, H3, CA
++	sltu	CA, G3, H3
++	addu	G4, H4, CA
++
++	srl	SC, G4, 2
++
++	/* if there was carry into 131st bit, h3:h0 = g3:g0 */
++	movn	H0, G0, SC
++	movn	H1, G1, SC
++	movn	H2, G2, SC
++	movn	H3, G3, SC
++
++	lwl	G0, 0+MSB(NONCE)
++	lwl	G1, 4+MSB(NONCE)
++	lwl	G2, 8+MSB(NONCE)
++	lwl	G3,12+MSB(NONCE)
++	lwr	G0, 0+LSB(NONCE)
++	lwr	G1, 4+LSB(NONCE)
++	lwr	G2, 8+LSB(NONCE)
++	lwr	G3,12+LSB(NONCE)
++
++	/* mac = (h + nonce) % (2^128) */
++	addu	H0, G0
++	sltu	CA, H0, G0
++
++	/* H1 */
++	addu	H1, CA
++	sltu	CA, H1, CA
++	addu	H1, G1
++	sltu	G1, H1, G1
++	addu	CA, G1
++
++	/* H2 */
++	addu	H2, CA
++	sltu	CA, H2, CA
++	addu	H2, G2
++	sltu	G2, H2, G2
++	addu	CA, G2
++
++	/* H3 */
++	addu	H3, CA
++	addu	H3, G3
++
++#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
++	wsbh	H0
++	wsbh	H1
++	wsbh	H2
++	wsbh	H3
++	rotr	H0, 16
++	rotr	H1, 16
++	rotr	H2, 16
++	rotr	H3, 16
++#endif
++
++	/* store MAC */
++	swl	H0, 0+MSB(MAC)
++	swl	H1, 4+MSB(MAC)
++	swl	H2, 8+MSB(MAC)
++	swl	H3,12+MSB(MAC)
++	swr	H0, 0+LSB(MAC)
++	swr	H1, 4+LSB(MAC)
++	swr	H2, 8+LSB(MAC)
++	swr	H3,12+LSB(MAC)
++
++	jr	$ra
++.end poly1305_emit_mips
++
++#define PR0 $t0
++#define PR1 $t1
++#define PR2 $t2
++#define PR3 $t3
++#define PT0 $t4
++
++/* Input arguments CTX=$a0, KEY=$a1 */
++
++.align	4
++.globl	poly1305_init_mips
++.ent	poly1305_init_mips
++poly1305_init_mips:
++	lwl	PR0, 0+MSB($a1)
++	lwl	PR1, 4+MSB($a1)
++	lwl	PR2, 8+MSB($a1)
++	lwl	PR3,12+MSB($a1)
++	lwr	PR0, 0+LSB($a1)
++	lwr	PR1, 4+LSB($a1)
++	lwr	PR2, 8+LSB($a1)
++	lwr	PR3,12+LSB($a1)
++
++	/* store Hx and Carry */
++	sw	$zero, PTR_POLY1305_CA
++	sw	$zero, PTR_POLY1305_H(0)
++	sw	$zero, PTR_POLY1305_H(1)
++	sw	$zero, PTR_POLY1305_H(2)
++	sw	$zero, PTR_POLY1305_H(3)
++	sw	$zero, PTR_POLY1305_H(4)
++
++#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
++	wsbh	PR0
++	wsbh	PR1
++	wsbh	PR2
++	wsbh	PR3
++	rotr	PR0, 16
++	rotr	PR1, 16
++	rotr	PR2, 16
++	rotr	PR3, 16
++#endif
++
++	lui	PT0, 0x0FFF
++	ori	PT0, 0xFFFC
++
++	/* AND 0x0fffffff; */
++	ext	PR0, PR0, 0, (32-4)
++
++	/* AND 0x0ffffffc; */
++	and	PR1, PT0
++	and	PR2, PT0
++	and	PR3, PT0
++
++	/* store Rx */
++	sw	PR0, PTR_POLY1305_R(0)
++	sw	PR1, PTR_POLY1305_R(1)
++	sw	PR2, PTR_POLY1305_R(2)
++	sw	PR3, PTR_POLY1305_R(3)
++
++	/* Jump Back  */
++	jr	$ra
++.end poly1305_init_mips
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/chacha20/chacha20-arm64.pl	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,1163 @@
++#!/usr/bin/env perl
++# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
++#
++# This code is taken from the OpenSSL project but the author, Andy Polyakov,
++# has relicensed it under the licenses specified in the SPDX header above.
++# The original headers, including the original license headers, are
++# included below for completeness.
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# June 2015
++#
++# ChaCha20 for ARMv8.
++#
++# Performance in cycles per byte out of large buffer.
++#
++#			IALU/gcc-4.9    3xNEON+1xIALU	6xNEON+2xIALU(*)
++#
++# Apple A7		5.50/+49%       3.33            1.70
++# Cortex-A53		8.40/+80%       4.72		4.72(**)
++# Cortex-A57		8.06/+43%       4.90            4.43(***)
++# Denver		4.50/+82%       2.63		2.67(**)
++# X-Gene		9.50/+46%       8.82		8.89(**)
++# Mongoose		8.00/+44%	3.64		3.25(***)
++# Kryo			8.17/+50%	4.83		4.65(***)
++#
++# (*)	since no non-Apple processor exhibits significantly better
++#	performance, the code path is #ifdef __APPLE__-ed;
++# (**)	it's expected that doubling interleave factor doesn't help
++#	all processors, only those with higher NEON latency and
++#	higher instruction issue rate;
++# (***)	expected improvement was actually higher;
++
++$flavour=shift;
++if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
++else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
++
++if ($flavour && $flavour ne "void") {
++    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
++    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
++    die "can't locate arm-xlate.pl";
++
++    open STDOUT,"| \"$^X\" $xlate $flavour $output";
++} else {
++    open STDOUT,">$output";
++}
++
++sub AUTOLOAD()		# thunk [simplified] x86-style perlasm
++{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
++  my $arg = pop;
++    $arg = "#$arg" if ($arg*1 eq $arg);
++    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
++}
++
++my ($out,$inp,$len,$key,$ctr) = map("x$_",(0..4));
++
++my @x=map("x$_",(5..17,19..21));
++my @d=map("x$_",(22..28,30));
++
++sub ROUND {
++my ($a0,$b0,$c0,$d0)=@_;
++my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
++my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
++my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
++
++    (
++	"&add_32	(@x[$a0],@x[$a0],@x[$b0])",
++	 "&add_32	(@x[$a1],@x[$a1],@x[$b1])",
++	  "&add_32	(@x[$a2],@x[$a2],@x[$b2])",
++	   "&add_32	(@x[$a3],@x[$a3],@x[$b3])",
++	"&eor_32	(@x[$d0],@x[$d0],@x[$a0])",
++	 "&eor_32	(@x[$d1],@x[$d1],@x[$a1])",
++	  "&eor_32	(@x[$d2],@x[$d2],@x[$a2])",
++	   "&eor_32	(@x[$d3],@x[$d3],@x[$a3])",
++	"&ror_32	(@x[$d0],@x[$d0],16)",
++	 "&ror_32	(@x[$d1],@x[$d1],16)",
++	  "&ror_32	(@x[$d2],@x[$d2],16)",
++	   "&ror_32	(@x[$d3],@x[$d3],16)",
++
++	"&add_32	(@x[$c0],@x[$c0],@x[$d0])",
++	 "&add_32	(@x[$c1],@x[$c1],@x[$d1])",
++	  "&add_32	(@x[$c2],@x[$c2],@x[$d2])",
++	   "&add_32	(@x[$c3],@x[$c3],@x[$d3])",
++	"&eor_32	(@x[$b0],@x[$b0],@x[$c0])",
++	 "&eor_32	(@x[$b1],@x[$b1],@x[$c1])",
++	  "&eor_32	(@x[$b2],@x[$b2],@x[$c2])",
++	   "&eor_32	(@x[$b3],@x[$b3],@x[$c3])",
++	"&ror_32	(@x[$b0],@x[$b0],20)",
++	 "&ror_32	(@x[$b1],@x[$b1],20)",
++	  "&ror_32	(@x[$b2],@x[$b2],20)",
++	   "&ror_32	(@x[$b3],@x[$b3],20)",
++
++	"&add_32	(@x[$a0],@x[$a0],@x[$b0])",
++	 "&add_32	(@x[$a1],@x[$a1],@x[$b1])",
++	  "&add_32	(@x[$a2],@x[$a2],@x[$b2])",
++	   "&add_32	(@x[$a3],@x[$a3],@x[$b3])",
++	"&eor_32	(@x[$d0],@x[$d0],@x[$a0])",
++	 "&eor_32	(@x[$d1],@x[$d1],@x[$a1])",
++	  "&eor_32	(@x[$d2],@x[$d2],@x[$a2])",
++	   "&eor_32	(@x[$d3],@x[$d3],@x[$a3])",
++	"&ror_32	(@x[$d0],@x[$d0],24)",
++	 "&ror_32	(@x[$d1],@x[$d1],24)",
++	  "&ror_32	(@x[$d2],@x[$d2],24)",
++	   "&ror_32	(@x[$d3],@x[$d3],24)",
++
++	"&add_32	(@x[$c0],@x[$c0],@x[$d0])",
++	 "&add_32	(@x[$c1],@x[$c1],@x[$d1])",
++	  "&add_32	(@x[$c2],@x[$c2],@x[$d2])",
++	   "&add_32	(@x[$c3],@x[$c3],@x[$d3])",
++	"&eor_32	(@x[$b0],@x[$b0],@x[$c0])",
++	 "&eor_32	(@x[$b1],@x[$b1],@x[$c1])",
++	  "&eor_32	(@x[$b2],@x[$b2],@x[$c2])",
++	   "&eor_32	(@x[$b3],@x[$b3],@x[$c3])",
++	"&ror_32	(@x[$b0],@x[$b0],25)",
++	 "&ror_32	(@x[$b1],@x[$b1],25)",
++	  "&ror_32	(@x[$b2],@x[$b2],25)",
++	   "&ror_32	(@x[$b3],@x[$b3],25)"
++    );
++}
++
++$code.=<<___;
++#ifndef	__KERNEL__
++# include "arm_arch.h"
++.extern	OPENSSL_armcap_P
++#else
++# define ChaCha20_ctr32 chacha20_arm
++# define ChaCha20_neon  chacha20_neon
++#endif
++
++.text
++
++.align	5
++.Lsigma:
++.quad	0x3320646e61707865,0x6b20657479622d32		// endian-neutral
++.Lone:
++.long	1,0,0,0
++#ifndef	__KERNEL__
++.LOPENSSL_armcap_P:
++# ifdef	__ILP32__
++.long	OPENSSL_armcap_P-.
++# else
++.quad	OPENSSL_armcap_P-.
++# endif
++#endif
++
++.globl	ChaCha20_ctr32
++.type	ChaCha20_ctr32,%function
++.align	5
++ChaCha20_ctr32:
++	cbz	$len,.Labort
++#ifndef	__KERNEL__
++	adr	@x[0],.LOPENSSL_armcap_P
++	cmp	$len,#192
++	b.lo	.Lshort
++# ifdef	__ILP32__
++	ldrsw	@x[1],[@x[0]]
++# else
++	ldr	@x[1],[@x[0]]
++# endif
++	ldr	w17,[@x[1],@x[0]]
++	tst	w17,#ARMV7_NEON
++	b.ne	ChaCha20_neon
++
++.Lshort:
++#endif
++	stp	x29,x30,[sp,#-96]!
++	add	x29,sp,#0
++
++	adr	@x[0],.Lsigma
++	stp	x19,x20,[sp,#16]
++	stp	x21,x22,[sp,#32]
++	stp	x23,x24,[sp,#48]
++	stp	x25,x26,[sp,#64]
++	stp	x27,x28,[sp,#80]
++	sub	sp,sp,#64
++
++	ldp	@d[0],@d[1],[@x[0]]		// load sigma
++	ldp	@d[2],@d[3],[$key]		// load key
++	ldp	@d[4],@d[5],[$key,#16]
++	ldp	@d[6],@d[7],[$ctr]		// load counter
++#ifdef	__AARCH64EB__
++	ror	@d[2],@d[2],#32
++	ror	@d[3],@d[3],#32
++	ror	@d[4],@d[4],#32
++	ror	@d[5],@d[5],#32
++	ror	@d[6],@d[6],#32
++	ror	@d[7],@d[7],#32
++#endif
++
++.Loop_outer:
++	mov.32	@x[0],@d[0]			// unpack key block
++	lsr	@x[1],@d[0],#32
++	mov.32	@x[2],@d[1]
++	lsr	@x[3],@d[1],#32
++	mov.32	@x[4],@d[2]
++	lsr	@x[5],@d[2],#32
++	mov.32	@x[6],@d[3]
++	lsr	@x[7],@d[3],#32
++	mov.32	@x[8],@d[4]
++	lsr	@x[9],@d[4],#32
++	mov.32	@x[10],@d[5]
++	lsr	@x[11],@d[5],#32
++	mov.32	@x[12],@d[6]
++	lsr	@x[13],@d[6],#32
++	mov.32	@x[14],@d[7]
++	lsr	@x[15],@d[7],#32
++
++	mov	$ctr,#10
++	subs	$len,$len,#64
++.Loop:
++	sub	$ctr,$ctr,#1
++___
++	foreach (&ROUND(0, 4, 8,12)) { eval; }
++	foreach (&ROUND(0, 5,10,15)) { eval; }
++$code.=<<___;
++	cbnz	$ctr,.Loop
++
++	add.32	@x[0],@x[0],@d[0]		// accumulate key block
++	add	@x[1],@x[1],@d[0],lsr#32
++	add.32	@x[2],@x[2],@d[1]
++	add	@x[3],@x[3],@d[1],lsr#32
++	add.32	@x[4],@x[4],@d[2]
++	add	@x[5],@x[5],@d[2],lsr#32
++	add.32	@x[6],@x[6],@d[3]
++	add	@x[7],@x[7],@d[3],lsr#32
++	add.32	@x[8],@x[8],@d[4]
++	add	@x[9],@x[9],@d[4],lsr#32
++	add.32	@x[10],@x[10],@d[5]
++	add	@x[11],@x[11],@d[5],lsr#32
++	add.32	@x[12],@x[12],@d[6]
++	add	@x[13],@x[13],@d[6],lsr#32
++	add.32	@x[14],@x[14],@d[7]
++	add	@x[15],@x[15],@d[7],lsr#32
++
++	b.lo	.Ltail
++
++	add	@x[0],@x[0],@x[1],lsl#32	// pack
++	add	@x[2],@x[2],@x[3],lsl#32
++	ldp	@x[1],@x[3],[$inp,#0]		// load input
++	add	@x[4],@x[4],@x[5],lsl#32
++	add	@x[6],@x[6],@x[7],lsl#32
++	ldp	@x[5],@x[7],[$inp,#16]
++	add	@x[8],@x[8],@x[9],lsl#32
++	add	@x[10],@x[10],@x[11],lsl#32
++	ldp	@x[9],@x[11],[$inp,#32]
++	add	@x[12],@x[12],@x[13],lsl#32
++	add	@x[14],@x[14],@x[15],lsl#32
++	ldp	@x[13],@x[15],[$inp,#48]
++	add	$inp,$inp,#64
++#ifdef	__AARCH64EB__
++	rev	@x[0],@x[0]
++	rev	@x[2],@x[2]
++	rev	@x[4],@x[4]
++	rev	@x[6],@x[6]
++	rev	@x[8],@x[8]
++	rev	@x[10],@x[10]
++	rev	@x[12],@x[12]
++	rev	@x[14],@x[14]
++#endif
++	eor	@x[0],@x[0],@x[1]
++	eor	@x[2],@x[2],@x[3]
++	eor	@x[4],@x[4],@x[5]
++	eor	@x[6],@x[6],@x[7]
++	eor	@x[8],@x[8],@x[9]
++	eor	@x[10],@x[10],@x[11]
++	eor	@x[12],@x[12],@x[13]
++	eor	@x[14],@x[14],@x[15]
++
++	stp	@x[0],@x[2],[$out,#0]		// store output
++	 add	@d[6],@d[6],#1			// increment counter
++	stp	@x[4],@x[6],[$out,#16]
++	stp	@x[8],@x[10],[$out,#32]
++	stp	@x[12],@x[14],[$out,#48]
++	add	$out,$out,#64
++
++	b.hi	.Loop_outer
++
++	ldp	x19,x20,[x29,#16]
++	add	sp,sp,#64
++	ldp	x21,x22,[x29,#32]
++	ldp	x23,x24,[x29,#48]
++	ldp	x25,x26,[x29,#64]
++	ldp	x27,x28,[x29,#80]
++	ldp	x29,x30,[sp],#96
++.Labort:
++	ret
++
++.align	4
++.Ltail:
++	add	$len,$len,#64
++.Less_than_64:
++	sub	$out,$out,#1
++	add	$inp,$inp,$len
++	add	$out,$out,$len
++	add	$ctr,sp,$len
++	neg	$len,$len
++
++	add	@x[0],@x[0],@x[1],lsl#32	// pack
++	add	@x[2],@x[2],@x[3],lsl#32
++	add	@x[4],@x[4],@x[5],lsl#32
++	add	@x[6],@x[6],@x[7],lsl#32
++	add	@x[8],@x[8],@x[9],lsl#32
++	add	@x[10],@x[10],@x[11],lsl#32
++	add	@x[12],@x[12],@x[13],lsl#32
++	add	@x[14],@x[14],@x[15],lsl#32
++#ifdef	__AARCH64EB__
++	rev	@x[0],@x[0]
++	rev	@x[2],@x[2]
++	rev	@x[4],@x[4]
++	rev	@x[6],@x[6]
++	rev	@x[8],@x[8]
++	rev	@x[10],@x[10]
++	rev	@x[12],@x[12]
++	rev	@x[14],@x[14]
++#endif
++	stp	@x[0],@x[2],[sp,#0]
++	stp	@x[4],@x[6],[sp,#16]
++	stp	@x[8],@x[10],[sp,#32]
++	stp	@x[12],@x[14],[sp,#48]
++
++.Loop_tail:
++	ldrb	w10,[$inp,$len]
++	ldrb	w11,[$ctr,$len]
++	add	$len,$len,#1
++	eor	w10,w10,w11
++	strb	w10,[$out,$len]
++	cbnz	$len,.Loop_tail
++
++	stp	xzr,xzr,[sp,#0]
++	stp	xzr,xzr,[sp,#16]
++	stp	xzr,xzr,[sp,#32]
++	stp	xzr,xzr,[sp,#48]
++
++	ldp	x19,x20,[x29,#16]
++	add	sp,sp,#64
++	ldp	x21,x22,[x29,#32]
++	ldp	x23,x24,[x29,#48]
++	ldp	x25,x26,[x29,#64]
++	ldp	x27,x28,[x29,#80]
++	ldp	x29,x30,[sp],#96
++	ret
++.size	ChaCha20_ctr32,.-ChaCha20_ctr32
++___
++
++{{{
++my ($A0,$B0,$C0,$D0,$A1,$B1,$C1,$D1,$A2,$B2,$C2,$D2,$T0,$T1,$T2,$T3) =
++    map("v$_.4s",(0..7,16..23));
++my (@K)=map("v$_.4s",(24..30));
++my $ONE="v31.4s";
++
++sub NEONROUND {
++my $odd = pop;
++my ($a,$b,$c,$d,$t)=@_;
++
++	(
++	"&add		('$a','$a','$b')",
++	"&eor		('$d','$d','$a')",
++	"&rev32_16	('$d','$d')",		# vrot ($d,16)
++
++	"&add		('$c','$c','$d')",
++	"&eor		('$t','$b','$c')",
++	"&ushr		('$b','$t',20)",
++	"&sli		('$b','$t',12)",
++
++	"&add		('$a','$a','$b')",
++	"&eor		('$t','$d','$a')",
++	"&ushr		('$d','$t',24)",
++	"&sli		('$d','$t',8)",
++
++	"&add		('$c','$c','$d')",
++	"&eor		('$t','$b','$c')",
++	"&ushr		('$b','$t',25)",
++	"&sli		('$b','$t',7)",
++
++	"&ext		('$a','$a','$a',$odd?4:12)",
++	"&ext		('$d','$d','$d',8)",
++	"&ext		('$c','$c','$c',$odd?12:4)"
++	);
++}
++
++$code.=<<___;
++#if !defined(__KERNEL__) || defined(CONFIG_KERNEL_MODE_NEON)
++#ifdef __KERNEL__
++.globl  ChaCha20_neon
++.type   ChaCha20_neon,%function
++#endif
++.type	ChaCha20_neon,%function
++.align	5
++ChaCha20_neon:
++	stp	x29,x30,[sp,#-96]!
++	add	x29,sp,#0
++
++	adr	@x[0],.Lsigma
++	stp	x19,x20,[sp,#16]
++	stp	x21,x22,[sp,#32]
++	stp	x23,x24,[sp,#48]
++	stp	x25,x26,[sp,#64]
++	stp	x27,x28,[sp,#80]
++#ifdef	__APPLE__
++	cmp	$len,#512
++	b.hs	.L512_or_more_neon
++#endif
++
++	sub	sp,sp,#64
++
++	ldp	@d[0],@d[1],[@x[0]]		// load sigma
++	ld1	{@K[0]},[@x[0]],#16
++	ldp	@d[2],@d[3],[$key]		// load key
++	ldp	@d[4],@d[5],[$key,#16]
++	ld1	{@K[1],@K[2]},[$key]
++	ldp	@d[6],@d[7],[$ctr]		// load counter
++	ld1	{@K[3]},[$ctr]
++	ld1	{$ONE},[@x[0]]
++#ifdef	__AARCH64EB__
++	rev64	@K[0],@K[0]
++	ror	@d[2],@d[2],#32
++	ror	@d[3],@d[3],#32
++	ror	@d[4],@d[4],#32
++	ror	@d[5],@d[5],#32
++	ror	@d[6],@d[6],#32
++	ror	@d[7],@d[7],#32
++#endif
++	add	@K[3],@K[3],$ONE		// += 1
++	add	@K[4],@K[3],$ONE
++	add	@K[5],@K[4],$ONE
++	shl	$ONE,$ONE,#2			// 1 -> 4
++
++.Loop_outer_neon:
++	mov.32	@x[0],@d[0]			// unpack key block
++	lsr	@x[1],@d[0],#32
++	 mov	$A0,@K[0]
++	mov.32	@x[2],@d[1]
++	lsr	@x[3],@d[1],#32
++	 mov	$A1,@K[0]
++	mov.32	@x[4],@d[2]
++	lsr	@x[5],@d[2],#32
++	 mov	$A2,@K[0]
++	mov.32	@x[6],@d[3]
++	 mov	$B0,@K[1]
++	lsr	@x[7],@d[3],#32
++	 mov	$B1,@K[1]
++	mov.32	@x[8],@d[4]
++	 mov	$B2,@K[1]
++	lsr	@x[9],@d[4],#32
++	 mov	$D0,@K[3]
++	mov.32	@x[10],@d[5]
++	 mov	$D1,@K[4]
++	lsr	@x[11],@d[5],#32
++	 mov	$D2,@K[5]
++	mov.32	@x[12],@d[6]
++	 mov	$C0,@K[2]
++	lsr	@x[13],@d[6],#32
++	 mov	$C1,@K[2]
++	mov.32	@x[14],@d[7]
++	 mov	$C2,@K[2]
++	lsr	@x[15],@d[7],#32
++
++	mov	$ctr,#10
++	subs	$len,$len,#256
++.Loop_neon:
++	sub	$ctr,$ctr,#1
++___
++	my @thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,0);
++	my @thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,0);
++	my @thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,0);
++	my @thread3=&ROUND(0,4,8,12);
++
++	foreach (@thread0) {
++		eval;			eval(shift(@thread3));
++		eval(shift(@thread1));	eval(shift(@thread3));
++		eval(shift(@thread2));	eval(shift(@thread3));
++	}
++
++	@thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,1);
++	@thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,1);
++	@thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,1);
++	@thread3=&ROUND(0,5,10,15);
++
++	foreach (@thread0) {
++		eval;			eval(shift(@thread3));
++		eval(shift(@thread1));	eval(shift(@thread3));
++		eval(shift(@thread2));	eval(shift(@thread3));
++	}
++$code.=<<___;
++	cbnz	$ctr,.Loop_neon
++
++	add.32	@x[0],@x[0],@d[0]		// accumulate key block
++	 add	$A0,$A0,@K[0]
++	add	@x[1],@x[1],@d[0],lsr#32
++	 add	$A1,$A1,@K[0]
++	add.32	@x[2],@x[2],@d[1]
++	 add	$A2,$A2,@K[0]
++	add	@x[3],@x[3],@d[1],lsr#32
++	 add	$C0,$C0,@K[2]
++	add.32	@x[4],@x[4],@d[2]
++	 add	$C1,$C1,@K[2]
++	add	@x[5],@x[5],@d[2],lsr#32
++	 add	$C2,$C2,@K[2]
++	add.32	@x[6],@x[6],@d[3]
++	 add	$D0,$D0,@K[3]
++	add	@x[7],@x[7],@d[3],lsr#32
++	add.32	@x[8],@x[8],@d[4]
++	 add	$D1,$D1,@K[4]
++	add	@x[9],@x[9],@d[4],lsr#32
++	add.32	@x[10],@x[10],@d[5]
++	 add	$D2,$D2,@K[5]
++	add	@x[11],@x[11],@d[5],lsr#32
++	add.32	@x[12],@x[12],@d[6]
++	 add	$B0,$B0,@K[1]
++	add	@x[13],@x[13],@d[6],lsr#32
++	add.32	@x[14],@x[14],@d[7]
++	 add	$B1,$B1,@K[1]
++	add	@x[15],@x[15],@d[7],lsr#32
++	 add	$B2,$B2,@K[1]
++
++	b.lo	.Ltail_neon
++
++	add	@x[0],@x[0],@x[1],lsl#32	// pack
++	add	@x[2],@x[2],@x[3],lsl#32
++	ldp	@x[1],@x[3],[$inp,#0]		// load input
++	add	@x[4],@x[4],@x[5],lsl#32
++	add	@x[6],@x[6],@x[7],lsl#32
++	ldp	@x[5],@x[7],[$inp,#16]
++	add	@x[8],@x[8],@x[9],lsl#32
++	add	@x[10],@x[10],@x[11],lsl#32
++	ldp	@x[9],@x[11],[$inp,#32]
++	add	@x[12],@x[12],@x[13],lsl#32
++	add	@x[14],@x[14],@x[15],lsl#32
++	ldp	@x[13],@x[15],[$inp,#48]
++	add	$inp,$inp,#64
++#ifdef	__AARCH64EB__
++	rev	@x[0],@x[0]
++	rev	@x[2],@x[2]
++	rev	@x[4],@x[4]
++	rev	@x[6],@x[6]
++	rev	@x[8],@x[8]
++	rev	@x[10],@x[10]
++	rev	@x[12],@x[12]
++	rev	@x[14],@x[14]
++#endif
++	ld1.8	{$T0-$T3},[$inp],#64
++	eor	@x[0],@x[0],@x[1]
++	eor	@x[2],@x[2],@x[3]
++	eor	@x[4],@x[4],@x[5]
++	eor	@x[6],@x[6],@x[7]
++	eor	@x[8],@x[8],@x[9]
++	 eor	$A0,$A0,$T0
++	eor	@x[10],@x[10],@x[11]
++	 eor	$B0,$B0,$T1
++	eor	@x[12],@x[12],@x[13]
++	 eor	$C0,$C0,$T2
++	eor	@x[14],@x[14],@x[15]
++	 eor	$D0,$D0,$T3
++	 ld1.8	{$T0-$T3},[$inp],#64
++
++	stp	@x[0],@x[2],[$out,#0]		// store output
++	 add	@d[6],@d[6],#4			// increment counter
++	stp	@x[4],@x[6],[$out,#16]
++	 add	@K[3],@K[3],$ONE		// += 4
++	stp	@x[8],@x[10],[$out,#32]
++	 add	@K[4],@K[4],$ONE
++	stp	@x[12],@x[14],[$out,#48]
++	 add	@K[5],@K[5],$ONE
++	add	$out,$out,#64
++
++	st1.8	{$A0-$D0},[$out],#64
++	ld1.8	{$A0-$D0},[$inp],#64
++
++	eor	$A1,$A1,$T0
++	eor	$B1,$B1,$T1
++	eor	$C1,$C1,$T2
++	eor	$D1,$D1,$T3
++	st1.8	{$A1-$D1},[$out],#64
++
++	eor	$A2,$A2,$A0
++	eor	$B2,$B2,$B0
++	eor	$C2,$C2,$C0
++	eor	$D2,$D2,$D0
++	st1.8	{$A2-$D2},[$out],#64
++
++	b.hi	.Loop_outer_neon
++
++	ldp	x19,x20,[x29,#16]
++	add	sp,sp,#64
++	ldp	x21,x22,[x29,#32]
++	ldp	x23,x24,[x29,#48]
++	ldp	x25,x26,[x29,#64]
++	ldp	x27,x28,[x29,#80]
++	ldp	x29,x30,[sp],#96
++	ret
++
++.Ltail_neon:
++	add	$len,$len,#256
++	cmp	$len,#64
++	b.lo	.Less_than_64
++
++	add	@x[0],@x[0],@x[1],lsl#32	// pack
++	add	@x[2],@x[2],@x[3],lsl#32
++	ldp	@x[1],@x[3],[$inp,#0]		// load input
++	add	@x[4],@x[4],@x[5],lsl#32
++	add	@x[6],@x[6],@x[7],lsl#32
++	ldp	@x[5],@x[7],[$inp,#16]
++	add	@x[8],@x[8],@x[9],lsl#32
++	add	@x[10],@x[10],@x[11],lsl#32
++	ldp	@x[9],@x[11],[$inp,#32]
++	add	@x[12],@x[12],@x[13],lsl#32
++	add	@x[14],@x[14],@x[15],lsl#32
++	ldp	@x[13],@x[15],[$inp,#48]
++	add	$inp,$inp,#64
++#ifdef	__AARCH64EB__
++	rev	@x[0],@x[0]
++	rev	@x[2],@x[2]
++	rev	@x[4],@x[4]
++	rev	@x[6],@x[6]
++	rev	@x[8],@x[8]
++	rev	@x[10],@x[10]
++	rev	@x[12],@x[12]
++	rev	@x[14],@x[14]
++#endif
++	eor	@x[0],@x[0],@x[1]
++	eor	@x[2],@x[2],@x[3]
++	eor	@x[4],@x[4],@x[5]
++	eor	@x[6],@x[6],@x[7]
++	eor	@x[8],@x[8],@x[9]
++	eor	@x[10],@x[10],@x[11]
++	eor	@x[12],@x[12],@x[13]
++	eor	@x[14],@x[14],@x[15]
++
++	stp	@x[0],@x[2],[$out,#0]		// store output
++	 add	@d[6],@d[6],#4			// increment counter
++	stp	@x[4],@x[6],[$out,#16]
++	stp	@x[8],@x[10],[$out,#32]
++	stp	@x[12],@x[14],[$out,#48]
++	add	$out,$out,#64
++	b.eq	.Ldone_neon
++	sub	$len,$len,#64
++	cmp	$len,#64
++	b.lo	.Less_than_128
++
++	ld1.8	{$T0-$T3},[$inp],#64
++	eor	$A0,$A0,$T0
++	eor	$B0,$B0,$T1
++	eor	$C0,$C0,$T2
++	eor	$D0,$D0,$T3
++	st1.8	{$A0-$D0},[$out],#64
++	b.eq	.Ldone_neon
++	sub	$len,$len,#64
++	cmp	$len,#64
++	b.lo	.Less_than_192
++
++	ld1.8	{$T0-$T3},[$inp],#64
++	eor	$A1,$A1,$T0
++	eor	$B1,$B1,$T1
++	eor	$C1,$C1,$T2
++	eor	$D1,$D1,$T3
++	st1.8	{$A1-$D1},[$out],#64
++	b.eq	.Ldone_neon
++	sub	$len,$len,#64
++
++	st1.8	{$A2-$D2},[sp]
++	b	.Last_neon
++
++.Less_than_128:
++	st1.8	{$A0-$D0},[sp]
++	b	.Last_neon
++.Less_than_192:
++	st1.8	{$A1-$D1},[sp]
++	b	.Last_neon
++
++.align	4
++.Last_neon:
++	sub	$out,$out,#1
++	add	$inp,$inp,$len
++	add	$out,$out,$len
++	add	$ctr,sp,$len
++	neg	$len,$len
++
++.Loop_tail_neon:
++	ldrb	w10,[$inp,$len]
++	ldrb	w11,[$ctr,$len]
++	add	$len,$len,#1
++	eor	w10,w10,w11
++	strb	w10,[$out,$len]
++	cbnz	$len,.Loop_tail_neon
++
++	stp	xzr,xzr,[sp,#0]
++	stp	xzr,xzr,[sp,#16]
++	stp	xzr,xzr,[sp,#32]
++	stp	xzr,xzr,[sp,#48]
++
++.Ldone_neon:
++	ldp	x19,x20,[x29,#16]
++	add	sp,sp,#64
++	ldp	x21,x22,[x29,#32]
++	ldp	x23,x24,[x29,#48]
++	ldp	x25,x26,[x29,#64]
++	ldp	x27,x28,[x29,#80]
++	ldp	x29,x30,[sp],#96
++	ret
++.size	ChaCha20_neon,.-ChaCha20_neon
++___
++{
++my ($T0,$T1,$T2,$T3,$T4,$T5)=@K;
++my ($A0,$B0,$C0,$D0,$A1,$B1,$C1,$D1,$A2,$B2,$C2,$D2,
++    $A3,$B3,$C3,$D3,$A4,$B4,$C4,$D4,$A5,$B5,$C5,$D5) = map("v$_.4s",(0..23));
++
++$code.=<<___;
++#ifdef	__APPLE__
++.type	ChaCha20_512_neon,%function
++.align	5
++ChaCha20_512_neon:
++	stp	x29,x30,[sp,#-96]!
++	add	x29,sp,#0
++
++	adr	@x[0],.Lsigma
++	stp	x19,x20,[sp,#16]
++	stp	x21,x22,[sp,#32]
++	stp	x23,x24,[sp,#48]
++	stp	x25,x26,[sp,#64]
++	stp	x27,x28,[sp,#80]
++
++.L512_or_more_neon:
++	sub	sp,sp,#128+64
++
++	ldp	@d[0],@d[1],[@x[0]]		// load sigma
++	ld1	{@K[0]},[@x[0]],#16
++	ldp	@d[2],@d[3],[$key]		// load key
++	ldp	@d[4],@d[5],[$key,#16]
++	ld1	{@K[1],@K[2]},[$key]
++	ldp	@d[6],@d[7],[$ctr]		// load counter
++	ld1	{@K[3]},[$ctr]
++	ld1	{$ONE},[@x[0]]
++# ifdef	__AARCH64EB__
++	rev64	@K[0],@K[0]
++	ror	@d[2],@d[2],#32
++	ror	@d[3],@d[3],#32
++	ror	@d[4],@d[4],#32
++	ror	@d[5],@d[5],#32
++	ror	@d[6],@d[6],#32
++	ror	@d[7],@d[7],#32
++# endif
++	add	@K[3],@K[3],$ONE		// += 1
++	stp	@K[0],@K[1],[sp,#0]		// off-load key block, invariant part
++	add	@K[3],@K[3],$ONE		// not typo
++	str	@K[2],[sp,#32]
++	add	@K[4],@K[3],$ONE
++	add	@K[5],@K[4],$ONE
++	add	@K[6],@K[5],$ONE
++	shl	$ONE,$ONE,#2			// 1 -> 4
++
++	stp	d8,d9,[sp,#128+0]		// meet ABI requirements
++	stp	d10,d11,[sp,#128+16]
++	stp	d12,d13,[sp,#128+32]
++	stp	d14,d15,[sp,#128+48]
++
++	sub	$len,$len,#512			// not typo
++
++.Loop_outer_512_neon:
++	 mov	$A0,@K[0]
++	 mov	$A1,@K[0]
++	 mov	$A2,@K[0]
++	 mov	$A3,@K[0]
++	 mov	$A4,@K[0]
++	 mov	$A5,@K[0]
++	 mov	$B0,@K[1]
++	mov.32	@x[0],@d[0]			// unpack key block
++	 mov	$B1,@K[1]
++	lsr	@x[1],@d[0],#32
++	 mov	$B2,@K[1]
++	mov.32	@x[2],@d[1]
++	 mov	$B3,@K[1]
++	lsr	@x[3],@d[1],#32
++	 mov	$B4,@K[1]
++	mov.32	@x[4],@d[2]
++	 mov	$B5,@K[1]
++	lsr	@x[5],@d[2],#32
++	 mov	$D0,@K[3]
++	mov.32	@x[6],@d[3]
++	 mov	$D1,@K[4]
++	lsr	@x[7],@d[3],#32
++	 mov	$D2,@K[5]
++	mov.32	@x[8],@d[4]
++	 mov	$D3,@K[6]
++	lsr	@x[9],@d[4],#32
++	 mov	$C0,@K[2]
++	mov.32	@x[10],@d[5]
++	 mov	$C1,@K[2]
++	lsr	@x[11],@d[5],#32
++	 add	$D4,$D0,$ONE			// +4
++	mov.32	@x[12],@d[6]
++	 add	$D5,$D1,$ONE			// +4
++	lsr	@x[13],@d[6],#32
++	 mov	$C2,@K[2]
++	mov.32	@x[14],@d[7]
++	 mov	$C3,@K[2]
++	lsr	@x[15],@d[7],#32
++	 mov	$C4,@K[2]
++	 stp	@K[3],@K[4],[sp,#48]		// off-load key block, variable part
++	 mov	$C5,@K[2]
++	 str	@K[5],[sp,#80]
++
++	mov	$ctr,#5
++	subs	$len,$len,#512
++.Loop_upper_neon:
++	sub	$ctr,$ctr,#1
++___
++	my @thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,0);
++	my @thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,0);
++	my @thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,0);
++	my @thread3=&NEONROUND($A3,$B3,$C3,$D3,$T3,0);
++	my @thread4=&NEONROUND($A4,$B4,$C4,$D4,$T4,0);
++	my @thread5=&NEONROUND($A5,$B5,$C5,$D5,$T5,0);
++	my @thread67=(&ROUND(0,4,8,12),&ROUND(0,5,10,15));
++	my $diff = ($#thread0+1)*6 - $#thread67 - 1;
++	my $i = 0;
++
++	foreach (@thread0) {
++		eval;			eval(shift(@thread67));
++		eval(shift(@thread1));	eval(shift(@thread67));
++		eval(shift(@thread2));	eval(shift(@thread67));
++		eval(shift(@thread3));	eval(shift(@thread67));
++		eval(shift(@thread4));	eval(shift(@thread67));
++		eval(shift(@thread5));	eval(shift(@thread67));
++	}
++
++	@thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,1);
++	@thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,1);
++	@thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,1);
++	@thread3=&NEONROUND($A3,$B3,$C3,$D3,$T3,1);
++	@thread4=&NEONROUND($A4,$B4,$C4,$D4,$T4,1);
++	@thread5=&NEONROUND($A5,$B5,$C5,$D5,$T5,1);
++	@thread67=(&ROUND(0,4,8,12),&ROUND(0,5,10,15));
++
++	foreach (@thread0) {
++		eval;			eval(shift(@thread67));
++		eval(shift(@thread1));	eval(shift(@thread67));
++		eval(shift(@thread2));	eval(shift(@thread67));
++		eval(shift(@thread3));	eval(shift(@thread67));
++		eval(shift(@thread4));	eval(shift(@thread67));
++		eval(shift(@thread5));	eval(shift(@thread67));
++	}
++$code.=<<___;
++	cbnz	$ctr,.Loop_upper_neon
++
++	add.32	@x[0],@x[0],@d[0]		// accumulate key block
++	add	@x[1],@x[1],@d[0],lsr#32
++	add.32	@x[2],@x[2],@d[1]
++	add	@x[3],@x[3],@d[1],lsr#32
++	add.32	@x[4],@x[4],@d[2]
++	add	@x[5],@x[5],@d[2],lsr#32
++	add.32	@x[6],@x[6],@d[3]
++	add	@x[7],@x[7],@d[3],lsr#32
++	add.32	@x[8],@x[8],@d[4]
++	add	@x[9],@x[9],@d[4],lsr#32
++	add.32	@x[10],@x[10],@d[5]
++	add	@x[11],@x[11],@d[5],lsr#32
++	add.32	@x[12],@x[12],@d[6]
++	add	@x[13],@x[13],@d[6],lsr#32
++	add.32	@x[14],@x[14],@d[7]
++	add	@x[15],@x[15],@d[7],lsr#32
++
++	add	@x[0],@x[0],@x[1],lsl#32	// pack
++	add	@x[2],@x[2],@x[3],lsl#32
++	ldp	@x[1],@x[3],[$inp,#0]		// load input
++	add	@x[4],@x[4],@x[5],lsl#32
++	add	@x[6],@x[6],@x[7],lsl#32
++	ldp	@x[5],@x[7],[$inp,#16]
++	add	@x[8],@x[8],@x[9],lsl#32
++	add	@x[10],@x[10],@x[11],lsl#32
++	ldp	@x[9],@x[11],[$inp,#32]
++	add	@x[12],@x[12],@x[13],lsl#32
++	add	@x[14],@x[14],@x[15],lsl#32
++	ldp	@x[13],@x[15],[$inp,#48]
++	add	$inp,$inp,#64
++# ifdef	__AARCH64EB__
++	rev	@x[0],@x[0]
++	rev	@x[2],@x[2]
++	rev	@x[4],@x[4]
++	rev	@x[6],@x[6]
++	rev	@x[8],@x[8]
++	rev	@x[10],@x[10]
++	rev	@x[12],@x[12]
++	rev	@x[14],@x[14]
++# endif
++	eor	@x[0],@x[0],@x[1]
++	eor	@x[2],@x[2],@x[3]
++	eor	@x[4],@x[4],@x[5]
++	eor	@x[6],@x[6],@x[7]
++	eor	@x[8],@x[8],@x[9]
++	eor	@x[10],@x[10],@x[11]
++	eor	@x[12],@x[12],@x[13]
++	eor	@x[14],@x[14],@x[15]
++
++	 stp	@x[0],@x[2],[$out,#0]		// store output
++	 add	@d[6],@d[6],#1			// increment counter
++	mov.32	@x[0],@d[0]			// unpack key block
++	lsr	@x[1],@d[0],#32
++	 stp	@x[4],@x[6],[$out,#16]
++	mov.32	@x[2],@d[1]
++	lsr	@x[3],@d[1],#32
++	 stp	@x[8],@x[10],[$out,#32]
++	mov.32	@x[4],@d[2]
++	lsr	@x[5],@d[2],#32
++	 stp	@x[12],@x[14],[$out,#48]
++	 add	$out,$out,#64
++	mov.32	@x[6],@d[3]
++	lsr	@x[7],@d[3],#32
++	mov.32	@x[8],@d[4]
++	lsr	@x[9],@d[4],#32
++	mov.32	@x[10],@d[5]
++	lsr	@x[11],@d[5],#32
++	mov.32	@x[12],@d[6]
++	lsr	@x[13],@d[6],#32
++	mov.32	@x[14],@d[7]
++	lsr	@x[15],@d[7],#32
++
++	mov	$ctr,#5
++.Loop_lower_neon:
++	sub	$ctr,$ctr,#1
++___
++	@thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,0);
++	@thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,0);
++	@thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,0);
++	@thread3=&NEONROUND($A3,$B3,$C3,$D3,$T3,0);
++	@thread4=&NEONROUND($A4,$B4,$C4,$D4,$T4,0);
++	@thread5=&NEONROUND($A5,$B5,$C5,$D5,$T5,0);
++	@thread67=(&ROUND(0,4,8,12),&ROUND(0,5,10,15));
++
++	foreach (@thread0) {
++		eval;			eval(shift(@thread67));
++		eval(shift(@thread1));	eval(shift(@thread67));
++		eval(shift(@thread2));	eval(shift(@thread67));
++		eval(shift(@thread3));	eval(shift(@thread67));
++		eval(shift(@thread4));	eval(shift(@thread67));
++		eval(shift(@thread5));	eval(shift(@thread67));
++	}
++
++	@thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,1);
++	@thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,1);
++	@thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,1);
++	@thread3=&NEONROUND($A3,$B3,$C3,$D3,$T3,1);
++	@thread4=&NEONROUND($A4,$B4,$C4,$D4,$T4,1);
++	@thread5=&NEONROUND($A5,$B5,$C5,$D5,$T5,1);
++	@thread67=(&ROUND(0,4,8,12),&ROUND(0,5,10,15));
++
++	foreach (@thread0) {
++		eval;			eval(shift(@thread67));
++		eval(shift(@thread1));	eval(shift(@thread67));
++		eval(shift(@thread2));	eval(shift(@thread67));
++		eval(shift(@thread3));	eval(shift(@thread67));
++		eval(shift(@thread4));	eval(shift(@thread67));
++		eval(shift(@thread5));	eval(shift(@thread67));
++	}
++$code.=<<___;
++	cbnz	$ctr,.Loop_lower_neon
++
++	add.32	@x[0],@x[0],@d[0]		// accumulate key block
++	 ldp	@K[0],@K[1],[sp,#0]
++	add	@x[1],@x[1],@d[0],lsr#32
++	 ldp	@K[2],@K[3],[sp,#32]
++	add.32	@x[2],@x[2],@d[1]
++	 ldp	@K[4],@K[5],[sp,#64]
++	add	@x[3],@x[3],@d[1],lsr#32
++	 add	$A0,$A0,@K[0]
++	add.32	@x[4],@x[4],@d[2]
++	 add	$A1,$A1,@K[0]
++	add	@x[5],@x[5],@d[2],lsr#32
++	 add	$A2,$A2,@K[0]
++	add.32	@x[6],@x[6],@d[3]
++	 add	$A3,$A3,@K[0]
++	add	@x[7],@x[7],@d[3],lsr#32
++	 add	$A4,$A4,@K[0]
++	add.32	@x[8],@x[8],@d[4]
++	 add	$A5,$A5,@K[0]
++	add	@x[9],@x[9],@d[4],lsr#32
++	 add	$C0,$C0,@K[2]
++	add.32	@x[10],@x[10],@d[5]
++	 add	$C1,$C1,@K[2]
++	add	@x[11],@x[11],@d[5],lsr#32
++	 add	$C2,$C2,@K[2]
++	add.32	@x[12],@x[12],@d[6]
++	 add	$C3,$C3,@K[2]
++	add	@x[13],@x[13],@d[6],lsr#32
++	 add	$C4,$C4,@K[2]
++	add.32	@x[14],@x[14],@d[7]
++	 add	$C5,$C5,@K[2]
++	add	@x[15],@x[15],@d[7],lsr#32
++	 add	$D4,$D4,$ONE			// +4
++	add	@x[0],@x[0],@x[1],lsl#32	// pack
++	 add	$D5,$D5,$ONE			// +4
++	add	@x[2],@x[2],@x[3],lsl#32
++	 add	$D0,$D0,@K[3]
++	ldp	@x[1],@x[3],[$inp,#0]		// load input
++	 add	$D1,$D1,@K[4]
++	add	@x[4],@x[4],@x[5],lsl#32
++	 add	$D2,$D2,@K[5]
++	add	@x[6],@x[6],@x[7],lsl#32
++	 add	$D3,$D3,@K[6]
++	ldp	@x[5],@x[7],[$inp,#16]
++	 add	$D4,$D4,@K[3]
++	add	@x[8],@x[8],@x[9],lsl#32
++	 add	$D5,$D5,@K[4]
++	add	@x[10],@x[10],@x[11],lsl#32
++	 add	$B0,$B0,@K[1]
++	ldp	@x[9],@x[11],[$inp,#32]
++	 add	$B1,$B1,@K[1]
++	add	@x[12],@x[12],@x[13],lsl#32
++	 add	$B2,$B2,@K[1]
++	add	@x[14],@x[14],@x[15],lsl#32
++	 add	$B3,$B3,@K[1]
++	ldp	@x[13],@x[15],[$inp,#48]
++	 add	$B4,$B4,@K[1]
++	add	$inp,$inp,#64
++	 add	$B5,$B5,@K[1]
++
++# ifdef	__AARCH64EB__
++	rev	@x[0],@x[0]
++	rev	@x[2],@x[2]
++	rev	@x[4],@x[4]
++	rev	@x[6],@x[6]
++	rev	@x[8],@x[8]
++	rev	@x[10],@x[10]
++	rev	@x[12],@x[12]
++	rev	@x[14],@x[14]
++# endif
++	ld1.8	{$T0-$T3},[$inp],#64
++	eor	@x[0],@x[0],@x[1]
++	eor	@x[2],@x[2],@x[3]
++	eor	@x[4],@x[4],@x[5]
++	eor	@x[6],@x[6],@x[7]
++	eor	@x[8],@x[8],@x[9]
++	 eor	$A0,$A0,$T0
++	eor	@x[10],@x[10],@x[11]
++	 eor	$B0,$B0,$T1
++	eor	@x[12],@x[12],@x[13]
++	 eor	$C0,$C0,$T2
++	eor	@x[14],@x[14],@x[15]
++	 eor	$D0,$D0,$T3
++	 ld1.8	{$T0-$T3},[$inp],#64
++
++	stp	@x[0],@x[2],[$out,#0]		// store output
++	 add	@d[6],@d[6],#7			// increment counter
++	stp	@x[4],@x[6],[$out,#16]
++	stp	@x[8],@x[10],[$out,#32]
++	stp	@x[12],@x[14],[$out,#48]
++	add	$out,$out,#64
++	st1.8	{$A0-$D0},[$out],#64
++
++	ld1.8	{$A0-$D0},[$inp],#64
++	eor	$A1,$A1,$T0
++	eor	$B1,$B1,$T1
++	eor	$C1,$C1,$T2
++	eor	$D1,$D1,$T3
++	st1.8	{$A1-$D1},[$out],#64
++
++	ld1.8	{$A1-$D1},[$inp],#64
++	eor	$A2,$A2,$A0
++	 ldp	@K[0],@K[1],[sp,#0]
++	eor	$B2,$B2,$B0
++	 ldp	@K[2],@K[3],[sp,#32]
++	eor	$C2,$C2,$C0
++	eor	$D2,$D2,$D0
++	st1.8	{$A2-$D2},[$out],#64
++
++	ld1.8	{$A2-$D2},[$inp],#64
++	eor	$A3,$A3,$A1
++	eor	$B3,$B3,$B1
++	eor	$C3,$C3,$C1
++	eor	$D3,$D3,$D1
++	st1.8	{$A3-$D3},[$out],#64
++
++	ld1.8	{$A3-$D3},[$inp],#64
++	eor	$A4,$A4,$A2
++	eor	$B4,$B4,$B2
++	eor	$C4,$C4,$C2
++	eor	$D4,$D4,$D2
++	st1.8	{$A4-$D4},[$out],#64
++
++	shl	$A0,$ONE,#1			// 4 -> 8
++	eor	$A5,$A5,$A3
++	eor	$B5,$B5,$B3
++	eor	$C5,$C5,$C3
++	eor	$D5,$D5,$D3
++	st1.8	{$A5-$D5},[$out],#64
++
++	add	@K[3],@K[3],$A0			// += 8
++	add	@K[4],@K[4],$A0
++	add	@K[5],@K[5],$A0
++	add	@K[6],@K[6],$A0
++
++	b.hs	.Loop_outer_512_neon
++
++	adds	$len,$len,#512
++	ushr	$A0,$ONE,#2			// 4 -> 1
++
++	ldp	d8,d9,[sp,#128+0]		// meet ABI requirements
++	ldp	d10,d11,[sp,#128+16]
++	ldp	d12,d13,[sp,#128+32]
++	ldp	d14,d15,[sp,#128+48]
++
++	stp	@K[0],$ONE,[sp,#0]		// wipe off-load area
++	stp	@K[0],$ONE,[sp,#32]
++	stp	@K[0],$ONE,[sp,#64]
++
++	b.eq	.Ldone_512_neon
++
++	cmp	$len,#192
++	sub	@K[3],@K[3],$A0			// -= 1
++	sub	@K[4],@K[4],$A0
++	sub	@K[5],@K[5],$A0
++	add	sp,sp,#128
++	b.hs	.Loop_outer_neon
++
++	eor	@K[1],@K[1],@K[1]
++	eor	@K[2],@K[2],@K[2]
++	eor	@K[3],@K[3],@K[3]
++	eor	@K[4],@K[4],@K[4]
++	eor	@K[5],@K[5],@K[5]
++	eor	@K[6],@K[6],@K[6]
++	b	.Loop_outer
++
++.Ldone_512_neon:
++	ldp	x19,x20,[x29,#16]
++	add	sp,sp,#128+64
++	ldp	x21,x22,[x29,#32]
++	ldp	x23,x24,[x29,#48]
++	ldp	x25,x26,[x29,#64]
++	ldp	x27,x28,[x29,#80]
++	ldp	x29,x30,[sp],#96
++	ret
++.size	ChaCha20_512_neon,.-ChaCha20_512_neon
++#endif
++#endif
++___
++}
++}}}
++
++open SELF,$0;
++while(<SELF>) {
++	next if (/^#!/);
++	last if (!s/^#/\/\// and !/^$/);
++	print;
++}
++close SELF;
++
++foreach (split("\n",$code)) {
++	s/\`([^\`]*)\`/eval $1/geo;
++
++	(s/\b([a-z]+)\.32\b/$1/ and (s/x([0-9]+)/w$1/g or 1))	or
++	(m/\b(eor|ext|mov)\b/ and (s/\.4s/\.16b/g or 1))	or
++	(s/\b((?:ld|st)1)\.8\b/$1/ and (s/\.4s/\.16b/g or 1))	or
++	(m/\b(ld|st)[rp]\b/ and (s/v([0-9]+)\.4s/q$1/g or 1))	or
++	(s/\brev32\.16\b/rev32/ and (s/\.4s/\.8h/g or 1));
++
++	print $_,"\n";
++}
++close STDOUT;	# flush
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/chacha20/chacha20-arm.pl	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,1227 @@
++#!/usr/bin/env perl
++# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
++#
++# This code is taken from the OpenSSL project but the author, Andy Polyakov,
++# has relicensed it under the licenses specified in the SPDX header above.
++# The original headers, including the original license headers, are
++# included below for completeness.
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# December 2014
++#
++# ChaCha20 for ARMv4.
++#
++# September 2018
++#
++# Improve scalar performance per Eric Biggers' suggestion to eliminate
++# separate rotates. This requires b[0..3] and d[0..3] to be maintained
++# pre-rotated, hence odd twists prior inner loop and when accumulating
++# key material. Since amount of instructions is reduced as result, even
++# NEON performance is improved somewhat, most notably by ~9% on low-end
++# Cortex-A5/A7. Full unroll was shown to provide even better scalar
++# performance on Cortex-A5/A7, naturally at the cost of manyfold size
++# increase. We let it be. Oversized code works in benchmarks, but is not
++# necessarily optimal in real life, when it's likely to be out-of-cache
++# upon entry and evict significant part of cache upon completion.
++#
++# Performance in cycles per byte out of large buffer.
++#
++#			IALU/gcc-4.4    1xNEON      3xNEON+1xIALU
++#
++# Cortex-A5		14.2(*)/+160%   21.8        12.9(**)
++# Cortex-A8		10.2(*)/+190%   13.9        6.10
++# Cortex-A9		10.8(*)/+150%   14.3        6.50
++# Cortex-A15		11.0/+40%       16.0        4.90
++# Snapdragon S4		13.9(***)/+90%  13.6        4.90
++#
++# (*)	most "favourable" result for aligned data on little-endian
++#	processor, result for misaligned data is 10-15% lower;
++# (**)	pure 4xNEON [with "vertical" layout] was shown to provide ~8%
++#	better performance on Cortex-A5/A7, but not on others;
++# (***)	it's 17% slower than original, trade-off is considered
++#	acceptable, because of improvement on others, specifically
++#	+36% on Cortex-A5/A7 and +20% on Cortex-A9;
++
++$flavour = shift;
++if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
++else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
++
++if ($flavour && $flavour ne "void") {
++    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
++    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
++    die "can't locate arm-xlate.pl";
++
++    open STDOUT,"| \"$^X\" $xlate $flavour $output";
++} else {
++    open STDOUT,">$output";
++}
++
++sub AUTOLOAD()		# thunk [simplified] x86-style perlasm
++{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
++  my $arg = pop;
++    $arg = "#$arg" if ($arg*1 eq $arg);
++    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
++}
++
++my @x=map("r$_",(0..7,"x","x","x","x",12,"x",14,"x"));
++my @t=map("r$_",(8..11));
++
++sub ROUND {
++my ($a0,$b0,$c0,$d0)=@_;
++my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
++my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
++my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
++my $odd = $d0&1;
++my ($xc,$xc_) = (@t[0..1]);
++my ($xd,$xd_) = $odd ? (@t[2],@x[$d1]) : (@x[$d0],@t[2]);
++my @ret;
++
++	# Consider order in which variables are addressed by their
++	# index:
++	#
++	#       a   b   c   d
++	#
++	#       0   4   8  12 < even round
++	#       1   5   9  13
++	#       2   6  10  14
++	#       3   7  11  15
++	#       0   5  10  15 < odd round
++	#       1   6  11  12
++	#       2   7   8  13
++	#       3   4   9  14
++	#
++	# 'a', 'b' are permanently allocated in registers, @x[0..7],
++	# while 'c's and pair of 'd's are maintained in memory. If
++	# you observe 'c' column, you'll notice that pair of 'c's is
++	# invariant between rounds. This means that we have to reload
++	# them once per round, in the middle. This is why you'll see
++	# bunch of 'c' stores and loads in the middle, but none in
++	# the beginning or end. If you observe 'd' column, you'll
++	# notice that 15 and 13 are reused in next pair of rounds.
++	# This is why these two are chosen for offloading to memory,
++	# to make loads count more.
++							push @ret,(
++	"&add	(@x[$a0],@x[$a0],@x[$b0],'ror#13')",
++	 "&add	(@x[$a1],@x[$a1],@x[$b1],'ror#13')",
++	"&eor	($xd,@x[$a0],$xd,'ror#24')",
++	 "&eor	($xd_,@x[$a1],$xd_,'ror#24')",
++
++	"&add	($xc,$xc,$xd,'ror#16')",
++	 "&add	($xc_,$xc_,$xd_,'ror#16')",
++	"&eor	(@x[$b0],$xc, @x[$b0],'ror#13')",
++	 "&eor	(@x[$b1],$xc_,@x[$b1],'ror#13')",
++
++	"&add	(@x[$a0],@x[$a0],@x[$b0],'ror#20')",
++	 "&add	(@x[$a1],@x[$a1],@x[$b1],'ror#20')",
++	"&eor	($xd,@x[$a0],$xd,'ror#16')",
++	 "&eor	($xd_,@x[$a1],$xd_,'ror#16')"		);
++							push @ret,(
++	"&str	($xd,'[sp,#4*(16+$d0)]')"		) if ($odd);
++							push @ret,(
++	"&add	($xc,$xc,$xd,'ror#24')"			);
++							push @ret,(
++	"&ldr	($xd,'[sp,#4*(16+$d2)]')"		) if ($odd);
++							push @ret,(
++	 "&str	($xd_,'[sp,#4*(16+$d1)]')"		) if (!$odd);
++							push @ret,(
++	 "&add	($xc_,$xc_,$xd_,'ror#24')"		);
++							push @ret,(
++	 "&ldr	($xd_,'[sp,#4*(16+$d3)]')"		) if (!$odd);
++							push @ret,(
++	"&str	($xc,'[sp,#4*(16+$c0)]')",
++	"&eor	(@x[$b0],@x[$b0],$xc,'ror#12')",
++	 "&str	($xc_,'[sp,#4*(16+$c1)]')",
++	 "&eor	(@x[$b1],@x[$b1],$xc_,'ror#12')"	);
++
++	$xd=@x[$d2]					if (!$odd);
++	$xd_=@x[$d3]					if ($odd);
++							push @ret,(
++	"&ldr	($xc,'[sp,#4*(16+$c2)]')",
++	"&add	(@x[$a2],@x[$a2],@x[$b2],'ror#13')",
++	 "&ldr	($xc_,'[sp,#4*(16+$c3)]')",
++	 "&add	(@x[$a3],@x[$a3],@x[$b3],'ror#13')",
++	"&eor	($xd,@x[$a2],$xd,'ror#24')",
++	 "&eor	($xd_,@x[$a3],$xd_,'ror#24')",
++
++	"&add	($xc,$xc,$xd,'ror#16')",
++	 "&add	($xc_,$xc_,$xd_,'ror#16')",
++	"&eor	(@x[$b2],$xc, @x[$b2],'ror#13')",
++	 "&eor	(@x[$b3],$xc_,@x[$b3],'ror#13')",
++
++	"&add	(@x[$a2],@x[$a2],@x[$b2],'ror#20')",
++	 "&add	(@x[$a3],@x[$a3],@x[$b3],'ror#20')",
++	"&eor	($xd,@x[$a2],$xd,'ror#16')",
++	 "&eor	($xd_,@x[$a3],$xd_,'ror#16')",
++
++	"&add	($xc,$xc,$xd,'ror#24')",
++	 "&add	($xc_,$xc_,$xd_,'ror#24')",
++	"&eor	(@x[$b2],@x[$b2],$xc,'ror#12')",
++	 "&eor	(@x[$b3],@x[$b3],$xc_,'ror#12')"	);
++
++	@ret;
++}
++
++$code.=<<___;
++#ifndef __KERNEL__
++# include "arm_arch.h"
++#else
++# define __ARM_ARCH__ __LINUX_ARM_ARCH__
++# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
++# define ChaCha20_ctr32 chacha20_arm_cryptogams
++# define ChaCha20_neon  chacha20_neon
++#endif
++
++.text
++#if defined(__thumb2__) || defined(__clang__)
++.syntax	unified
++# define ldrhsb	ldrbhs
++#endif
++#if defined(__thumb2__)
++.thumb
++#else
++.code	32
++#endif
++
++.align	5
++.Lsigma:
++.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	@ endian-neutral
++.Lone:
++.long	1,0,0,0
++.Lrot8:
++.long	0x02010003,0x06050407
++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
++.LOPENSSL_armcap:
++.word   OPENSSL_armcap_P-.LChaCha20_ctr32
++#else
++.word	-1
++#endif
++
++.globl	ChaCha20_ctr32
++.type	ChaCha20_ctr32,%function
++.align	5
++ChaCha20_ctr32:
++.LChaCha20_ctr32:
++	ldr	r12,[sp,#0]		@ pull pointer to counter and nonce
++	stmdb	sp!,{r0-r2,r4-r11,lr}
++#if __ARM_ARCH__<7 && !defined(__thumb2__)
++	sub	r14,pc,#16		@ ChaCha20_ctr32
++#else
++	adr	r14,.LChaCha20_ctr32
++#endif
++	cmp	r2,#0			@ len==0?
++#ifdef	__thumb2__
++	itt	eq
++#endif
++	addeq	sp,sp,#4*3
++	beq	.Lno_data
++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
++	cmp	r2,#192			@ test len
++	bls	.Lshort
++	ldr	r4,[r14,#-24]
++	ldr	r4,[r14,r4]
++# ifdef	__APPLE__
++	ldr	r4,[r4]
++# endif
++	tst	r4,#ARMV7_NEON
++	bne	.LChaCha20_neon
++.Lshort:
++#endif
++	ldmia	r12,{r4-r7}		@ load counter and nonce
++	sub	sp,sp,#4*(16)		@ off-load area
++	sub	r14,r14,#64		@ .Lsigma
++	stmdb	sp!,{r4-r7}		@ copy counter and nonce
++	ldmia	r3,{r4-r11}		@ load key
++	ldmia	r14,{r0-r3}		@ load sigma
++	stmdb	sp!,{r4-r11}		@ copy key
++	stmdb	sp!,{r0-r3}		@ copy sigma
++	str	r10,[sp,#4*(16+10)]	@ off-load "@x[10]"
++	str	r11,[sp,#4*(16+11)]	@ off-load "@x[11]"
++	b	.Loop_outer_enter
++
++.align	4
++.Loop_outer:
++	ldmia	sp,{r0-r9}		@ load key material
++	str	@t[3],[sp,#4*(32+2)]	@ save len
++	str	r12,  [sp,#4*(32+1)]	@ save inp
++	str	r14,  [sp,#4*(32+0)]	@ save out
++.Loop_outer_enter:
++	ldr	@t[3], [sp,#4*(15)]
++	 mov	@x[4],@x[4],ror#19	@ twist b[0..3]
++	ldr	@x[12],[sp,#4*(12)]	@ modulo-scheduled load
++	 mov	@x[5],@x[5],ror#19
++	ldr	@t[2], [sp,#4*(13)]
++	 mov	@x[6],@x[6],ror#19
++	ldr	@x[14],[sp,#4*(14)]
++	 mov	@x[7],@x[7],ror#19
++	mov	@t[3],@t[3],ror#8	@ twist d[0..3]
++	mov	@x[12],@x[12],ror#8
++	mov	@t[2],@t[2],ror#8
++	mov	@x[14],@x[14],ror#8
++	str	@t[3], [sp,#4*(16+15)]
++	mov	@t[3],#10
++	b	.Loop
++
++.align	4
++.Loop:
++	subs	@t[3],@t[3],#1
++___
++	foreach (&ROUND(0, 4, 8,12)) { eval; }
++	foreach (&ROUND(0, 5,10,15)) { eval; }
++$code.=<<___;
++	bne	.Loop
++
++	ldr	@t[3],[sp,#4*(32+2)]	@ load len
++
++	str	@t[0], [sp,#4*(16+8)]	@ modulo-scheduled store
++	str	@t[1], [sp,#4*(16+9)]
++	str	@x[12],[sp,#4*(16+12)]
++	str	@t[2], [sp,#4*(16+13)]
++	str	@x[14],[sp,#4*(16+14)]
++
++	@ at this point we have first half of 512-bit result in
++	@ @x[0-7] and second half at sp+4*(16+8)
++
++	cmp	@t[3],#64		@ done yet?
++#ifdef	__thumb2__
++	itete	lo
++#endif
++	addlo	r12,sp,#4*(0)		@ shortcut or ...
++	ldrhs	r12,[sp,#4*(32+1)]	@ ... load inp
++	addlo	r14,sp,#4*(0)		@ shortcut or ...
++	ldrhs	r14,[sp,#4*(32+0)]	@ ... load out
++
++	ldr	@t[0],[sp,#4*(0)]	@ load key material
++	ldr	@t[1],[sp,#4*(1)]
++
++#if __ARM_ARCH__>=6 || !defined(__ARMEB__)
++# if __ARM_ARCH__<7
++	orr	@t[2],r12,r14
++	tst	@t[2],#3		@ are input and output aligned?
++	ldr	@t[2],[sp,#4*(2)]
++	bne	.Lunaligned
++	cmp	@t[3],#64		@ restore flags
++# else
++	ldr	@t[2],[sp,#4*(2)]
++# endif
++	ldr	@t[3],[sp,#4*(3)]
++
++	add	@x[0],@x[0],@t[0]	@ accumulate key material
++	add	@x[1],@x[1],@t[1]
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhs	@t[0],[r12],#16		@ load input
++	ldrhs	@t[1],[r12,#-12]
++
++	add	@x[2],@x[2],@t[2]
++	add	@x[3],@x[3],@t[3]
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhs	@t[2],[r12,#-8]
++	ldrhs	@t[3],[r12,#-4]
++# if __ARM_ARCH__>=6 && defined(__ARMEB__)
++	rev	@x[0],@x[0]
++	rev	@x[1],@x[1]
++	rev	@x[2],@x[2]
++	rev	@x[3],@x[3]
++# endif
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	eorhs	@x[0],@x[0],@t[0]	@ xor with input
++	eorhs	@x[1],@x[1],@t[1]
++	 add	@t[0],sp,#4*(4)
++	str	@x[0],[r14],#16		@ store output
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	eorhs	@x[2],@x[2],@t[2]
++	eorhs	@x[3],@x[3],@t[3]
++	 ldmia	@t[0],{@t[0]-@t[3]}	@ load key material
++	str	@x[1],[r14,#-12]
++	str	@x[2],[r14,#-8]
++	str	@x[3],[r14,#-4]
++
++	add	@x[4],@t[0],@x[4],ror#13 @ accumulate key material
++	add	@x[5],@t[1],@x[5],ror#13
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhs	@t[0],[r12],#16		@ load input
++	ldrhs	@t[1],[r12,#-12]
++	add	@x[6],@t[2],@x[6],ror#13
++	add	@x[7],@t[3],@x[7],ror#13
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhs	@t[2],[r12,#-8]
++	ldrhs	@t[3],[r12,#-4]
++# if __ARM_ARCH__>=6 && defined(__ARMEB__)
++	rev	@x[4],@x[4]
++	rev	@x[5],@x[5]
++	rev	@x[6],@x[6]
++	rev	@x[7],@x[7]
++# endif
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	eorhs	@x[4],@x[4],@t[0]
++	eorhs	@x[5],@x[5],@t[1]
++	 add	@t[0],sp,#4*(8)
++	str	@x[4],[r14],#16		@ store output
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	eorhs	@x[6],@x[6],@t[2]
++	eorhs	@x[7],@x[7],@t[3]
++	str	@x[5],[r14,#-12]
++	 ldmia	@t[0],{@t[0]-@t[3]}	@ load key material
++	str	@x[6],[r14,#-8]
++	 add	@x[0],sp,#4*(16+8)
++	str	@x[7],[r14,#-4]
++
++	ldmia	@x[0],{@x[0]-@x[7]}	@ load second half
++
++	add	@x[0],@x[0],@t[0]	@ accumulate key material
++	add	@x[1],@x[1],@t[1]
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhs	@t[0],[r12],#16		@ load input
++	ldrhs	@t[1],[r12,#-12]
++# ifdef	__thumb2__
++	itt	hi
++# endif
++	 strhi	@t[2],[sp,#4*(16+10)]	@ copy "@x[10]" while at it
++	 strhi	@t[3],[sp,#4*(16+11)]	@ copy "@x[11]" while at it
++	add	@x[2],@x[2],@t[2]
++	add	@x[3],@x[3],@t[3]
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhs	@t[2],[r12,#-8]
++	ldrhs	@t[3],[r12,#-4]
++# if __ARM_ARCH__>=6 && defined(__ARMEB__)
++	rev	@x[0],@x[0]
++	rev	@x[1],@x[1]
++	rev	@x[2],@x[2]
++	rev	@x[3],@x[3]
++# endif
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	eorhs	@x[0],@x[0],@t[0]
++	eorhs	@x[1],@x[1],@t[1]
++	 add	@t[0],sp,#4*(12)
++	str	@x[0],[r14],#16		@ store output
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	eorhs	@x[2],@x[2],@t[2]
++	eorhs	@x[3],@x[3],@t[3]
++	str	@x[1],[r14,#-12]
++	 ldmia	@t[0],{@t[0]-@t[3]}	@ load key material
++	str	@x[2],[r14,#-8]
++	str	@x[3],[r14,#-4]
++
++	add	@x[4],@t[0],@x[4],ror#24 @ accumulate key material
++	add	@x[5],@t[1],@x[5],ror#24
++# ifdef	__thumb2__
++	itt	hi
++# endif
++	 addhi	@t[0],@t[0],#1		@ next counter value
++	 strhi	@t[0],[sp,#4*(12)]	@ save next counter value
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhs	@t[0],[r12],#16		@ load input
++	ldrhs	@t[1],[r12,#-12]
++	add	@x[6],@t[2],@x[6],ror#24
++	add	@x[7],@t[3],@x[7],ror#24
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhs	@t[2],[r12,#-8]
++	ldrhs	@t[3],[r12,#-4]
++# if __ARM_ARCH__>=6 && defined(__ARMEB__)
++	rev	@x[4],@x[4]
++	rev	@x[5],@x[5]
++	rev	@x[6],@x[6]
++	rev	@x[7],@x[7]
++# endif
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	eorhs	@x[4],@x[4],@t[0]
++	eorhs	@x[5],@x[5],@t[1]
++# ifdef	__thumb2__
++	 it	ne
++# endif
++	 ldrne	@t[0],[sp,#4*(32+2)]	@ re-load len
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	eorhs	@x[6],@x[6],@t[2]
++	eorhs	@x[7],@x[7],@t[3]
++	str	@x[4],[r14],#16		@ store output
++	str	@x[5],[r14,#-12]
++# ifdef	__thumb2__
++	it	hs
++# endif
++	 subhs	@t[3],@t[0],#64		@ len-=64
++	str	@x[6],[r14,#-8]
++	str	@x[7],[r14,#-4]
++	bhi	.Loop_outer
++
++	beq	.Ldone
++# if __ARM_ARCH__<7
++	b	.Ltail
++
++.align	4
++.Lunaligned:				@ unaligned endian-neutral path
++	cmp	@t[3],#64		@ restore flags
++# endif
++#endif
++#if __ARM_ARCH__<7
++	ldr	@t[3],[sp,#4*(3)]
++___
++for ($i=0;$i<16;$i+=4) {
++my $j=$i&0x7;
++my $twist="";
++if ($i==4)     { $twist = ",ror#13"; }
++elsif ($i==12) { $twist = ",ror#24"; }
++
++$code.=<<___	if ($i==4);
++	add	@x[0],sp,#4*(16+8)
++___
++$code.=<<___	if ($i==8);
++	ldmia	@x[0],{@x[0]-@x[7]}		@ load second half
++# ifdef	__thumb2__
++	itt	hi
++# endif
++	strhi	@t[2],[sp,#4*(16+10)]		@ copy "@x[10]"
++	strhi	@t[3],[sp,#4*(16+11)]		@ copy "@x[11]"
++___
++$code.=<<___;
++	add	@x[$j+0],@t[0],@x[$j+0]$twist	@ accumulate key material
++___
++$code.=<<___	if ($i==12);
++# ifdef	__thumb2__
++	itt	hi
++# endif
++	addhi	@t[0],@t[0],#1			@ next counter value
++	strhi	@t[0],[sp,#4*(12)]		@ save next counter value
++___
++$code.=<<___;
++	add	@x[$j+1],@t[1],@x[$j+1]$twist
++	add	@x[$j+2],@t[2],@x[$j+2]$twist
++# ifdef	__thumb2__
++	itete	lo
++# endif
++	eorlo	@t[0],@t[0],@t[0]		@ zero or ...
++	ldrhsb	@t[0],[r12],#16			@ ... load input
++	eorlo	@t[1],@t[1],@t[1]
++	ldrhsb	@t[1],[r12,#-12]
++
++	add	@x[$j+3],@t[3],@x[$j+3]$twist
++# ifdef	__thumb2__
++	itete	lo
++# endif
++	eorlo	@t[2],@t[2],@t[2]
++	ldrhsb	@t[2],[r12,#-8]
++	eorlo	@t[3],@t[3],@t[3]
++	ldrhsb	@t[3],[r12,#-4]
++
++	eor	@x[$j+0],@t[0],@x[$j+0]		@ xor with input (or zero)
++	eor	@x[$j+1],@t[1],@x[$j+1]
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhsb	@t[0],[r12,#-15]		@ load more input
++	ldrhsb	@t[1],[r12,#-11]
++	eor	@x[$j+2],@t[2],@x[$j+2]
++	 strb	@x[$j+0],[r14],#16		@ store output
++	eor	@x[$j+3],@t[3],@x[$j+3]
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhsb	@t[2],[r12,#-7]
++	ldrhsb	@t[3],[r12,#-3]
++	 strb	@x[$j+1],[r14,#-12]
++	eor	@x[$j+0],@t[0],@x[$j+0],lsr#8
++	 strb	@x[$j+2],[r14,#-8]
++	eor	@x[$j+1],@t[1],@x[$j+1],lsr#8
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhsb	@t[0],[r12,#-14]		@ load more input
++	ldrhsb	@t[1],[r12,#-10]
++	 strb	@x[$j+3],[r14,#-4]
++	eor	@x[$j+2],@t[2],@x[$j+2],lsr#8
++	 strb	@x[$j+0],[r14,#-15]
++	eor	@x[$j+3],@t[3],@x[$j+3],lsr#8
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhsb	@t[2],[r12,#-6]
++	ldrhsb	@t[3],[r12,#-2]
++	 strb	@x[$j+1],[r14,#-11]
++	eor	@x[$j+0],@t[0],@x[$j+0],lsr#8
++	 strb	@x[$j+2],[r14,#-7]
++	eor	@x[$j+1],@t[1],@x[$j+1],lsr#8
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhsb	@t[0],[r12,#-13]		@ load more input
++	ldrhsb	@t[1],[r12,#-9]
++	 strb	@x[$j+3],[r14,#-3]
++	eor	@x[$j+2],@t[2],@x[$j+2],lsr#8
++	 strb	@x[$j+0],[r14,#-14]
++	eor	@x[$j+3],@t[3],@x[$j+3],lsr#8
++# ifdef	__thumb2__
++	itt	hs
++# endif
++	ldrhsb	@t[2],[r12,#-5]
++	ldrhsb	@t[3],[r12,#-1]
++	 strb	@x[$j+1],[r14,#-10]
++	 strb	@x[$j+2],[r14,#-6]
++	eor	@x[$j+0],@t[0],@x[$j+0],lsr#8
++	 strb	@x[$j+3],[r14,#-2]
++	eor	@x[$j+1],@t[1],@x[$j+1],lsr#8
++	 strb	@x[$j+0],[r14,#-13]
++	eor	@x[$j+2],@t[2],@x[$j+2],lsr#8
++	 strb	@x[$j+1],[r14,#-9]
++	eor	@x[$j+3],@t[3],@x[$j+3],lsr#8
++	 strb	@x[$j+2],[r14,#-5]
++	 strb	@x[$j+3],[r14,#-1]
++___
++$code.=<<___	if ($i<12);
++	add	@t[0],sp,#4*(4+$i)
++	ldmia	@t[0],{@t[0]-@t[3]}		@ load key material
++___
++}
++$code.=<<___;
++# ifdef	__thumb2__
++	it	ne
++# endif
++	ldrne	@t[0],[sp,#4*(32+2)]		@ re-load len
++# ifdef	__thumb2__
++	it	hs
++# endif
++	subhs	@t[3],@t[0],#64			@ len-=64
++	bhi	.Loop_outer
++
++	beq	.Ldone
++#endif
++
++.Ltail:
++	ldr	r12,[sp,#4*(32+1)]	@ load inp
++	add	@t[1],sp,#4*(0)
++	ldr	r14,[sp,#4*(32+0)]	@ load out
++
++.Loop_tail:
++	ldrb	@t[2],[@t[1]],#1	@ read buffer on stack
++	ldrb	@t[3],[r12],#1		@ read input
++	subs	@t[0],@t[0],#1
++	eor	@t[3],@t[3],@t[2]
++	strb	@t[3],[r14],#1		@ store output
++	bne	.Loop_tail
++
++.Ldone:
++	add	sp,sp,#4*(32+3)
++.Lno_data:
++#if __ARM_ARCH__>=5
++	ldmia	sp!,{r4-r11,pc}
++#else
++	ldmia	sp!,{r4-r12,lr}
++	tst	lr,#1
++	moveq	pc,lr			@ be binary compatible with V4, yet
++	.long	0xe12fff1e		@ interoperable with Thumb ISA:-)
++#endif
++.size	ChaCha20_ctr32,.-ChaCha20_ctr32
++___
++
++{{{
++my ($a0,$b0,$c0,$d0,$a1,$b1,$c1,$d1,$a2,$b2,$c2,$d2,$t0,$t1,$t2,$t3) =
++    map("q$_",(0..15));
++
++# This can replace vshr-by-24+vsli-by-8. It gives ~3% improvement on
++# Cortex-A5/A7, but hurts Cortex-A9 by 5% and Snapdragon S4 by 14%!
++sub vperm()
++{ my ($dst,$src,$tbl) = @_;
++    $code .= "	vtbl.8	$dst#lo,{$src#lo},$tbl#lo\n";
++    $code .= "	vtbl.8	$dst#hi,{$src#hi},$tbl#lo\n";
++}
++
++sub NEONROUND {
++my $odd = pop;
++my ($a,$b,$c,$d,$t)=@_;
++
++	(
++	"&vadd_i32	($a,$a,$b)",
++	"&veor		($d,$d,$a)",
++	"&vrev32_16	($d,$d)",	# vrot ($d,16)
++
++	"&vadd_i32	($c,$c,$d)",
++	"&veor		($t,$b,$c)",
++	"&vshr_u32	($b,$t,20)",
++	"&vsli_32	($b,$t,12)",
++
++	"&vadd_i32	($a,$a,$b)",
++	"&veor		($t,$d,$a)",
++	"&vshr_u32	($d,$t,24)",
++	"&vsli_32	($d,$t,8)",
++	#"&vperm	($d,$t,$t3)",
++
++	"&vadd_i32	($c,$c,$d)",
++	"&veor		($t,$b,$c)",
++	"&vshr_u32	($b,$t,25)",
++	"&vsli_32	($b,$t,7)",
++
++	"&vext_8	($a,$a,$a,$odd?4:12)",
++	"&vext_8	($d,$d,$d,8)",
++	"&vext_8	($c,$c,$c,$odd?12:4)"
++	);
++}
++
++$code.=<<___;
++#if (defined(__KERNEL__) && defined(CONFIG_KERNEL_MODE_NEON)) || (!defined(__KERNEL__) && __ARM_MAX_ARCH__>=7)
++.arch	armv7-a
++.fpu	neon
++
++# ifdef __KERNEL__
++.globl	ChaCha20_neon
++@ For optimal performance it's appropriate for caller to enforce
++@ minimum input length, 193 bytes is suggested.
++# endif
++.type	ChaCha20_neon,%function
++.align	5
++ChaCha20_neon:
++	ldr		r12,[sp,#0]		@ pull pointer to counter and nonce
++	stmdb		sp!,{r0-r2,r4-r11,lr}
++.LChaCha20_neon:
++	adr		r14,.Lsigma
++	vstmdb		sp!,{d8-d15}		@ ABI spec says so
++	stmdb		sp!,{r0-r3}
++
++	vld1.32		{$b0-$c0},[r3]		@ load key
++	ldmia		r3,{r4-r11}		@ load key
++
++	sub		sp,sp,#4*(16+16)
++	vld1.32		{$d0},[r12]		@ load counter and nonce
++	add		r12,sp,#4*8
++	ldmia		r14,{r0-r3}		@ load sigma
++	vld1.32		{$a0},[r14]!		@ load sigma
++	vld1.32		{$t0},[r14]!		@ one
++	@ vld1.32	{$t3#lo},[r14]		@ rot8
++	vst1.32		{$c0-$d0},[r12]		@ copy 1/2key|counter|nonce
++	vst1.32		{$a0-$b0},[sp]		@ copy sigma|1/2key
++
++	str		r10,[sp,#4*(16+10)]	@ off-load "@x[10]"
++	str		r11,[sp,#4*(16+11)]	@ off-load "@x[11]"
++	vshl.i32	$t1#lo,$t0#lo,#1	@ two
++	vstr		$t0#lo,[sp,#4*(16+0)]
++	vshl.i32	$t2#lo,$t0#lo,#2	@ four
++	vstr		$t1#lo,[sp,#4*(16+2)]
++	vmov		$a1,$a0
++	vstr		$t2#lo,[sp,#4*(16+4)]
++	vmov		$a2,$a0
++	@ vstr		$t3#lo,[sp,#4*(16+6)]
++	vmov		$b1,$b0
++	vmov		$b2,$b0
++	b		.Loop_neon_enter
++
++.align	4
++.Loop_neon_outer:
++	ldmia		sp,{r0-r9}		@ load key material
++	cmp		@t[3],#64*2		@ if len<=64*2
++	bls		.Lbreak_neon		@ switch to integer-only
++	@ vldr		$t3#lo,[sp,#4*(16+6)]	@ rot8
++	vmov		$a1,$a0
++	str		@t[3],[sp,#4*(32+2)]	@ save len
++	vmov		$a2,$a0
++	str		r12,  [sp,#4*(32+1)]	@ save inp
++	vmov		$b1,$b0
++	str		r14,  [sp,#4*(32+0)]	@ save out
++	vmov		$b2,$b0
++.Loop_neon_enter:
++	ldr		@t[3], [sp,#4*(15)]
++	 mov		@x[4],@x[4],ror#19	@ twist b[0..3]
++	vadd.i32	$d1,$d0,$t0		@ counter+1
++	ldr		@x[12],[sp,#4*(12)]	@ modulo-scheduled load
++	 mov		@x[5],@x[5],ror#19
++	vmov		$c1,$c0
++	ldr		@t[2], [sp,#4*(13)]
++	 mov		@x[6],@x[6],ror#19
++	vmov		$c2,$c0
++	ldr		@x[14],[sp,#4*(14)]
++	 mov		@x[7],@x[7],ror#19
++	vadd.i32	$d2,$d1,$t0		@ counter+2
++	add		@x[12],@x[12],#3	@ counter+3
++	mov		@t[3],@t[3],ror#8	@ twist d[0..3]
++	mov		@x[12],@x[12],ror#8
++	mov		@t[2],@t[2],ror#8
++	mov		@x[14],@x[14],ror#8
++	str		@t[3], [sp,#4*(16+15)]
++	mov		@t[3],#10
++	b		.Loop_neon
++
++.align	4
++.Loop_neon:
++	subs		@t[3],@t[3],#1
++___
++	my @thread0=&NEONROUND($a0,$b0,$c0,$d0,$t0,0);
++	my @thread1=&NEONROUND($a1,$b1,$c1,$d1,$t1,0);
++	my @thread2=&NEONROUND($a2,$b2,$c2,$d2,$t2,0);
++	my @thread3=&ROUND(0,4,8,12);
++
++	foreach (@thread0) {
++		eval;			eval(shift(@thread3));
++		eval(shift(@thread1));	eval(shift(@thread3));
++		eval(shift(@thread2));	eval(shift(@thread3));
++	}
++
++	@thread0=&NEONROUND($a0,$b0,$c0,$d0,$t0,1);
++	@thread1=&NEONROUND($a1,$b1,$c1,$d1,$t1,1);
++	@thread2=&NEONROUND($a2,$b2,$c2,$d2,$t2,1);
++	@thread3=&ROUND(0,5,10,15);
++
++	foreach (@thread0) {
++		eval;			eval(shift(@thread3));
++		eval(shift(@thread1));	eval(shift(@thread3));
++		eval(shift(@thread2));	eval(shift(@thread3));
++	}
++$code.=<<___;
++	bne		.Loop_neon
++
++	add		@t[3],sp,#32
++	vld1.32		{$t0-$t1},[sp]		@ load key material
++	vld1.32		{$t2-$t3},[@t[3]]
++
++	ldr		@t[3],[sp,#4*(32+2)]	@ load len
++
++	str		@t[0], [sp,#4*(16+8)]	@ modulo-scheduled store
++	str		@t[1], [sp,#4*(16+9)]
++	str		@x[12],[sp,#4*(16+12)]
++	str		@t[2], [sp,#4*(16+13)]
++	str		@x[14],[sp,#4*(16+14)]
++
++	@ at this point we have first half of 512-bit result in
++	@ @x[0-7] and second half at sp+4*(16+8)
++
++	ldr		r12,[sp,#4*(32+1)]	@ load inp
++	ldr		r14,[sp,#4*(32+0)]	@ load out
++
++	vadd.i32	$a0,$a0,$t0		@ accumulate key material
++	vadd.i32	$a1,$a1,$t0
++	vadd.i32	$a2,$a2,$t0
++	vldr		$t0#lo,[sp,#4*(16+0)]	@ one
++
++	vadd.i32	$b0,$b0,$t1
++	vadd.i32	$b1,$b1,$t1
++	vadd.i32	$b2,$b2,$t1
++	vldr		$t1#lo,[sp,#4*(16+2)]	@ two
++
++	vadd.i32	$c0,$c0,$t2
++	vadd.i32	$c1,$c1,$t2
++	vadd.i32	$c2,$c2,$t2
++	vadd.i32	$d1#lo,$d1#lo,$t0#lo	@ counter+1
++	vadd.i32	$d2#lo,$d2#lo,$t1#lo	@ counter+2
++
++	vadd.i32	$d0,$d0,$t3
++	vadd.i32	$d1,$d1,$t3
++	vadd.i32	$d2,$d2,$t3
++
++	cmp		@t[3],#64*4
++	blo		.Ltail_neon
++
++	vld1.8		{$t0-$t1},[r12]!	@ load input
++	 mov		@t[3],sp
++	vld1.8		{$t2-$t3},[r12]!
++	veor		$a0,$a0,$t0		@ xor with input
++	veor		$b0,$b0,$t1
++	vld1.8		{$t0-$t1},[r12]!
++	veor		$c0,$c0,$t2
++	veor		$d0,$d0,$t3
++	vld1.8		{$t2-$t3},[r12]!
++
++	veor		$a1,$a1,$t0
++	 vst1.8		{$a0-$b0},[r14]!	@ store output
++	veor		$b1,$b1,$t1
++	vld1.8		{$t0-$t1},[r12]!
++	veor		$c1,$c1,$t2
++	 vst1.8		{$c0-$d0},[r14]!
++	veor		$d1,$d1,$t3
++	vld1.8		{$t2-$t3},[r12]!
++
++	veor		$a2,$a2,$t0
++	 vld1.32	{$a0-$b0},[@t[3]]!	@ load for next iteration
++	 veor		$t0#hi,$t0#hi,$t0#hi
++	 vldr		$t0#lo,[sp,#4*(16+4)]	@ four
++	veor		$b2,$b2,$t1
++	 vld1.32	{$c0-$d0},[@t[3]]
++	veor		$c2,$c2,$t2
++	 vst1.8		{$a1-$b1},[r14]!
++	veor		$d2,$d2,$t3
++	 vst1.8		{$c1-$d1},[r14]!
++
++	vadd.i32	$d0#lo,$d0#lo,$t0#lo	@ next counter value
++	vldr		$t0#lo,[sp,#4*(16+0)]	@ one
++
++	ldmia		sp,{@t[0]-@t[3]}	@ load key material
++	add		@x[0],@x[0],@t[0]	@ accumulate key material
++	ldr		@t[0],[r12],#16		@ load input
++	 vst1.8		{$a2-$b2},[r14]!
++	add		@x[1],@x[1],@t[1]
++	ldr		@t[1],[r12,#-12]
++	 vst1.8		{$c2-$d2},[r14]!
++	add		@x[2],@x[2],@t[2]
++	ldr		@t[2],[r12,#-8]
++	add		@x[3],@x[3],@t[3]
++	ldr		@t[3],[r12,#-4]
++# ifdef	__ARMEB__
++	rev		@x[0],@x[0]
++	rev		@x[1],@x[1]
++	rev		@x[2],@x[2]
++	rev		@x[3],@x[3]
++# endif
++	eor		@x[0],@x[0],@t[0]	@ xor with input
++	 add		@t[0],sp,#4*(4)
++	eor		@x[1],@x[1],@t[1]
++	str		@x[0],[r14],#16		@ store output
++	eor		@x[2],@x[2],@t[2]
++	str		@x[1],[r14,#-12]
++	eor		@x[3],@x[3],@t[3]
++	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
++	str		@x[2],[r14,#-8]
++	str		@x[3],[r14,#-4]
++
++	add		@x[4],@t[0],@x[4],ror#13 @ accumulate key material
++	ldr		@t[0],[r12],#16		@ load input
++	add		@x[5],@t[1],@x[5],ror#13
++	ldr		@t[1],[r12,#-12]
++	add		@x[6],@t[2],@x[6],ror#13
++	ldr		@t[2],[r12,#-8]
++	add		@x[7],@t[3],@x[7],ror#13
++	ldr		@t[3],[r12,#-4]
++# ifdef	__ARMEB__
++	rev		@x[4],@x[4]
++	rev		@x[5],@x[5]
++	rev		@x[6],@x[6]
++	rev		@x[7],@x[7]
++# endif
++	eor		@x[4],@x[4],@t[0]
++	 add		@t[0],sp,#4*(8)
++	eor		@x[5],@x[5],@t[1]
++	str		@x[4],[r14],#16		@ store output
++	eor		@x[6],@x[6],@t[2]
++	str		@x[5],[r14,#-12]
++	eor		@x[7],@x[7],@t[3]
++	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
++	str		@x[6],[r14,#-8]
++	 add		@x[0],sp,#4*(16+8)
++	str		@x[7],[r14,#-4]
++
++	ldmia		@x[0],{@x[0]-@x[7]}	@ load second half
++
++	add		@x[0],@x[0],@t[0]	@ accumulate key material
++	ldr		@t[0],[r12],#16		@ load input
++	add		@x[1],@x[1],@t[1]
++	ldr		@t[1],[r12,#-12]
++# ifdef	__thumb2__
++	it	hi
++# endif
++	 strhi		@t[2],[sp,#4*(16+10)]	@ copy "@x[10]" while at it
++	add		@x[2],@x[2],@t[2]
++	ldr		@t[2],[r12,#-8]
++# ifdef	__thumb2__
++	it	hi
++# endif
++	 strhi		@t[3],[sp,#4*(16+11)]	@ copy "@x[11]" while at it
++	add		@x[3],@x[3],@t[3]
++	ldr		@t[3],[r12,#-4]
++# ifdef	__ARMEB__
++	rev		@x[0],@x[0]
++	rev		@x[1],@x[1]
++	rev		@x[2],@x[2]
++	rev		@x[3],@x[3]
++# endif
++	eor		@x[0],@x[0],@t[0]
++	 add		@t[0],sp,#4*(12)
++	eor		@x[1],@x[1],@t[1]
++	str		@x[0],[r14],#16		@ store output
++	eor		@x[2],@x[2],@t[2]
++	str		@x[1],[r14,#-12]
++	eor		@x[3],@x[3],@t[3]
++	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
++	str		@x[2],[r14,#-8]
++	str		@x[3],[r14,#-4]
++
++	add		@x[4],@t[0],@x[4],ror#24 @ accumulate key material
++	 add		@t[0],@t[0],#4		@ next counter value
++	add		@x[5],@t[1],@x[5],ror#24
++	 str		@t[0],[sp,#4*(12)]	@ save next counter value
++	ldr		@t[0],[r12],#16		@ load input
++	add		@x[6],@t[2],@x[6],ror#24
++	 add		@x[4],@x[4],#3		@ counter+3
++	ldr		@t[1],[r12,#-12]
++	add		@x[7],@t[3],@x[7],ror#24
++	ldr		@t[2],[r12,#-8]
++	ldr		@t[3],[r12,#-4]
++# ifdef	__ARMEB__
++	rev		@x[4],@x[4]
++	rev		@x[5],@x[5]
++	rev		@x[6],@x[6]
++	rev		@x[7],@x[7]
++# endif
++	eor		@x[4],@x[4],@t[0]
++# ifdef	__thumb2__
++	it	hi
++# endif
++	 ldrhi		@t[0],[sp,#4*(32+2)]	@ re-load len
++	eor		@x[5],@x[5],@t[1]
++	eor		@x[6],@x[6],@t[2]
++	str		@x[4],[r14],#16		@ store output
++	eor		@x[7],@x[7],@t[3]
++	str		@x[5],[r14,#-12]
++	 sub		@t[3],@t[0],#64*4	@ len-=64*4
++	str		@x[6],[r14,#-8]
++	str		@x[7],[r14,#-4]
++	bhi		.Loop_neon_outer
++
++	b		.Ldone_neon
++
++.align	4
++.Lbreak_neon:
++	@ harmonize NEON and integer-only stack frames: load data
++	@ from NEON frame, but save to integer-only one; distance
++	@ between the two is 4*(32+4+16-32)=4*(20).
++
++	str		@t[3], [sp,#4*(20+32+2)]	@ save len
++	 add		@t[3],sp,#4*(32+4)
++	str		r12,   [sp,#4*(20+32+1)]	@ save inp
++	str		r14,   [sp,#4*(20+32+0)]	@ save out
++
++	ldr		@x[12],[sp,#4*(16+10)]
++	ldr		@x[14],[sp,#4*(16+11)]
++	 vldmia		@t[3],{d8-d15}			@ fulfill ABI requirement
++	str		@x[12],[sp,#4*(20+16+10)]	@ copy "@x[10]"
++	str		@x[14],[sp,#4*(20+16+11)]	@ copy "@x[11]"
++
++	ldr		@t[3], [sp,#4*(15)]
++	 mov		@x[4],@x[4],ror#19		@ twist b[0..3]
++	ldr		@x[12],[sp,#4*(12)]		@ modulo-scheduled load
++	 mov		@x[5],@x[5],ror#19
++	ldr		@t[2], [sp,#4*(13)]
++	 mov		@x[6],@x[6],ror#19
++	ldr		@x[14],[sp,#4*(14)]
++	 mov		@x[7],@x[7],ror#19
++	mov		@t[3],@t[3],ror#8		@ twist d[0..3]
++	mov		@x[12],@x[12],ror#8
++	mov		@t[2],@t[2],ror#8
++	mov		@x[14],@x[14],ror#8
++	str		@t[3], [sp,#4*(20+16+15)]
++	add		@t[3],sp,#4*(20)
++	vst1.32		{$a0-$b0},[@t[3]]!		@ copy key
++	add		sp,sp,#4*(20)			@ switch frame
++	vst1.32		{$c0-$d0},[@t[3]]
++	mov		@t[3],#10
++	b		.Loop				@ go integer-only
++
++.align	4
++.Ltail_neon:
++	cmp		@t[3],#64*3
++	bhs		.L192_or_more_neon
++	cmp		@t[3],#64*2
++	bhs		.L128_or_more_neon
++	cmp		@t[3],#64*1
++	bhs		.L64_or_more_neon
++
++	add		@t[0],sp,#4*(8)
++	vst1.8		{$a0-$b0},[sp]
++	add		@t[2],sp,#4*(0)
++	vst1.8		{$c0-$d0},[@t[0]]
++	b		.Loop_tail_neon
++
++.align	4
++.L64_or_more_neon:
++	vld1.8		{$t0-$t1},[r12]!
++	vld1.8		{$t2-$t3},[r12]!
++	veor		$a0,$a0,$t0
++	veor		$b0,$b0,$t1
++	veor		$c0,$c0,$t2
++	veor		$d0,$d0,$t3
++	vst1.8		{$a0-$b0},[r14]!
++	vst1.8		{$c0-$d0},[r14]!
++
++	beq		.Ldone_neon
++
++	add		@t[0],sp,#4*(8)
++	vst1.8		{$a1-$b1},[sp]
++	add		@t[2],sp,#4*(0)
++	vst1.8		{$c1-$d1},[@t[0]]
++	sub		@t[3],@t[3],#64*1	@ len-=64*1
++	b		.Loop_tail_neon
++
++.align	4
++.L128_or_more_neon:
++	vld1.8		{$t0-$t1},[r12]!
++	vld1.8		{$t2-$t3},[r12]!
++	veor		$a0,$a0,$t0
++	veor		$b0,$b0,$t1
++	vld1.8		{$t0-$t1},[r12]!
++	veor		$c0,$c0,$t2
++	veor		$d0,$d0,$t3
++	vld1.8		{$t2-$t3},[r12]!
++
++	veor		$a1,$a1,$t0
++	veor		$b1,$b1,$t1
++	 vst1.8		{$a0-$b0},[r14]!
++	veor		$c1,$c1,$t2
++	 vst1.8		{$c0-$d0},[r14]!
++	veor		$d1,$d1,$t3
++	vst1.8		{$a1-$b1},[r14]!
++	vst1.8		{$c1-$d1},[r14]!
++
++	beq		.Ldone_neon
++
++	add		@t[0],sp,#4*(8)
++	vst1.8		{$a2-$b2},[sp]
++	add		@t[2],sp,#4*(0)
++	vst1.8		{$c2-$d2},[@t[0]]
++	sub		@t[3],@t[3],#64*2	@ len-=64*2
++	b		.Loop_tail_neon
++
++.align	4
++.L192_or_more_neon:
++	vld1.8		{$t0-$t1},[r12]!
++	vld1.8		{$t2-$t3},[r12]!
++	veor		$a0,$a0,$t0
++	veor		$b0,$b0,$t1
++	vld1.8		{$t0-$t1},[r12]!
++	veor		$c0,$c0,$t2
++	veor		$d0,$d0,$t3
++	vld1.8		{$t2-$t3},[r12]!
++
++	veor		$a1,$a1,$t0
++	veor		$b1,$b1,$t1
++	vld1.8		{$t0-$t1},[r12]!
++	veor		$c1,$c1,$t2
++	 vst1.8		{$a0-$b0},[r14]!
++	veor		$d1,$d1,$t3
++	vld1.8		{$t2-$t3},[r12]!
++
++	veor		$a2,$a2,$t0
++	 vst1.8		{$c0-$d0},[r14]!
++	veor		$b2,$b2,$t1
++	 vst1.8		{$a1-$b1},[r14]!
++	veor		$c2,$c2,$t2
++	 vst1.8		{$c1-$d1},[r14]!
++	veor		$d2,$d2,$t3
++	vst1.8		{$a2-$b2},[r14]!
++	vst1.8		{$c2-$d2},[r14]!
++
++	beq		.Ldone_neon
++
++	ldmia		sp,{@t[0]-@t[3]}	@ load key material
++	add		@x[0],@x[0],@t[0]	@ accumulate key material
++	 add		@t[0],sp,#4*(4)
++	add		@x[1],@x[1],@t[1]
++	add		@x[2],@x[2],@t[2]
++	add		@x[3],@x[3],@t[3]
++	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
++
++	add		@x[4],@t[0],@x[4],ror#13 @ accumulate key material
++	 add		@t[0],sp,#4*(8)
++	add		@x[5],@t[1],@x[5],ror#13
++	add		@x[6],@t[2],@x[6],ror#13
++	add		@x[7],@t[3],@x[7],ror#13
++	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
++# ifdef	__ARMEB__
++	rev		@x[0],@x[0]
++	rev		@x[1],@x[1]
++	rev		@x[2],@x[2]
++	rev		@x[3],@x[3]
++	rev		@x[4],@x[4]
++	rev		@x[5],@x[5]
++	rev		@x[6],@x[6]
++	rev		@x[7],@x[7]
++# endif
++	stmia		sp,{@x[0]-@x[7]}
++	 add		@x[0],sp,#4*(16+8)
++
++	ldmia		@x[0],{@x[0]-@x[7]}	@ load second half
++
++	add		@x[0],@x[0],@t[0]	@ accumulate key material
++	 add		@t[0],sp,#4*(12)
++	add		@x[1],@x[1],@t[1]
++	add		@x[2],@x[2],@t[2]
++	add		@x[3],@x[3],@t[3]
++	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
++
++	add		@x[4],@t[0],@x[4],ror#24 @ accumulate key material
++	 add		@t[0],sp,#4*(8)
++	add		@x[5],@t[1],@x[5],ror#24
++	 add		@x[4],@x[4],#3		@ counter+3
++	add		@x[6],@t[2],@x[6],ror#24
++	add		@x[7],@t[3],@x[7],ror#24
++	 ldr		@t[3],[sp,#4*(32+2)]	@ re-load len
++# ifdef	__ARMEB__
++	rev		@x[0],@x[0]
++	rev		@x[1],@x[1]
++	rev		@x[2],@x[2]
++	rev		@x[3],@x[3]
++	rev		@x[4],@x[4]
++	rev		@x[5],@x[5]
++	rev		@x[6],@x[6]
++	rev		@x[7],@x[7]
++# endif
++	stmia		@t[0],{@x[0]-@x[7]}
++	 add		@t[2],sp,#4*(0)
++	 sub		@t[3],@t[3],#64*3	@ len-=64*3
++
++.Loop_tail_neon:
++	ldrb		@t[0],[@t[2]],#1	@ read buffer on stack
++	ldrb		@t[1],[r12],#1		@ read input
++	subs		@t[3],@t[3],#1
++	eor		@t[0],@t[0],@t[1]
++	strb		@t[0],[r14],#1		@ store output
++	bne		.Loop_tail_neon
++
++.Ldone_neon:
++	add		sp,sp,#4*(32+4)
++	vldmia		sp,{d8-d15}
++	add		sp,sp,#4*(16+3)
++	ldmia		sp!,{r4-r11,pc}
++.size	ChaCha20_neon,.-ChaCha20_neon
++# ifndef __KERNEL__
++.comm	OPENSSL_armcap_P,4,4
++# endif
++#endif
++___
++}}}
++
++open SELF,$0;
++while(<SELF>) {
++	next if (/^#!/);
++	last if (!s/^#/@/ and !/^$/);
++	print;
++}
++close SELF;
++
++foreach (split("\n",$code)) {
++	s/\`([^\`]*)\`/eval $1/geo;
++
++	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo;
++
++	print $_,"\n";
++}
++close STDOUT;
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/chacha20/chacha20-x86_64.pl	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,4106 @@
++#!/usr/bin/env perl
++# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
++#
++# Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
++# Copyright (C) 2017-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++# Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
++#
++# This code is taken from the OpenSSL project but the author, Andy Polyakov,
++# has relicensed it under the licenses specified in the SPDX header above.
++# The original headers, including the original license headers, are
++# included below for completeness.
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# November 2014
++#
++# ChaCha20 for x86_64.
++#
++# December 2016
++#
++# Add AVX512F code path.
++#
++# December 2017
++#
++# Add AVX512VL code path.
++#
++# Performance in cycles per byte out of large buffer.
++#
++#		IALU/gcc 4.8(i)	1x/2xSSSE3(ii)	4xSSSE3	    NxAVX(v)
++#
++# P4		9.48/+99%	-		-
++# Core2		7.83/+55%	7.90/5.76	4.35
++# Westmere	7.19/+50%	5.60/4.50	3.00
++# Sandy Bridge	8.31/+42%	5.45/4.00	2.72
++# Ivy Bridge	6.71/+46%	5.40/?		2.41
++# Haswell	5.92/+43%	5.20/3.45	2.42        1.23
++# Skylake[-X]	5.87/+39%	4.70/3.22	2.31        1.19[0.80(vi)]
++# Silvermont	12.0/+33%	7.75/6.90	7.03(iii)
++# Knights L	11.7/-		?		9.60(iii)   0.80
++# Goldmont	10.6/+17%	5.10/3.52	3.28
++# Sledgehammer	7.28/+52%	-		-
++# Bulldozer	9.66/+28%	9.85/5.35(iv)	3.06(iv)
++# Ryzen		5.96/+50%	5.19/3.00	2.40        2.09
++# VIA Nano	10.5/+46%	6.72/6.88	6.05
++#
++# (i)	compared to older gcc 3.x one can observe >2x improvement on
++#	most platforms;
++# (ii)	2xSSSE3 is code path optimized specifically for 128 bytes used
++#	by chacha20_poly1305_tls_cipher, results are EVP-free;
++# (iii)	this is not optimal result for Atom because of MSROM
++#	limitations, SSE2 can do better, but gain is considered too
++#	low to justify the [maintenance] effort;
++# (iv)	Bulldozer actually executes 4xXOP code path that delivers 2.20
++#	and 4.85 for 128-byte inputs;
++# (v)	8xAVX2, 8xAVX512VL or 16xAVX512F, whichever best applicable;
++# (vi)	even though Skylake-X can execute AVX512F code and deliver 0.57
++#	cpb in single thread, the corresponding capability is suppressed;
++
++$flavour = shift;
++$output  = shift;
++if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
++
++$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
++$kernel=0; $kernel=1 if (!$flavour && !$output);
++
++if (!$kernel) {
++	$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++	( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
++	( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
++	die "can't locate x86_64-xlate.pl";
++
++	open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
++	*STDOUT=*OUT;
++
++	if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
++	    =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
++		$avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25);
++	}
++
++	if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
++	    `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) {
++		$avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12);
++		$avx += 1 if ($1==2.11 && $2>=8);
++	}
++
++	if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
++	    `ml64 2>&1` =~ /Version ([0-9]+)\./) {
++		$avx = ($1>=10) + ($1>=11);
++	}
++
++	if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
++		$avx = ($2>=3.0) + ($2>3.0);
++	}
++} else {
++	$avx = 4; # The kernel uses ifdefs for this.
++}
++
++# input parameter block
++($out,$inp,$len,$key,$counter)=("%rdi","%rsi","%rdx","%rcx","%r8");
++
++$code.=<<___ if $kernel;
++#include <linux/linkage.h>
++___
++
++sub declare_variable() {
++	my ($name, $size, $type, $payload) = @_;
++	if($kernel) {
++		$code.=".section .rodata.cst$size.L$name, \"aM\", \@progbits, $size\n";
++		$code.=".align $size\n";
++		$code.=".L$name:\n";
++		$code.=".$type $payload\n";
++	} else {
++		$code.=".L$name:\n";
++		$code.=".$type $payload\n";
++	}
++}
++
++sub declare_function() {
++	my ($name, $align, $nargs) = @_;
++	if($kernel) {
++		$code .= ".align $align\n";
++		$code .= "SYM_FUNC_START($name)\n";
++		$code .= ".L$name:\n";
++	} else {
++		$code .= ".globl	$name\n";
++		$code .= ".type	$name,\@function,$nargs\n";
++		$code .= ".align	$align\n";
++		$code .= "$name:\n";
++	}
++}
++
++sub end_function() {
++	my ($name) = @_;
++	if($kernel) {
++		$code .= "SYM_FUNC_END($name)\n";
++	} else {
++		$code .= ".size   $name,.-$name\n";
++	}
++}
++
++if(!$kernel) {
++	$code .= ".text\n";
++}
++&declare_variable('zero', 16, 'long', '0,0,0,0');
++&declare_variable('one', 16, 'long', '1,0,0,0');
++&declare_variable('inc', 16, 'long', '0,1,2,3');
++&declare_variable('four', 16, 'long', '4,4,4,4');
++&declare_variable('incy', 32, 'long', '0,2,4,6,1,3,5,7');
++&declare_variable('eight', 32, 'long', '8,8,8,8,8,8,8,8');
++&declare_variable('rot16', 16, 'byte', '0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd');
++&declare_variable('rot24', 16, 'byte', '0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe');
++&declare_variable('twoy', 32, 'long', '2,0,0,0, 2,0,0,0');
++&declare_variable('zeroz', 64, 'long', '0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0');
++&declare_variable('fourz', 64, 'long', '4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0');
++&declare_variable('incz', 64, 'long', '0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15');
++&declare_variable('sixteen', 64, 'long', '16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16');
++&declare_variable('sigma', 16, 'ascii', '"expand 32-byte k"');
++
++$code.=<<___ if !$kernel;
++.asciz "ChaCha20 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
++___
++$code.=".text\n";
++
++sub AUTOLOAD()          # thunk [simplified] 32-bit style perlasm
++{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;
++  my $arg = pop;
++    $arg = "\$$arg" if ($arg*1 eq $arg);
++    $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
++}
++
++@x=("%eax","%ebx","%ecx","%edx",map("%r${_}d",(8..11)),
++    "%nox","%nox","%nox","%nox",map("%r${_}d",(12..15)));
++@t=("%esi","%edi");
++
++sub ROUND {			# critical path is 24 cycles per round
++my ($a0,$b0,$c0,$d0)=@_;
++my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
++my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
++my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
++my ($xc,$xc_)=map("\"$_\"",@t);
++my @x=map("\"$_\"",@x);
++
++	# Consider order in which variables are addressed by their
++	# index:
++	#
++	#	a   b   c   d
++	#
++	#	0   4   8  12 < even round
++	#	1   5   9  13
++	#	2   6  10  14
++	#	3   7  11  15
++	#	0   5  10  15 < odd round
++	#	1   6  11  12
++	#	2   7   8  13
++	#	3   4   9  14
++	#
++	# 'a', 'b' and 'd's are permanently allocated in registers,
++	# @x[0..7,12..15], while 'c's are maintained in memory. If
++	# you observe 'c' column, you'll notice that pair of 'c's is
++	# invariant between rounds. This means that we have to reload
++	# them once per round, in the middle. This is why you'll see
++	# bunch of 'c' stores and loads in the middle, but none in
++	# the beginning or end.
++
++	# Normally instructions would be interleaved to favour in-order
++	# execution. Generally out-of-order cores manage it gracefully,
++	# but not this time for some reason. As in-order execution
++	# cores are dying breed, old Atom is the only one around,
++	# instructions are left uninterleaved. Besides, Atom is better
++	# off executing 1xSSSE3 code anyway...
++
++	(
++	"&add	(@x[$a0],@x[$b0])",	# Q1
++	"&xor	(@x[$d0],@x[$a0])",
++	"&rol	(@x[$d0],16)",
++	 "&add	(@x[$a1],@x[$b1])",	# Q2
++	 "&xor	(@x[$d1],@x[$a1])",
++	 "&rol	(@x[$d1],16)",
++
++	"&add	($xc,@x[$d0])",
++	"&xor	(@x[$b0],$xc)",
++	"&rol	(@x[$b0],12)",
++	 "&add	($xc_,@x[$d1])",
++	 "&xor	(@x[$b1],$xc_)",
++	 "&rol	(@x[$b1],12)",
++
++	"&add	(@x[$a0],@x[$b0])",
++	"&xor	(@x[$d0],@x[$a0])",
++	"&rol	(@x[$d0],8)",
++	 "&add	(@x[$a1],@x[$b1])",
++	 "&xor	(@x[$d1],@x[$a1])",
++	 "&rol	(@x[$d1],8)",
++
++	"&add	($xc,@x[$d0])",
++	"&xor	(@x[$b0],$xc)",
++	"&rol	(@x[$b0],7)",
++	 "&add	($xc_,@x[$d1])",
++	 "&xor	(@x[$b1],$xc_)",
++	 "&rol	(@x[$b1],7)",
++
++	"&mov	(\"4*$c0(%rsp)\",$xc)",	# reload pair of 'c's
++	 "&mov	(\"4*$c1(%rsp)\",$xc_)",
++	"&mov	($xc,\"4*$c2(%rsp)\")",
++	 "&mov	($xc_,\"4*$c3(%rsp)\")",
++
++	"&add	(@x[$a2],@x[$b2])",	# Q3
++	"&xor	(@x[$d2],@x[$a2])",
++	"&rol	(@x[$d2],16)",
++	 "&add	(@x[$a3],@x[$b3])",	# Q4
++	 "&xor	(@x[$d3],@x[$a3])",
++	 "&rol	(@x[$d3],16)",
++
++	"&add	($xc,@x[$d2])",
++	"&xor	(@x[$b2],$xc)",
++	"&rol	(@x[$b2],12)",
++	 "&add	($xc_,@x[$d3])",
++	 "&xor	(@x[$b3],$xc_)",
++	 "&rol	(@x[$b3],12)",
++
++	"&add	(@x[$a2],@x[$b2])",
++	"&xor	(@x[$d2],@x[$a2])",
++	"&rol	(@x[$d2],8)",
++	 "&add	(@x[$a3],@x[$b3])",
++	 "&xor	(@x[$d3],@x[$a3])",
++	 "&rol	(@x[$d3],8)",
++
++	"&add	($xc,@x[$d2])",
++	"&xor	(@x[$b2],$xc)",
++	"&rol	(@x[$b2],7)",
++	 "&add	($xc_,@x[$d3])",
++	 "&xor	(@x[$b3],$xc_)",
++	 "&rol	(@x[$b3],7)"
++	);
++}
++
++########################################################################
++# Generic code path that handles all lengths on pre-SSSE3 processors.
++if(!$kernel) {
++&declare_function("chacha20_ctr32", 64, 5);
++$code.=<<___;
++.cfi_startproc
++	cmp	\$0,$len
++	je	.Lno_data
++	mov	OPENSSL_ia32cap_P+4(%rip),%r9
++___
++$code.=<<___	if ($avx>2);
++	bt	\$48,%r9		# check for AVX512F
++	jc	.Lchacha20_avx512
++	test	%r9,%r9		# check for AVX512VL
++	js	.Lchacha20_avx512vl
++___
++$code.=<<___;
++	test	\$`1<<(41-32)`,%r9d
++	jnz	.Lchacha20_ssse3
++___
++$code.=<<___;
++	push	%rbx
++.cfi_push	%rbx
++	push	%rbp
++.cfi_push	%rbp
++	push	%r12
++.cfi_push	%r12
++	push	%r13
++.cfi_push	%r13
++	push	%r14
++.cfi_push	%r14
++	push	%r15
++.cfi_push	%r15
++	sub	\$64+24,%rsp
++.cfi_adjust_cfa_offset	64+24
++.Lctr32_body:
++
++	#movdqa	.Lsigma(%rip),%xmm0
++	movdqu	($key),%xmm1
++	movdqu	16($key),%xmm2
++	movdqu	($counter),%xmm3
++	movdqa	.Lone(%rip),%xmm4
++
++	#movdqa	%xmm0,4*0(%rsp)		# key[0]
++	movdqa	%xmm1,4*4(%rsp)		# key[1]
++	movdqa	%xmm2,4*8(%rsp)		# key[2]
++	movdqa	%xmm3,4*12(%rsp)	# key[3]
++	mov	$len,%rbp		# reassign $len
++	jmp	.Loop_outer
++
++.align	32
++.Loop_outer:
++	mov	\$0x61707865,@x[0]      # 'expa'
++	mov	\$0x3320646e,@x[1]      # 'nd 3'
++	mov	\$0x79622d32,@x[2]      # '2-by'
++	mov	\$0x6b206574,@x[3]      # 'te k'
++	mov	4*4(%rsp),@x[4]
++	mov	4*5(%rsp),@x[5]
++	mov	4*6(%rsp),@x[6]
++	mov	4*7(%rsp),@x[7]
++	movd	%xmm3,@x[12]
++	mov	4*13(%rsp),@x[13]
++	mov	4*14(%rsp),@x[14]
++	mov	4*15(%rsp),@x[15]
++
++	mov	%rbp,64+0(%rsp)		# save len
++	mov	\$10,%ebp
++	mov	$inp,64+8(%rsp)		# save inp
++	movq	%xmm2,%rsi		# "@x[8]"
++	mov	$out,64+16(%rsp)	# save out
++	mov	%rsi,%rdi
++	shr	\$32,%rdi		# "@x[9]"
++	jmp	.Loop
++
++.align	32
++.Loop:
++___
++	foreach (&ROUND (0, 4, 8,12)) { eval; }
++	foreach (&ROUND	(0, 5,10,15)) { eval; }
++	&dec	("%ebp");
++	&jnz	(".Loop");
++
++$code.=<<___;
++	mov	@t[1],4*9(%rsp)		# modulo-scheduled
++	mov	@t[0],4*8(%rsp)
++	mov	64(%rsp),%rbp		# load len
++	movdqa	%xmm2,%xmm1
++	mov	64+8(%rsp),$inp		# load inp
++	paddd	%xmm4,%xmm3		# increment counter
++	mov	64+16(%rsp),$out	# load out
++
++	add	\$0x61707865,@x[0]      # 'expa'
++	add	\$0x3320646e,@x[1]      # 'nd 3'
++	add	\$0x79622d32,@x[2]      # '2-by'
++	add	\$0x6b206574,@x[3]      # 'te k'
++	add	4*4(%rsp),@x[4]
++	add	4*5(%rsp),@x[5]
++	add	4*6(%rsp),@x[6]
++	add	4*7(%rsp),@x[7]
++	add	4*12(%rsp),@x[12]
++	add	4*13(%rsp),@x[13]
++	add	4*14(%rsp),@x[14]
++	add	4*15(%rsp),@x[15]
++	paddd	4*8(%rsp),%xmm1
++
++	cmp	\$64,%rbp
++	jb	.Ltail
++
++	xor	4*0($inp),@x[0]		# xor with input
++	xor	4*1($inp),@x[1]
++	xor	4*2($inp),@x[2]
++	xor	4*3($inp),@x[3]
++	xor	4*4($inp),@x[4]
++	xor	4*5($inp),@x[5]
++	xor	4*6($inp),@x[6]
++	xor	4*7($inp),@x[7]
++	movdqu	4*8($inp),%xmm0
++	xor	4*12($inp),@x[12]
++	xor	4*13($inp),@x[13]
++	xor	4*14($inp),@x[14]
++	xor	4*15($inp),@x[15]
++	lea	4*16($inp),$inp		# inp+=64
++	pxor	%xmm1,%xmm0
++
++	movdqa	%xmm2,4*8(%rsp)
++	movd	%xmm3,4*12(%rsp)
++
++	mov	@x[0],4*0($out)		# write output
++	mov	@x[1],4*1($out)
++	mov	@x[2],4*2($out)
++	mov	@x[3],4*3($out)
++	mov	@x[4],4*4($out)
++	mov	@x[5],4*5($out)
++	mov	@x[6],4*6($out)
++	mov	@x[7],4*7($out)
++	movdqu	%xmm0,4*8($out)
++	mov	@x[12],4*12($out)
++	mov	@x[13],4*13($out)
++	mov	@x[14],4*14($out)
++	mov	@x[15],4*15($out)
++	lea	4*16($out),$out		# out+=64
++
++	sub	\$64,%rbp
++	jnz	.Loop_outer
++
++	jmp	.Ldone
++
++.align	16
++.Ltail:
++	mov	@x[0],4*0(%rsp)
++	mov	@x[1],4*1(%rsp)
++	xor	%rbx,%rbx
++	mov	@x[2],4*2(%rsp)
++	mov	@x[3],4*3(%rsp)
++	mov	@x[4],4*4(%rsp)
++	mov	@x[5],4*5(%rsp)
++	mov	@x[6],4*6(%rsp)
++	mov	@x[7],4*7(%rsp)
++	movdqa	%xmm1,4*8(%rsp)
++	mov	@x[12],4*12(%rsp)
++	mov	@x[13],4*13(%rsp)
++	mov	@x[14],4*14(%rsp)
++	mov	@x[15],4*15(%rsp)
++
++.Loop_tail:
++	movzb	($inp,%rbx),%eax
++	movzb	(%rsp,%rbx),%edx
++	lea	1(%rbx),%rbx
++	xor	%edx,%eax
++	mov	%al,-1($out,%rbx)
++	dec	%rbp
++	jnz	.Loop_tail
++
++.Ldone:
++	add	\$64+24,%rsp
++.cfi_adjust_cfa_offset	-64-24
++	pop			%r15
++.cfi_restore	%r15
++	pop			%r14
++.cfi_restore	%r14
++	pop			%r13
++.cfi_restore	%r13
++	pop			%r12
++.cfi_restore	%r12
++	pop			%rbp
++.cfi_restore	%rbp
++	pop			%rbx
++.cfi_restore	%rbx
++.Lno_data:
++	ret
++.cfi_endproc
++___
++&end_function("chacha20_ctr32");
++}
++
++########################################################################
++# SSSE3 code path that handles shorter lengths
++{
++my ($a,$b,$c,$d,$t,$t1,$rot16,$rot24)=map("%xmm$_",(0..7));
++
++sub SSSE3ROUND {	# critical path is 20 "SIMD ticks" per round
++	&paddd	($a,$b);
++	&pxor	($d,$a);
++	&pshufb	($d,$rot16);
++
++	&paddd	($c,$d);
++	&pxor	($b,$c);
++	&movdqa	($t,$b);
++	&psrld	($b,20);
++	&pslld	($t,12);
++	&por	($b,$t);
++
++	&paddd	($a,$b);
++	&pxor	($d,$a);
++	&pshufb	($d,$rot24);
++
++	&paddd	($c,$d);
++	&pxor	($b,$c);
++	&movdqa	($t,$b);
++	&psrld	($b,25);
++	&pslld	($t,7);
++	&por	($b,$t);
++}
++
++my $xframe = $win64 ? 32+8 : 8;
++
++if($kernel) {
++	$code .= "#ifdef CONFIG_AS_SSSE3\n";
++}
++
++if($kernel) {
++&declare_function("hchacha20_ssse3", 32, 5);
++$code.=<<___;
++	movdqa	.Lsigma(%rip),$a
++	movdqu	($len),$b
++	movdqu	16($len),$c
++	movdqu	($inp),$d
++	# This code is only used when targeting kernel.
++	# If targeting win64, xmm{6,7} preserving needs to be added.
++	movdqa	.Lrot16(%rip),$rot16
++	movdqa	.Lrot24(%rip),$rot24
++	mov	\$10,$counter		# reuse $counter
++	jmp	1f
++.align	32
++1:
++___
++	&SSSE3ROUND();
++	&pshufd	($a,$a,0b10010011);
++	&pshufd	($d,$d,0b01001110);
++	&pshufd	($c,$c,0b00111001);
++	&nop	();
++
++	&SSSE3ROUND();
++	&pshufd	($a,$a,0b00111001);
++	&pshufd	($d,$d,0b01001110);
++	&pshufd	($c,$c,0b10010011);
++
++	&dec	($counter);
++	&jnz	("1b");
++
++$code.=<<___;
++	movdqu $a, ($out)
++	movdqu $d, 16($out)
++	ret
++___
++&end_function("hchacha20_ssse3");
++}
++
++&declare_function("chacha20_ssse3", 32, 5);
++$code.=<<___;
++.cfi_startproc
++	lea	8(%rsp),%r10		# frame pointer
++.cfi_def_cfa_register	%r10
++___
++$code.=<<___	if ($avx && !$kernel);
++	test	\$`1<<(43-32)`,%r10d
++	jnz	.Lchacha20_4xop		# XOP is fastest even if we use 1/4
++___
++$code.=<<___;
++	cmp	\$128,$len		# we might throw away some data,
++	je	.Lchacha20_128
++	ja	.Lchacha20_4x		# but overall it won't be slower
++
++.Ldo_ssse3_after_all:
++	sub	\$64+$xframe,%rsp
++	and \$-16,%rsp
++___
++$code.=<<___	if ($win64);
++	movaps	%xmm6,-0x30(%r10)
++	movaps	%xmm7,-0x20(%r10)
++.Lssse3_body:
++___
++$code.=<<___;
++	movdqa	.Lsigma(%rip),$a
++	movdqu	($key),$b
++	movdqu	16($key),$c
++	movdqu	($counter),$d
++	movdqa	.Lrot16(%rip),$rot16
++	movdqa	.Lrot24(%rip),$rot24
++
++	movdqa	$a,0x00(%rsp)
++	movdqa	$b,0x10(%rsp)
++	movdqa	$c,0x20(%rsp)
++	movdqa	$d,0x30(%rsp)
++	mov	\$10,$counter		# reuse $counter
++	jmp	.Loop_ssse3
++
++.align	32
++.Loop_outer_ssse3:
++	movdqa	.Lone(%rip),$d
++	movdqa	0x00(%rsp),$a
++	movdqa	0x10(%rsp),$b
++	movdqa	0x20(%rsp),$c
++	paddd	0x30(%rsp),$d
++	mov	\$10,$counter
++	movdqa	$d,0x30(%rsp)
++	jmp	.Loop_ssse3
++
++.align	32
++.Loop_ssse3:
++___
++	&SSSE3ROUND();
++	&pshufd	($a,$a,0b10010011);
++	&pshufd	($d,$d,0b01001110);
++	&pshufd	($c,$c,0b00111001);
++	&nop	();
++
++	&SSSE3ROUND();
++	&pshufd	($a,$a,0b00111001);
++	&pshufd	($d,$d,0b01001110);
++	&pshufd	($c,$c,0b10010011);
++
++	&dec	($counter);
++	&jnz	(".Loop_ssse3");
++
++$code.=<<___;
++	paddd	0x00(%rsp),$a
++	paddd	0x10(%rsp),$b
++	paddd	0x20(%rsp),$c
++	paddd	0x30(%rsp),$d
++
++	cmp	\$64,$len
++	jb	.Ltail_ssse3
++
++	movdqu	0x00($inp),$t
++	movdqu	0x10($inp),$t1
++	pxor	$t,$a			# xor with input
++	movdqu	0x20($inp),$t
++	pxor	$t1,$b
++	movdqu	0x30($inp),$t1
++	lea	0x40($inp),$inp		# inp+=64
++	pxor	$t,$c
++	pxor	$t1,$d
++
++	movdqu	$a,0x00($out)		# write output
++	movdqu	$b,0x10($out)
++	movdqu	$c,0x20($out)
++	movdqu	$d,0x30($out)
++	lea	0x40($out),$out		# out+=64
++
++	sub	\$64,$len
++	jnz	.Loop_outer_ssse3
++
++	jmp	.Ldone_ssse3
++
++.align	16
++.Ltail_ssse3:
++	movdqa	$a,0x00(%rsp)
++	movdqa	$b,0x10(%rsp)
++	movdqa	$c,0x20(%rsp)
++	movdqa	$d,0x30(%rsp)
++	xor	$counter,$counter
++
++.Loop_tail_ssse3:
++	movzb	($inp,$counter),%eax
++	movzb	(%rsp,$counter),%ecx
++	lea	1($counter),$counter
++	xor	%ecx,%eax
++	mov	%al,-1($out,$counter)
++	dec	$len
++	jnz	.Loop_tail_ssse3
++
++.Ldone_ssse3:
++___
++$code.=<<___	if ($win64);
++	movaps	-0x30(%r10),%xmm6
++	movaps	-0x20(%r10),%xmm7
++___
++$code.=<<___;
++	lea	-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++.Lssse3_epilogue:
++	ret
++.cfi_endproc
++___
++}
++&end_function("chacha20_ssse3");
++
++########################################################################
++# SSSE3 code path that handles 128-byte inputs
++{
++my ($a,$b,$c,$d,$t,$t1,$rot16,$rot24)=map("%xmm$_",(8,9,2..7));
++my ($a1,$b1,$c1,$d1)=map("%xmm$_",(10,11,0,1));
++
++sub SSSE3ROUND_2x {
++	&paddd	($a,$b);
++	&pxor	($d,$a);
++	 &paddd	($a1,$b1);
++	 &pxor	($d1,$a1);
++	&pshufb	($d,$rot16);
++	 &pshufb($d1,$rot16);
++
++	&paddd	($c,$d);
++	 &paddd	($c1,$d1);
++	&pxor	($b,$c);
++	 &pxor	($b1,$c1);
++	&movdqa	($t,$b);
++	&psrld	($b,20);
++	 &movdqa($t1,$b1);
++	&pslld	($t,12);
++	 &psrld	($b1,20);
++	&por	($b,$t);
++	 &pslld	($t1,12);
++	 &por	($b1,$t1);
++
++	&paddd	($a,$b);
++	&pxor	($d,$a);
++	 &paddd	($a1,$b1);
++	 &pxor	($d1,$a1);
++	&pshufb	($d,$rot24);
++	 &pshufb($d1,$rot24);
++
++	&paddd	($c,$d);
++	 &paddd	($c1,$d1);
++	&pxor	($b,$c);
++	 &pxor	($b1,$c1);
++	&movdqa	($t,$b);
++	&psrld	($b,25);
++	 &movdqa($t1,$b1);
++	&pslld	($t,7);
++	 &psrld	($b1,25);
++	&por	($b,$t);
++	 &pslld	($t1,7);
++	 &por	($b1,$t1);
++}
++
++my $xframe = $win64 ? 0x68 : 8;
++
++$code.=<<___;
++.type	chacha20_128,\@function,5
++.align	32
++chacha20_128:
++.cfi_startproc
++.Lchacha20_128:
++	lea	8(%rsp),%r10		# frame pointer
++.cfi_def_cfa_register	%r10
++	sub	\$64+$xframe,%rsp
++	and \$-16,%rsp
++___
++$code.=<<___	if ($win64);
++	movaps	%xmm6,-0x70(%r10)
++	movaps	%xmm7,-0x60(%r10)
++	movaps	%xmm8,-0x50(%r10)
++	movaps	%xmm9,-0x40(%r10)
++	movaps	%xmm10,-0x30(%r10)
++	movaps	%xmm11,-0x20(%r10)
++.L128_body:
++___
++$code.=<<___;
++	movdqa	.Lsigma(%rip),$a
++	movdqu	($key),$b
++	movdqu	16($key),$c
++	movdqu	($counter),$d
++	movdqa	.Lone(%rip),$d1
++	movdqa	.Lrot16(%rip),$rot16
++	movdqa	.Lrot24(%rip),$rot24
++
++	movdqa	$a,$a1
++	movdqa	$a,0x00(%rsp)
++	movdqa	$b,$b1
++	movdqa	$b,0x10(%rsp)
++	movdqa	$c,$c1
++	movdqa	$c,0x20(%rsp)
++	paddd	$d,$d1
++	movdqa	$d,0x30(%rsp)
++	mov	\$10,$counter		# reuse $counter
++	jmp	.Loop_128
++
++.align	32
++.Loop_128:
++___
++	&SSSE3ROUND_2x();
++	&pshufd	($a,$a,0b10010011);
++	&pshufd	($d,$d,0b01001110);
++	&pshufd	($c,$c,0b00111001);
++	&pshufd	($a1,$a1,0b10010011);
++	&pshufd	($d1,$d1,0b01001110);
++	&pshufd	($c1,$c1,0b00111001);
++
++	&SSSE3ROUND_2x();
++	&pshufd	($a,$a,0b00111001);
++	&pshufd	($d,$d,0b01001110);
++	&pshufd	($c,$c,0b10010011);
++	&pshufd	($a1,$a1,0b00111001);
++	&pshufd	($d1,$d1,0b01001110);
++	&pshufd	($c1,$c1,0b10010011);
++
++	&dec	($counter);
++	&jnz	(".Loop_128");
++
++$code.=<<___;
++	paddd	0x00(%rsp),$a
++	paddd	0x10(%rsp),$b
++	paddd	0x20(%rsp),$c
++	paddd	0x30(%rsp),$d
++	paddd	.Lone(%rip),$d1
++	paddd	0x00(%rsp),$a1
++	paddd	0x10(%rsp),$b1
++	paddd	0x20(%rsp),$c1
++	paddd	0x30(%rsp),$d1
++
++	movdqu	0x00($inp),$t
++	movdqu	0x10($inp),$t1
++	pxor	$t,$a			# xor with input
++	movdqu	0x20($inp),$t
++	pxor	$t1,$b
++	movdqu	0x30($inp),$t1
++	pxor	$t,$c
++	movdqu	0x40($inp),$t
++	pxor	$t1,$d
++	movdqu	0x50($inp),$t1
++	pxor	$t,$a1
++	movdqu	0x60($inp),$t
++	pxor	$t1,$b1
++	movdqu	0x70($inp),$t1
++	pxor	$t,$c1
++	pxor	$t1,$d1
++
++	movdqu	$a,0x00($out)		# write output
++	movdqu	$b,0x10($out)
++	movdqu	$c,0x20($out)
++	movdqu	$d,0x30($out)
++	movdqu	$a1,0x40($out)
++	movdqu	$b1,0x50($out)
++	movdqu	$c1,0x60($out)
++	movdqu	$d1,0x70($out)
++___
++$code.=<<___	if ($win64);
++	movaps	-0x70(%r10),%xmm6
++	movaps	-0x60(%r10),%xmm7
++	movaps	-0x50(%r10),%xmm8
++	movaps	-0x40(%r10),%xmm9
++	movaps	-0x30(%r10),%xmm10
++	movaps	-0x20(%r10),%xmm11
++___
++$code.=<<___;
++	lea	-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++.L128_epilogue:
++	ret
++.cfi_endproc
++.size	chacha20_128,.-chacha20_128
++___
++}
++
++########################################################################
++# SSSE3 code path that handles longer messages.
++{
++# assign variables to favor Atom front-end
++my ($xd0,$xd1,$xd2,$xd3, $xt0,$xt1,$xt2,$xt3,
++    $xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3)=map("%xmm$_",(0..15));
++my  @xx=($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
++	"%nox","%nox","%nox","%nox", $xd0,$xd1,$xd2,$xd3);
++
++sub SSSE3_lane_ROUND {
++my ($a0,$b0,$c0,$d0)=@_;
++my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
++my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
++my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
++my ($xc,$xc_,$t0,$t1)=map("\"$_\"",$xt0,$xt1,$xt2,$xt3);
++my @x=map("\"$_\"",@xx);
++
++	# Consider order in which variables are addressed by their
++	# index:
++	#
++	#	a   b   c   d
++	#
++	#	0   4   8  12 < even round
++	#	1   5   9  13
++	#	2   6  10  14
++	#	3   7  11  15
++	#	0   5  10  15 < odd round
++	#	1   6  11  12
++	#	2   7   8  13
++	#	3   4   9  14
++	#
++	# 'a', 'b' and 'd's are permanently allocated in registers,
++	# @x[0..7,12..15], while 'c's are maintained in memory. If
++	# you observe 'c' column, you'll notice that pair of 'c's is
++	# invariant between rounds. This means that we have to reload
++	# them once per round, in the middle. This is why you'll see
++	# bunch of 'c' stores and loads in the middle, but none in
++	# the beginning or end.
++
++	(
++	"&paddd		(@x[$a0],@x[$b0])",	# Q1
++	 "&paddd	(@x[$a1],@x[$b1])",	# Q2
++	"&pxor		(@x[$d0],@x[$a0])",
++	 "&pxor		(@x[$d1],@x[$a1])",
++	"&pshufb	(@x[$d0],$t1)",
++	 "&pshufb	(@x[$d1],$t1)",
++
++	"&paddd		($xc,@x[$d0])",
++	 "&paddd	($xc_,@x[$d1])",
++	"&pxor		(@x[$b0],$xc)",
++	 "&pxor		(@x[$b1],$xc_)",
++	"&movdqa	($t0,@x[$b0])",
++	"&pslld		(@x[$b0],12)",
++	"&psrld		($t0,20)",
++	 "&movdqa	($t1,@x[$b1])",
++	 "&pslld	(@x[$b1],12)",
++	"&por		(@x[$b0],$t0)",
++	 "&psrld	($t1,20)",
++	"&movdqa	($t0,'(%r11)')",	# .Lrot24(%rip)
++	 "&por		(@x[$b1],$t1)",
++
++	"&paddd		(@x[$a0],@x[$b0])",
++	 "&paddd	(@x[$a1],@x[$b1])",
++	"&pxor		(@x[$d0],@x[$a0])",
++	 "&pxor		(@x[$d1],@x[$a1])",
++	"&pshufb	(@x[$d0],$t0)",
++	 "&pshufb	(@x[$d1],$t0)",
++
++	"&paddd		($xc,@x[$d0])",
++	 "&paddd	($xc_,@x[$d1])",
++	"&pxor		(@x[$b0],$xc)",
++	 "&pxor		(@x[$b1],$xc_)",
++	"&movdqa	($t1,@x[$b0])",
++	"&pslld		(@x[$b0],7)",
++	"&psrld		($t1,25)",
++	 "&movdqa	($t0,@x[$b1])",
++	 "&pslld	(@x[$b1],7)",
++	"&por		(@x[$b0],$t1)",
++	 "&psrld	($t0,25)",
++	"&movdqa	($t1,'(%r9)')",	# .Lrot16(%rip)
++	 "&por		(@x[$b1],$t0)",
++
++	"&movdqa	(\"`16*($c0-8)`(%rsp)\",$xc)",	# reload pair of 'c's
++	 "&movdqa	(\"`16*($c1-8)`(%rsp)\",$xc_)",
++	"&movdqa	($xc,\"`16*($c2-8)`(%rsp)\")",
++	 "&movdqa	($xc_,\"`16*($c3-8)`(%rsp)\")",
++
++	"&paddd		(@x[$a2],@x[$b2])",	# Q3
++	 "&paddd	(@x[$a3],@x[$b3])",	# Q4
++	"&pxor		(@x[$d2],@x[$a2])",
++	 "&pxor		(@x[$d3],@x[$a3])",
++	"&pshufb	(@x[$d2],$t1)",
++	 "&pshufb	(@x[$d3],$t1)",
++
++	"&paddd		($xc,@x[$d2])",
++	 "&paddd	($xc_,@x[$d3])",
++	"&pxor		(@x[$b2],$xc)",
++	 "&pxor		(@x[$b3],$xc_)",
++	"&movdqa	($t0,@x[$b2])",
++	"&pslld		(@x[$b2],12)",
++	"&psrld		($t0,20)",
++	 "&movdqa	($t1,@x[$b3])",
++	 "&pslld	(@x[$b3],12)",
++	"&por		(@x[$b2],$t0)",
++	 "&psrld	($t1,20)",
++	"&movdqa	($t0,'(%r11)')",	# .Lrot24(%rip)
++	 "&por		(@x[$b3],$t1)",
++
++	"&paddd		(@x[$a2],@x[$b2])",
++	 "&paddd	(@x[$a3],@x[$b3])",
++	"&pxor		(@x[$d2],@x[$a2])",
++	 "&pxor		(@x[$d3],@x[$a3])",
++	"&pshufb	(@x[$d2],$t0)",
++	 "&pshufb	(@x[$d3],$t0)",
++
++	"&paddd		($xc,@x[$d2])",
++	 "&paddd	($xc_,@x[$d3])",
++	"&pxor		(@x[$b2],$xc)",
++	 "&pxor		(@x[$b3],$xc_)",
++	"&movdqa	($t1,@x[$b2])",
++	"&pslld		(@x[$b2],7)",
++	"&psrld		($t1,25)",
++	 "&movdqa	($t0,@x[$b3])",
++	 "&pslld	(@x[$b3],7)",
++	"&por		(@x[$b2],$t1)",
++	 "&psrld	($t0,25)",
++	"&movdqa	($t1,'(%r9)')",	# .Lrot16(%rip)
++	 "&por		(@x[$b3],$t0)"
++	);
++}
++
++my $xframe = $win64 ? 0xa8 : 8;
++
++$code.=<<___;
++.type	chacha20_4x,\@function,5
++.align	32
++chacha20_4x:
++.cfi_startproc
++.Lchacha20_4x:
++	lea		8(%rsp),%r10		# frame pointer
++.cfi_def_cfa_register	%r10
++___
++$code.=<<___ if (!$kernel);
++	mov		%r9,%r11
++___
++$code.=<<___	if ($avx>1 && !$kernel);
++	shr		\$32,%r9		# OPENSSL_ia32cap_P+8
++	test		\$`1<<5`,%r9		# test AVX2
++	jnz		.Lchacha20_8x
++___
++$code.=<<___;
++	cmp		\$192,$len
++	ja		.Lproceed4x
++___
++$code.=<<___ if (!$kernel);
++	and		\$`1<<26|1<<22`,%r11	# isolate XSAVE+MOVBE
++	cmp		\$`1<<22`,%r11		# check for MOVBE without XSAVE
++	je		.Ldo_ssse3_after_all	# to detect Atom
++___
++$code.=<<___;
++.Lproceed4x:
++	sub		\$0x140+$xframe,%rsp
++	and		\$-16,%rsp
++___
++	################ stack layout
++	# +0x00		SIMD equivalent of @x[8-12]
++	# ...
++	# +0x40		constant copy of key[0-2] smashed by lanes
++	# ...
++	# +0x100	SIMD counters (with nonce smashed by lanes)
++	# ...
++	# +0x140
++$code.=<<___	if ($win64);
++	movaps		%xmm6,-0xb0(%r10)
++	movaps		%xmm7,-0xa0(%r10)
++	movaps		%xmm8,-0x90(%r10)
++	movaps		%xmm9,-0x80(%r10)
++	movaps		%xmm10,-0x70(%r10)
++	movaps		%xmm11,-0x60(%r10)
++	movaps		%xmm12,-0x50(%r10)
++	movaps		%xmm13,-0x40(%r10)
++	movaps		%xmm14,-0x30(%r10)
++	movaps		%xmm15,-0x20(%r10)
++.L4x_body:
++___
++$code.=<<___;
++	movdqa		.Lsigma(%rip),$xa3	# key[0]
++	movdqu		($key),$xb3		# key[1]
++	movdqu		16($key),$xt3		# key[2]
++	movdqu		($counter),$xd3		# key[3]
++	lea		0x100(%rsp),%rcx	# size optimization
++	lea		.Lrot16(%rip),%r9
++	lea		.Lrot24(%rip),%r11
++
++	pshufd		\$0x00,$xa3,$xa0	# smash key by lanes...
++	pshufd		\$0x55,$xa3,$xa1
++	movdqa		$xa0,0x40(%rsp)		# ... and offload
++	pshufd		\$0xaa,$xa3,$xa2
++	movdqa		$xa1,0x50(%rsp)
++	pshufd		\$0xff,$xa3,$xa3
++	movdqa		$xa2,0x60(%rsp)
++	movdqa		$xa3,0x70(%rsp)
++
++	pshufd		\$0x00,$xb3,$xb0
++	pshufd		\$0x55,$xb3,$xb1
++	movdqa		$xb0,0x80-0x100(%rcx)
++	pshufd		\$0xaa,$xb3,$xb2
++	movdqa		$xb1,0x90-0x100(%rcx)
++	pshufd		\$0xff,$xb3,$xb3
++	movdqa		$xb2,0xa0-0x100(%rcx)
++	movdqa		$xb3,0xb0-0x100(%rcx)
++
++	pshufd		\$0x00,$xt3,$xt0	# "$xc0"
++	pshufd		\$0x55,$xt3,$xt1	# "$xc1"
++	movdqa		$xt0,0xc0-0x100(%rcx)
++	pshufd		\$0xaa,$xt3,$xt2	# "$xc2"
++	movdqa		$xt1,0xd0-0x100(%rcx)
++	pshufd		\$0xff,$xt3,$xt3	# "$xc3"
++	movdqa		$xt2,0xe0-0x100(%rcx)
++	movdqa		$xt3,0xf0-0x100(%rcx)
++
++	pshufd		\$0x00,$xd3,$xd0
++	pshufd		\$0x55,$xd3,$xd1
++	paddd		.Linc(%rip),$xd0	# don't save counters yet
++	pshufd		\$0xaa,$xd3,$xd2
++	movdqa		$xd1,0x110-0x100(%rcx)
++	pshufd		\$0xff,$xd3,$xd3
++	movdqa		$xd2,0x120-0x100(%rcx)
++	movdqa		$xd3,0x130-0x100(%rcx)
++
++	jmp		.Loop_enter4x
++
++.align	32
++.Loop_outer4x:
++	movdqa		0x40(%rsp),$xa0		# re-load smashed key
++	movdqa		0x50(%rsp),$xa1
++	movdqa		0x60(%rsp),$xa2
++	movdqa		0x70(%rsp),$xa3
++	movdqa		0x80-0x100(%rcx),$xb0
++	movdqa		0x90-0x100(%rcx),$xb1
++	movdqa		0xa0-0x100(%rcx),$xb2
++	movdqa		0xb0-0x100(%rcx),$xb3
++	movdqa		0xc0-0x100(%rcx),$xt0	# "$xc0"
++	movdqa		0xd0-0x100(%rcx),$xt1	# "$xc1"
++	movdqa		0xe0-0x100(%rcx),$xt2	# "$xc2"
++	movdqa		0xf0-0x100(%rcx),$xt3	# "$xc3"
++	movdqa		0x100-0x100(%rcx),$xd0
++	movdqa		0x110-0x100(%rcx),$xd1
++	movdqa		0x120-0x100(%rcx),$xd2
++	movdqa		0x130-0x100(%rcx),$xd3
++	paddd		.Lfour(%rip),$xd0	# next SIMD counters
++
++.Loop_enter4x:
++	movdqa		$xt2,0x20(%rsp)		# SIMD equivalent of "@x[10]"
++	movdqa		$xt3,0x30(%rsp)		# SIMD equivalent of "@x[11]"
++	movdqa		(%r9),$xt3		# .Lrot16(%rip)
++	mov		\$10,%eax
++	movdqa		$xd0,0x100-0x100(%rcx)	# save SIMD counters
++	jmp		.Loop4x
++
++.align	32
++.Loop4x:
++___
++	foreach (&SSSE3_lane_ROUND(0, 4, 8,12)) { eval; }
++	foreach (&SSSE3_lane_ROUND(0, 5,10,15)) { eval; }
++$code.=<<___;
++	dec		%eax
++	jnz		.Loop4x
++
++	paddd		0x40(%rsp),$xa0		# accumulate key material
++	paddd		0x50(%rsp),$xa1
++	paddd		0x60(%rsp),$xa2
++	paddd		0x70(%rsp),$xa3
++
++	movdqa		$xa0,$xt2		# "de-interlace" data
++	punpckldq	$xa1,$xa0
++	movdqa		$xa2,$xt3
++	punpckldq	$xa3,$xa2
++	punpckhdq	$xa1,$xt2
++	punpckhdq	$xa3,$xt3
++	movdqa		$xa0,$xa1
++	punpcklqdq	$xa2,$xa0		# "a0"
++	movdqa		$xt2,$xa3
++	punpcklqdq	$xt3,$xt2		# "a2"
++	punpckhqdq	$xa2,$xa1		# "a1"
++	punpckhqdq	$xt3,$xa3		# "a3"
++___
++	($xa2,$xt2)=($xt2,$xa2);
++$code.=<<___;
++	paddd		0x80-0x100(%rcx),$xb0
++	paddd		0x90-0x100(%rcx),$xb1
++	paddd		0xa0-0x100(%rcx),$xb2
++	paddd		0xb0-0x100(%rcx),$xb3
++
++	movdqa		$xa0,0x00(%rsp)		# offload $xaN
++	movdqa		$xa1,0x10(%rsp)
++	movdqa		0x20(%rsp),$xa0		# "xc2"
++	movdqa		0x30(%rsp),$xa1		# "xc3"
++
++	movdqa		$xb0,$xt2
++	punpckldq	$xb1,$xb0
++	movdqa		$xb2,$xt3
++	punpckldq	$xb3,$xb2
++	punpckhdq	$xb1,$xt2
++	punpckhdq	$xb3,$xt3
++	movdqa		$xb0,$xb1
++	punpcklqdq	$xb2,$xb0		# "b0"
++	movdqa		$xt2,$xb3
++	punpcklqdq	$xt3,$xt2		# "b2"
++	punpckhqdq	$xb2,$xb1		# "b1"
++	punpckhqdq	$xt3,$xb3		# "b3"
++___
++	($xb2,$xt2)=($xt2,$xb2);
++	my ($xc0,$xc1,$xc2,$xc3)=($xt0,$xt1,$xa0,$xa1);
++$code.=<<___;
++	paddd		0xc0-0x100(%rcx),$xc0
++	paddd		0xd0-0x100(%rcx),$xc1
++	paddd		0xe0-0x100(%rcx),$xc2
++	paddd		0xf0-0x100(%rcx),$xc3
++
++	movdqa		$xa2,0x20(%rsp)		# keep offloading $xaN
++	movdqa		$xa3,0x30(%rsp)
++
++	movdqa		$xc0,$xt2
++	punpckldq	$xc1,$xc0
++	movdqa		$xc2,$xt3
++	punpckldq	$xc3,$xc2
++	punpckhdq	$xc1,$xt2
++	punpckhdq	$xc3,$xt3
++	movdqa		$xc0,$xc1
++	punpcklqdq	$xc2,$xc0		# "c0"
++	movdqa		$xt2,$xc3
++	punpcklqdq	$xt3,$xt2		# "c2"
++	punpckhqdq	$xc2,$xc1		# "c1"
++	punpckhqdq	$xt3,$xc3		# "c3"
++___
++	($xc2,$xt2)=($xt2,$xc2);
++	($xt0,$xt1)=($xa2,$xa3);		# use $xaN as temporary
++$code.=<<___;
++	paddd		0x100-0x100(%rcx),$xd0
++	paddd		0x110-0x100(%rcx),$xd1
++	paddd		0x120-0x100(%rcx),$xd2
++	paddd		0x130-0x100(%rcx),$xd3
++
++	movdqa		$xd0,$xt2
++	punpckldq	$xd1,$xd0
++	movdqa		$xd2,$xt3
++	punpckldq	$xd3,$xd2
++	punpckhdq	$xd1,$xt2
++	punpckhdq	$xd3,$xt3
++	movdqa		$xd0,$xd1
++	punpcklqdq	$xd2,$xd0		# "d0"
++	movdqa		$xt2,$xd3
++	punpcklqdq	$xt3,$xt2		# "d2"
++	punpckhqdq	$xd2,$xd1		# "d1"
++	punpckhqdq	$xt3,$xd3		# "d3"
++___
++	($xd2,$xt2)=($xt2,$xd2);
++$code.=<<___;
++	cmp		\$64*4,$len
++	jb		.Ltail4x
++
++	movdqu		0x00($inp),$xt0		# xor with input
++	movdqu		0x10($inp),$xt1
++	movdqu		0x20($inp),$xt2
++	movdqu		0x30($inp),$xt3
++	pxor		0x00(%rsp),$xt0		# $xaN is offloaded, remember?
++	pxor		$xb0,$xt1
++	pxor		$xc0,$xt2
++	pxor		$xd0,$xt3
++
++	 movdqu		$xt0,0x00($out)
++	movdqu		0x40($inp),$xt0
++	 movdqu		$xt1,0x10($out)
++	movdqu		0x50($inp),$xt1
++	 movdqu		$xt2,0x20($out)
++	movdqu		0x60($inp),$xt2
++	 movdqu		$xt3,0x30($out)
++	movdqu		0x70($inp),$xt3
++	lea		0x80($inp),$inp		# size optimization
++	pxor		0x10(%rsp),$xt0
++	pxor		$xb1,$xt1
++	pxor		$xc1,$xt2
++	pxor		$xd1,$xt3
++
++	 movdqu		$xt0,0x40($out)
++	movdqu		0x00($inp),$xt0
++	 movdqu		$xt1,0x50($out)
++	movdqu		0x10($inp),$xt1
++	 movdqu		$xt2,0x60($out)
++	movdqu		0x20($inp),$xt2
++	 movdqu		$xt3,0x70($out)
++	 lea		0x80($out),$out		# size optimization
++	movdqu		0x30($inp),$xt3
++	pxor		0x20(%rsp),$xt0
++	pxor		$xb2,$xt1
++	pxor		$xc2,$xt2
++	pxor		$xd2,$xt3
++
++	 movdqu		$xt0,0x00($out)
++	movdqu		0x40($inp),$xt0
++	 movdqu		$xt1,0x10($out)
++	movdqu		0x50($inp),$xt1
++	 movdqu		$xt2,0x20($out)
++	movdqu		0x60($inp),$xt2
++	 movdqu		$xt3,0x30($out)
++	movdqu		0x70($inp),$xt3
++	lea		0x80($inp),$inp		# inp+=64*4
++	pxor		0x30(%rsp),$xt0
++	pxor		$xb3,$xt1
++	pxor		$xc3,$xt2
++	pxor		$xd3,$xt3
++	movdqu		$xt0,0x40($out)
++	movdqu		$xt1,0x50($out)
++	movdqu		$xt2,0x60($out)
++	movdqu		$xt3,0x70($out)
++	lea		0x80($out),$out		# out+=64*4
++
++	sub		\$64*4,$len
++	jnz		.Loop_outer4x
++
++	jmp		.Ldone4x
++
++.Ltail4x:
++	cmp		\$192,$len
++	jae		.L192_or_more4x
++	cmp		\$128,$len
++	jae		.L128_or_more4x
++	cmp		\$64,$len
++	jae		.L64_or_more4x
++
++	#movdqa		0x00(%rsp),$xt0		# $xaN is offloaded, remember?
++	xor		%r9,%r9
++	#movdqa		$xt0,0x00(%rsp)
++	movdqa		$xb0,0x10(%rsp)
++	movdqa		$xc0,0x20(%rsp)
++	movdqa		$xd0,0x30(%rsp)
++	jmp		.Loop_tail4x
++
++.align	32
++.L64_or_more4x:
++	movdqu		0x00($inp),$xt0		# xor with input
++	movdqu		0x10($inp),$xt1
++	movdqu		0x20($inp),$xt2
++	movdqu		0x30($inp),$xt3
++	pxor		0x00(%rsp),$xt0		# $xaxN is offloaded, remember?
++	pxor		$xb0,$xt1
++	pxor		$xc0,$xt2
++	pxor		$xd0,$xt3
++	movdqu		$xt0,0x00($out)
++	movdqu		$xt1,0x10($out)
++	movdqu		$xt2,0x20($out)
++	movdqu		$xt3,0x30($out)
++	je		.Ldone4x
++
++	movdqa		0x10(%rsp),$xt0		# $xaN is offloaded, remember?
++	lea		0x40($inp),$inp		# inp+=64*1
++	xor		%r9,%r9
++	movdqa		$xt0,0x00(%rsp)
++	movdqa		$xb1,0x10(%rsp)
++	lea		0x40($out),$out		# out+=64*1
++	movdqa		$xc1,0x20(%rsp)
++	sub		\$64,$len		# len-=64*1
++	movdqa		$xd1,0x30(%rsp)
++	jmp		.Loop_tail4x
++
++.align	32
++.L128_or_more4x:
++	movdqu		0x00($inp),$xt0		# xor with input
++	movdqu		0x10($inp),$xt1
++	movdqu		0x20($inp),$xt2
++	movdqu		0x30($inp),$xt3
++	pxor		0x00(%rsp),$xt0		# $xaN is offloaded, remember?
++	pxor		$xb0,$xt1
++	pxor		$xc0,$xt2
++	pxor		$xd0,$xt3
++
++	 movdqu		$xt0,0x00($out)
++	movdqu		0x40($inp),$xt0
++	 movdqu		$xt1,0x10($out)
++	movdqu		0x50($inp),$xt1
++	 movdqu		$xt2,0x20($out)
++	movdqu		0x60($inp),$xt2
++	 movdqu		$xt3,0x30($out)
++	movdqu		0x70($inp),$xt3
++	pxor		0x10(%rsp),$xt0
++	pxor		$xb1,$xt1
++	pxor		$xc1,$xt2
++	pxor		$xd1,$xt3
++	movdqu		$xt0,0x40($out)
++	movdqu		$xt1,0x50($out)
++	movdqu		$xt2,0x60($out)
++	movdqu		$xt3,0x70($out)
++	je		.Ldone4x
++
++	movdqa		0x20(%rsp),$xt0		# $xaN is offloaded, remember?
++	lea		0x80($inp),$inp		# inp+=64*2
++	xor		%r9,%r9
++	movdqa		$xt0,0x00(%rsp)
++	movdqa		$xb2,0x10(%rsp)
++	lea		0x80($out),$out		# out+=64*2
++	movdqa		$xc2,0x20(%rsp)
++	sub		\$128,$len		# len-=64*2
++	movdqa		$xd2,0x30(%rsp)
++	jmp		.Loop_tail4x
++
++.align	32
++.L192_or_more4x:
++	movdqu		0x00($inp),$xt0		# xor with input
++	movdqu		0x10($inp),$xt1
++	movdqu		0x20($inp),$xt2
++	movdqu		0x30($inp),$xt3
++	pxor		0x00(%rsp),$xt0		# $xaN is offloaded, remember?
++	pxor		$xb0,$xt1
++	pxor		$xc0,$xt2
++	pxor		$xd0,$xt3
++
++	 movdqu		$xt0,0x00($out)
++	movdqu		0x40($inp),$xt0
++	 movdqu		$xt1,0x10($out)
++	movdqu		0x50($inp),$xt1
++	 movdqu		$xt2,0x20($out)
++	movdqu		0x60($inp),$xt2
++	 movdqu		$xt3,0x30($out)
++	movdqu		0x70($inp),$xt3
++	lea		0x80($inp),$inp		# size optimization
++	pxor		0x10(%rsp),$xt0
++	pxor		$xb1,$xt1
++	pxor		$xc1,$xt2
++	pxor		$xd1,$xt3
++
++	 movdqu		$xt0,0x40($out)
++	movdqu		0x00($inp),$xt0
++	 movdqu		$xt1,0x50($out)
++	movdqu		0x10($inp),$xt1
++	 movdqu		$xt2,0x60($out)
++	movdqu		0x20($inp),$xt2
++	 movdqu		$xt3,0x70($out)
++	 lea		0x80($out),$out		# size optimization
++	movdqu		0x30($inp),$xt3
++	pxor		0x20(%rsp),$xt0
++	pxor		$xb2,$xt1
++	pxor		$xc2,$xt2
++	pxor		$xd2,$xt3
++	movdqu		$xt0,0x00($out)
++	movdqu		$xt1,0x10($out)
++	movdqu		$xt2,0x20($out)
++	movdqu		$xt3,0x30($out)
++	je		.Ldone4x
++
++	movdqa		0x30(%rsp),$xt0		# $xaN is offloaded, remember?
++	lea		0x40($inp),$inp		# inp+=64*3
++	xor		%r9,%r9
++	movdqa		$xt0,0x00(%rsp)
++	movdqa		$xb3,0x10(%rsp)
++	lea		0x40($out),$out		# out+=64*3
++	movdqa		$xc3,0x20(%rsp)
++	sub		\$192,$len		# len-=64*3
++	movdqa		$xd3,0x30(%rsp)
++
++.Loop_tail4x:
++	movzb		($inp,%r9),%eax
++	movzb		(%rsp,%r9),%ecx
++	lea		1(%r9),%r9
++	xor		%ecx,%eax
++	mov		%al,-1($out,%r9)
++	dec		$len
++	jnz		.Loop_tail4x
++
++.Ldone4x:
++___
++$code.=<<___	if ($win64);
++	movaps		-0xb0(%r10),%xmm6
++	movaps		-0xa0(%r10),%xmm7
++	movaps		-0x90(%r10),%xmm8
++	movaps		-0x80(%r10),%xmm9
++	movaps		-0x70(%r10),%xmm10
++	movaps		-0x60(%r10),%xmm11
++	movaps		-0x50(%r10),%xmm12
++	movaps		-0x40(%r10),%xmm13
++	movaps		-0x30(%r10),%xmm14
++	movaps		-0x20(%r10),%xmm15
++___
++$code.=<<___;
++	lea		-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++.L4x_epilogue:
++	ret
++.cfi_endproc
++.size	chacha20_4x,.-chacha20_4x
++___
++}
++if($kernel) {
++	$code .= "#endif\n";
++}
++
++########################################################################
++# XOP code path that handles all lengths.
++if ($avx && !$kernel) {
++# There is some "anomaly" observed depending on instructions' size or
++# alignment. If you look closely at below code you'll notice that
++# sometimes argument order varies. The order affects instruction
++# encoding by making it larger, and such fiddling gives 5% performance
++# improvement. This is on FX-4100...
++
++my ($xb0,$xb1,$xb2,$xb3, $xd0,$xd1,$xd2,$xd3,
++    $xa0,$xa1,$xa2,$xa3, $xt0,$xt1,$xt2,$xt3)=map("%xmm$_",(0..15));
++my  @xx=($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
++	 $xt0,$xt1,$xt2,$xt3, $xd0,$xd1,$xd2,$xd3);
++
++sub XOP_lane_ROUND {
++my ($a0,$b0,$c0,$d0)=@_;
++my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
++my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
++my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
++my @x=map("\"$_\"",@xx);
++
++	(
++	"&vpaddd	(@x[$a0],@x[$a0],@x[$b0])",	# Q1
++	 "&vpaddd	(@x[$a1],@x[$a1],@x[$b1])",	# Q2
++	  "&vpaddd	(@x[$a2],@x[$a2],@x[$b2])",	# Q3
++	   "&vpaddd	(@x[$a3],@x[$a3],@x[$b3])",	# Q4
++	"&vpxor		(@x[$d0],@x[$a0],@x[$d0])",
++	 "&vpxor	(@x[$d1],@x[$a1],@x[$d1])",
++	  "&vpxor	(@x[$d2],@x[$a2],@x[$d2])",
++	   "&vpxor	(@x[$d3],@x[$a3],@x[$d3])",
++	"&vprotd	(@x[$d0],@x[$d0],16)",
++	 "&vprotd	(@x[$d1],@x[$d1],16)",
++	  "&vprotd	(@x[$d2],@x[$d2],16)",
++	   "&vprotd	(@x[$d3],@x[$d3],16)",
++
++	"&vpaddd	(@x[$c0],@x[$c0],@x[$d0])",
++	 "&vpaddd	(@x[$c1],@x[$c1],@x[$d1])",
++	  "&vpaddd	(@x[$c2],@x[$c2],@x[$d2])",
++	   "&vpaddd	(@x[$c3],@x[$c3],@x[$d3])",
++	"&vpxor		(@x[$b0],@x[$c0],@x[$b0])",
++	 "&vpxor	(@x[$b1],@x[$c1],@x[$b1])",
++	  "&vpxor	(@x[$b2],@x[$b2],@x[$c2])",	# flip
++	   "&vpxor	(@x[$b3],@x[$b3],@x[$c3])",	# flip
++	"&vprotd	(@x[$b0],@x[$b0],12)",
++	 "&vprotd	(@x[$b1],@x[$b1],12)",
++	  "&vprotd	(@x[$b2],@x[$b2],12)",
++	   "&vprotd	(@x[$b3],@x[$b3],12)",
++
++	"&vpaddd	(@x[$a0],@x[$b0],@x[$a0])",	# flip
++	 "&vpaddd	(@x[$a1],@x[$b1],@x[$a1])",	# flip
++	  "&vpaddd	(@x[$a2],@x[$a2],@x[$b2])",
++	   "&vpaddd	(@x[$a3],@x[$a3],@x[$b3])",
++	"&vpxor		(@x[$d0],@x[$a0],@x[$d0])",
++	 "&vpxor	(@x[$d1],@x[$a1],@x[$d1])",
++	  "&vpxor	(@x[$d2],@x[$a2],@x[$d2])",
++	   "&vpxor	(@x[$d3],@x[$a3],@x[$d3])",
++	"&vprotd	(@x[$d0],@x[$d0],8)",
++	 "&vprotd	(@x[$d1],@x[$d1],8)",
++	  "&vprotd	(@x[$d2],@x[$d2],8)",
++	   "&vprotd	(@x[$d3],@x[$d3],8)",
++
++	"&vpaddd	(@x[$c0],@x[$c0],@x[$d0])",
++	 "&vpaddd	(@x[$c1],@x[$c1],@x[$d1])",
++	  "&vpaddd	(@x[$c2],@x[$c2],@x[$d2])",
++	   "&vpaddd	(@x[$c3],@x[$c3],@x[$d3])",
++	"&vpxor		(@x[$b0],@x[$c0],@x[$b0])",
++	 "&vpxor	(@x[$b1],@x[$c1],@x[$b1])",
++	  "&vpxor	(@x[$b2],@x[$b2],@x[$c2])",	# flip
++	   "&vpxor	(@x[$b3],@x[$b3],@x[$c3])",	# flip
++	"&vprotd	(@x[$b0],@x[$b0],7)",
++	 "&vprotd	(@x[$b1],@x[$b1],7)",
++	  "&vprotd	(@x[$b2],@x[$b2],7)",
++	   "&vprotd	(@x[$b3],@x[$b3],7)"
++	);
++}
++
++my $xframe = $win64 ? 0xa8 : 8;
++
++&declare_function("chacha20_xop", 32, 5);
++$code.=<<___;
++.cfi_startproc
++.Lchacha20_4xop:
++	lea		8(%rsp),%r10		# frame pointer
++.cfi_def_cfa_register	%r10
++	sub		\$0x140+$xframe,%rsp
++	and 	\$-16,%rsp
++___
++	################ stack layout
++	# +0x00		SIMD equivalent of @x[8-12]
++	# ...
++	# +0x40		constant copy of key[0-2] smashed by lanes
++	# ...
++	# +0x100	SIMD counters (with nonce smashed by lanes)
++	# ...
++	# +0x140
++$code.=<<___	if ($win64);
++	movaps		%xmm6,-0xb0(%r10)
++	movaps		%xmm7,-0xa0(%r10)
++	movaps		%xmm8,-0x90(%r10)
++	movaps		%xmm9,-0x80(%r10)
++	movaps		%xmm10,-0x70(%r10)
++	movaps		%xmm11,-0x60(%r10)
++	movaps		%xmm12,-0x50(%r10)
++	movaps		%xmm13,-0x40(%r10)
++	movaps		%xmm14,-0x30(%r10)
++	movaps		%xmm15,-0x20(%r10)
++.L4xop_body:
++___
++$code.=<<___;
++	vzeroupper
++
++	vmovdqa		.Lsigma(%rip),$xa3	# key[0]
++	vmovdqu		($key),$xb3		# key[1]
++	vmovdqu		16($key),$xt3		# key[2]
++	vmovdqu		($counter),$xd3		# key[3]
++	lea		0x100(%rsp),%rcx	# size optimization
++
++	vpshufd		\$0x00,$xa3,$xa0	# smash key by lanes...
++	vpshufd		\$0x55,$xa3,$xa1
++	vmovdqa		$xa0,0x40(%rsp)		# ... and offload
++	vpshufd		\$0xaa,$xa3,$xa2
++	vmovdqa		$xa1,0x50(%rsp)
++	vpshufd		\$0xff,$xa3,$xa3
++	vmovdqa		$xa2,0x60(%rsp)
++	vmovdqa		$xa3,0x70(%rsp)
++
++	vpshufd		\$0x00,$xb3,$xb0
++	vpshufd		\$0x55,$xb3,$xb1
++	vmovdqa		$xb0,0x80-0x100(%rcx)
++	vpshufd		\$0xaa,$xb3,$xb2
++	vmovdqa		$xb1,0x90-0x100(%rcx)
++	vpshufd		\$0xff,$xb3,$xb3
++	vmovdqa		$xb2,0xa0-0x100(%rcx)
++	vmovdqa		$xb3,0xb0-0x100(%rcx)
++
++	vpshufd		\$0x00,$xt3,$xt0	# "$xc0"
++	vpshufd		\$0x55,$xt3,$xt1	# "$xc1"
++	vmovdqa		$xt0,0xc0-0x100(%rcx)
++	vpshufd		\$0xaa,$xt3,$xt2	# "$xc2"
++	vmovdqa		$xt1,0xd0-0x100(%rcx)
++	vpshufd		\$0xff,$xt3,$xt3	# "$xc3"
++	vmovdqa		$xt2,0xe0-0x100(%rcx)
++	vmovdqa		$xt3,0xf0-0x100(%rcx)
++
++	vpshufd		\$0x00,$xd3,$xd0
++	vpshufd		\$0x55,$xd3,$xd1
++	vpaddd		.Linc(%rip),$xd0,$xd0	# don't save counters yet
++	vpshufd		\$0xaa,$xd3,$xd2
++	vmovdqa		$xd1,0x110-0x100(%rcx)
++	vpshufd		\$0xff,$xd3,$xd3
++	vmovdqa		$xd2,0x120-0x100(%rcx)
++	vmovdqa		$xd3,0x130-0x100(%rcx)
++
++	jmp		.Loop_enter4xop
++
++.align	32
++.Loop_outer4xop:
++	vmovdqa		0x40(%rsp),$xa0		# re-load smashed key
++	vmovdqa		0x50(%rsp),$xa1
++	vmovdqa		0x60(%rsp),$xa2
++	vmovdqa		0x70(%rsp),$xa3
++	vmovdqa		0x80-0x100(%rcx),$xb0
++	vmovdqa		0x90-0x100(%rcx),$xb1
++	vmovdqa		0xa0-0x100(%rcx),$xb2
++	vmovdqa		0xb0-0x100(%rcx),$xb3
++	vmovdqa		0xc0-0x100(%rcx),$xt0	# "$xc0"
++	vmovdqa		0xd0-0x100(%rcx),$xt1	# "$xc1"
++	vmovdqa		0xe0-0x100(%rcx),$xt2	# "$xc2"
++	vmovdqa		0xf0-0x100(%rcx),$xt3	# "$xc3"
++	vmovdqa		0x100-0x100(%rcx),$xd0
++	vmovdqa		0x110-0x100(%rcx),$xd1
++	vmovdqa		0x120-0x100(%rcx),$xd2
++	vmovdqa		0x130-0x100(%rcx),$xd3
++	vpaddd		.Lfour(%rip),$xd0,$xd0	# next SIMD counters
++
++.Loop_enter4xop:
++	mov		\$10,%eax
++	vmovdqa		$xd0,0x100-0x100(%rcx)	# save SIMD counters
++	jmp		.Loop4xop
++
++.align	32
++.Loop4xop:
++___
++	foreach (&XOP_lane_ROUND(0, 4, 8,12)) { eval; }
++	foreach (&XOP_lane_ROUND(0, 5,10,15)) { eval; }
++$code.=<<___;
++	dec		%eax
++	jnz		.Loop4xop
++
++	vpaddd		0x40(%rsp),$xa0,$xa0	# accumulate key material
++	vpaddd		0x50(%rsp),$xa1,$xa1
++	vpaddd		0x60(%rsp),$xa2,$xa2
++	vpaddd		0x70(%rsp),$xa3,$xa3
++
++	vmovdqa		$xt2,0x20(%rsp)		# offload $xc2,3
++	vmovdqa		$xt3,0x30(%rsp)
++
++	vpunpckldq	$xa1,$xa0,$xt2		# "de-interlace" data
++	vpunpckldq	$xa3,$xa2,$xt3
++	vpunpckhdq	$xa1,$xa0,$xa0
++	vpunpckhdq	$xa3,$xa2,$xa2
++	vpunpcklqdq	$xt3,$xt2,$xa1		# "a0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "a1"
++	vpunpcklqdq	$xa2,$xa0,$xa3		# "a2"
++	vpunpckhqdq	$xa2,$xa0,$xa0		# "a3"
++___
++        ($xa0,$xa1,$xa2,$xa3,$xt2)=($xa1,$xt2,$xa3,$xa0,$xa2);
++$code.=<<___;
++	vpaddd		0x80-0x100(%rcx),$xb0,$xb0
++	vpaddd		0x90-0x100(%rcx),$xb1,$xb1
++	vpaddd		0xa0-0x100(%rcx),$xb2,$xb2
++	vpaddd		0xb0-0x100(%rcx),$xb3,$xb3
++
++	vmovdqa		$xa0,0x00(%rsp)		# offload $xa0,1
++	vmovdqa		$xa1,0x10(%rsp)
++	vmovdqa		0x20(%rsp),$xa0		# "xc2"
++	vmovdqa		0x30(%rsp),$xa1		# "xc3"
++
++	vpunpckldq	$xb1,$xb0,$xt2
++	vpunpckldq	$xb3,$xb2,$xt3
++	vpunpckhdq	$xb1,$xb0,$xb0
++	vpunpckhdq	$xb3,$xb2,$xb2
++	vpunpcklqdq	$xt3,$xt2,$xb1		# "b0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "b1"
++	vpunpcklqdq	$xb2,$xb0,$xb3		# "b2"
++	vpunpckhqdq	$xb2,$xb0,$xb0		# "b3"
++___
++	($xb0,$xb1,$xb2,$xb3,$xt2)=($xb1,$xt2,$xb3,$xb0,$xb2);
++	my ($xc0,$xc1,$xc2,$xc3)=($xt0,$xt1,$xa0,$xa1);
++$code.=<<___;
++	vpaddd		0xc0-0x100(%rcx),$xc0,$xc0
++	vpaddd		0xd0-0x100(%rcx),$xc1,$xc1
++	vpaddd		0xe0-0x100(%rcx),$xc2,$xc2
++	vpaddd		0xf0-0x100(%rcx),$xc3,$xc3
++
++	vpunpckldq	$xc1,$xc0,$xt2
++	vpunpckldq	$xc3,$xc2,$xt3
++	vpunpckhdq	$xc1,$xc0,$xc0
++	vpunpckhdq	$xc3,$xc2,$xc2
++	vpunpcklqdq	$xt3,$xt2,$xc1		# "c0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "c1"
++	vpunpcklqdq	$xc2,$xc0,$xc3		# "c2"
++	vpunpckhqdq	$xc2,$xc0,$xc0		# "c3"
++___
++	($xc0,$xc1,$xc2,$xc3,$xt2)=($xc1,$xt2,$xc3,$xc0,$xc2);
++$code.=<<___;
++	vpaddd		0x100-0x100(%rcx),$xd0,$xd0
++	vpaddd		0x110-0x100(%rcx),$xd1,$xd1
++	vpaddd		0x120-0x100(%rcx),$xd2,$xd2
++	vpaddd		0x130-0x100(%rcx),$xd3,$xd3
++
++	vpunpckldq	$xd1,$xd0,$xt2
++	vpunpckldq	$xd3,$xd2,$xt3
++	vpunpckhdq	$xd1,$xd0,$xd0
++	vpunpckhdq	$xd3,$xd2,$xd2
++	vpunpcklqdq	$xt3,$xt2,$xd1		# "d0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "d1"
++	vpunpcklqdq	$xd2,$xd0,$xd3		# "d2"
++	vpunpckhqdq	$xd2,$xd0,$xd0		# "d3"
++___
++	($xd0,$xd1,$xd2,$xd3,$xt2)=($xd1,$xt2,$xd3,$xd0,$xd2);
++	($xa0,$xa1)=($xt2,$xt3);
++$code.=<<___;
++	vmovdqa		0x00(%rsp),$xa0		# restore $xa0,1
++	vmovdqa		0x10(%rsp),$xa1
++
++	cmp		\$64*4,$len
++	jb		.Ltail4xop
++
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x10($inp),$xb0,$xb0
++	vpxor		0x20($inp),$xc0,$xc0
++	vpxor		0x30($inp),$xd0,$xd0
++	vpxor		0x40($inp),$xa1,$xa1
++	vpxor		0x50($inp),$xb1,$xb1
++	vpxor		0x60($inp),$xc1,$xc1
++	vpxor		0x70($inp),$xd1,$xd1
++	lea		0x80($inp),$inp		# size optimization
++	vpxor		0x00($inp),$xa2,$xa2
++	vpxor		0x10($inp),$xb2,$xb2
++	vpxor		0x20($inp),$xc2,$xc2
++	vpxor		0x30($inp),$xd2,$xd2
++	vpxor		0x40($inp),$xa3,$xa3
++	vpxor		0x50($inp),$xb3,$xb3
++	vpxor		0x60($inp),$xc3,$xc3
++	vpxor		0x70($inp),$xd3,$xd3
++	lea		0x80($inp),$inp		# inp+=64*4
++
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x10($out)
++	vmovdqu		$xc0,0x20($out)
++	vmovdqu		$xd0,0x30($out)
++	vmovdqu		$xa1,0x40($out)
++	vmovdqu		$xb1,0x50($out)
++	vmovdqu		$xc1,0x60($out)
++	vmovdqu		$xd1,0x70($out)
++	lea		0x80($out),$out		# size optimization
++	vmovdqu		$xa2,0x00($out)
++	vmovdqu		$xb2,0x10($out)
++	vmovdqu		$xc2,0x20($out)
++	vmovdqu		$xd2,0x30($out)
++	vmovdqu		$xa3,0x40($out)
++	vmovdqu		$xb3,0x50($out)
++	vmovdqu		$xc3,0x60($out)
++	vmovdqu		$xd3,0x70($out)
++	lea		0x80($out),$out		# out+=64*4
++
++	sub		\$64*4,$len
++	jnz		.Loop_outer4xop
++
++	jmp		.Ldone4xop
++
++.align	32
++.Ltail4xop:
++	cmp		\$192,$len
++	jae		.L192_or_more4xop
++	cmp		\$128,$len
++	jae		.L128_or_more4xop
++	cmp		\$64,$len
++	jae		.L64_or_more4xop
++
++	xor		%r9,%r9
++	vmovdqa		$xa0,0x00(%rsp)
++	vmovdqa		$xb0,0x10(%rsp)
++	vmovdqa		$xc0,0x20(%rsp)
++	vmovdqa		$xd0,0x30(%rsp)
++	jmp		.Loop_tail4xop
++
++.align	32
++.L64_or_more4xop:
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x10($inp),$xb0,$xb0
++	vpxor		0x20($inp),$xc0,$xc0
++	vpxor		0x30($inp),$xd0,$xd0
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x10($out)
++	vmovdqu		$xc0,0x20($out)
++	vmovdqu		$xd0,0x30($out)
++	je		.Ldone4xop
++
++	lea		0x40($inp),$inp		# inp+=64*1
++	vmovdqa		$xa1,0x00(%rsp)
++	xor		%r9,%r9
++	vmovdqa		$xb1,0x10(%rsp)
++	lea		0x40($out),$out		# out+=64*1
++	vmovdqa		$xc1,0x20(%rsp)
++	sub		\$64,$len		# len-=64*1
++	vmovdqa		$xd1,0x30(%rsp)
++	jmp		.Loop_tail4xop
++
++.align	32
++.L128_or_more4xop:
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x10($inp),$xb0,$xb0
++	vpxor		0x20($inp),$xc0,$xc0
++	vpxor		0x30($inp),$xd0,$xd0
++	vpxor		0x40($inp),$xa1,$xa1
++	vpxor		0x50($inp),$xb1,$xb1
++	vpxor		0x60($inp),$xc1,$xc1
++	vpxor		0x70($inp),$xd1,$xd1
++
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x10($out)
++	vmovdqu		$xc0,0x20($out)
++	vmovdqu		$xd0,0x30($out)
++	vmovdqu		$xa1,0x40($out)
++	vmovdqu		$xb1,0x50($out)
++	vmovdqu		$xc1,0x60($out)
++	vmovdqu		$xd1,0x70($out)
++	je		.Ldone4xop
++
++	lea		0x80($inp),$inp		# inp+=64*2
++	vmovdqa		$xa2,0x00(%rsp)
++	xor		%r9,%r9
++	vmovdqa		$xb2,0x10(%rsp)
++	lea		0x80($out),$out		# out+=64*2
++	vmovdqa		$xc2,0x20(%rsp)
++	sub		\$128,$len		# len-=64*2
++	vmovdqa		$xd2,0x30(%rsp)
++	jmp		.Loop_tail4xop
++
++.align	32
++.L192_or_more4xop:
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x10($inp),$xb0,$xb0
++	vpxor		0x20($inp),$xc0,$xc0
++	vpxor		0x30($inp),$xd0,$xd0
++	vpxor		0x40($inp),$xa1,$xa1
++	vpxor		0x50($inp),$xb1,$xb1
++	vpxor		0x60($inp),$xc1,$xc1
++	vpxor		0x70($inp),$xd1,$xd1
++	lea		0x80($inp),$inp		# size optimization
++	vpxor		0x00($inp),$xa2,$xa2
++	vpxor		0x10($inp),$xb2,$xb2
++	vpxor		0x20($inp),$xc2,$xc2
++	vpxor		0x30($inp),$xd2,$xd2
++
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x10($out)
++	vmovdqu		$xc0,0x20($out)
++	vmovdqu		$xd0,0x30($out)
++	vmovdqu		$xa1,0x40($out)
++	vmovdqu		$xb1,0x50($out)
++	vmovdqu		$xc1,0x60($out)
++	vmovdqu		$xd1,0x70($out)
++	lea		0x80($out),$out		# size optimization
++	vmovdqu		$xa2,0x00($out)
++	vmovdqu		$xb2,0x10($out)
++	vmovdqu		$xc2,0x20($out)
++	vmovdqu		$xd2,0x30($out)
++	je		.Ldone4xop
++
++	lea		0x40($inp),$inp		# inp+=64*3
++	vmovdqa		$xa3,0x00(%rsp)
++	xor		%r9,%r9
++	vmovdqa		$xb3,0x10(%rsp)
++	lea		0x40($out),$out		# out+=64*3
++	vmovdqa		$xc3,0x20(%rsp)
++	sub		\$192,$len		# len-=64*3
++	vmovdqa		$xd3,0x30(%rsp)
++
++.Loop_tail4xop:
++	movzb		($inp,%r9),%eax
++	movzb		(%rsp,%r9),%ecx
++	lea		1(%r9),%r9
++	xor		%ecx,%eax
++	mov		%al,-1($out,%r9)
++	dec		$len
++	jnz		.Loop_tail4xop
++
++.Ldone4xop:
++	vzeroupper
++___
++$code.=<<___	if ($win64);
++	movaps		-0xb0(%r10),%xmm6
++	movaps		-0xa0(%r10),%xmm7
++	movaps		-0x90(%r10),%xmm8
++	movaps		-0x80(%r10),%xmm9
++	movaps		-0x70(%r10),%xmm10
++	movaps		-0x60(%r10),%xmm11
++	movaps		-0x50(%r10),%xmm12
++	movaps		-0x40(%r10),%xmm13
++	movaps		-0x30(%r10),%xmm14
++	movaps		-0x20(%r10),%xmm15
++___
++$code.=<<___;
++	lea		-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++.L4xop_epilogue:
++	ret
++.cfi_endproc
++___
++&end_function("chacha20_xop");
++}
++
++########################################################################
++# AVX2 code path
++if ($avx>1) {
++
++if($kernel) {
++	$code .= "#ifdef CONFIG_AS_AVX2\n";
++}
++
++my ($xb0,$xb1,$xb2,$xb3, $xd0,$xd1,$xd2,$xd3,
++    $xa0,$xa1,$xa2,$xa3, $xt0,$xt1,$xt2,$xt3)=map("%ymm$_",(0..15));
++my @xx=($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
++	"%nox","%nox","%nox","%nox", $xd0,$xd1,$xd2,$xd3);
++
++sub AVX2_lane_ROUND {
++my ($a0,$b0,$c0,$d0)=@_;
++my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
++my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
++my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
++my ($xc,$xc_,$t0,$t1)=map("\"$_\"",$xt0,$xt1,$xt2,$xt3);
++my @x=map("\"$_\"",@xx);
++
++	# Consider order in which variables are addressed by their
++	# index:
++	#
++	#	a   b   c   d
++	#
++	#	0   4   8  12 < even round
++	#	1   5   9  13
++	#	2   6  10  14
++	#	3   7  11  15
++	#	0   5  10  15 < odd round
++	#	1   6  11  12
++	#	2   7   8  13
++	#	3   4   9  14
++	#
++	# 'a', 'b' and 'd's are permanently allocated in registers,
++	# @x[0..7,12..15], while 'c's are maintained in memory. If
++	# you observe 'c' column, you'll notice that pair of 'c's is
++	# invariant between rounds. This means that we have to reload
++	# them once per round, in the middle. This is why you'll see
++	# bunch of 'c' stores and loads in the middle, but none in
++	# the beginning or end.
++
++	(
++	"&vpaddd	(@x[$a0],@x[$a0],@x[$b0])",	# Q1
++	"&vpxor		(@x[$d0],@x[$a0],@x[$d0])",
++	"&vpshufb	(@x[$d0],@x[$d0],$t1)",
++	 "&vpaddd	(@x[$a1],@x[$a1],@x[$b1])",	# Q2
++	 "&vpxor	(@x[$d1],@x[$a1],@x[$d1])",
++	 "&vpshufb	(@x[$d1],@x[$d1],$t1)",
++
++	"&vpaddd	($xc,$xc,@x[$d0])",
++	"&vpxor		(@x[$b0],$xc,@x[$b0])",
++	"&vpslld	($t0,@x[$b0],12)",
++	"&vpsrld	(@x[$b0],@x[$b0],20)",
++	"&vpor		(@x[$b0],$t0,@x[$b0])",
++	"&vbroadcasti128($t0,'(%r11)')",		# .Lrot24(%rip)
++	 "&vpaddd	($xc_,$xc_,@x[$d1])",
++	 "&vpxor	(@x[$b1],$xc_,@x[$b1])",
++	 "&vpslld	($t1,@x[$b1],12)",
++	 "&vpsrld	(@x[$b1],@x[$b1],20)",
++	 "&vpor		(@x[$b1],$t1,@x[$b1])",
++
++	"&vpaddd	(@x[$a0],@x[$a0],@x[$b0])",
++	"&vpxor		(@x[$d0],@x[$a0],@x[$d0])",
++	"&vpshufb	(@x[$d0],@x[$d0],$t0)",
++	 "&vpaddd	(@x[$a1],@x[$a1],@x[$b1])",
++	 "&vpxor	(@x[$d1],@x[$a1],@x[$d1])",
++	 "&vpshufb	(@x[$d1],@x[$d1],$t0)",
++
++	"&vpaddd	($xc,$xc,@x[$d0])",
++	"&vpxor		(@x[$b0],$xc,@x[$b0])",
++	"&vpslld	($t1,@x[$b0],7)",
++	"&vpsrld	(@x[$b0],@x[$b0],25)",
++	"&vpor		(@x[$b0],$t1,@x[$b0])",
++	"&vbroadcasti128($t1,'(%r9)')",		# .Lrot16(%rip)
++	 "&vpaddd	($xc_,$xc_,@x[$d1])",
++	 "&vpxor	(@x[$b1],$xc_,@x[$b1])",
++	 "&vpslld	($t0,@x[$b1],7)",
++	 "&vpsrld	(@x[$b1],@x[$b1],25)",
++	 "&vpor		(@x[$b1],$t0,@x[$b1])",
++
++	"&vmovdqa	(\"`32*($c0-8)`(%rsp)\",$xc)",	# reload pair of 'c's
++	 "&vmovdqa	(\"`32*($c1-8)`(%rsp)\",$xc_)",
++	"&vmovdqa	($xc,\"`32*($c2-8)`(%rsp)\")",
++	 "&vmovdqa	($xc_,\"`32*($c3-8)`(%rsp)\")",
++
++	"&vpaddd	(@x[$a2],@x[$a2],@x[$b2])",	# Q3
++	"&vpxor		(@x[$d2],@x[$a2],@x[$d2])",
++	"&vpshufb	(@x[$d2],@x[$d2],$t1)",
++	 "&vpaddd	(@x[$a3],@x[$a3],@x[$b3])",	# Q4
++	 "&vpxor	(@x[$d3],@x[$a3],@x[$d3])",
++	 "&vpshufb	(@x[$d3],@x[$d3],$t1)",
++
++	"&vpaddd	($xc,$xc,@x[$d2])",
++	"&vpxor		(@x[$b2],$xc,@x[$b2])",
++	"&vpslld	($t0,@x[$b2],12)",
++	"&vpsrld	(@x[$b2],@x[$b2],20)",
++	"&vpor		(@x[$b2],$t0,@x[$b2])",
++	"&vbroadcasti128($t0,'(%r11)')",		# .Lrot24(%rip)
++	 "&vpaddd	($xc_,$xc_,@x[$d3])",
++	 "&vpxor	(@x[$b3],$xc_,@x[$b3])",
++	 "&vpslld	($t1,@x[$b3],12)",
++	 "&vpsrld	(@x[$b3],@x[$b3],20)",
++	 "&vpor		(@x[$b3],$t1,@x[$b3])",
++
++	"&vpaddd	(@x[$a2],@x[$a2],@x[$b2])",
++	"&vpxor		(@x[$d2],@x[$a2],@x[$d2])",
++	"&vpshufb	(@x[$d2],@x[$d2],$t0)",
++	 "&vpaddd	(@x[$a3],@x[$a3],@x[$b3])",
++	 "&vpxor	(@x[$d3],@x[$a3],@x[$d3])",
++	 "&vpshufb	(@x[$d3],@x[$d3],$t0)",
++
++	"&vpaddd	($xc,$xc,@x[$d2])",
++	"&vpxor		(@x[$b2],$xc,@x[$b2])",
++	"&vpslld	($t1,@x[$b2],7)",
++	"&vpsrld	(@x[$b2],@x[$b2],25)",
++	"&vpor		(@x[$b2],$t1,@x[$b2])",
++	"&vbroadcasti128($t1,'(%r9)')",		# .Lrot16(%rip)
++	 "&vpaddd	($xc_,$xc_,@x[$d3])",
++	 "&vpxor	(@x[$b3],$xc_,@x[$b3])",
++	 "&vpslld	($t0,@x[$b3],7)",
++	 "&vpsrld	(@x[$b3],@x[$b3],25)",
++	 "&vpor		(@x[$b3],$t0,@x[$b3])"
++	);
++}
++
++my $xframe = $win64 ? 0xa8 : 8;
++
++&declare_function("chacha20_avx2", 32, 5);
++$code.=<<___;
++.cfi_startproc
++.Lchacha20_8x:
++	lea		8(%rsp),%r10		# frame register
++.cfi_def_cfa_register	%r10
++	sub		\$0x280+$xframe,%rsp
++	and		\$-32,%rsp
++___
++$code.=<<___	if ($win64);
++	movaps		%xmm6,-0xb0(%r10)
++	movaps		%xmm7,-0xa0(%r10)
++	movaps		%xmm8,-0x90(%r10)
++	movaps		%xmm9,-0x80(%r10)
++	movaps		%xmm10,-0x70(%r10)
++	movaps		%xmm11,-0x60(%r10)
++	movaps		%xmm12,-0x50(%r10)
++	movaps		%xmm13,-0x40(%r10)
++	movaps		%xmm14,-0x30(%r10)
++	movaps		%xmm15,-0x20(%r10)
++.L8x_body:
++___
++$code.=<<___;
++	vzeroupper
++
++	################ stack layout
++	# +0x00		SIMD equivalent of @x[8-12]
++	# ...
++	# +0x80		constant copy of key[0-2] smashed by lanes
++	# ...
++	# +0x200	SIMD counters (with nonce smashed by lanes)
++	# ...
++	# +0x280
++
++	vbroadcasti128	.Lsigma(%rip),$xa3	# key[0]
++	vbroadcasti128	($key),$xb3		# key[1]
++	vbroadcasti128	16($key),$xt3		# key[2]
++	vbroadcasti128	($counter),$xd3		# key[3]
++	lea		0x100(%rsp),%rcx	# size optimization
++	lea		0x200(%rsp),%rax	# size optimization
++	lea		.Lrot16(%rip),%r9
++	lea		.Lrot24(%rip),%r11
++
++	vpshufd		\$0x00,$xa3,$xa0	# smash key by lanes...
++	vpshufd		\$0x55,$xa3,$xa1
++	vmovdqa		$xa0,0x80-0x100(%rcx)	# ... and offload
++	vpshufd		\$0xaa,$xa3,$xa2
++	vmovdqa		$xa1,0xa0-0x100(%rcx)
++	vpshufd		\$0xff,$xa3,$xa3
++	vmovdqa		$xa2,0xc0-0x100(%rcx)
++	vmovdqa		$xa3,0xe0-0x100(%rcx)
++
++	vpshufd		\$0x00,$xb3,$xb0
++	vpshufd		\$0x55,$xb3,$xb1
++	vmovdqa		$xb0,0x100-0x100(%rcx)
++	vpshufd		\$0xaa,$xb3,$xb2
++	vmovdqa		$xb1,0x120-0x100(%rcx)
++	vpshufd		\$0xff,$xb3,$xb3
++	vmovdqa		$xb2,0x140-0x100(%rcx)
++	vmovdqa		$xb3,0x160-0x100(%rcx)
++
++	vpshufd		\$0x00,$xt3,$xt0	# "xc0"
++	vpshufd		\$0x55,$xt3,$xt1	# "xc1"
++	vmovdqa		$xt0,0x180-0x200(%rax)
++	vpshufd		\$0xaa,$xt3,$xt2	# "xc2"
++	vmovdqa		$xt1,0x1a0-0x200(%rax)
++	vpshufd		\$0xff,$xt3,$xt3	# "xc3"
++	vmovdqa		$xt2,0x1c0-0x200(%rax)
++	vmovdqa		$xt3,0x1e0-0x200(%rax)
++
++	vpshufd		\$0x00,$xd3,$xd0
++	vpshufd		\$0x55,$xd3,$xd1
++	vpaddd		.Lincy(%rip),$xd0,$xd0	# don't save counters yet
++	vpshufd		\$0xaa,$xd3,$xd2
++	vmovdqa		$xd1,0x220-0x200(%rax)
++	vpshufd		\$0xff,$xd3,$xd3
++	vmovdqa		$xd2,0x240-0x200(%rax)
++	vmovdqa		$xd3,0x260-0x200(%rax)
++
++	jmp		.Loop_enter8x
++
++.align	32
++.Loop_outer8x:
++	vmovdqa		0x80-0x100(%rcx),$xa0	# re-load smashed key
++	vmovdqa		0xa0-0x100(%rcx),$xa1
++	vmovdqa		0xc0-0x100(%rcx),$xa2
++	vmovdqa		0xe0-0x100(%rcx),$xa3
++	vmovdqa		0x100-0x100(%rcx),$xb0
++	vmovdqa		0x120-0x100(%rcx),$xb1
++	vmovdqa		0x140-0x100(%rcx),$xb2
++	vmovdqa		0x160-0x100(%rcx),$xb3
++	vmovdqa		0x180-0x200(%rax),$xt0	# "xc0"
++	vmovdqa		0x1a0-0x200(%rax),$xt1	# "xc1"
++	vmovdqa		0x1c0-0x200(%rax),$xt2	# "xc2"
++	vmovdqa		0x1e0-0x200(%rax),$xt3	# "xc3"
++	vmovdqa		0x200-0x200(%rax),$xd0
++	vmovdqa		0x220-0x200(%rax),$xd1
++	vmovdqa		0x240-0x200(%rax),$xd2
++	vmovdqa		0x260-0x200(%rax),$xd3
++	vpaddd		.Leight(%rip),$xd0,$xd0	# next SIMD counters
++
++.Loop_enter8x:
++	vmovdqa		$xt2,0x40(%rsp)		# SIMD equivalent of "@x[10]"
++	vmovdqa		$xt3,0x60(%rsp)		# SIMD equivalent of "@x[11]"
++	vbroadcasti128	(%r9),$xt3
++	vmovdqa		$xd0,0x200-0x200(%rax)	# save SIMD counters
++	mov		\$10,%eax
++	jmp		.Loop8x
++
++.align	32
++.Loop8x:
++___
++	foreach (&AVX2_lane_ROUND(0, 4, 8,12)) { eval; }
++	foreach (&AVX2_lane_ROUND(0, 5,10,15)) { eval; }
++$code.=<<___;
++	dec		%eax
++	jnz		.Loop8x
++
++	lea		0x200(%rsp),%rax	# size optimization
++	vpaddd		0x80-0x100(%rcx),$xa0,$xa0	# accumulate key
++	vpaddd		0xa0-0x100(%rcx),$xa1,$xa1
++	vpaddd		0xc0-0x100(%rcx),$xa2,$xa2
++	vpaddd		0xe0-0x100(%rcx),$xa3,$xa3
++
++	vpunpckldq	$xa1,$xa0,$xt2		# "de-interlace" data
++	vpunpckldq	$xa3,$xa2,$xt3
++	vpunpckhdq	$xa1,$xa0,$xa0
++	vpunpckhdq	$xa3,$xa2,$xa2
++	vpunpcklqdq	$xt3,$xt2,$xa1		# "a0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "a1"
++	vpunpcklqdq	$xa2,$xa0,$xa3		# "a2"
++	vpunpckhqdq	$xa2,$xa0,$xa0		# "a3"
++___
++	($xa0,$xa1,$xa2,$xa3,$xt2)=($xa1,$xt2,$xa3,$xa0,$xa2);
++$code.=<<___;
++	vpaddd		0x100-0x100(%rcx),$xb0,$xb0
++	vpaddd		0x120-0x100(%rcx),$xb1,$xb1
++	vpaddd		0x140-0x100(%rcx),$xb2,$xb2
++	vpaddd		0x160-0x100(%rcx),$xb3,$xb3
++
++	vpunpckldq	$xb1,$xb0,$xt2
++	vpunpckldq	$xb3,$xb2,$xt3
++	vpunpckhdq	$xb1,$xb0,$xb0
++	vpunpckhdq	$xb3,$xb2,$xb2
++	vpunpcklqdq	$xt3,$xt2,$xb1		# "b0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "b1"
++	vpunpcklqdq	$xb2,$xb0,$xb3		# "b2"
++	vpunpckhqdq	$xb2,$xb0,$xb0		# "b3"
++___
++	($xb0,$xb1,$xb2,$xb3,$xt2)=($xb1,$xt2,$xb3,$xb0,$xb2);
++$code.=<<___;
++	vperm2i128	\$0x20,$xb0,$xa0,$xt3	# "de-interlace" further
++	vperm2i128	\$0x31,$xb0,$xa0,$xb0
++	vperm2i128	\$0x20,$xb1,$xa1,$xa0
++	vperm2i128	\$0x31,$xb1,$xa1,$xb1
++	vperm2i128	\$0x20,$xb2,$xa2,$xa1
++	vperm2i128	\$0x31,$xb2,$xa2,$xb2
++	vperm2i128	\$0x20,$xb3,$xa3,$xa2
++	vperm2i128	\$0x31,$xb3,$xa3,$xb3
++___
++	($xa0,$xa1,$xa2,$xa3,$xt3)=($xt3,$xa0,$xa1,$xa2,$xa3);
++	my ($xc0,$xc1,$xc2,$xc3)=($xt0,$xt1,$xa0,$xa1);
++$code.=<<___;
++	vmovdqa		$xa0,0x00(%rsp)		# offload $xaN
++	vmovdqa		$xa1,0x20(%rsp)
++	vmovdqa		0x40(%rsp),$xc2		# $xa0
++	vmovdqa		0x60(%rsp),$xc3		# $xa1
++
++	vpaddd		0x180-0x200(%rax),$xc0,$xc0
++	vpaddd		0x1a0-0x200(%rax),$xc1,$xc1
++	vpaddd		0x1c0-0x200(%rax),$xc2,$xc2
++	vpaddd		0x1e0-0x200(%rax),$xc3,$xc3
++
++	vpunpckldq	$xc1,$xc0,$xt2
++	vpunpckldq	$xc3,$xc2,$xt3
++	vpunpckhdq	$xc1,$xc0,$xc0
++	vpunpckhdq	$xc3,$xc2,$xc2
++	vpunpcklqdq	$xt3,$xt2,$xc1		# "c0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "c1"
++	vpunpcklqdq	$xc2,$xc0,$xc3		# "c2"
++	vpunpckhqdq	$xc2,$xc0,$xc0		# "c3"
++___
++	($xc0,$xc1,$xc2,$xc3,$xt2)=($xc1,$xt2,$xc3,$xc0,$xc2);
++$code.=<<___;
++	vpaddd		0x200-0x200(%rax),$xd0,$xd0
++	vpaddd		0x220-0x200(%rax),$xd1,$xd1
++	vpaddd		0x240-0x200(%rax),$xd2,$xd2
++	vpaddd		0x260-0x200(%rax),$xd3,$xd3
++
++	vpunpckldq	$xd1,$xd0,$xt2
++	vpunpckldq	$xd3,$xd2,$xt3
++	vpunpckhdq	$xd1,$xd0,$xd0
++	vpunpckhdq	$xd3,$xd2,$xd2
++	vpunpcklqdq	$xt3,$xt2,$xd1		# "d0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "d1"
++	vpunpcklqdq	$xd2,$xd0,$xd3		# "d2"
++	vpunpckhqdq	$xd2,$xd0,$xd0		# "d3"
++___
++	($xd0,$xd1,$xd2,$xd3,$xt2)=($xd1,$xt2,$xd3,$xd0,$xd2);
++$code.=<<___;
++	vperm2i128	\$0x20,$xd0,$xc0,$xt3	# "de-interlace" further
++	vperm2i128	\$0x31,$xd0,$xc0,$xd0
++	vperm2i128	\$0x20,$xd1,$xc1,$xc0
++	vperm2i128	\$0x31,$xd1,$xc1,$xd1
++	vperm2i128	\$0x20,$xd2,$xc2,$xc1
++	vperm2i128	\$0x31,$xd2,$xc2,$xd2
++	vperm2i128	\$0x20,$xd3,$xc3,$xc2
++	vperm2i128	\$0x31,$xd3,$xc3,$xd3
++___
++	($xc0,$xc1,$xc2,$xc3,$xt3)=($xt3,$xc0,$xc1,$xc2,$xc3);
++	($xb0,$xb1,$xb2,$xb3,$xc0,$xc1,$xc2,$xc3)=
++	($xc0,$xc1,$xc2,$xc3,$xb0,$xb1,$xb2,$xb3);
++	($xa0,$xa1)=($xt2,$xt3);
++$code.=<<___;
++	vmovdqa		0x00(%rsp),$xa0		# $xaN was offloaded, remember?
++	vmovdqa		0x20(%rsp),$xa1
++
++	cmp		\$64*8,$len
++	jb		.Ltail8x
++
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x20($inp),$xb0,$xb0
++	vpxor		0x40($inp),$xc0,$xc0
++	vpxor		0x60($inp),$xd0,$xd0
++	lea		0x80($inp),$inp		# size optimization
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x20($out)
++	vmovdqu		$xc0,0x40($out)
++	vmovdqu		$xd0,0x60($out)
++	lea		0x80($out),$out		# size optimization
++
++	vpxor		0x00($inp),$xa1,$xa1
++	vpxor		0x20($inp),$xb1,$xb1
++	vpxor		0x40($inp),$xc1,$xc1
++	vpxor		0x60($inp),$xd1,$xd1
++	lea		0x80($inp),$inp		# size optimization
++	vmovdqu		$xa1,0x00($out)
++	vmovdqu		$xb1,0x20($out)
++	vmovdqu		$xc1,0x40($out)
++	vmovdqu		$xd1,0x60($out)
++	lea		0x80($out),$out		# size optimization
++
++	vpxor		0x00($inp),$xa2,$xa2
++	vpxor		0x20($inp),$xb2,$xb2
++	vpxor		0x40($inp),$xc2,$xc2
++	vpxor		0x60($inp),$xd2,$xd2
++	lea		0x80($inp),$inp		# size optimization
++	vmovdqu		$xa2,0x00($out)
++	vmovdqu		$xb2,0x20($out)
++	vmovdqu		$xc2,0x40($out)
++	vmovdqu		$xd2,0x60($out)
++	lea		0x80($out),$out		# size optimization
++
++	vpxor		0x00($inp),$xa3,$xa3
++	vpxor		0x20($inp),$xb3,$xb3
++	vpxor		0x40($inp),$xc3,$xc3
++	vpxor		0x60($inp),$xd3,$xd3
++	lea		0x80($inp),$inp		# size optimization
++	vmovdqu		$xa3,0x00($out)
++	vmovdqu		$xb3,0x20($out)
++	vmovdqu		$xc3,0x40($out)
++	vmovdqu		$xd3,0x60($out)
++	lea		0x80($out),$out		# size optimization
++
++	sub		\$64*8,$len
++	jnz		.Loop_outer8x
++
++	jmp		.Ldone8x
++
++.Ltail8x:
++	cmp		\$448,$len
++	jae		.L448_or_more8x
++	cmp		\$384,$len
++	jae		.L384_or_more8x
++	cmp		\$320,$len
++	jae		.L320_or_more8x
++	cmp		\$256,$len
++	jae		.L256_or_more8x
++	cmp		\$192,$len
++	jae		.L192_or_more8x
++	cmp		\$128,$len
++	jae		.L128_or_more8x
++	cmp		\$64,$len
++	jae		.L64_or_more8x
++
++	xor		%r9,%r9
++	vmovdqa		$xa0,0x00(%rsp)
++	vmovdqa		$xb0,0x20(%rsp)
++	jmp		.Loop_tail8x
++
++.align	32
++.L64_or_more8x:
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x20($inp),$xb0,$xb0
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x20($out)
++	je		.Ldone8x
++
++	lea		0x40($inp),$inp		# inp+=64*1
++	xor		%r9,%r9
++	vmovdqa		$xc0,0x00(%rsp)
++	lea		0x40($out),$out		# out+=64*1
++	sub		\$64,$len		# len-=64*1
++	vmovdqa		$xd0,0x20(%rsp)
++	jmp		.Loop_tail8x
++
++.align	32
++.L128_or_more8x:
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x20($inp),$xb0,$xb0
++	vpxor		0x40($inp),$xc0,$xc0
++	vpxor		0x60($inp),$xd0,$xd0
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x20($out)
++	vmovdqu		$xc0,0x40($out)
++	vmovdqu		$xd0,0x60($out)
++	je		.Ldone8x
++
++	lea		0x80($inp),$inp		# inp+=64*2
++	xor		%r9,%r9
++	vmovdqa		$xa1,0x00(%rsp)
++	lea		0x80($out),$out		# out+=64*2
++	sub		\$128,$len		# len-=64*2
++	vmovdqa		$xb1,0x20(%rsp)
++	jmp		.Loop_tail8x
++
++.align	32
++.L192_or_more8x:
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x20($inp),$xb0,$xb0
++	vpxor		0x40($inp),$xc0,$xc0
++	vpxor		0x60($inp),$xd0,$xd0
++	vpxor		0x80($inp),$xa1,$xa1
++	vpxor		0xa0($inp),$xb1,$xb1
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x20($out)
++	vmovdqu		$xc0,0x40($out)
++	vmovdqu		$xd0,0x60($out)
++	vmovdqu		$xa1,0x80($out)
++	vmovdqu		$xb1,0xa0($out)
++	je		.Ldone8x
++
++	lea		0xc0($inp),$inp		# inp+=64*3
++	xor		%r9,%r9
++	vmovdqa		$xc1,0x00(%rsp)
++	lea		0xc0($out),$out		# out+=64*3
++	sub		\$192,$len		# len-=64*3
++	vmovdqa		$xd1,0x20(%rsp)
++	jmp		.Loop_tail8x
++
++.align	32
++.L256_or_more8x:
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x20($inp),$xb0,$xb0
++	vpxor		0x40($inp),$xc0,$xc0
++	vpxor		0x60($inp),$xd0,$xd0
++	vpxor		0x80($inp),$xa1,$xa1
++	vpxor		0xa0($inp),$xb1,$xb1
++	vpxor		0xc0($inp),$xc1,$xc1
++	vpxor		0xe0($inp),$xd1,$xd1
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x20($out)
++	vmovdqu		$xc0,0x40($out)
++	vmovdqu		$xd0,0x60($out)
++	vmovdqu		$xa1,0x80($out)
++	vmovdqu		$xb1,0xa0($out)
++	vmovdqu		$xc1,0xc0($out)
++	vmovdqu		$xd1,0xe0($out)
++	je		.Ldone8x
++
++	lea		0x100($inp),$inp	# inp+=64*4
++	xor		%r9,%r9
++	vmovdqa		$xa2,0x00(%rsp)
++	lea		0x100($out),$out	# out+=64*4
++	sub		\$256,$len		# len-=64*4
++	vmovdqa		$xb2,0x20(%rsp)
++	jmp		.Loop_tail8x
++
++.align	32
++.L320_or_more8x:
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x20($inp),$xb0,$xb0
++	vpxor		0x40($inp),$xc0,$xc0
++	vpxor		0x60($inp),$xd0,$xd0
++	vpxor		0x80($inp),$xa1,$xa1
++	vpxor		0xa0($inp),$xb1,$xb1
++	vpxor		0xc0($inp),$xc1,$xc1
++	vpxor		0xe0($inp),$xd1,$xd1
++	vpxor		0x100($inp),$xa2,$xa2
++	vpxor		0x120($inp),$xb2,$xb2
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x20($out)
++	vmovdqu		$xc0,0x40($out)
++	vmovdqu		$xd0,0x60($out)
++	vmovdqu		$xa1,0x80($out)
++	vmovdqu		$xb1,0xa0($out)
++	vmovdqu		$xc1,0xc0($out)
++	vmovdqu		$xd1,0xe0($out)
++	vmovdqu		$xa2,0x100($out)
++	vmovdqu		$xb2,0x120($out)
++	je		.Ldone8x
++
++	lea		0x140($inp),$inp	# inp+=64*5
++	xor		%r9,%r9
++	vmovdqa		$xc2,0x00(%rsp)
++	lea		0x140($out),$out	# out+=64*5
++	sub		\$320,$len		# len-=64*5
++	vmovdqa		$xd2,0x20(%rsp)
++	jmp		.Loop_tail8x
++
++.align	32
++.L384_or_more8x:
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x20($inp),$xb0,$xb0
++	vpxor		0x40($inp),$xc0,$xc0
++	vpxor		0x60($inp),$xd0,$xd0
++	vpxor		0x80($inp),$xa1,$xa1
++	vpxor		0xa0($inp),$xb1,$xb1
++	vpxor		0xc0($inp),$xc1,$xc1
++	vpxor		0xe0($inp),$xd1,$xd1
++	vpxor		0x100($inp),$xa2,$xa2
++	vpxor		0x120($inp),$xb2,$xb2
++	vpxor		0x140($inp),$xc2,$xc2
++	vpxor		0x160($inp),$xd2,$xd2
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x20($out)
++	vmovdqu		$xc0,0x40($out)
++	vmovdqu		$xd0,0x60($out)
++	vmovdqu		$xa1,0x80($out)
++	vmovdqu		$xb1,0xa0($out)
++	vmovdqu		$xc1,0xc0($out)
++	vmovdqu		$xd1,0xe0($out)
++	vmovdqu		$xa2,0x100($out)
++	vmovdqu		$xb2,0x120($out)
++	vmovdqu		$xc2,0x140($out)
++	vmovdqu		$xd2,0x160($out)
++	je		.Ldone8x
++
++	lea		0x180($inp),$inp	# inp+=64*6
++	xor		%r9,%r9
++	vmovdqa		$xa3,0x00(%rsp)
++	lea		0x180($out),$out	# out+=64*6
++	sub		\$384,$len		# len-=64*6
++	vmovdqa		$xb3,0x20(%rsp)
++	jmp		.Loop_tail8x
++
++.align	32
++.L448_or_more8x:
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x20($inp),$xb0,$xb0
++	vpxor		0x40($inp),$xc0,$xc0
++	vpxor		0x60($inp),$xd0,$xd0
++	vpxor		0x80($inp),$xa1,$xa1
++	vpxor		0xa0($inp),$xb1,$xb1
++	vpxor		0xc0($inp),$xc1,$xc1
++	vpxor		0xe0($inp),$xd1,$xd1
++	vpxor		0x100($inp),$xa2,$xa2
++	vpxor		0x120($inp),$xb2,$xb2
++	vpxor		0x140($inp),$xc2,$xc2
++	vpxor		0x160($inp),$xd2,$xd2
++	vpxor		0x180($inp),$xa3,$xa3
++	vpxor		0x1a0($inp),$xb3,$xb3
++	vmovdqu		$xa0,0x00($out)
++	vmovdqu		$xb0,0x20($out)
++	vmovdqu		$xc0,0x40($out)
++	vmovdqu		$xd0,0x60($out)
++	vmovdqu		$xa1,0x80($out)
++	vmovdqu		$xb1,0xa0($out)
++	vmovdqu		$xc1,0xc0($out)
++	vmovdqu		$xd1,0xe0($out)
++	vmovdqu		$xa2,0x100($out)
++	vmovdqu		$xb2,0x120($out)
++	vmovdqu		$xc2,0x140($out)
++	vmovdqu		$xd2,0x160($out)
++	vmovdqu		$xa3,0x180($out)
++	vmovdqu		$xb3,0x1a0($out)
++	je		.Ldone8x
++
++	lea		0x1c0($inp),$inp	# inp+=64*7
++	xor		%r9,%r9
++	vmovdqa		$xc3,0x00(%rsp)
++	lea		0x1c0($out),$out	# out+=64*7
++	sub		\$448,$len		# len-=64*7
++	vmovdqa		$xd3,0x20(%rsp)
++
++.Loop_tail8x:
++	movzb		($inp,%r9),%eax
++	movzb		(%rsp,%r9),%ecx
++	lea		1(%r9),%r9
++	xor		%ecx,%eax
++	mov		%al,-1($out,%r9)
++	dec		$len
++	jnz		.Loop_tail8x
++
++.Ldone8x:
++	vzeroall
++___
++$code.=<<___	if ($win64);
++	movaps		-0xb0(%r10),%xmm6
++	movaps		-0xa0(%r10),%xmm7
++	movaps		-0x90(%r10),%xmm8
++	movaps		-0x80(%r10),%xmm9
++	movaps		-0x70(%r10),%xmm10
++	movaps		-0x60(%r10),%xmm11
++	movaps		-0x50(%r10),%xmm12
++	movaps		-0x40(%r10),%xmm13
++	movaps		-0x30(%r10),%xmm14
++	movaps		-0x20(%r10),%xmm15
++___
++$code.=<<___;
++	lea		-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++.L8x_epilogue:
++	ret
++.cfi_endproc
++___
++&end_function("chacha20_avx2");
++if($kernel) {
++	$code .= "#endif\n";
++}
++}
++
++########################################################################
++# AVX512 code paths
++if ($avx>2) {
++# This one handles shorter inputs...
++if($kernel) {
++	$code .= "#ifdef CONFIG_AS_AVX512\n";
++}
++
++my ($a,$b,$c,$d, $a_,$b_,$c_,$d_,$fourz) = map("%zmm$_",(0..3,16..20));
++my ($t0,$t1,$t2,$t3) = map("%xmm$_",(4..7));
++
++sub vpxord()		# size optimization
++{ my $opcode = "vpxor";	# adhere to vpxor when possible
++
++    foreach (@_) {
++	if (/%([zy])mm([0-9]+)/ && ($1 eq "z" || $2>=16)) {
++	    $opcode = "vpxord";
++	    last;
++	}
++    }
++
++    $code .= "\t$opcode\t".join(',',reverse @_)."\n";
++}
++
++sub AVX512ROUND {	# critical path is 14 "SIMD ticks" per round
++	&vpaddd	($a,$a,$b);
++	&vpxord	($d,$d,$a);
++	&vprold	($d,$d,16);
++
++	&vpaddd	($c,$c,$d);
++	&vpxord	($b,$b,$c);
++	&vprold	($b,$b,12);
++
++	&vpaddd	($a,$a,$b);
++	&vpxord	($d,$d,$a);
++	&vprold	($d,$d,8);
++
++	&vpaddd	($c,$c,$d);
++	&vpxord	($b,$b,$c);
++	&vprold	($b,$b,7);
++}
++
++my $xframe = $win64 ? 32+8 : 8;
++
++&declare_function("chacha20_avx512", 32, 5);
++$code.=<<___;
++.cfi_startproc
++.Lchacha20_avx512:
++	lea	8(%rsp),%r10		# frame pointer
++.cfi_def_cfa_register	%r10
++	cmp	\$512,$len
++	ja	.Lchacha20_16x
++
++	sub	\$64+$xframe,%rsp
++	and \$-64,%rsp
++___
++$code.=<<___	if ($win64);
++	movaps	%xmm6,-0x30(%r10)
++	movaps	%xmm7,-0x20(%r10)
++.Lavx512_body:
++___
++$code.=<<___;
++	vbroadcasti32x4	.Lsigma(%rip),$a
++	vbroadcasti32x4	($key),$b
++	vbroadcasti32x4	16($key),$c
++	vbroadcasti32x4	($counter),$d
++
++	vmovdqa32	$a,$a_
++	vmovdqa32	$b,$b_
++	vmovdqa32	$c,$c_
++	vpaddd		.Lzeroz(%rip),$d,$d
++	vmovdqa32	.Lfourz(%rip),$fourz
++	mov		\$10,$counter	# reuse $counter
++	vmovdqa32	$d,$d_
++	jmp		.Loop_avx512
++
++.align	16
++.Loop_outer_avx512:
++	vmovdqa32	$a_,$a
++	vmovdqa32	$b_,$b
++	vmovdqa32	$c_,$c
++	vpaddd		$fourz,$d_,$d
++	mov		\$10,$counter
++	vmovdqa32	$d,$d_
++	jmp		.Loop_avx512
++
++.align	32
++.Loop_avx512:
++___
++	&AVX512ROUND();
++	&vpshufd	($c,$c,0b01001110);
++	&vpshufd	($b,$b,0b00111001);
++	&vpshufd	($d,$d,0b10010011);
++
++	&AVX512ROUND();
++	&vpshufd	($c,$c,0b01001110);
++	&vpshufd	($b,$b,0b10010011);
++	&vpshufd	($d,$d,0b00111001);
++
++	&dec		($counter);
++	&jnz		(".Loop_avx512");
++
++$code.=<<___;
++	vpaddd		$a_,$a,$a
++	vpaddd		$b_,$b,$b
++	vpaddd		$c_,$c,$c
++	vpaddd		$d_,$d,$d
++
++	sub		\$64,$len
++	jb		.Ltail64_avx512
++
++	vpxor		0x00($inp),%x#$a,$t0	# xor with input
++	vpxor		0x10($inp),%x#$b,$t1
++	vpxor		0x20($inp),%x#$c,$t2
++	vpxor		0x30($inp),%x#$d,$t3
++	lea		0x40($inp),$inp		# inp+=64
++
++	vmovdqu		$t0,0x00($out)		# write output
++	vmovdqu		$t1,0x10($out)
++	vmovdqu		$t2,0x20($out)
++	vmovdqu		$t3,0x30($out)
++	lea		0x40($out),$out		# out+=64
++
++	jz		.Ldone_avx512
++
++	vextracti32x4	\$1,$a,$t0
++	vextracti32x4	\$1,$b,$t1
++	vextracti32x4	\$1,$c,$t2
++	vextracti32x4	\$1,$d,$t3
++
++	sub		\$64,$len
++	jb		.Ltail_avx512
++
++	vpxor		0x00($inp),$t0,$t0	# xor with input
++	vpxor		0x10($inp),$t1,$t1
++	vpxor		0x20($inp),$t2,$t2
++	vpxor		0x30($inp),$t3,$t3
++	lea		0x40($inp),$inp		# inp+=64
++
++	vmovdqu		$t0,0x00($out)		# write output
++	vmovdqu		$t1,0x10($out)
++	vmovdqu		$t2,0x20($out)
++	vmovdqu		$t3,0x30($out)
++	lea		0x40($out),$out		# out+=64
++
++	jz		.Ldone_avx512
++
++	vextracti32x4	\$2,$a,$t0
++	vextracti32x4	\$2,$b,$t1
++	vextracti32x4	\$2,$c,$t2
++	vextracti32x4	\$2,$d,$t3
++
++	sub		\$64,$len
++	jb		.Ltail_avx512
++
++	vpxor		0x00($inp),$t0,$t0	# xor with input
++	vpxor		0x10($inp),$t1,$t1
++	vpxor		0x20($inp),$t2,$t2
++	vpxor		0x30($inp),$t3,$t3
++	lea		0x40($inp),$inp		# inp+=64
++
++	vmovdqu		$t0,0x00($out)		# write output
++	vmovdqu		$t1,0x10($out)
++	vmovdqu		$t2,0x20($out)
++	vmovdqu		$t3,0x30($out)
++	lea		0x40($out),$out		# out+=64
++
++	jz		.Ldone_avx512
++
++	vextracti32x4	\$3,$a,$t0
++	vextracti32x4	\$3,$b,$t1
++	vextracti32x4	\$3,$c,$t2
++	vextracti32x4	\$3,$d,$t3
++
++	sub		\$64,$len
++	jb		.Ltail_avx512
++
++	vpxor		0x00($inp),$t0,$t0	# xor with input
++	vpxor		0x10($inp),$t1,$t1
++	vpxor		0x20($inp),$t2,$t2
++	vpxor		0x30($inp),$t3,$t3
++	lea		0x40($inp),$inp		# inp+=64
++
++	vmovdqu		$t0,0x00($out)		# write output
++	vmovdqu		$t1,0x10($out)
++	vmovdqu		$t2,0x20($out)
++	vmovdqu		$t3,0x30($out)
++	lea		0x40($out),$out		# out+=64
++
++	jnz		.Loop_outer_avx512
++
++	jmp		.Ldone_avx512
++
++.align	16
++.Ltail64_avx512:
++	vmovdqa		%x#$a,0x00(%rsp)
++	vmovdqa		%x#$b,0x10(%rsp)
++	vmovdqa		%x#$c,0x20(%rsp)
++	vmovdqa		%x#$d,0x30(%rsp)
++	add		\$64,$len
++	jmp		.Loop_tail_avx512
++
++.align	16
++.Ltail_avx512:
++	vmovdqa		$t0,0x00(%rsp)
++	vmovdqa		$t1,0x10(%rsp)
++	vmovdqa		$t2,0x20(%rsp)
++	vmovdqa		$t3,0x30(%rsp)
++	add		\$64,$len
++
++.Loop_tail_avx512:
++	movzb		($inp,$counter),%eax
++	movzb		(%rsp,$counter),%ecx
++	lea		1($counter),$counter
++	xor		%ecx,%eax
++	mov		%al,-1($out,$counter)
++	dec		$len
++	jnz		.Loop_tail_avx512
++
++	vmovdqu32	$a_,0x00(%rsp)
++
++.Ldone_avx512:
++	vzeroall
++___
++$code.=<<___	if ($win64);
++	movaps	-0x30(%r10),%xmm6
++	movaps	-0x20(%r10),%xmm7
++___
++$code.=<<___;
++	lea	-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++.Lavx512_epilogue:
++	ret
++.cfi_endproc
++___
++&end_function("chacha20_avx512");
++
++map(s/%z/%y/, $a,$b,$c,$d, $a_,$b_,$c_,$d_,$fourz);
++
++&declare_function("chacha20_avx512vl", 32, 5);
++$code.=<<___;
++.cfi_startproc
++.Lchacha20_avx512vl:
++	lea	8(%rsp),%r10		# frame pointer
++.cfi_def_cfa_register	%r10
++	cmp	\$128,$len
++	ja	.Lchacha20_8xvl
++
++	sub	\$64+$xframe,%rsp
++	and \$-32,%rsp
++___
++$code.=<<___	if ($win64);
++	movaps	%xmm6,-0x30(%r10)
++	movaps	%xmm7,-0x20(%r10)
++.Lavx512vl_body:
++___
++$code.=<<___;
++	vbroadcasti128	.Lsigma(%rip),$a
++	vbroadcasti128	($key),$b
++	vbroadcasti128	16($key),$c
++	vbroadcasti128	($counter),$d
++
++	vmovdqa32	$a,$a_
++	vmovdqa32	$b,$b_
++	vmovdqa32	$c,$c_
++	vpaddd		.Lzeroz(%rip),$d,$d
++	vmovdqa32	.Ltwoy(%rip),$fourz
++	mov		\$10,$counter	# reuse $counter
++	vmovdqa32	$d,$d_
++	jmp		.Loop_avx512vl
++
++.align	16
++.Loop_outer_avx512vl:
++	vmovdqa32	$c_,$c
++	vpaddd		$fourz,$d_,$d
++	mov		\$10,$counter
++	vmovdqa32	$d,$d_
++	jmp		.Loop_avx512vl
++
++.align	32
++.Loop_avx512vl:
++___
++	&AVX512ROUND();
++	&vpshufd	($c,$c,0b01001110);
++	&vpshufd	($b,$b,0b00111001);
++	&vpshufd	($d,$d,0b10010011);
++
++	&AVX512ROUND();
++	&vpshufd	($c,$c,0b01001110);
++	&vpshufd	($b,$b,0b10010011);
++	&vpshufd	($d,$d,0b00111001);
++
++	&dec		($counter);
++	&jnz		(".Loop_avx512vl");
++
++$code.=<<___;
++	vpaddd		$a_,$a,$a
++	vpaddd		$b_,$b,$b
++	vpaddd		$c_,$c,$c
++	vpaddd		$d_,$d,$d
++
++	sub		\$64,$len
++	jb		.Ltail64_avx512vl
++
++	vpxor		0x00($inp),%x#$a,$t0	# xor with input
++	vpxor		0x10($inp),%x#$b,$t1
++	vpxor		0x20($inp),%x#$c,$t2
++	vpxor		0x30($inp),%x#$d,$t3
++	lea		0x40($inp),$inp		# inp+=64
++
++	vmovdqu		$t0,0x00($out)		# write output
++	vmovdqu		$t1,0x10($out)
++	vmovdqu		$t2,0x20($out)
++	vmovdqu		$t3,0x30($out)
++	lea		0x40($out),$out		# out+=64
++
++	jz		.Ldone_avx512vl
++
++	vextracti128	\$1,$a,$t0
++	vextracti128	\$1,$b,$t1
++	vextracti128	\$1,$c,$t2
++	vextracti128	\$1,$d,$t3
++
++	sub		\$64,$len
++	jb		.Ltail_avx512vl
++
++	vpxor		0x00($inp),$t0,$t0	# xor with input
++	vpxor		0x10($inp),$t1,$t1
++	vpxor		0x20($inp),$t2,$t2
++	vpxor		0x30($inp),$t3,$t3
++	lea		0x40($inp),$inp		# inp+=64
++
++	vmovdqu		$t0,0x00($out)		# write output
++	vmovdqu		$t1,0x10($out)
++	vmovdqu		$t2,0x20($out)
++	vmovdqu		$t3,0x30($out)
++	lea		0x40($out),$out		# out+=64
++
++	vmovdqa32	$a_,$a
++	vmovdqa32	$b_,$b
++	jnz		.Loop_outer_avx512vl
++
++	jmp		.Ldone_avx512vl
++
++.align	16
++.Ltail64_avx512vl:
++	vmovdqa		%x#$a,0x00(%rsp)
++	vmovdqa		%x#$b,0x10(%rsp)
++	vmovdqa		%x#$c,0x20(%rsp)
++	vmovdqa		%x#$d,0x30(%rsp)
++	add		\$64,$len
++	jmp		.Loop_tail_avx512vl
++
++.align	16
++.Ltail_avx512vl:
++	vmovdqa		$t0,0x00(%rsp)
++	vmovdqa		$t1,0x10(%rsp)
++	vmovdqa		$t2,0x20(%rsp)
++	vmovdqa		$t3,0x30(%rsp)
++	add		\$64,$len
++
++.Loop_tail_avx512vl:
++	movzb		($inp,$counter),%eax
++	movzb		(%rsp,$counter),%ecx
++	lea		1($counter),$counter
++	xor		%ecx,%eax
++	mov		%al,-1($out,$counter)
++	dec		$len
++	jnz		.Loop_tail_avx512vl
++
++	vmovdqu32	$a_,0x00(%rsp)
++	vmovdqu32	$a_,0x20(%rsp)
++
++.Ldone_avx512vl:
++	vzeroall
++___
++$code.=<<___	if ($win64);
++	movaps	-0x30(%r10),%xmm6
++	movaps	-0x20(%r10),%xmm7
++___
++$code.=<<___;
++	lea	-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++.Lavx512vl_epilogue:
++	ret
++.cfi_endproc
++___
++&end_function("chacha20_avx512vl");
++
++# This one handles longer inputs...
++
++my ($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
++    $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3)=map("%zmm$_",(0..15));
++my  @xx=($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
++	 $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3);
++my @key=map("%zmm$_",(16..31));
++my ($xt0,$xt1,$xt2,$xt3)=@key[0..3];
++
++sub AVX512_lane_ROUND {
++my ($a0,$b0,$c0,$d0)=@_;
++my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
++my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
++my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
++my @x=map("\"$_\"",@xx);
++
++	(
++	"&vpaddd	(@x[$a0],@x[$a0],@x[$b0])",	# Q1
++	 "&vpaddd	(@x[$a1],@x[$a1],@x[$b1])",	# Q2
++	  "&vpaddd	(@x[$a2],@x[$a2],@x[$b2])",	# Q3
++	   "&vpaddd	(@x[$a3],@x[$a3],@x[$b3])",	# Q4
++	"&vpxord	(@x[$d0],@x[$d0],@x[$a0])",
++	 "&vpxord	(@x[$d1],@x[$d1],@x[$a1])",
++	  "&vpxord	(@x[$d2],@x[$d2],@x[$a2])",
++	   "&vpxord	(@x[$d3],@x[$d3],@x[$a3])",
++	"&vprold	(@x[$d0],@x[$d0],16)",
++	 "&vprold	(@x[$d1],@x[$d1],16)",
++	  "&vprold	(@x[$d2],@x[$d2],16)",
++	   "&vprold	(@x[$d3],@x[$d3],16)",
++
++	"&vpaddd	(@x[$c0],@x[$c0],@x[$d0])",
++	 "&vpaddd	(@x[$c1],@x[$c1],@x[$d1])",
++	  "&vpaddd	(@x[$c2],@x[$c2],@x[$d2])",
++	   "&vpaddd	(@x[$c3],@x[$c3],@x[$d3])",
++	"&vpxord	(@x[$b0],@x[$b0],@x[$c0])",
++	 "&vpxord	(@x[$b1],@x[$b1],@x[$c1])",
++	  "&vpxord	(@x[$b2],@x[$b2],@x[$c2])",
++	   "&vpxord	(@x[$b3],@x[$b3],@x[$c3])",
++	"&vprold	(@x[$b0],@x[$b0],12)",
++	 "&vprold	(@x[$b1],@x[$b1],12)",
++	  "&vprold	(@x[$b2],@x[$b2],12)",
++	   "&vprold	(@x[$b3],@x[$b3],12)",
++
++	"&vpaddd	(@x[$a0],@x[$a0],@x[$b0])",
++	 "&vpaddd	(@x[$a1],@x[$a1],@x[$b1])",
++	  "&vpaddd	(@x[$a2],@x[$a2],@x[$b2])",
++	   "&vpaddd	(@x[$a3],@x[$a3],@x[$b3])",
++	"&vpxord	(@x[$d0],@x[$d0],@x[$a0])",
++	 "&vpxord	(@x[$d1],@x[$d1],@x[$a1])",
++	  "&vpxord	(@x[$d2],@x[$d2],@x[$a2])",
++	   "&vpxord	(@x[$d3],@x[$d3],@x[$a3])",
++	"&vprold	(@x[$d0],@x[$d0],8)",
++	 "&vprold	(@x[$d1],@x[$d1],8)",
++	  "&vprold	(@x[$d2],@x[$d2],8)",
++	   "&vprold	(@x[$d3],@x[$d3],8)",
++
++	"&vpaddd	(@x[$c0],@x[$c0],@x[$d0])",
++	 "&vpaddd	(@x[$c1],@x[$c1],@x[$d1])",
++	  "&vpaddd	(@x[$c2],@x[$c2],@x[$d2])",
++	   "&vpaddd	(@x[$c3],@x[$c3],@x[$d3])",
++	"&vpxord	(@x[$b0],@x[$b0],@x[$c0])",
++	 "&vpxord	(@x[$b1],@x[$b1],@x[$c1])",
++	  "&vpxord	(@x[$b2],@x[$b2],@x[$c2])",
++	   "&vpxord	(@x[$b3],@x[$b3],@x[$c3])",
++	"&vprold	(@x[$b0],@x[$b0],7)",
++	 "&vprold	(@x[$b1],@x[$b1],7)",
++	  "&vprold	(@x[$b2],@x[$b2],7)",
++	   "&vprold	(@x[$b3],@x[$b3],7)"
++	);
++}
++
++my $xframe = $win64 ? 0xa8 : 8;
++
++$code.=<<___;
++.type	chacha20_16x,\@function,5
++.align	32
++chacha20_16x:
++.cfi_startproc
++.Lchacha20_16x:
++	lea		8(%rsp),%r10		# frame register
++.cfi_def_cfa_register	%r10
++	sub		\$64+$xframe,%rsp
++	and		\$-64,%rsp
++___
++$code.=<<___	if ($win64);
++	movaps		%xmm6,-0xb0(%r10)
++	movaps		%xmm7,-0xa0(%r10)
++	movaps		%xmm8,-0x90(%r10)
++	movaps		%xmm9,-0x80(%r10)
++	movaps		%xmm10,-0x70(%r10)
++	movaps		%xmm11,-0x60(%r10)
++	movaps		%xmm12,-0x50(%r10)
++	movaps		%xmm13,-0x40(%r10)
++	movaps		%xmm14,-0x30(%r10)
++	movaps		%xmm15,-0x20(%r10)
++.L16x_body:
++___
++$code.=<<___;
++	vzeroupper
++
++	lea		.Lsigma(%rip),%r9
++	vbroadcasti32x4	(%r9),$xa3		# key[0]
++	vbroadcasti32x4	($key),$xb3		# key[1]
++	vbroadcasti32x4	16($key),$xc3		# key[2]
++	vbroadcasti32x4	($counter),$xd3		# key[3]
++
++	vpshufd		\$0x00,$xa3,$xa0	# smash key by lanes...
++	vpshufd		\$0x55,$xa3,$xa1
++	vpshufd		\$0xaa,$xa3,$xa2
++	vpshufd		\$0xff,$xa3,$xa3
++	vmovdqa64	$xa0,@key[0]
++	vmovdqa64	$xa1,@key[1]
++	vmovdqa64	$xa2,@key[2]
++	vmovdqa64	$xa3,@key[3]
++
++	vpshufd		\$0x00,$xb3,$xb0
++	vpshufd		\$0x55,$xb3,$xb1
++	vpshufd		\$0xaa,$xb3,$xb2
++	vpshufd		\$0xff,$xb3,$xb3
++	vmovdqa64	$xb0,@key[4]
++	vmovdqa64	$xb1,@key[5]
++	vmovdqa64	$xb2,@key[6]
++	vmovdqa64	$xb3,@key[7]
++
++	vpshufd		\$0x00,$xc3,$xc0
++	vpshufd		\$0x55,$xc3,$xc1
++	vpshufd		\$0xaa,$xc3,$xc2
++	vpshufd		\$0xff,$xc3,$xc3
++	vmovdqa64	$xc0,@key[8]
++	vmovdqa64	$xc1,@key[9]
++	vmovdqa64	$xc2,@key[10]
++	vmovdqa64	$xc3,@key[11]
++
++	vpshufd		\$0x00,$xd3,$xd0
++	vpshufd		\$0x55,$xd3,$xd1
++	vpshufd		\$0xaa,$xd3,$xd2
++	vpshufd		\$0xff,$xd3,$xd3
++	vpaddd		.Lincz(%rip),$xd0,$xd0	# don't save counters yet
++	vmovdqa64	$xd0,@key[12]
++	vmovdqa64	$xd1,@key[13]
++	vmovdqa64	$xd2,@key[14]
++	vmovdqa64	$xd3,@key[15]
++
++	mov		\$10,%eax
++	jmp		.Loop16x
++
++.align	32
++.Loop_outer16x:
++	vpbroadcastd	0(%r9),$xa0		# reload key
++	vpbroadcastd	4(%r9),$xa1
++	vpbroadcastd	8(%r9),$xa2
++	vpbroadcastd	12(%r9),$xa3
++	vpaddd		.Lsixteen(%rip),@key[12],@key[12]	# next SIMD counters
++	vmovdqa64	@key[4],$xb0
++	vmovdqa64	@key[5],$xb1
++	vmovdqa64	@key[6],$xb2
++	vmovdqa64	@key[7],$xb3
++	vmovdqa64	@key[8],$xc0
++	vmovdqa64	@key[9],$xc1
++	vmovdqa64	@key[10],$xc2
++	vmovdqa64	@key[11],$xc3
++	vmovdqa64	@key[12],$xd0
++	vmovdqa64	@key[13],$xd1
++	vmovdqa64	@key[14],$xd2
++	vmovdqa64	@key[15],$xd3
++
++	vmovdqa64	$xa0,@key[0]
++	vmovdqa64	$xa1,@key[1]
++	vmovdqa64	$xa2,@key[2]
++	vmovdqa64	$xa3,@key[3]
++
++	mov		\$10,%eax
++	jmp		.Loop16x
++
++.align	32
++.Loop16x:
++___
++	foreach (&AVX512_lane_ROUND(0, 4, 8,12)) { eval; }
++	foreach (&AVX512_lane_ROUND(0, 5,10,15)) { eval; }
++$code.=<<___;
++	dec		%eax
++	jnz		.Loop16x
++
++	vpaddd		@key[0],$xa0,$xa0	# accumulate key
++	vpaddd		@key[1],$xa1,$xa1
++	vpaddd		@key[2],$xa2,$xa2
++	vpaddd		@key[3],$xa3,$xa3
++
++	vpunpckldq	$xa1,$xa0,$xt2		# "de-interlace" data
++	vpunpckldq	$xa3,$xa2,$xt3
++	vpunpckhdq	$xa1,$xa0,$xa0
++	vpunpckhdq	$xa3,$xa2,$xa2
++	vpunpcklqdq	$xt3,$xt2,$xa1		# "a0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "a1"
++	vpunpcklqdq	$xa2,$xa0,$xa3		# "a2"
++	vpunpckhqdq	$xa2,$xa0,$xa0		# "a3"
++___
++	($xa0,$xa1,$xa2,$xa3,$xt2)=($xa1,$xt2,$xa3,$xa0,$xa2);
++$code.=<<___;
++	vpaddd		@key[4],$xb0,$xb0
++	vpaddd		@key[5],$xb1,$xb1
++	vpaddd		@key[6],$xb2,$xb2
++	vpaddd		@key[7],$xb3,$xb3
++
++	vpunpckldq	$xb1,$xb0,$xt2
++	vpunpckldq	$xb3,$xb2,$xt3
++	vpunpckhdq	$xb1,$xb0,$xb0
++	vpunpckhdq	$xb3,$xb2,$xb2
++	vpunpcklqdq	$xt3,$xt2,$xb1		# "b0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "b1"
++	vpunpcklqdq	$xb2,$xb0,$xb3		# "b2"
++	vpunpckhqdq	$xb2,$xb0,$xb0		# "b3"
++___
++	($xb0,$xb1,$xb2,$xb3,$xt2)=($xb1,$xt2,$xb3,$xb0,$xb2);
++$code.=<<___;
++	vshufi32x4	\$0x44,$xb0,$xa0,$xt3	# "de-interlace" further
++	vshufi32x4	\$0xee,$xb0,$xa0,$xb0
++	vshufi32x4	\$0x44,$xb1,$xa1,$xa0
++	vshufi32x4	\$0xee,$xb1,$xa1,$xb1
++	vshufi32x4	\$0x44,$xb2,$xa2,$xa1
++	vshufi32x4	\$0xee,$xb2,$xa2,$xb2
++	vshufi32x4	\$0x44,$xb3,$xa3,$xa2
++	vshufi32x4	\$0xee,$xb3,$xa3,$xb3
++___
++	($xa0,$xa1,$xa2,$xa3,$xt3)=($xt3,$xa0,$xa1,$xa2,$xa3);
++$code.=<<___;
++	vpaddd		@key[8],$xc0,$xc0
++	vpaddd		@key[9],$xc1,$xc1
++	vpaddd		@key[10],$xc2,$xc2
++	vpaddd		@key[11],$xc3,$xc3
++
++	vpunpckldq	$xc1,$xc0,$xt2
++	vpunpckldq	$xc3,$xc2,$xt3
++	vpunpckhdq	$xc1,$xc0,$xc0
++	vpunpckhdq	$xc3,$xc2,$xc2
++	vpunpcklqdq	$xt3,$xt2,$xc1		# "c0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "c1"
++	vpunpcklqdq	$xc2,$xc0,$xc3		# "c2"
++	vpunpckhqdq	$xc2,$xc0,$xc0		# "c3"
++___
++	($xc0,$xc1,$xc2,$xc3,$xt2)=($xc1,$xt2,$xc3,$xc0,$xc2);
++$code.=<<___;
++	vpaddd		@key[12],$xd0,$xd0
++	vpaddd		@key[13],$xd1,$xd1
++	vpaddd		@key[14],$xd2,$xd2
++	vpaddd		@key[15],$xd3,$xd3
++
++	vpunpckldq	$xd1,$xd0,$xt2
++	vpunpckldq	$xd3,$xd2,$xt3
++	vpunpckhdq	$xd1,$xd0,$xd0
++	vpunpckhdq	$xd3,$xd2,$xd2
++	vpunpcklqdq	$xt3,$xt2,$xd1		# "d0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "d1"
++	vpunpcklqdq	$xd2,$xd0,$xd3		# "d2"
++	vpunpckhqdq	$xd2,$xd0,$xd0		# "d3"
++___
++	($xd0,$xd1,$xd2,$xd3,$xt2)=($xd1,$xt2,$xd3,$xd0,$xd2);
++$code.=<<___;
++	vshufi32x4	\$0x44,$xd0,$xc0,$xt3	# "de-interlace" further
++	vshufi32x4	\$0xee,$xd0,$xc0,$xd0
++	vshufi32x4	\$0x44,$xd1,$xc1,$xc0
++	vshufi32x4	\$0xee,$xd1,$xc1,$xd1
++	vshufi32x4	\$0x44,$xd2,$xc2,$xc1
++	vshufi32x4	\$0xee,$xd2,$xc2,$xd2
++	vshufi32x4	\$0x44,$xd3,$xc3,$xc2
++	vshufi32x4	\$0xee,$xd3,$xc3,$xd3
++___
++	($xc0,$xc1,$xc2,$xc3,$xt3)=($xt3,$xc0,$xc1,$xc2,$xc3);
++$code.=<<___;
++	vshufi32x4	\$0x88,$xc0,$xa0,$xt0	# "de-interlace" further
++	vshufi32x4	\$0xdd,$xc0,$xa0,$xa0
++	 vshufi32x4	\$0x88,$xd0,$xb0,$xc0
++	 vshufi32x4	\$0xdd,$xd0,$xb0,$xd0
++	vshufi32x4	\$0x88,$xc1,$xa1,$xt1
++	vshufi32x4	\$0xdd,$xc1,$xa1,$xa1
++	 vshufi32x4	\$0x88,$xd1,$xb1,$xc1
++	 vshufi32x4	\$0xdd,$xd1,$xb1,$xd1
++	vshufi32x4	\$0x88,$xc2,$xa2,$xt2
++	vshufi32x4	\$0xdd,$xc2,$xa2,$xa2
++	 vshufi32x4	\$0x88,$xd2,$xb2,$xc2
++	 vshufi32x4	\$0xdd,$xd2,$xb2,$xd2
++	vshufi32x4	\$0x88,$xc3,$xa3,$xt3
++	vshufi32x4	\$0xdd,$xc3,$xa3,$xa3
++	 vshufi32x4	\$0x88,$xd3,$xb3,$xc3
++	 vshufi32x4	\$0xdd,$xd3,$xb3,$xd3
++___
++	($xa0,$xa1,$xa2,$xa3,$xb0,$xb1,$xb2,$xb3)=
++	($xt0,$xt1,$xt2,$xt3,$xa0,$xa1,$xa2,$xa3);
++
++	($xa0,$xb0,$xc0,$xd0, $xa1,$xb1,$xc1,$xd1,
++	 $xa2,$xb2,$xc2,$xd2, $xa3,$xb3,$xc3,$xd3) =
++	($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
++	 $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3);
++$code.=<<___;
++	cmp		\$64*16,$len
++	jb		.Ltail16x
++
++	vpxord		0x00($inp),$xa0,$xa0	# xor with input
++	vpxord		0x40($inp),$xb0,$xb0
++	vpxord		0x80($inp),$xc0,$xc0
++	vpxord		0xc0($inp),$xd0,$xd0
++	vmovdqu32	$xa0,0x00($out)
++	vmovdqu32	$xb0,0x40($out)
++	vmovdqu32	$xc0,0x80($out)
++	vmovdqu32	$xd0,0xc0($out)
++
++	vpxord		0x100($inp),$xa1,$xa1
++	vpxord		0x140($inp),$xb1,$xb1
++	vpxord		0x180($inp),$xc1,$xc1
++	vpxord		0x1c0($inp),$xd1,$xd1
++	vmovdqu32	$xa1,0x100($out)
++	vmovdqu32	$xb1,0x140($out)
++	vmovdqu32	$xc1,0x180($out)
++	vmovdqu32	$xd1,0x1c0($out)
++
++	vpxord		0x200($inp),$xa2,$xa2
++	vpxord		0x240($inp),$xb2,$xb2
++	vpxord		0x280($inp),$xc2,$xc2
++	vpxord		0x2c0($inp),$xd2,$xd2
++	vmovdqu32	$xa2,0x200($out)
++	vmovdqu32	$xb2,0x240($out)
++	vmovdqu32	$xc2,0x280($out)
++	vmovdqu32	$xd2,0x2c0($out)
++
++	vpxord		0x300($inp),$xa3,$xa3
++	vpxord		0x340($inp),$xb3,$xb3
++	vpxord		0x380($inp),$xc3,$xc3
++	vpxord		0x3c0($inp),$xd3,$xd3
++	lea		0x400($inp),$inp
++	vmovdqu32	$xa3,0x300($out)
++	vmovdqu32	$xb3,0x340($out)
++	vmovdqu32	$xc3,0x380($out)
++	vmovdqu32	$xd3,0x3c0($out)
++	lea		0x400($out),$out
++
++	sub		\$64*16,$len
++	jnz		.Loop_outer16x
++
++	jmp		.Ldone16x
++
++.align	32
++.Ltail16x:
++	xor		%r9,%r9
++	sub		$inp,$out
++	cmp		\$64*1,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xa0,$xa0	# xor with input
++	vmovdqu32	$xa0,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xb0,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*2,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xb0,$xb0
++	vmovdqu32	$xb0,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xc0,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*3,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xc0,$xc0
++	vmovdqu32	$xc0,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xd0,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*4,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xd0,$xd0
++	vmovdqu32	$xd0,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xa1,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*5,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xa1,$xa1
++	vmovdqu32	$xa1,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xb1,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*6,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xb1,$xb1
++	vmovdqu32	$xb1,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xc1,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*7,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xc1,$xc1
++	vmovdqu32	$xc1,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xd1,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*8,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xd1,$xd1
++	vmovdqu32	$xd1,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xa2,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*9,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xa2,$xa2
++	vmovdqu32	$xa2,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xb2,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*10,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xb2,$xb2
++	vmovdqu32	$xb2,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xc2,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*11,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xc2,$xc2
++	vmovdqu32	$xc2,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xd2,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*12,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xd2,$xd2
++	vmovdqu32	$xd2,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xa3,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*13,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xa3,$xa3
++	vmovdqu32	$xa3,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xb3,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*14,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xb3,$xb3
++	vmovdqu32	$xb3,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xc3,$xa0
++	lea		64($inp),$inp
++
++	cmp		\$64*15,$len
++	jb		.Less_than_64_16x
++	vpxord		($inp),$xc3,$xc3
++	vmovdqu32	$xc3,($out,$inp)
++	je		.Ldone16x
++	vmovdqa32	$xd3,$xa0
++	lea		64($inp),$inp
++
++.Less_than_64_16x:
++	vmovdqa32	$xa0,0x00(%rsp)
++	lea		($out,$inp),$out
++	and		\$63,$len
++
++.Loop_tail16x:
++	movzb		($inp,%r9),%eax
++	movzb		(%rsp,%r9),%ecx
++	lea		1(%r9),%r9
++	xor		%ecx,%eax
++	mov		%al,-1($out,%r9)
++	dec		$len
++	jnz		.Loop_tail16x
++
++	vpxord		$xa0,$xa0,$xa0
++	vmovdqa32	$xa0,0(%rsp)
++
++.Ldone16x:
++	vzeroall
++___
++$code.=<<___	if ($win64);
++	movaps		-0xb0(%r10),%xmm6
++	movaps		-0xa0(%r10),%xmm7
++	movaps		-0x90(%r10),%xmm8
++	movaps		-0x80(%r10),%xmm9
++	movaps		-0x70(%r10),%xmm10
++	movaps		-0x60(%r10),%xmm11
++	movaps		-0x50(%r10),%xmm12
++	movaps		-0x40(%r10),%xmm13
++	movaps		-0x30(%r10),%xmm14
++	movaps		-0x20(%r10),%xmm15
++___
++$code.=<<___;
++	lea		-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++.L16x_epilogue:
++	ret
++.cfi_endproc
++.size	chacha20_16x,.-chacha20_16x
++___
++
++# switch to %ymm domain
++($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
++ $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3)=map("%ymm$_",(0..15));
++@xx=($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
++     $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3);
++@key=map("%ymm$_",(16..31));
++($xt0,$xt1,$xt2,$xt3)=@key[0..3];
++
++$code.=<<___;
++.type	chacha20_8xvl,\@function,5
++.align	32
++chacha20_8xvl:
++.cfi_startproc
++.Lchacha20_8xvl:
++	lea		8(%rsp),%r10		# frame register
++.cfi_def_cfa_register	%r10
++	sub		\$64+$xframe,%rsp
++	and		\$-64,%rsp
++___
++$code.=<<___	if ($win64);
++	movaps		%xmm6,-0xb0(%r10)
++	movaps		%xmm7,-0xa0(%r10)
++	movaps		%xmm8,-0x90(%r10)
++	movaps		%xmm9,-0x80(%r10)
++	movaps		%xmm10,-0x70(%r10)
++	movaps		%xmm11,-0x60(%r10)
++	movaps		%xmm12,-0x50(%r10)
++	movaps		%xmm13,-0x40(%r10)
++	movaps		%xmm14,-0x30(%r10)
++	movaps		%xmm15,-0x20(%r10)
++.L8xvl_body:
++___
++$code.=<<___;
++	vzeroupper
++
++	lea		.Lsigma(%rip),%r9
++	vbroadcasti128	(%r9),$xa3		# key[0]
++	vbroadcasti128	($key),$xb3		# key[1]
++	vbroadcasti128	16($key),$xc3		# key[2]
++	vbroadcasti128	($counter),$xd3		# key[3]
++
++	vpshufd		\$0x00,$xa3,$xa0	# smash key by lanes...
++	vpshufd		\$0x55,$xa3,$xa1
++	vpshufd		\$0xaa,$xa3,$xa2
++	vpshufd		\$0xff,$xa3,$xa3
++	vmovdqa64	$xa0,@key[0]
++	vmovdqa64	$xa1,@key[1]
++	vmovdqa64	$xa2,@key[2]
++	vmovdqa64	$xa3,@key[3]
++
++	vpshufd		\$0x00,$xb3,$xb0
++	vpshufd		\$0x55,$xb3,$xb1
++	vpshufd		\$0xaa,$xb3,$xb2
++	vpshufd		\$0xff,$xb3,$xb3
++	vmovdqa64	$xb0,@key[4]
++	vmovdqa64	$xb1,@key[5]
++	vmovdqa64	$xb2,@key[6]
++	vmovdqa64	$xb3,@key[7]
++
++	vpshufd		\$0x00,$xc3,$xc0
++	vpshufd		\$0x55,$xc3,$xc1
++	vpshufd		\$0xaa,$xc3,$xc2
++	vpshufd		\$0xff,$xc3,$xc3
++	vmovdqa64	$xc0,@key[8]
++	vmovdqa64	$xc1,@key[9]
++	vmovdqa64	$xc2,@key[10]
++	vmovdqa64	$xc3,@key[11]
++
++	vpshufd		\$0x00,$xd3,$xd0
++	vpshufd		\$0x55,$xd3,$xd1
++	vpshufd		\$0xaa,$xd3,$xd2
++	vpshufd		\$0xff,$xd3,$xd3
++	vpaddd		.Lincy(%rip),$xd0,$xd0	# don't save counters yet
++	vmovdqa64	$xd0,@key[12]
++	vmovdqa64	$xd1,@key[13]
++	vmovdqa64	$xd2,@key[14]
++	vmovdqa64	$xd3,@key[15]
++
++	mov		\$10,%eax
++	jmp		.Loop8xvl
++
++.align	32
++.Loop_outer8xvl:
++	#vpbroadcastd	0(%r9),$xa0		# reload key
++	#vpbroadcastd	4(%r9),$xa1
++	vpbroadcastd	8(%r9),$xa2
++	vpbroadcastd	12(%r9),$xa3
++	vpaddd		.Leight(%rip),@key[12],@key[12]	# next SIMD counters
++	vmovdqa64	@key[4],$xb0
++	vmovdqa64	@key[5],$xb1
++	vmovdqa64	@key[6],$xb2
++	vmovdqa64	@key[7],$xb3
++	vmovdqa64	@key[8],$xc0
++	vmovdqa64	@key[9],$xc1
++	vmovdqa64	@key[10],$xc2
++	vmovdqa64	@key[11],$xc3
++	vmovdqa64	@key[12],$xd0
++	vmovdqa64	@key[13],$xd1
++	vmovdqa64	@key[14],$xd2
++	vmovdqa64	@key[15],$xd3
++
++	vmovdqa64	$xa0,@key[0]
++	vmovdqa64	$xa1,@key[1]
++	vmovdqa64	$xa2,@key[2]
++	vmovdqa64	$xa3,@key[3]
++
++	mov		\$10,%eax
++	jmp		.Loop8xvl
++
++.align	32
++.Loop8xvl:
++___
++	foreach (&AVX512_lane_ROUND(0, 4, 8,12)) { eval; }
++	foreach (&AVX512_lane_ROUND(0, 5,10,15)) { eval; }
++$code.=<<___;
++	dec		%eax
++	jnz		.Loop8xvl
++
++	vpaddd		@key[0],$xa0,$xa0	# accumulate key
++	vpaddd		@key[1],$xa1,$xa1
++	vpaddd		@key[2],$xa2,$xa2
++	vpaddd		@key[3],$xa3,$xa3
++
++	vpunpckldq	$xa1,$xa0,$xt2		# "de-interlace" data
++	vpunpckldq	$xa3,$xa2,$xt3
++	vpunpckhdq	$xa1,$xa0,$xa0
++	vpunpckhdq	$xa3,$xa2,$xa2
++	vpunpcklqdq	$xt3,$xt2,$xa1		# "a0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "a1"
++	vpunpcklqdq	$xa2,$xa0,$xa3		# "a2"
++	vpunpckhqdq	$xa2,$xa0,$xa0		# "a3"
++___
++	($xa0,$xa1,$xa2,$xa3,$xt2)=($xa1,$xt2,$xa3,$xa0,$xa2);
++$code.=<<___;
++	vpaddd		@key[4],$xb0,$xb0
++	vpaddd		@key[5],$xb1,$xb1
++	vpaddd		@key[6],$xb2,$xb2
++	vpaddd		@key[7],$xb3,$xb3
++
++	vpunpckldq	$xb1,$xb0,$xt2
++	vpunpckldq	$xb3,$xb2,$xt3
++	vpunpckhdq	$xb1,$xb0,$xb0
++	vpunpckhdq	$xb3,$xb2,$xb2
++	vpunpcklqdq	$xt3,$xt2,$xb1		# "b0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "b1"
++	vpunpcklqdq	$xb2,$xb0,$xb3		# "b2"
++	vpunpckhqdq	$xb2,$xb0,$xb0		# "b3"
++___
++	($xb0,$xb1,$xb2,$xb3,$xt2)=($xb1,$xt2,$xb3,$xb0,$xb2);
++$code.=<<___;
++	vshufi32x4	\$0,$xb0,$xa0,$xt3	# "de-interlace" further
++	vshufi32x4	\$3,$xb0,$xa0,$xb0
++	vshufi32x4	\$0,$xb1,$xa1,$xa0
++	vshufi32x4	\$3,$xb1,$xa1,$xb1
++	vshufi32x4	\$0,$xb2,$xa2,$xa1
++	vshufi32x4	\$3,$xb2,$xa2,$xb2
++	vshufi32x4	\$0,$xb3,$xa3,$xa2
++	vshufi32x4	\$3,$xb3,$xa3,$xb3
++___
++	($xa0,$xa1,$xa2,$xa3,$xt3)=($xt3,$xa0,$xa1,$xa2,$xa3);
++$code.=<<___;
++	vpaddd		@key[8],$xc0,$xc0
++	vpaddd		@key[9],$xc1,$xc1
++	vpaddd		@key[10],$xc2,$xc2
++	vpaddd		@key[11],$xc3,$xc3
++
++	vpunpckldq	$xc1,$xc0,$xt2
++	vpunpckldq	$xc3,$xc2,$xt3
++	vpunpckhdq	$xc1,$xc0,$xc0
++	vpunpckhdq	$xc3,$xc2,$xc2
++	vpunpcklqdq	$xt3,$xt2,$xc1		# "c0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "c1"
++	vpunpcklqdq	$xc2,$xc0,$xc3		# "c2"
++	vpunpckhqdq	$xc2,$xc0,$xc0		# "c3"
++___
++	($xc0,$xc1,$xc2,$xc3,$xt2)=($xc1,$xt2,$xc3,$xc0,$xc2);
++$code.=<<___;
++	vpaddd		@key[12],$xd0,$xd0
++	vpaddd		@key[13],$xd1,$xd1
++	vpaddd		@key[14],$xd2,$xd2
++	vpaddd		@key[15],$xd3,$xd3
++
++	vpunpckldq	$xd1,$xd0,$xt2
++	vpunpckldq	$xd3,$xd2,$xt3
++	vpunpckhdq	$xd1,$xd0,$xd0
++	vpunpckhdq	$xd3,$xd2,$xd2
++	vpunpcklqdq	$xt3,$xt2,$xd1		# "d0"
++	vpunpckhqdq	$xt3,$xt2,$xt2		# "d1"
++	vpunpcklqdq	$xd2,$xd0,$xd3		# "d2"
++	vpunpckhqdq	$xd2,$xd0,$xd0		# "d3"
++___
++	($xd0,$xd1,$xd2,$xd3,$xt2)=($xd1,$xt2,$xd3,$xd0,$xd2);
++$code.=<<___;
++	vperm2i128	\$0x20,$xd0,$xc0,$xt3	# "de-interlace" further
++	vperm2i128	\$0x31,$xd0,$xc0,$xd0
++	vperm2i128	\$0x20,$xd1,$xc1,$xc0
++	vperm2i128	\$0x31,$xd1,$xc1,$xd1
++	vperm2i128	\$0x20,$xd2,$xc2,$xc1
++	vperm2i128	\$0x31,$xd2,$xc2,$xd2
++	vperm2i128	\$0x20,$xd3,$xc3,$xc2
++	vperm2i128	\$0x31,$xd3,$xc3,$xd3
++___
++	($xc0,$xc1,$xc2,$xc3,$xt3)=($xt3,$xc0,$xc1,$xc2,$xc3);
++	($xb0,$xb1,$xb2,$xb3,$xc0,$xc1,$xc2,$xc3)=
++	($xc0,$xc1,$xc2,$xc3,$xb0,$xb1,$xb2,$xb3);
++$code.=<<___;
++	cmp		\$64*8,$len
++	jb		.Ltail8xvl
++
++	mov		\$0x80,%eax		# size optimization
++	vpxord		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x20($inp),$xb0,$xb0
++	vpxor		0x40($inp),$xc0,$xc0
++	vpxor		0x60($inp),$xd0,$xd0
++	lea		($inp,%rax),$inp	# size optimization
++	vmovdqu32	$xa0,0x00($out)
++	vmovdqu		$xb0,0x20($out)
++	vmovdqu		$xc0,0x40($out)
++	vmovdqu		$xd0,0x60($out)
++	lea		($out,%rax),$out	# size optimization
++
++	vpxor		0x00($inp),$xa1,$xa1
++	vpxor		0x20($inp),$xb1,$xb1
++	vpxor		0x40($inp),$xc1,$xc1
++	vpxor		0x60($inp),$xd1,$xd1
++	lea		($inp,%rax),$inp	# size optimization
++	vmovdqu		$xa1,0x00($out)
++	vmovdqu		$xb1,0x20($out)
++	vmovdqu		$xc1,0x40($out)
++	vmovdqu		$xd1,0x60($out)
++	lea		($out,%rax),$out	# size optimization
++
++	vpxord		0x00($inp),$xa2,$xa2
++	vpxor		0x20($inp),$xb2,$xb2
++	vpxor		0x40($inp),$xc2,$xc2
++	vpxor		0x60($inp),$xd2,$xd2
++	lea		($inp,%rax),$inp	# size optimization
++	vmovdqu32	$xa2,0x00($out)
++	vmovdqu		$xb2,0x20($out)
++	vmovdqu		$xc2,0x40($out)
++	vmovdqu		$xd2,0x60($out)
++	lea		($out,%rax),$out	# size optimization
++
++	vpxor		0x00($inp),$xa3,$xa3
++	vpxor		0x20($inp),$xb3,$xb3
++	vpxor		0x40($inp),$xc3,$xc3
++	vpxor		0x60($inp),$xd3,$xd3
++	lea		($inp,%rax),$inp	# size optimization
++	vmovdqu		$xa3,0x00($out)
++	vmovdqu		$xb3,0x20($out)
++	vmovdqu		$xc3,0x40($out)
++	vmovdqu		$xd3,0x60($out)
++	lea		($out,%rax),$out	# size optimization
++
++	vpbroadcastd	0(%r9),%ymm0		# reload key
++	vpbroadcastd	4(%r9),%ymm1
++
++	sub		\$64*8,$len
++	jnz		.Loop_outer8xvl
++
++	jmp		.Ldone8xvl
++
++.align	32
++.Ltail8xvl:
++	vmovdqa64	$xa0,%ymm8		# size optimization
++___
++$xa0 = "%ymm8";
++$code.=<<___;
++	xor		%r9,%r9
++	sub		$inp,$out
++	cmp		\$64*1,$len
++	jb		.Less_than_64_8xvl
++	vpxor		0x00($inp),$xa0,$xa0	# xor with input
++	vpxor		0x20($inp),$xb0,$xb0
++	vmovdqu		$xa0,0x00($out,$inp)
++	vmovdqu		$xb0,0x20($out,$inp)
++	je		.Ldone8xvl
++	vmovdqa		$xc0,$xa0
++	vmovdqa		$xd0,$xb0
++	lea		64($inp),$inp
++
++	cmp		\$64*2,$len
++	jb		.Less_than_64_8xvl
++	vpxor		0x00($inp),$xc0,$xc0
++	vpxor		0x20($inp),$xd0,$xd0
++	vmovdqu		$xc0,0x00($out,$inp)
++	vmovdqu		$xd0,0x20($out,$inp)
++	je		.Ldone8xvl
++	vmovdqa		$xa1,$xa0
++	vmovdqa		$xb1,$xb0
++	lea		64($inp),$inp
++
++	cmp		\$64*3,$len
++	jb		.Less_than_64_8xvl
++	vpxor		0x00($inp),$xa1,$xa1
++	vpxor		0x20($inp),$xb1,$xb1
++	vmovdqu		$xa1,0x00($out,$inp)
++	vmovdqu		$xb1,0x20($out,$inp)
++	je		.Ldone8xvl
++	vmovdqa		$xc1,$xa0
++	vmovdqa		$xd1,$xb0
++	lea		64($inp),$inp
++
++	cmp		\$64*4,$len
++	jb		.Less_than_64_8xvl
++	vpxor		0x00($inp),$xc1,$xc1
++	vpxor		0x20($inp),$xd1,$xd1
++	vmovdqu		$xc1,0x00($out,$inp)
++	vmovdqu		$xd1,0x20($out,$inp)
++	je		.Ldone8xvl
++	vmovdqa32	$xa2,$xa0
++	vmovdqa		$xb2,$xb0
++	lea		64($inp),$inp
++
++	cmp		\$64*5,$len
++	jb		.Less_than_64_8xvl
++	vpxord		0x00($inp),$xa2,$xa2
++	vpxor		0x20($inp),$xb2,$xb2
++	vmovdqu32	$xa2,0x00($out,$inp)
++	vmovdqu		$xb2,0x20($out,$inp)
++	je		.Ldone8xvl
++	vmovdqa		$xc2,$xa0
++	vmovdqa		$xd2,$xb0
++	lea		64($inp),$inp
++
++	cmp		\$64*6,$len
++	jb		.Less_than_64_8xvl
++	vpxor		0x00($inp),$xc2,$xc2
++	vpxor		0x20($inp),$xd2,$xd2
++	vmovdqu		$xc2,0x00($out,$inp)
++	vmovdqu		$xd2,0x20($out,$inp)
++	je		.Ldone8xvl
++	vmovdqa		$xa3,$xa0
++	vmovdqa		$xb3,$xb0
++	lea		64($inp),$inp
++
++	cmp		\$64*7,$len
++	jb		.Less_than_64_8xvl
++	vpxor		0x00($inp),$xa3,$xa3
++	vpxor		0x20($inp),$xb3,$xb3
++	vmovdqu		$xa3,0x00($out,$inp)
++	vmovdqu		$xb3,0x20($out,$inp)
++	je		.Ldone8xvl
++	vmovdqa		$xc3,$xa0
++	vmovdqa		$xd3,$xb0
++	lea		64($inp),$inp
++
++.Less_than_64_8xvl:
++	vmovdqa		$xa0,0x00(%rsp)
++	vmovdqa		$xb0,0x20(%rsp)
++	lea		($out,$inp),$out
++	and		\$63,$len
++
++.Loop_tail8xvl:
++	movzb		($inp,%r9),%eax
++	movzb		(%rsp,%r9),%ecx
++	lea		1(%r9),%r9
++	xor		%ecx,%eax
++	mov		%al,-1($out,%r9)
++	dec		$len
++	jnz		.Loop_tail8xvl
++
++	vpxor		$xa0,$xa0,$xa0
++	vmovdqa		$xa0,0x00(%rsp)
++	vmovdqa		$xa0,0x20(%rsp)
++
++.Ldone8xvl:
++	vzeroall
++___
++$code.=<<___	if ($win64);
++	movaps		-0xb0(%r10),%xmm6
++	movaps		-0xa0(%r10),%xmm7
++	movaps		-0x90(%r10),%xmm8
++	movaps		-0x80(%r10),%xmm9
++	movaps		-0x70(%r10),%xmm10
++	movaps		-0x60(%r10),%xmm11
++	movaps		-0x50(%r10),%xmm12
++	movaps		-0x40(%r10),%xmm13
++	movaps		-0x30(%r10),%xmm14
++	movaps		-0x20(%r10),%xmm15
++___
++$code.=<<___;
++	lea		-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++.L8xvl_epilogue:
++	ret
++.cfi_endproc
++.size	chacha20_8xvl,.-chacha20_8xvl
++___
++if($kernel) {
++	$code .= "#endif\n";
++}
++}
++
++# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
++#		CONTEXT *context,DISPATCHER_CONTEXT *disp)
++if ($win64) {
++$rec="%rcx";
++$frame="%rdx";
++$context="%r8";
++$disp="%r9";
++
++$code.=<<___;
++.extern	__imp_RtlVirtualUnwind
++.type	se_handler,\@abi-omnipotent
++.align	16
++se_handler:
++	push	%rsi
++	push	%rdi
++	push	%rbx
++	push	%rbp
++	push	%r12
++	push	%r13
++	push	%r14
++	push	%r15
++	pushfq
++	sub	\$64,%rsp
++
++	mov	120($context),%rax	# pull context->Rax
++	mov	248($context),%rbx	# pull context->Rip
++
++	mov	8($disp),%rsi		# disp->ImageBase
++	mov	56($disp),%r11		# disp->HandlerData
++
++	lea	.Lctr32_body(%rip),%r10
++	cmp	%r10,%rbx		# context->Rip<.Lprologue
++	jb	.Lcommon_seh_tail
++
++	mov	152($context),%rax	# pull context->Rsp
++
++	lea	.Lno_data(%rip),%r10	# epilogue label
++	cmp	%r10,%rbx		# context->Rip>=.Lepilogue
++	jae	.Lcommon_seh_tail
++
++	lea	64+24+48(%rax),%rax
++
++	mov	-8(%rax),%rbx
++	mov	-16(%rax),%rbp
++	mov	-24(%rax),%r12
++	mov	-32(%rax),%r13
++	mov	-40(%rax),%r14
++	mov	-48(%rax),%r15
++	mov	%rbx,144($context)	# restore context->Rbx
++	mov	%rbp,160($context)	# restore context->Rbp
++	mov	%r12,216($context)	# restore context->R12
++	mov	%r13,224($context)	# restore context->R13
++	mov	%r14,232($context)	# restore context->R14
++	mov	%r15,240($context)	# restore context->R14
++
++.Lcommon_seh_tail:
++	mov	8(%rax),%rdi
++	mov	16(%rax),%rsi
++	mov	%rax,152($context)	# restore context->Rsp
++	mov	%rsi,168($context)	# restore context->Rsi
++	mov	%rdi,176($context)	# restore context->Rdi
++
++	mov	40($disp),%rdi		# disp->ContextRecord
++	mov	$context,%rsi		# context
++	mov	\$154,%ecx		# sizeof(CONTEXT)
++	.long	0xa548f3fc		# cld; rep movsq
++
++	mov	$disp,%rsi
++	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
++	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
++	mov	0(%rsi),%r8		# arg3, disp->ControlPc
++	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
++	mov	40(%rsi),%r10		# disp->ContextRecord
++	lea	56(%rsi),%r11		# &disp->HandlerData
++	lea	24(%rsi),%r12		# &disp->EstablisherFrame
++	mov	%r10,32(%rsp)		# arg5
++	mov	%r11,40(%rsp)		# arg6
++	mov	%r12,48(%rsp)		# arg7
++	mov	%rcx,56(%rsp)		# arg8, (NULL)
++	call	*__imp_RtlVirtualUnwind(%rip)
++
++	mov	\$1,%eax		# ExceptionContinueSearch
++	add	\$64,%rsp
++	popfq
++	pop	%r15
++	pop	%r14
++	pop	%r13
++	pop	%r12
++	pop	%rbp
++	pop	%rbx
++	pop	%rdi
++	pop	%rsi
++	ret
++.size	se_handler,.-se_handler
++
++.type	simd_handler,\@abi-omnipotent
++.align	16
++simd_handler:
++	push	%rsi
++	push	%rdi
++	push	%rbx
++	push	%rbp
++	push	%r12
++	push	%r13
++	push	%r14
++	push	%r15
++	pushfq
++	sub	\$64,%rsp
++
++	mov	120($context),%rax	# pull context->Rax
++	mov	248($context),%rbx	# pull context->Rip
++
++	mov	8($disp),%rsi		# disp->ImageBase
++	mov	56($disp),%r11		# disp->HandlerData
++
++	mov	0(%r11),%r10d		# HandlerData[0]
++	lea	(%rsi,%r10),%r10	# prologue label
++	cmp	%r10,%rbx		# context->Rip<prologue label
++	jb	.Lcommon_seh_tail
++
++	mov	192($context),%rax	# pull context->R9
++
++	mov	4(%r11),%r10d		# HandlerData[1]
++	mov	8(%r11),%ecx		# HandlerData[2]
++	lea	(%rsi,%r10),%r10	# epilogue label
++	cmp	%r10,%rbx		# context->Rip>=epilogue label
++	jae	.Lcommon_seh_tail
++
++	neg	%rcx
++	lea	-8(%rax,%rcx),%rsi
++	lea	512($context),%rdi	# &context.Xmm6
++	neg	%ecx
++	shr	\$3,%ecx
++	.long	0xa548f3fc		# cld; rep movsq
++
++	jmp	.Lcommon_seh_tail
++.size	simd_handler,.-simd_handler
++
++.section	.pdata
++.align	4
++	.rva	.LSEH_begin_chacha20_ctr32
++	.rva	.LSEH_end_chacha20_ctr32
++	.rva	.LSEH_info_chacha20_ctr32
++
++	.rva	.LSEH_begin_chacha20_ssse3
++	.rva	.LSEH_end_chacha20_ssse3
++	.rva	.LSEH_info_chacha20_ssse3
++
++	.rva	.LSEH_begin_chacha20_128
++	.rva	.LSEH_end_chacha20_128
++	.rva	.LSEH_info_chacha20_128
++
++	.rva	.LSEH_begin_chacha20_4x
++	.rva	.LSEH_end_chacha20_4x
++	.rva	.LSEH_info_chacha20_4x
++___
++$code.=<<___ if ($avx);
++	.rva	.LSEH_begin_chacha20_xop
++	.rva	.LSEH_end_chacha20_xop
++	.rva	.LSEH_info_chacha20_xop
++___
++$code.=<<___ if ($avx>1);
++	.rva	.LSEH_begin_chacha20_avx2
++	.rva	.LSEH_end_chacha20_avx2
++	.rva	.LSEH_info_chacha20_avx2
++___
++$code.=<<___ if ($avx>2);
++	.rva	.LSEH_begin_chacha20_avx512
++	.rva	.LSEH_end_chacha20_avx512
++	.rva	.LSEH_info_chacha20_avx512
++
++	.rva	.LSEH_begin_chacha20_avx512vl
++	.rva	.LSEH_end_chacha20_avx512vl
++	.rva	.LSEH_info_chacha20_avx512vl
++
++	.rva	.LSEH_begin_chacha20_16x
++	.rva	.LSEH_end_chacha20_16x
++	.rva	.LSEH_info_chacha20_16x
++
++	.rva	.LSEH_begin_chacha20_8xvl
++	.rva	.LSEH_end_chacha20_8xvl
++	.rva	.LSEH_info_chacha20_8xvl
++___
++$code.=<<___;
++.section	.xdata
++.align	8
++.LSEH_info_chacha20_ctr32:
++	.byte	9,0,0,0
++	.rva	se_handler
++
++.LSEH_info_chacha20_ssse3:
++	.byte	9,0,0,0
++	.rva	simd_handler
++	.rva	.Lssse3_body,.Lssse3_epilogue
++	.long	0x20,0
++
++.LSEH_info_chacha20_128:
++	.byte	9,0,0,0
++	.rva	simd_handler
++	.rva	.L128_body,.L128_epilogue
++	.long	0x60,0
++
++.LSEH_info_chacha20_4x:
++	.byte	9,0,0,0
++	.rva	simd_handler
++	.rva	.L4x_body,.L4x_epilogue
++	.long	0xa0,0
++___
++$code.=<<___ if ($avx);
++.LSEH_info_chacha20_xop:
++	.byte	9,0,0,0
++	.rva	simd_handler
++	.rva	.L4xop_body,.L4xop_epilogue		# HandlerData[]
++	.long	0xa0,0
++___
++$code.=<<___ if ($avx>1);
++.LSEH_info_chacha20_avx2:
++	.byte	9,0,0,0
++	.rva	simd_handler
++	.rva	.L8x_body,.L8x_epilogue			# HandlerData[]
++	.long	0xa0,0
++___
++$code.=<<___ if ($avx>2);
++.LSEH_info_chacha20_avx512:
++	.byte	9,0,0,0
++	.rva	simd_handler
++	.rva	.Lavx512_body,.Lavx512_epilogue		# HandlerData[]
++	.long	0x20,0
++
++.LSEH_info_chacha20_avx512vl:
++	.byte	9,0,0,0
++	.rva	simd_handler
++	.rva	.Lavx512vl_body,.Lavx512vl_epilogue	# HandlerData[]
++	.long	0x20,0
++
++.LSEH_info_chacha20_16x:
++	.byte	9,0,0,0
++	.rva	simd_handler
++	.rva	.L16x_body,.L16x_epilogue		# HandlerData[]
++	.long	0xa0,0
++
++.LSEH_info_chacha20_8xvl:
++	.byte	9,0,0,0
++	.rva	simd_handler
++	.rva	.L8xvl_body,.L8xvl_epilogue		# HandlerData[]
++	.long	0xa0,0
++___
++}
++
++open SELF,$0;
++while(<SELF>) {
++	next if (/^#!/);
++	last if (!s/^#/\/\// and !/^$/);
++	print;
++}
++close SELF;
++
++foreach (split("\n",$code)) {
++	s/\`([^\`]*)\`/eval $1/ge;
++
++	s/%x#%[yz]/%x/g;	# "down-shift"
++
++	if ($kernel) {
++		s/(^\.type.*),[0-9]+$/\1/;
++		next if /^\.cfi.*/;
++	}
++
++	print $_,"\n";
++}
++
++close STDOUT;
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305-arm64.pl	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,974 @@
++#!/usr/bin/env perl
++# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
++#
++# This code is taken from the OpenSSL project but the author, Andy Polyakov,
++# has relicensed it under the licenses specified in the SPDX header above.
++# The original headers, including the original license headers, are
++# included below for completeness.
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# This module implements Poly1305 hash for ARMv8.
++#
++# June 2015
++#
++# Numbers are cycles per processed byte with poly1305_blocks alone.
++#
++#		IALU/gcc-4.9	NEON
++#
++# Apple A7	1.86/+5%	0.72
++# Cortex-A53	2.69/+58%	1.47
++# Cortex-A57	2.70/+7%	1.14
++# Denver	1.64/+50%	1.18(*)
++# X-Gene	2.13/+68%	2.27
++# Mongoose	1.77/+75%	1.12
++# Kryo		2.70/+55%	1.13
++#
++# (*)	estimate based on resources availability is less than 1.0,
++#	i.e. measured result is worse than expected, presumably binary
++#	translator is not almighty;
++
++$flavour=shift;
++if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
++else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
++
++if ($flavour && $flavour ne "void") {
++    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
++    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
++    die "can't locate arm-xlate.pl";
++
++    open STDOUT,"| \"$^X\" $xlate $flavour $output";
++} else {
++    open STDOUT,">$output";
++}
++
++my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
++my ($mac,$nonce)=($inp,$len);
++
++my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
++
++$code.=<<___;
++#ifndef	__KERNEL__
++# include "arm_arch.h"
++.extern	OPENSSL_armcap_P
++#else
++# define poly1305_init   poly1305_init_arm
++# define poly1305_blocks poly1305_blocks_arm
++# define poly1305_emit   poly1305_emit_arm
++#endif
++
++.text
++
++// forward "declarations" are required for Apple
++.globl	poly1305_blocks
++.globl	poly1305_emit
++.globl	poly1305_init
++.type	poly1305_init,%function
++.align	5
++poly1305_init:
++	cmp	$inp,xzr
++	stp	xzr,xzr,[$ctx]		// zero hash value
++	stp	xzr,xzr,[$ctx,#16]	// [along with is_base2_26]
++
++	csel	x0,xzr,x0,eq
++	b.eq	.Lno_key
++
++#ifndef	__KERNEL__
++# ifdef	__ILP32__
++	ldrsw	$t1,.LOPENSSL_armcap_P
++# else
++	ldr	$t1,.LOPENSSL_armcap_P
++# endif
++	adr	$t0,.LOPENSSL_armcap_P
++	ldr	w17,[$t0,$t1]
++#endif
++
++	ldp	$r0,$r1,[$inp]		// load key
++	mov	$s1,#0xfffffffc0fffffff
++	movk	$s1,#0x0fff,lsl#48
++#ifdef	__AARCH64EB__
++	rev	$r0,$r0			// flip bytes
++	rev	$r1,$r1
++#endif
++	and	$r0,$r0,$s1		// &=0ffffffc0fffffff
++	and	$s1,$s1,#-4
++	and	$r1,$r1,$s1		// &=0ffffffc0ffffffc
++	stp	$r0,$r1,[$ctx,#32]	// save key value
++
++#ifndef	__KERNEL__
++	tst	w17,#ARMV7_NEON
++
++	adr	$d0,poly1305_blocks
++	adr	$r0,poly1305_blocks_neon
++	adr	$d1,poly1305_emit
++	adr	$r1,poly1305_emit_neon
++
++	csel	$d0,$d0,$r0,eq
++	csel	$d1,$d1,$r1,eq
++
++# ifdef	__ILP32__
++	stp	w12,w13,[$len]
++# else
++	stp	$d0,$d1,[$len]
++# endif
++
++	mov	x0,#1
++#else
++	mov	x0,#0
++#endif
++.Lno_key:
++	ret
++.size	poly1305_init,.-poly1305_init
++
++.type	poly1305_blocks,%function
++.align	5
++poly1305_blocks:
++	ands	$len,$len,#-16
++	b.eq	.Lno_data
++
++	ldp	$h0,$h1,[$ctx]		// load hash value
++	ldp	$r0,$r1,[$ctx,#32]	// load key value
++	ldr	$h2,[$ctx,#16]
++	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
++	b	.Loop
++
++.align	5
++.Loop:
++	ldp	$t0,$t1,[$inp],#16	// load input
++	sub	$len,$len,#16
++#ifdef	__AARCH64EB__
++	rev	$t0,$t0
++	rev	$t1,$t1
++#endif
++	adds	$h0,$h0,$t0		// accumulate input
++	adcs	$h1,$h1,$t1
++
++	mul	$d0,$h0,$r0		// h0*r0
++	adc	$h2,$h2,$padbit
++	umulh	$d1,$h0,$r0
++
++	mul	$t0,$h1,$s1		// h1*5*r1
++	umulh	$t1,$h1,$s1
++
++	adds	$d0,$d0,$t0
++	mul	$t0,$h0,$r1		// h0*r1
++	adc	$d1,$d1,$t1
++	umulh	$d2,$h0,$r1
++
++	adds	$d1,$d1,$t0
++	mul	$t0,$h1,$r0		// h1*r0
++	adc	$d2,$d2,xzr
++	umulh	$t1,$h1,$r0
++
++	adds	$d1,$d1,$t0
++	mul	$t0,$h2,$s1		// h2*5*r1
++	adc	$d2,$d2,$t1
++	mul	$t1,$h2,$r0		// h2*r0
++
++	adds	$d1,$d1,$t0
++	adc	$d2,$d2,$t1
++
++	and	$t0,$d2,#-4		// final reduction
++	and	$h2,$d2,#3
++	add	$t0,$t0,$d2,lsr#2
++	adds	$h0,$d0,$t0
++	adcs	$h1,$d1,xzr
++	adc	$h2,$h2,xzr
++
++	cbnz	$len,.Loop
++
++	stp	$h0,$h1,[$ctx]		// store hash value
++	str	$h2,[$ctx,#16]
++
++.Lno_data:
++	ret
++.size	poly1305_blocks,.-poly1305_blocks
++
++.type	poly1305_emit,%function
++.align	5
++poly1305_emit:
++	ldp	$h0,$h1,[$ctx]		// load hash base 2^64
++	ldr	$h2,[$ctx,#16]
++	ldp	$t0,$t1,[$nonce]	// load nonce
++
++	adds	$d0,$h0,#5		// compare to modulus
++	adcs	$d1,$h1,xzr
++	adc	$d2,$h2,xzr
++
++	tst	$d2,#-4			// see if it's carried/borrowed
++
++	csel	$h0,$h0,$d0,eq
++	csel	$h1,$h1,$d1,eq
++
++#ifdef	__AARCH64EB__
++	ror	$t0,$t0,#32		// flip nonce words
++	ror	$t1,$t1,#32
++#endif
++	adds	$h0,$h0,$t0		// accumulate nonce
++	adc	$h1,$h1,$t1
++#ifdef	__AARCH64EB__
++	rev	$h0,$h0			// flip output bytes
++	rev	$h1,$h1
++#endif
++	stp	$h0,$h1,[$mac]		// write result
++
++	ret
++.size	poly1305_emit,.-poly1305_emit
++___
++my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
++my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
++my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
++my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
++my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
++my ($T0,$T1,$MASK) = map("v$_",(29..31));
++
++my ($in2,$zeros)=("x16","x17");
++my $is_base2_26 = $zeros;		# borrow
++
++$code.=<<___;
++.type	__poly1305_mult,%function
++.align	5
++__poly1305_mult:
++	mul	$d0,$h0,$r0		// h0*r0
++	umulh	$d1,$h0,$r0
++
++	mul	$t0,$h1,$s1		// h1*5*r1
++	umulh	$t1,$h1,$s1
++
++	adds	$d0,$d0,$t0
++	mul	$t0,$h0,$r1		// h0*r1
++	adc	$d1,$d1,$t1
++	umulh	$d2,$h0,$r1
++
++	adds	$d1,$d1,$t0
++	mul	$t0,$h1,$r0		// h1*r0
++	adc	$d2,$d2,xzr
++	umulh	$t1,$h1,$r0
++
++	adds	$d1,$d1,$t0
++	mul	$t0,$h2,$s1		// h2*5*r1
++	adc	$d2,$d2,$t1
++	mul	$t1,$h2,$r0		// h2*r0
++
++	adds	$d1,$d1,$t0
++	adc	$d2,$d2,$t1
++
++	and	$t0,$d2,#-4		// final reduction
++	and	$h2,$d2,#3
++	add	$t0,$t0,$d2,lsr#2
++	adds	$h0,$d0,$t0
++	adcs	$h1,$d1,xzr
++	adc	$h2,$h2,xzr
++
++	ret
++.size	__poly1305_mult,.-__poly1305_mult
++
++.type	__poly1305_splat,%function
++.align	5
++__poly1305_splat:
++	and	x12,$h0,#0x03ffffff	// base 2^64 -> base 2^26
++	ubfx	x13,$h0,#26,#26
++	extr	x14,$h1,$h0,#52
++	and	x14,x14,#0x03ffffff
++	ubfx	x15,$h1,#14,#26
++	extr	x16,$h2,$h1,#40
++
++	str	w12,[$ctx,#16*0]	// r0
++	add	w12,w13,w13,lsl#2	// r1*5
++	str	w13,[$ctx,#16*1]	// r1
++	add	w13,w14,w14,lsl#2	// r2*5
++	str	w12,[$ctx,#16*2]	// s1
++	str	w14,[$ctx,#16*3]	// r2
++	add	w14,w15,w15,lsl#2	// r3*5
++	str	w13,[$ctx,#16*4]	// s2
++	str	w15,[$ctx,#16*5]	// r3
++	add	w15,w16,w16,lsl#2	// r4*5
++	str	w14,[$ctx,#16*6]	// s3
++	str	w16,[$ctx,#16*7]	// r4
++	str	w15,[$ctx,#16*8]	// s4
++
++	ret
++.size	__poly1305_splat,.-__poly1305_splat
++
++#if !defined(__KERNEL__) || defined(CONFIG_KERNEL_MODE_NEON)
++#ifdef	__KERNEL__
++.globl	poly1305_blocks_neon
++.globl	poly1305_emit_neon
++#endif
++
++.type	poly1305_blocks_neon,%function
++.align	5
++poly1305_blocks_neon:
++	ldr	$is_base2_26,[$ctx,#24]
++	cmp	$len,#128
++	b.hs	.Lblocks_neon
++	cbz	$is_base2_26,poly1305_blocks
++
++.Lblocks_neon:
++	stp	x29,x30,[sp,#-80]!
++	add	x29,sp,#0
++
++	ands	$len,$len,#-16
++	b.eq	.Lno_data_neon
++
++	cbz	$is_base2_26,.Lbase2_64_neon
++
++	ldp	w10,w11,[$ctx]		// load hash value base 2^26
++	ldp	w12,w13,[$ctx,#8]
++	ldr	w14,[$ctx,#16]
++
++	tst	$len,#31
++	b.eq	.Leven_neon
++
++	ldp	$r0,$r1,[$ctx,#32]	// load key value
++
++	add	$h0,x10,x11,lsl#26	// base 2^26 -> base 2^64
++	lsr	$h1,x12,#12
++	adds	$h0,$h0,x12,lsl#52
++	add	$h1,$h1,x13,lsl#14
++	adc	$h1,$h1,xzr
++	lsr	$h2,x14,#24
++	adds	$h1,$h1,x14,lsl#40
++	adc	$d2,$h2,xzr		// can be partially reduced...
++
++	ldp	$d0,$d1,[$inp],#16	// load input
++	sub	$len,$len,#16
++	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
++
++	and	$t0,$d2,#-4		// ... so reduce
++	and	$h2,$d2,#3
++	add	$t0,$t0,$d2,lsr#2
++	adds	$h0,$h0,$t0
++	adcs	$h1,$h1,xzr
++	adc	$h2,$h2,xzr
++
++#ifdef	__AARCH64EB__
++	rev	$d0,$d0
++	rev	$d1,$d1
++#endif
++	adds	$h0,$h0,$d0		// accumulate input
++	adcs	$h1,$h1,$d1
++	adc	$h2,$h2,$padbit
++
++	bl	__poly1305_mult
++	ldr	x30,[sp,#8]
++
++	cbz	$padbit,.Lstore_base2_64_neon
++
++	and	x10,$h0,#0x03ffffff	// base 2^64 -> base 2^26
++	ubfx	x11,$h0,#26,#26
++	extr	x12,$h1,$h0,#52
++	and	x12,x12,#0x03ffffff
++	ubfx	x13,$h1,#14,#26
++	extr	x14,$h2,$h1,#40
++
++	cbnz	$len,.Leven_neon
++
++	stp	w10,w11,[$ctx]		// store hash value base 2^26
++	stp	w12,w13,[$ctx,#8]
++	str	w14,[$ctx,#16]
++	b	.Lno_data_neon
++
++.align	4
++.Lstore_base2_64_neon:
++	stp	$h0,$h1,[$ctx]		// store hash value base 2^64
++	stp	$h2,xzr,[$ctx,#16]	// note that is_base2_26 is zeroed
++	b	.Lno_data_neon
++
++.align	4
++.Lbase2_64_neon:
++	ldp	$r0,$r1,[$ctx,#32]	// load key value
++
++	ldp	$h0,$h1,[$ctx]		// load hash value base 2^64
++	ldr	$h2,[$ctx,#16]
++
++	tst	$len,#31
++	b.eq	.Linit_neon
++
++	ldp	$d0,$d1,[$inp],#16	// load input
++	sub	$len,$len,#16
++	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
++#ifdef	__AARCH64EB__
++	rev	$d0,$d0
++	rev	$d1,$d1
++#endif
++	adds	$h0,$h0,$d0		// accumulate input
++	adcs	$h1,$h1,$d1
++	adc	$h2,$h2,$padbit
++
++	bl	__poly1305_mult
++
++.Linit_neon:
++	and	x10,$h0,#0x03ffffff	// base 2^64 -> base 2^26
++	ubfx	x11,$h0,#26,#26
++	extr	x12,$h1,$h0,#52
++	and	x12,x12,#0x03ffffff
++	ubfx	x13,$h1,#14,#26
++	extr	x14,$h2,$h1,#40
++
++	stp	d8,d9,[sp,#16]		// meet ABI requirements
++	stp	d10,d11,[sp,#32]
++	stp	d12,d13,[sp,#48]
++	stp	d14,d15,[sp,#64]
++
++	fmov	${H0},x10
++	fmov	${H1},x11
++	fmov	${H2},x12
++	fmov	${H3},x13
++	fmov	${H4},x14
++
++	////////////////////////////////// initialize r^n table
++	mov	$h0,$r0			// r^1
++	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
++	mov	$h1,$r1
++	mov	$h2,xzr
++	add	$ctx,$ctx,#48+12
++	bl	__poly1305_splat
++
++	bl	__poly1305_mult		// r^2
++	sub	$ctx,$ctx,#4
++	bl	__poly1305_splat
++
++	bl	__poly1305_mult		// r^3
++	sub	$ctx,$ctx,#4
++	bl	__poly1305_splat
++
++	bl	__poly1305_mult		// r^4
++	sub	$ctx,$ctx,#4
++	bl	__poly1305_splat
++	ldr	x30,[sp,#8]
++
++	add	$in2,$inp,#32
++	adr	$zeros,.Lzeros
++	subs	$len,$len,#64
++	csel	$in2,$zeros,$in2,lo
++
++	mov	x4,#1
++	str	x4,[$ctx,#-24]		// set is_base2_26
++	sub	$ctx,$ctx,#48		// restore original $ctx
++	b	.Ldo_neon
++
++.align	4
++.Leven_neon:
++	add	$in2,$inp,#32
++	adr	$zeros,.Lzeros
++	subs	$len,$len,#64
++	csel	$in2,$zeros,$in2,lo
++
++	stp	d8,d9,[sp,#16]		// meet ABI requirements
++	stp	d10,d11,[sp,#32]
++	stp	d12,d13,[sp,#48]
++	stp	d14,d15,[sp,#64]
++
++	fmov	${H0},x10
++	fmov	${H1},x11
++	fmov	${H2},x12
++	fmov	${H3},x13
++	fmov	${H4},x14
++
++.Ldo_neon:
++	ldp	x8,x12,[$in2],#16	// inp[2:3] (or zero)
++	ldp	x9,x13,[$in2],#48
++
++	lsl	$padbit,$padbit,#24
++	add	x15,$ctx,#48
++
++#ifdef	__AARCH64EB__
++	rev	x8,x8
++	rev	x12,x12
++	rev	x9,x9
++	rev	x13,x13
++#endif
++	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
++	and	x5,x9,#0x03ffffff
++	ubfx	x6,x8,#26,#26
++	ubfx	x7,x9,#26,#26
++	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
++	extr	x8,x12,x8,#52
++	extr	x9,x13,x9,#52
++	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
++	fmov	$IN23_0,x4
++	and	x8,x8,#0x03ffffff
++	and	x9,x9,#0x03ffffff
++	ubfx	x10,x12,#14,#26
++	ubfx	x11,x13,#14,#26
++	add	x12,$padbit,x12,lsr#40
++	add	x13,$padbit,x13,lsr#40
++	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
++	fmov	$IN23_1,x6
++	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
++	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
++	fmov	$IN23_2,x8
++	fmov	$IN23_3,x10
++	fmov	$IN23_4,x12
++
++	ldp	x8,x12,[$inp],#16	// inp[0:1]
++	ldp	x9,x13,[$inp],#48
++
++	ld1	{$R0,$R1,$S1,$R2},[x15],#64
++	ld1	{$S2,$R3,$S3,$R4},[x15],#64
++	ld1	{$S4},[x15]
++
++#ifdef	__AARCH64EB__
++	rev	x8,x8
++	rev	x12,x12
++	rev	x9,x9
++	rev	x13,x13
++#endif
++	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
++	and	x5,x9,#0x03ffffff
++	ubfx	x6,x8,#26,#26
++	ubfx	x7,x9,#26,#26
++	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
++	extr	x8,x12,x8,#52
++	extr	x9,x13,x9,#52
++	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
++	fmov	$IN01_0,x4
++	and	x8,x8,#0x03ffffff
++	and	x9,x9,#0x03ffffff
++	ubfx	x10,x12,#14,#26
++	ubfx	x11,x13,#14,#26
++	add	x12,$padbit,x12,lsr#40
++	add	x13,$padbit,x13,lsr#40
++	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
++	fmov	$IN01_1,x6
++	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
++	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
++	movi	$MASK.2d,#-1
++	fmov	$IN01_2,x8
++	fmov	$IN01_3,x10
++	fmov	$IN01_4,x12
++	ushr	$MASK.2d,$MASK.2d,#38
++
++	b.ls	.Lskip_loop
++
++.align	4
++.Loop_neon:
++	////////////////////////////////////////////////////////////////
++	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
++	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
++	//   \___________________/
++	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
++	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
++	//   \___________________/ \____________________/
++	//
++	// Note that we start with inp[2:3]*r^2. This is because it
++	// doesn't depend on reduction in previous iteration.
++	////////////////////////////////////////////////////////////////
++	// d4 = h0*r4 + h1*r3   + h2*r2   + h3*r1   + h4*r0
++	// d3 = h0*r3 + h1*r2   + h2*r1   + h3*r0   + h4*5*r4
++	// d2 = h0*r2 + h1*r1   + h2*r0   + h3*5*r4 + h4*5*r3
++	// d1 = h0*r1 + h1*r0   + h2*5*r4 + h3*5*r3 + h4*5*r2
++	// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
++
++	subs	$len,$len,#64
++	umull	$ACC4,$IN23_0,${R4}[2]
++	csel	$in2,$zeros,$in2,lo
++	umull	$ACC3,$IN23_0,${R3}[2]
++	umull	$ACC2,$IN23_0,${R2}[2]
++	 ldp	x8,x12,[$in2],#16	// inp[2:3] (or zero)
++	umull	$ACC1,$IN23_0,${R1}[2]
++	 ldp	x9,x13,[$in2],#48
++	umull	$ACC0,$IN23_0,${R0}[2]
++#ifdef	__AARCH64EB__
++	 rev	x8,x8
++	 rev	x12,x12
++	 rev	x9,x9
++	 rev	x13,x13
++#endif
++
++	umlal	$ACC4,$IN23_1,${R3}[2]
++	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
++	umlal	$ACC3,$IN23_1,${R2}[2]
++	 and	x5,x9,#0x03ffffff
++	umlal	$ACC2,$IN23_1,${R1}[2]
++	 ubfx	x6,x8,#26,#26
++	umlal	$ACC1,$IN23_1,${R0}[2]
++	 ubfx	x7,x9,#26,#26
++	umlal	$ACC0,$IN23_1,${S4}[2]
++	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
++
++	umlal	$ACC4,$IN23_2,${R2}[2]
++	 extr	x8,x12,x8,#52
++	umlal	$ACC3,$IN23_2,${R1}[2]
++	 extr	x9,x13,x9,#52
++	umlal	$ACC2,$IN23_2,${R0}[2]
++	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
++	umlal	$ACC1,$IN23_2,${S4}[2]
++	 fmov	$IN23_0,x4
++	umlal	$ACC0,$IN23_2,${S3}[2]
++	 and	x8,x8,#0x03ffffff
++
++	umlal	$ACC4,$IN23_3,${R1}[2]
++	 and	x9,x9,#0x03ffffff
++	umlal	$ACC3,$IN23_3,${R0}[2]
++	 ubfx	x10,x12,#14,#26
++	umlal	$ACC2,$IN23_3,${S4}[2]
++	 ubfx	x11,x13,#14,#26
++	umlal	$ACC1,$IN23_3,${S3}[2]
++	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
++	umlal	$ACC0,$IN23_3,${S2}[2]
++	 fmov	$IN23_1,x6
++
++	add	$IN01_2,$IN01_2,$H2
++	 add	x12,$padbit,x12,lsr#40
++	umlal	$ACC4,$IN23_4,${R0}[2]
++	 add	x13,$padbit,x13,lsr#40
++	umlal	$ACC3,$IN23_4,${S4}[2]
++	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
++	umlal	$ACC2,$IN23_4,${S3}[2]
++	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
++	umlal	$ACC1,$IN23_4,${S2}[2]
++	 fmov	$IN23_2,x8
++	umlal	$ACC0,$IN23_4,${S1}[2]
++	 fmov	$IN23_3,x10
++
++	////////////////////////////////////////////////////////////////
++	// (hash+inp[0:1])*r^4 and accumulate
++
++	add	$IN01_0,$IN01_0,$H0
++	 fmov	$IN23_4,x12
++	umlal	$ACC3,$IN01_2,${R1}[0]
++	 ldp	x8,x12,[$inp],#16	// inp[0:1]
++	umlal	$ACC0,$IN01_2,${S3}[0]
++	 ldp	x9,x13,[$inp],#48
++	umlal	$ACC4,$IN01_2,${R2}[0]
++	umlal	$ACC1,$IN01_2,${S4}[0]
++	umlal	$ACC2,$IN01_2,${R0}[0]
++#ifdef	__AARCH64EB__
++	 rev	x8,x8
++	 rev	x12,x12
++	 rev	x9,x9
++	 rev	x13,x13
++#endif
++
++	add	$IN01_1,$IN01_1,$H1
++	umlal	$ACC3,$IN01_0,${R3}[0]
++	umlal	$ACC4,$IN01_0,${R4}[0]
++	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
++	umlal	$ACC2,$IN01_0,${R2}[0]
++	 and	x5,x9,#0x03ffffff
++	umlal	$ACC0,$IN01_0,${R0}[0]
++	 ubfx	x6,x8,#26,#26
++	umlal	$ACC1,$IN01_0,${R1}[0]
++	 ubfx	x7,x9,#26,#26
++
++	add	$IN01_3,$IN01_3,$H3
++	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
++	umlal	$ACC3,$IN01_1,${R2}[0]
++	 extr	x8,x12,x8,#52
++	umlal	$ACC4,$IN01_1,${R3}[0]
++	 extr	x9,x13,x9,#52
++	umlal	$ACC0,$IN01_1,${S4}[0]
++	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
++	umlal	$ACC2,$IN01_1,${R1}[0]
++	 fmov	$IN01_0,x4
++	umlal	$ACC1,$IN01_1,${R0}[0]
++	 and	x8,x8,#0x03ffffff
++
++	add	$IN01_4,$IN01_4,$H4
++	 and	x9,x9,#0x03ffffff
++	umlal	$ACC3,$IN01_3,${R0}[0]
++	 ubfx	x10,x12,#14,#26
++	umlal	$ACC0,$IN01_3,${S2}[0]
++	 ubfx	x11,x13,#14,#26
++	umlal	$ACC4,$IN01_3,${R1}[0]
++	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
++	umlal	$ACC1,$IN01_3,${S3}[0]
++	 fmov	$IN01_1,x6
++	umlal	$ACC2,$IN01_3,${S4}[0]
++	 add	x12,$padbit,x12,lsr#40
++
++	umlal	$ACC3,$IN01_4,${S4}[0]
++	 add	x13,$padbit,x13,lsr#40
++	umlal	$ACC0,$IN01_4,${S1}[0]
++	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
++	umlal	$ACC4,$IN01_4,${R0}[0]
++	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
++	umlal	$ACC1,$IN01_4,${S2}[0]
++	 fmov	$IN01_2,x8
++	umlal	$ACC2,$IN01_4,${S3}[0]
++	 fmov	$IN01_3,x10
++	 fmov	$IN01_4,x12
++
++	/////////////////////////////////////////////////////////////////
++	// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
++	// and P. Schwabe
++	//
++	// [see discussion in poly1305-armv4 module]
++
++	ushr	$T0.2d,$ACC3,#26
++	xtn	$H3,$ACC3
++	 ushr	$T1.2d,$ACC0,#26
++	 and	$ACC0,$ACC0,$MASK.2d
++	add	$ACC4,$ACC4,$T0.2d	// h3 -> h4
++	bic	$H3,#0xfc,lsl#24	// &=0x03ffffff
++	 add	$ACC1,$ACC1,$T1.2d	// h0 -> h1
++
++	ushr	$T0.2d,$ACC4,#26
++	xtn	$H4,$ACC4
++	 ushr	$T1.2d,$ACC1,#26
++	 xtn	$H1,$ACC1
++	bic	$H4,#0xfc,lsl#24
++	 add	$ACC2,$ACC2,$T1.2d	// h1 -> h2
++
++	add	$ACC0,$ACC0,$T0.2d
++	shl	$T0.2d,$T0.2d,#2
++	 shrn	$T1.2s,$ACC2,#26
++	 xtn	$H2,$ACC2
++	add	$ACC0,$ACC0,$T0.2d	// h4 -> h0
++	 bic	$H1,#0xfc,lsl#24
++	 add	$H3,$H3,$T1.2s		// h2 -> h3
++	 bic	$H2,#0xfc,lsl#24
++
++	shrn	$T0.2s,$ACC0,#26
++	xtn	$H0,$ACC0
++	 ushr	$T1.2s,$H3,#26
++	 bic	$H3,#0xfc,lsl#24
++	 bic	$H0,#0xfc,lsl#24
++	add	$H1,$H1,$T0.2s		// h0 -> h1
++	 add	$H4,$H4,$T1.2s		// h3 -> h4
++
++	b.hi	.Loop_neon
++
++.Lskip_loop:
++	dup	$IN23_2,${IN23_2}[0]
++	add	$IN01_2,$IN01_2,$H2
++
++	////////////////////////////////////////////////////////////////
++	// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
++
++	adds	$len,$len,#32
++	b.ne	.Long_tail
++
++	dup	$IN23_2,${IN01_2}[0]
++	add	$IN23_0,$IN01_0,$H0
++	add	$IN23_3,$IN01_3,$H3
++	add	$IN23_1,$IN01_1,$H1
++	add	$IN23_4,$IN01_4,$H4
++
++.Long_tail:
++	dup	$IN23_0,${IN23_0}[0]
++	umull2	$ACC0,$IN23_2,${S3}
++	umull2	$ACC3,$IN23_2,${R1}
++	umull2	$ACC4,$IN23_2,${R2}
++	umull2	$ACC2,$IN23_2,${R0}
++	umull2	$ACC1,$IN23_2,${S4}
++
++	dup	$IN23_1,${IN23_1}[0]
++	umlal2	$ACC0,$IN23_0,${R0}
++	umlal2	$ACC2,$IN23_0,${R2}
++	umlal2	$ACC3,$IN23_0,${R3}
++	umlal2	$ACC4,$IN23_0,${R4}
++	umlal2	$ACC1,$IN23_0,${R1}
++
++	dup	$IN23_3,${IN23_3}[0]
++	umlal2	$ACC0,$IN23_1,${S4}
++	umlal2	$ACC3,$IN23_1,${R2}
++	umlal2	$ACC2,$IN23_1,${R1}
++	umlal2	$ACC4,$IN23_1,${R3}
++	umlal2	$ACC1,$IN23_1,${R0}
++
++	dup	$IN23_4,${IN23_4}[0]
++	umlal2	$ACC3,$IN23_3,${R0}
++	umlal2	$ACC4,$IN23_3,${R1}
++	umlal2	$ACC0,$IN23_3,${S2}
++	umlal2	$ACC1,$IN23_3,${S3}
++	umlal2	$ACC2,$IN23_3,${S4}
++
++	umlal2	$ACC3,$IN23_4,${S4}
++	umlal2	$ACC0,$IN23_4,${S1}
++	umlal2	$ACC4,$IN23_4,${R0}
++	umlal2	$ACC1,$IN23_4,${S2}
++	umlal2	$ACC2,$IN23_4,${S3}
++
++	b.eq	.Lshort_tail
++
++	////////////////////////////////////////////////////////////////
++	// (hash+inp[0:1])*r^4:r^3 and accumulate
++
++	add	$IN01_0,$IN01_0,$H0
++	umlal	$ACC3,$IN01_2,${R1}
++	umlal	$ACC0,$IN01_2,${S3}
++	umlal	$ACC4,$IN01_2,${R2}
++	umlal	$ACC1,$IN01_2,${S4}
++	umlal	$ACC2,$IN01_2,${R0}
++
++	add	$IN01_1,$IN01_1,$H1
++	umlal	$ACC3,$IN01_0,${R3}
++	umlal	$ACC0,$IN01_0,${R0}
++	umlal	$ACC4,$IN01_0,${R4}
++	umlal	$ACC1,$IN01_0,${R1}
++	umlal	$ACC2,$IN01_0,${R2}
++
++	add	$IN01_3,$IN01_3,$H3
++	umlal	$ACC3,$IN01_1,${R2}
++	umlal	$ACC0,$IN01_1,${S4}
++	umlal	$ACC4,$IN01_1,${R3}
++	umlal	$ACC1,$IN01_1,${R0}
++	umlal	$ACC2,$IN01_1,${R1}
++
++	add	$IN01_4,$IN01_4,$H4
++	umlal	$ACC3,$IN01_3,${R0}
++	umlal	$ACC0,$IN01_3,${S2}
++	umlal	$ACC4,$IN01_3,${R1}
++	umlal	$ACC1,$IN01_3,${S3}
++	umlal	$ACC2,$IN01_3,${S4}
++
++	umlal	$ACC3,$IN01_4,${S4}
++	umlal	$ACC0,$IN01_4,${S1}
++	umlal	$ACC4,$IN01_4,${R0}
++	umlal	$ACC1,$IN01_4,${S2}
++	umlal	$ACC2,$IN01_4,${S3}
++
++.Lshort_tail:
++	////////////////////////////////////////////////////////////////
++	// horizontal add
++
++	addp	$ACC3,$ACC3,$ACC3
++	 ldp	d8,d9,[sp,#16]		// meet ABI requirements
++	addp	$ACC0,$ACC0,$ACC0
++	 ldp	d10,d11,[sp,#32]
++	addp	$ACC4,$ACC4,$ACC4
++	 ldp	d12,d13,[sp,#48]
++	addp	$ACC1,$ACC1,$ACC1
++	 ldp	d14,d15,[sp,#64]
++	addp	$ACC2,$ACC2,$ACC2
++
++	////////////////////////////////////////////////////////////////
++	// lazy reduction, but without narrowing
++
++	ushr	$T0.2d,$ACC3,#26
++	and	$ACC3,$ACC3,$MASK.2d
++	 ushr	$T1.2d,$ACC0,#26
++	 and	$ACC0,$ACC0,$MASK.2d
++
++	add	$ACC4,$ACC4,$T0.2d	// h3 -> h4
++	 add	$ACC1,$ACC1,$T1.2d	// h0 -> h1
++
++	ushr	$T0.2d,$ACC4,#26
++	and	$ACC4,$ACC4,$MASK.2d
++	 ushr	$T1.2d,$ACC1,#26
++	 and	$ACC1,$ACC1,$MASK.2d
++	 add	$ACC2,$ACC2,$T1.2d	// h1 -> h2
++
++	add	$ACC0,$ACC0,$T0.2d
++	shl	$T0.2d,$T0.2d,#2
++	 ushr	$T1.2d,$ACC2,#26
++	 and	$ACC2,$ACC2,$MASK.2d
++	add	$ACC0,$ACC0,$T0.2d	// h4 -> h0
++	 add	$ACC3,$ACC3,$T1.2d	// h2 -> h3
++
++	ushr	$T0.2d,$ACC0,#26
++	and	$ACC0,$ACC0,$MASK.2d
++	 ushr	$T1.2d,$ACC3,#26
++	 and	$ACC3,$ACC3,$MASK.2d
++	add	$ACC1,$ACC1,$T0.2d	// h0 -> h1
++	 add	$ACC4,$ACC4,$T1.2d	// h3 -> h4
++
++	////////////////////////////////////////////////////////////////
++	// write the result, can be partially reduced
++
++	st4	{$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
++	st1	{$ACC4}[0],[$ctx]
++
++.Lno_data_neon:
++	ldr	x29,[sp],#80
++	ret
++.size	poly1305_blocks_neon,.-poly1305_blocks_neon
++
++.type	poly1305_emit_neon,%function
++.align	5
++poly1305_emit_neon:
++	ldr	$is_base2_26,[$ctx,#24]
++	cbz	$is_base2_26,poly1305_emit
++
++	ldp	w10,w11,[$ctx]		// load hash value base 2^26
++	ldp	w12,w13,[$ctx,#8]
++	ldr	w14,[$ctx,#16]
++
++	add	$h0,x10,x11,lsl#26	// base 2^26 -> base 2^64
++	lsr	$h1,x12,#12
++	adds	$h0,$h0,x12,lsl#52
++	add	$h1,$h1,x13,lsl#14
++	adc	$h1,$h1,xzr
++	lsr	$h2,x14,#24
++	adds	$h1,$h1,x14,lsl#40
++	adc	$h2,$h2,xzr		// can be partially reduced...
++
++	ldp	$t0,$t1,[$nonce]	// load nonce
++
++	and	$d0,$h2,#-4		// ... so reduce
++	add	$d0,$d0,$h2,lsr#2
++	and	$h2,$h2,#3
++	adds	$h0,$h0,$d0
++	adcs	$h1,$h1,xzr
++	adc	$h2,$h2,xzr
++
++	adds	$d0,$h0,#5		// compare to modulus
++	adcs	$d1,$h1,xzr
++	adc	$d2,$h2,xzr
++
++	tst	$d2,#-4			// see if it's carried/borrowed
++
++	csel	$h0,$h0,$d0,eq
++	csel	$h1,$h1,$d1,eq
++
++#ifdef	__AARCH64EB__
++	ror	$t0,$t0,#32		// flip nonce words
++	ror	$t1,$t1,#32
++#endif
++	adds	$h0,$h0,$t0		// accumulate nonce
++	adc	$h1,$h1,$t1
++#ifdef	__AARCH64EB__
++	rev	$h0,$h0			// flip output bytes
++	rev	$h1,$h1
++#endif
++	stp	$h0,$h1,[$mac]		// write result
++
++	ret
++.size	poly1305_emit_neon,.-poly1305_emit_neon
++#endif
++
++.align	5
++.Lzeros:
++.long	0,0,0,0,0,0,0,0
++#ifndef __KERNEL__
++.LOPENSSL_armcap_P:
++#ifdef	__ILP32__
++.long	OPENSSL_armcap_P-.
++#else
++.quad	OPENSSL_armcap_P-.
++#endif
++#endif
++.align	2
++___
++
++open SELF,$0;
++while(<SELF>) {
++	next if (/^#!/);
++	last if (!s/^#/\/\// and !/^$/);
++	print;
++}
++close SELF;
++
++foreach (split("\n",$code)) {
++	s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/			or
++	s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/	or
++	(m/\bdup\b/ and (s/\.[24]s/.2d/g or 1))			or
++	(m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1))	or
++	(m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1))		or
++	(m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1))		or
++	(m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
++
++	s/\.[124]([sd])\[/.$1\[/;
++
++	print $_,"\n";
++}
++close STDOUT;
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305-arm.pl	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,1276 @@
++#!/usr/bin/env perl
++# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
++#
++# This code is taken from the OpenSSL project but the author, Andy Polyakov,
++# has relicensed it under the licenses specified in the SPDX header above.
++# The original headers, including the original license headers, are
++# included below for completeness.
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++#			IALU(*)/gcc-4.4		NEON
++#
++# ARM11xx(ARMv6)	7.78/+100%		-
++# Cortex-A5		6.35/+130%		3.00
++# Cortex-A8		6.25/+115%		2.36
++# Cortex-A9		5.10/+95%		2.55
++# Cortex-A15		3.85/+85%		1.25(**)
++# Snapdragon S4		5.70/+100%		1.48(**)
++#
++# (*)	this is for -march=armv6, i.e. with bunch of ldrb loading data;
++# (**)	these are trade-off results, they can be improved by ~8% but at
++#	the cost of 15/12% regression on Cortex-A5/A7, it's even possible
++#	to improve Cortex-A9 result, but then A5/A7 loose more than 20%;
++
++$flavour = shift;
++if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
++else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
++
++if ($flavour && $flavour ne "void") {
++    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
++    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
++    die "can't locate arm-xlate.pl";
++
++    open STDOUT,"| \"$^X\" $xlate $flavour $output";
++} else {
++    open STDOUT,">$output";
++}
++
++($ctx,$inp,$len,$padbit)=map("r$_",(0..3));
++
++$code.=<<___;
++#ifndef	__KERNEL__
++# include "arm_arch.h"
++#else
++# define __ARM_ARCH__ __LINUX_ARM_ARCH__
++# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
++# define poly1305_init   poly1305_init_arm
++# define poly1305_blocks poly1305_blocks_arm
++# define poly1305_emit   poly1305_emit_arm
++#endif
++
++.text
++#if defined(__thumb2__)
++.syntax	unified
++.thumb
++#else
++.code	32
++#endif
++
++.globl	poly1305_emit
++.globl	poly1305_blocks
++.globl	poly1305_init
++.type	poly1305_init,%function
++.align	5
++poly1305_init:
++.Lpoly1305_init:
++	stmdb	sp!,{r4-r11}
++
++	eor	r3,r3,r3
++	cmp	$inp,#0
++	str	r3,[$ctx,#0]		@ zero hash value
++	str	r3,[$ctx,#4]
++	str	r3,[$ctx,#8]
++	str	r3,[$ctx,#12]
++	str	r3,[$ctx,#16]
++	str	r3,[$ctx,#36]		@ is_base2_26
++	add	$ctx,$ctx,#20
++
++#ifdef	__thumb2__
++	it	eq
++#endif
++	moveq	r0,#0
++	beq	.Lno_key
++
++#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
++	adr	r11,.Lpoly1305_init
++	ldr	r12,.LOPENSSL_armcap
++#endif
++	ldrb	r4,[$inp,#0]
++	mov	r10,#0x0fffffff
++	ldrb	r5,[$inp,#1]
++	and	r3,r10,#-4		@ 0x0ffffffc
++	ldrb	r6,[$inp,#2]
++	ldrb	r7,[$inp,#3]
++	orr	r4,r4,r5,lsl#8
++	ldrb	r5,[$inp,#4]
++	orr	r4,r4,r6,lsl#16
++	ldrb	r6,[$inp,#5]
++	orr	r4,r4,r7,lsl#24
++	ldrb	r7,[$inp,#6]
++	and	r4,r4,r10
++
++#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
++	ldr	r12,[r11,r12]		@ OPENSSL_armcap_P
++# ifdef	__APPLE__
++	ldr	r12,[r12]
++# endif
++#endif
++	ldrb	r8,[$inp,#7]
++	orr	r5,r5,r6,lsl#8
++	ldrb	r6,[$inp,#8]
++	orr	r5,r5,r7,lsl#16
++	ldrb	r7,[$inp,#9]
++	orr	r5,r5,r8,lsl#24
++	ldrb	r8,[$inp,#10]
++	and	r5,r5,r3
++
++#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
++	tst	r12,#ARMV7_NEON		@ check for NEON
++# ifdef	__APPLE__
++	adr	r9,poly1305_blocks_neon
++	adr	r11,poly1305_blocks
++#  ifdef __thumb2__
++	it	ne
++#  endif
++	movne	r11,r9
++	adr	r12,poly1305_emit
++	adr	r10,poly1305_emit_neon
++#  ifdef __thumb2__
++	it	ne
++#  endif
++	movne	r12,r10
++# else
++#  ifdef __thumb2__
++	itete	eq
++#  endif
++	addeq	r12,r11,#(poly1305_emit-.Lpoly1305_init)
++	addne	r12,r11,#(poly1305_emit_neon-.Lpoly1305_init)
++	addeq	r11,r11,#(poly1305_blocks-.Lpoly1305_init)
++	addne	r11,r11,#(poly1305_blocks_neon-.Lpoly1305_init)
++# endif
++# ifdef	__thumb2__
++	orr	r12,r12,#1	@ thumb-ify address
++	orr	r11,r11,#1
++# endif
++#endif
++	ldrb	r9,[$inp,#11]
++	orr	r6,r6,r7,lsl#8
++	ldrb	r7,[$inp,#12]
++	orr	r6,r6,r8,lsl#16
++	ldrb	r8,[$inp,#13]
++	orr	r6,r6,r9,lsl#24
++	ldrb	r9,[$inp,#14]
++	and	r6,r6,r3
++
++	ldrb	r10,[$inp,#15]
++	orr	r7,r7,r8,lsl#8
++	str	r4,[$ctx,#0]
++	orr	r7,r7,r9,lsl#16
++	str	r5,[$ctx,#4]
++	orr	r7,r7,r10,lsl#24
++	str	r6,[$ctx,#8]
++	and	r7,r7,r3
++	str	r7,[$ctx,#12]
++#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
++	stmia	r2,{r11,r12}		@ fill functions table
++	mov	r0,#1
++#else
++	mov	r0,#0
++#endif
++.Lno_key:
++	ldmia	sp!,{r4-r11}
++#if	__ARM_ARCH__>=5
++	ret				@ bx	lr
++#else
++	tst	lr,#1
++	moveq	pc,lr			@ be binary compatible with V4, yet
++	bx	lr			@ interoperable with Thumb ISA:-)
++#endif
++.size	poly1305_init,.-poly1305_init
++___
++{
++my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12));
++my ($s1,$s2,$s3)=($r1,$r2,$r3);
++
++$code.=<<___;
++.type	poly1305_blocks,%function
++.align	5
++poly1305_blocks:
++.Lpoly1305_blocks:
++	stmdb	sp!,{r3-r11,lr}
++
++	ands	$len,$len,#-16
++	beq	.Lno_data
++
++	cmp	$padbit,#0
++	add	$len,$len,$inp		@ end pointer
++	sub	sp,sp,#32
++
++	ldmia	$ctx,{$h0-$r3}		@ load context
++
++	str	$ctx,[sp,#12]		@ offload stuff
++	mov	lr,$inp
++	str	$len,[sp,#16]
++	str	$r1,[sp,#20]
++	str	$r2,[sp,#24]
++	str	$r3,[sp,#28]
++	b	.Loop
++
++.Loop:
++#if __ARM_ARCH__<7
++	ldrb	r0,[lr],#16		@ load input
++# ifdef	__thumb2__
++	it	hi
++# endif
++	addhi	$h4,$h4,#1		@ 1<<128
++	ldrb	r1,[lr,#-15]
++	ldrb	r2,[lr,#-14]
++	ldrb	r3,[lr,#-13]
++	orr	r1,r0,r1,lsl#8
++	ldrb	r0,[lr,#-12]
++	orr	r2,r1,r2,lsl#16
++	ldrb	r1,[lr,#-11]
++	orr	r3,r2,r3,lsl#24
++	ldrb	r2,[lr,#-10]
++	adds	$h0,$h0,r3		@ accumulate input
++
++	ldrb	r3,[lr,#-9]
++	orr	r1,r0,r1,lsl#8
++	ldrb	r0,[lr,#-8]
++	orr	r2,r1,r2,lsl#16
++	ldrb	r1,[lr,#-7]
++	orr	r3,r2,r3,lsl#24
++	ldrb	r2,[lr,#-6]
++	adcs	$h1,$h1,r3
++
++	ldrb	r3,[lr,#-5]
++	orr	r1,r0,r1,lsl#8
++	ldrb	r0,[lr,#-4]
++	orr	r2,r1,r2,lsl#16
++	ldrb	r1,[lr,#-3]
++	orr	r3,r2,r3,lsl#24
++	ldrb	r2,[lr,#-2]
++	adcs	$h2,$h2,r3
++
++	ldrb	r3,[lr,#-1]
++	orr	r1,r0,r1,lsl#8
++	str	lr,[sp,#8]		@ offload input pointer
++	orr	r2,r1,r2,lsl#16
++	add	$s1,$r1,$r1,lsr#2
++	orr	r3,r2,r3,lsl#24
++#else
++	ldr	r0,[lr],#16		@ load input
++# ifdef	__thumb2__
++	it	hi
++# endif
++	addhi	$h4,$h4,#1		@ padbit
++	ldr	r1,[lr,#-12]
++	ldr	r2,[lr,#-8]
++	ldr	r3,[lr,#-4]
++# ifdef	__ARMEB__
++	rev	r0,r0
++	rev	r1,r1
++	rev	r2,r2
++	rev	r3,r3
++# endif
++	adds	$h0,$h0,r0		@ accumulate input
++	str	lr,[sp,#8]		@ offload input pointer
++	adcs	$h1,$h1,r1
++	add	$s1,$r1,$r1,lsr#2
++	adcs	$h2,$h2,r2
++#endif
++	add	$s2,$r2,$r2,lsr#2
++	adcs	$h3,$h3,r3
++	add	$s3,$r3,$r3,lsr#2
++
++	umull	r2,r3,$h1,$r0
++	 adc	$h4,$h4,#0
++	umull	r0,r1,$h0,$r0
++	umlal	r2,r3,$h4,$s1
++	umlal	r0,r1,$h3,$s1
++	ldr	$r1,[sp,#20]		@ reload $r1
++	umlal	r2,r3,$h2,$s3
++	umlal	r0,r1,$h1,$s3
++	umlal	r2,r3,$h3,$s2
++	umlal	r0,r1,$h2,$s2
++	umlal	r2,r3,$h0,$r1
++	str	r0,[sp,#0]		@ future $h0
++	 mul	r0,$s2,$h4
++	ldr	$r2,[sp,#24]		@ reload $r2
++	adds	r2,r2,r1		@ d1+=d0>>32
++	 eor	r1,r1,r1
++	adc	lr,r3,#0		@ future $h2
++	str	r2,[sp,#4]		@ future $h1
++
++	mul	r2,$s3,$h4
++	eor	r3,r3,r3
++	umlal	r0,r1,$h3,$s3
++	ldr	$r3,[sp,#28]		@ reload $r3
++	umlal	r2,r3,$h3,$r0
++	umlal	r0,r1,$h2,$r0
++	umlal	r2,r3,$h2,$r1
++	umlal	r0,r1,$h1,$r1
++	umlal	r2,r3,$h1,$r2
++	umlal	r0,r1,$h0,$r2
++	umlal	r2,r3,$h0,$r3
++	ldr	$h0,[sp,#0]
++	mul	$h4,$r0,$h4
++	ldr	$h1,[sp,#4]
++
++	adds	$h2,lr,r0		@ d2+=d1>>32
++	ldr	lr,[sp,#8]		@ reload input pointer
++	adc	r1,r1,#0
++	adds	$h3,r2,r1		@ d3+=d2>>32
++	ldr	r0,[sp,#16]		@ reload end pointer
++	adc	r3,r3,#0
++	add	$h4,$h4,r3		@ h4+=d3>>32
++
++	and	r1,$h4,#-4
++	and	$h4,$h4,#3
++	add	r1,r1,r1,lsr#2		@ *=5
++	adds	$h0,$h0,r1
++	adcs	$h1,$h1,#0
++	adcs	$h2,$h2,#0
++	adcs	$h3,$h3,#0
++	adc	$h4,$h4,#0
++
++	cmp	r0,lr			@ done yet?
++	bhi	.Loop
++
++	ldr	$ctx,[sp,#12]
++	add	sp,sp,#32
++	stmia	$ctx,{$h0-$h4}		@ store the result
++
++.Lno_data:
++#if	__ARM_ARCH__>=5
++	ldmia	sp!,{r3-r11,pc}
++#else
++	ldmia	sp!,{r3-r11,lr}
++	tst	lr,#1
++	moveq	pc,lr			@ be binary compatible with V4, yet
++	bx	lr			@ interoperable with Thumb ISA:-)
++#endif
++.size	poly1305_blocks,.-poly1305_blocks
++___
++}
++{
++my ($ctx,$mac,$nonce)=map("r$_",(0..2));
++my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11));
++my $g4=$h4;
++
++$code.=<<___;
++.type	poly1305_emit,%function
++.align	5
++poly1305_emit:
++	stmdb	sp!,{r4-r11}
++.Lpoly1305_emit_enter:
++
++	ldmia	$ctx,{$h0-$h4}
++	adds	$g0,$h0,#5		@ compare to modulus
++	adcs	$g1,$h1,#0
++	adcs	$g2,$h2,#0
++	adcs	$g3,$h3,#0
++	adc	$g4,$h4,#0
++	tst	$g4,#4			@ did it carry/borrow?
++
++#ifdef	__thumb2__
++	it	ne
++#endif
++	movne	$h0,$g0
++	ldr	$g0,[$nonce,#0]
++#ifdef	__thumb2__
++	it	ne
++#endif
++	movne	$h1,$g1
++	ldr	$g1,[$nonce,#4]
++#ifdef	__thumb2__
++	it	ne
++#endif
++	movne	$h2,$g2
++	ldr	$g2,[$nonce,#8]
++#ifdef	__thumb2__
++	it	ne
++#endif
++	movne	$h3,$g3
++	ldr	$g3,[$nonce,#12]
++
++	adds	$h0,$h0,$g0
++	adcs	$h1,$h1,$g1
++	adcs	$h2,$h2,$g2
++	adc	$h3,$h3,$g3
++
++#if __ARM_ARCH__>=7
++# ifdef __ARMEB__
++	rev	$h0,$h0
++	rev	$h1,$h1
++	rev	$h2,$h2
++	rev	$h3,$h3
++# endif
++	str	$h0,[$mac,#0]
++	str	$h1,[$mac,#4]
++	str	$h2,[$mac,#8]
++	str	$h3,[$mac,#12]
++#else
++	strb	$h0,[$mac,#0]
++	mov	$h0,$h0,lsr#8
++	strb	$h1,[$mac,#4]
++	mov	$h1,$h1,lsr#8
++	strb	$h2,[$mac,#8]
++	mov	$h2,$h2,lsr#8
++	strb	$h3,[$mac,#12]
++	mov	$h3,$h3,lsr#8
++
++	strb	$h0,[$mac,#1]
++	mov	$h0,$h0,lsr#8
++	strb	$h1,[$mac,#5]
++	mov	$h1,$h1,lsr#8
++	strb	$h2,[$mac,#9]
++	mov	$h2,$h2,lsr#8
++	strb	$h3,[$mac,#13]
++	mov	$h3,$h3,lsr#8
++
++	strb	$h0,[$mac,#2]
++	mov	$h0,$h0,lsr#8
++	strb	$h1,[$mac,#6]
++	mov	$h1,$h1,lsr#8
++	strb	$h2,[$mac,#10]
++	mov	$h2,$h2,lsr#8
++	strb	$h3,[$mac,#14]
++	mov	$h3,$h3,lsr#8
++
++	strb	$h0,[$mac,#3]
++	strb	$h1,[$mac,#7]
++	strb	$h2,[$mac,#11]
++	strb	$h3,[$mac,#15]
++#endif
++	ldmia	sp!,{r4-r11}
++#if	__ARM_ARCH__>=5
++	ret				@ bx	lr
++#else
++	tst	lr,#1
++	moveq	pc,lr			@ be binary compatible with V4, yet
++	bx	lr			@ interoperable with Thumb ISA:-)
++#endif
++.size	poly1305_emit,.-poly1305_emit
++___
++{
++my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9));
++my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14));
++my ($T0,$T1,$MASK) = map("q$_",(15,4,0));
++
++my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7));
++
++$code.=<<___;
++#if (defined(__KERNEL__) && defined(CONFIG_KERNEL_MODE_NEON)) || (!defined(__KERNEL__) && __ARM_MAX_ARCH__>=7)
++.fpu	neon
++
++.type	poly1305_init_neon,%function
++.align	5
++poly1305_init_neon:
++.Lpoly1305_init_neon:
++	ldr	r4,[$ctx,#20]		@ load key base 2^32
++	ldr	r5,[$ctx,#24]
++	ldr	r6,[$ctx,#28]
++	ldr	r7,[$ctx,#32]
++
++	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
++	mov	r3,r4,lsr#26
++	mov	r4,r5,lsr#20
++	orr	r3,r3,r5,lsl#6
++	mov	r5,r6,lsr#14
++	orr	r4,r4,r6,lsl#12
++	mov	r6,r7,lsr#8
++	orr	r5,r5,r7,lsl#18
++	and	r3,r3,#0x03ffffff
++	and	r4,r4,#0x03ffffff
++	and	r5,r5,#0x03ffffff
++
++	vdup.32	$R0,r2			@ r^1 in both lanes
++	add	r2,r3,r3,lsl#2		@ *5
++	vdup.32	$R1,r3
++	add	r3,r4,r4,lsl#2
++	vdup.32	$S1,r2
++	vdup.32	$R2,r4
++	add	r4,r5,r5,lsl#2
++	vdup.32	$S2,r3
++	vdup.32	$R3,r5
++	add	r5,r6,r6,lsl#2
++	vdup.32	$S3,r4
++	vdup.32	$R4,r6
++	vdup.32	$S4,r5
++
++	mov	$zeros,#2		@ counter
++
++.Lsquare_neon:
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
++	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
++	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
++	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
++	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
++
++	vmull.u32	$D0,$R0,${R0}[1]
++	vmull.u32	$D1,$R1,${R0}[1]
++	vmull.u32	$D2,$R2,${R0}[1]
++	vmull.u32	$D3,$R3,${R0}[1]
++	vmull.u32	$D4,$R4,${R0}[1]
++
++	vmlal.u32	$D0,$R4,${S1}[1]
++	vmlal.u32	$D1,$R0,${R1}[1]
++	vmlal.u32	$D2,$R1,${R1}[1]
++	vmlal.u32	$D3,$R2,${R1}[1]
++	vmlal.u32	$D4,$R3,${R1}[1]
++
++	vmlal.u32	$D0,$R3,${S2}[1]
++	vmlal.u32	$D1,$R4,${S2}[1]
++	vmlal.u32	$D3,$R1,${R2}[1]
++	vmlal.u32	$D2,$R0,${R2}[1]
++	vmlal.u32	$D4,$R2,${R2}[1]
++
++	vmlal.u32	$D0,$R2,${S3}[1]
++	vmlal.u32	$D3,$R0,${R3}[1]
++	vmlal.u32	$D1,$R3,${S3}[1]
++	vmlal.u32	$D2,$R4,${S3}[1]
++	vmlal.u32	$D4,$R1,${R3}[1]
++
++	vmlal.u32	$D3,$R4,${S4}[1]
++	vmlal.u32	$D0,$R1,${S4}[1]
++	vmlal.u32	$D1,$R2,${S4}[1]
++	vmlal.u32	$D2,$R3,${S4}[1]
++	vmlal.u32	$D4,$R0,${R4}[1]
++
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
++	@ and P. Schwabe
++	@
++	@ H0>>+H1>>+H2>>+H3>>+H4
++	@ H3>>+H4>>*5+H0>>+H1
++	@
++	@ Trivia.
++	@
++	@ Result of multiplication of n-bit number by m-bit number is
++	@ n+m bits wide. However! Even though 2^n is a n+1-bit number,
++	@ m-bit number multiplied by 2^n is still n+m bits wide.
++	@
++	@ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
++	@ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
++	@ one is n+1 bits wide.
++	@
++	@ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
++	@ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
++	@ can be 27. However! In cases when their width exceeds 26 bits
++	@ they are limited by 2^26+2^6. This in turn means that *sum*
++	@ of the products with these values can still be viewed as sum
++	@ of 52-bit numbers as long as the amount of addends is not a
++	@ power of 2. For example,
++	@
++	@ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
++	@
++	@ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
++	@ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
++	@ 8 * (2^52) or 2^55. However, the value is then multiplied by
++	@ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
++	@ which is less than 32 * (2^52) or 2^57. And when processing
++	@ data we are looking at triple as many addends...
++	@
++	@ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
++	@ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
++	@ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
++	@ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
++	@ instruction accepts 2x32-bit input and writes 2x64-bit result.
++	@ This means that result of reduction have to be compressed upon
++	@ loop wrap-around. This can be done in the process of reduction
++	@ to minimize amount of instructions [as well as amount of
++	@ 128-bit instructions, which benefits low-end processors], but
++	@ one has to watch for H2 (which is narrower than H0) and 5*H4
++	@ not being wider than 58 bits, so that result of right shift
++	@ by 26 bits fits in 32 bits. This is also useful on x86,
++	@ because it allows to use paddd in place for paddq, which
++	@ benefits Atom, where paddq is ridiculously slow.
++
++	vshr.u64	$T0,$D3,#26
++	vmovn.i64	$D3#lo,$D3
++	 vshr.u64	$T1,$D0,#26
++	 vmovn.i64	$D0#lo,$D0
++	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
++	vbic.i32	$D3#lo,#0xfc000000	@ &=0x03ffffff
++	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
++	 vbic.i32	$D0#lo,#0xfc000000
++
++	vshrn.u64	$T0#lo,$D4,#26
++	vmovn.i64	$D4#lo,$D4
++	 vshr.u64	$T1,$D1,#26
++	 vmovn.i64	$D1#lo,$D1
++	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
++	vbic.i32	$D4#lo,#0xfc000000
++	 vbic.i32	$D1#lo,#0xfc000000
++
++	vadd.i32	$D0#lo,$D0#lo,$T0#lo
++	vshl.u32	$T0#lo,$T0#lo,#2
++	 vshrn.u64	$T1#lo,$D2,#26
++	 vmovn.i64	$D2#lo,$D2
++	vadd.i32	$D0#lo,$D0#lo,$T0#lo	@ h4 -> h0
++	 vadd.i32	$D3#lo,$D3#lo,$T1#lo	@ h2 -> h3
++	 vbic.i32	$D2#lo,#0xfc000000
++
++	vshr.u32	$T0#lo,$D0#lo,#26
++	vbic.i32	$D0#lo,#0xfc000000
++	 vshr.u32	$T1#lo,$D3#lo,#26
++	 vbic.i32	$D3#lo,#0xfc000000
++	vadd.i32	$D1#lo,$D1#lo,$T0#lo	@ h0 -> h1
++	 vadd.i32	$D4#lo,$D4#lo,$T1#lo	@ h3 -> h4
++
++	subs		$zeros,$zeros,#1
++	beq		.Lsquare_break_neon
++
++	add		$tbl0,$ctx,#(48+0*9*4)
++	add		$tbl1,$ctx,#(48+1*9*4)
++
++	vtrn.32		$R0,$D0#lo		@ r^2:r^1
++	vtrn.32		$R2,$D2#lo
++	vtrn.32		$R3,$D3#lo
++	vtrn.32		$R1,$D1#lo
++	vtrn.32		$R4,$D4#lo
++
++	vshl.u32	$S2,$R2,#2		@ *5
++	vshl.u32	$S3,$R3,#2
++	vshl.u32	$S1,$R1,#2
++	vshl.u32	$S4,$R4,#2
++	vadd.i32	$S2,$S2,$R2
++	vadd.i32	$S1,$S1,$R1
++	vadd.i32	$S3,$S3,$R3
++	vadd.i32	$S4,$S4,$R4
++
++	vst4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
++	vst4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
++	vst4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
++	vst4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
++	vst1.32		{${S4}[0]},[$tbl0,:32]
++	vst1.32		{${S4}[1]},[$tbl1,:32]
++
++	b		.Lsquare_neon
++
++.align	4
++.Lsquare_break_neon:
++	add		$tbl0,$ctx,#(48+2*4*9)
++	add		$tbl1,$ctx,#(48+3*4*9)
++
++	vmov		$R0,$D0#lo		@ r^4:r^3
++	vshl.u32	$S1,$D1#lo,#2		@ *5
++	vmov		$R1,$D1#lo
++	vshl.u32	$S2,$D2#lo,#2
++	vmov		$R2,$D2#lo
++	vshl.u32	$S3,$D3#lo,#2
++	vmov		$R3,$D3#lo
++	vshl.u32	$S4,$D4#lo,#2
++	vmov		$R4,$D4#lo
++	vadd.i32	$S1,$S1,$D1#lo
++	vadd.i32	$S2,$S2,$D2#lo
++	vadd.i32	$S3,$S3,$D3#lo
++	vadd.i32	$S4,$S4,$D4#lo
++
++	vst4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
++	vst4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
++	vst4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
++	vst4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
++	vst1.32		{${S4}[0]},[$tbl0]
++	vst1.32		{${S4}[1]},[$tbl1]
++
++	ret				@ bx	lr
++.size	poly1305_init_neon,.-poly1305_init_neon
++
++#ifdef __KERNEL__
++.globl	poly1305_blocks_neon
++#endif
++.type	poly1305_blocks_neon,%function
++.align	5
++poly1305_blocks_neon:
++	ldr	ip,[$ctx,#36]		@ is_base2_26
++	ands	$len,$len,#-16
++	beq	.Lno_data_neon
++
++	cmp	$len,#64
++	bhs	.Lenter_neon
++	tst	ip,ip			@ is_base2_26?
++	beq	.Lpoly1305_blocks
++
++.Lenter_neon:
++	stmdb	sp!,{r4-r7}
++	vstmdb	sp!,{d8-d15}		@ ABI specification says so
++
++	tst	ip,ip			@ is_base2_26?
++	bne	.Lbase2_26_neon
++
++	stmdb	sp!,{r1-r3,lr}
++	bl	.Lpoly1305_init_neon
++
++	ldr	r4,[$ctx,#0]		@ load hash value base 2^32
++	ldr	r5,[$ctx,#4]
++	ldr	r6,[$ctx,#8]
++	ldr	r7,[$ctx,#12]
++	ldr	ip,[$ctx,#16]
++
++	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
++	mov	r3,r4,lsr#26
++	 veor	$D0#lo,$D0#lo,$D0#lo
++	mov	r4,r5,lsr#20
++	orr	r3,r3,r5,lsl#6
++	 veor	$D1#lo,$D1#lo,$D1#lo
++	mov	r5,r6,lsr#14
++	orr	r4,r4,r6,lsl#12
++	 veor	$D2#lo,$D2#lo,$D2#lo
++	mov	r6,r7,lsr#8
++	orr	r5,r5,r7,lsl#18
++	 veor	$D3#lo,$D3#lo,$D3#lo
++	and	r3,r3,#0x03ffffff
++	orr	r6,r6,ip,lsl#24
++	 veor	$D4#lo,$D4#lo,$D4#lo
++	and	r4,r4,#0x03ffffff
++	mov	r1,#1
++	and	r5,r5,#0x03ffffff
++	str	r1,[$ctx,#36]		@ is_base2_26
++
++	vmov.32	$D0#lo[0],r2
++	vmov.32	$D1#lo[0],r3
++	vmov.32	$D2#lo[0],r4
++	vmov.32	$D3#lo[0],r5
++	vmov.32	$D4#lo[0],r6
++	adr	$zeros,.Lzeros
++
++	ldmia	sp!,{r1-r3,lr}
++	b	.Lbase2_32_neon
++
++.align	4
++.Lbase2_26_neon:
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ load hash value
++
++	veor		$D0#lo,$D0#lo,$D0#lo
++	veor		$D1#lo,$D1#lo,$D1#lo
++	veor		$D2#lo,$D2#lo,$D2#lo
++	veor		$D3#lo,$D3#lo,$D3#lo
++	veor		$D4#lo,$D4#lo,$D4#lo
++	vld4.32		{$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
++	adr		$zeros,.Lzeros
++	vld1.32		{$D4#lo[0]},[$ctx]
++	sub		$ctx,$ctx,#16		@ rewind
++
++.Lbase2_32_neon:
++	add		$in2,$inp,#32
++	mov		$padbit,$padbit,lsl#24
++	tst		$len,#31
++	beq		.Leven
++
++	vld4.32		{$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]!
++	vmov.32		$H4#lo[0],$padbit
++	sub		$len,$len,#16
++	add		$in2,$inp,#32
++
++# ifdef	__ARMEB__
++	vrev32.8	$H0,$H0
++	vrev32.8	$H3,$H3
++	vrev32.8	$H1,$H1
++	vrev32.8	$H2,$H2
++# endif
++	vsri.u32	$H4#lo,$H3#lo,#8	@ base 2^32 -> base 2^26
++	vshl.u32	$H3#lo,$H3#lo,#18
++
++	vsri.u32	$H3#lo,$H2#lo,#14
++	vshl.u32	$H2#lo,$H2#lo,#12
++	vadd.i32	$H4#hi,$H4#lo,$D4#lo	@ add hash value and move to #hi
++
++	vbic.i32	$H3#lo,#0xfc000000
++	vsri.u32	$H2#lo,$H1#lo,#20
++	vshl.u32	$H1#lo,$H1#lo,#6
++
++	vbic.i32	$H2#lo,#0xfc000000
++	vsri.u32	$H1#lo,$H0#lo,#26
++	vadd.i32	$H3#hi,$H3#lo,$D3#lo
++
++	vbic.i32	$H0#lo,#0xfc000000
++	vbic.i32	$H1#lo,#0xfc000000
++	vadd.i32	$H2#hi,$H2#lo,$D2#lo
++
++	vadd.i32	$H0#hi,$H0#lo,$D0#lo
++	vadd.i32	$H1#hi,$H1#lo,$D1#lo
++
++	mov		$tbl1,$zeros
++	add		$tbl0,$ctx,#48
++
++	cmp		$len,$len
++	b		.Long_tail
++
++.align	4
++.Leven:
++	subs		$len,$len,#64
++	it		lo
++	movlo		$in2,$zeros
++
++	vmov.i32	$H4,#1<<24		@ padbit, yes, always
++	vld4.32		{$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp]	@ inp[0:1]
++	add		$inp,$inp,#64
++	vld4.32		{$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2]	@ inp[2:3] (or 0)
++	add		$in2,$in2,#64
++	itt		hi
++	addhi		$tbl1,$ctx,#(48+1*9*4)
++	addhi		$tbl0,$ctx,#(48+3*9*4)
++
++# ifdef	__ARMEB__
++	vrev32.8	$H0,$H0
++	vrev32.8	$H3,$H3
++	vrev32.8	$H1,$H1
++	vrev32.8	$H2,$H2
++# endif
++	vsri.u32	$H4,$H3,#8		@ base 2^32 -> base 2^26
++	vshl.u32	$H3,$H3,#18
++
++	vsri.u32	$H3,$H2,#14
++	vshl.u32	$H2,$H2,#12
++
++	vbic.i32	$H3,#0xfc000000
++	vsri.u32	$H2,$H1,#20
++	vshl.u32	$H1,$H1,#6
++
++	vbic.i32	$H2,#0xfc000000
++	vsri.u32	$H1,$H0,#26
++
++	vbic.i32	$H0,#0xfc000000
++	vbic.i32	$H1,#0xfc000000
++
++	bls		.Lskip_loop
++
++	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^2
++	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^4
++	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
++	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
++	b		.Loop_neon
++
++.align	5
++.Loop_neon:
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
++	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
++	@   \___________________/
++	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
++	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
++	@   \___________________/ \____________________/
++	@
++	@ Note that we start with inp[2:3]*r^2. This is because it
++	@ doesn't depend on reduction in previous iteration.
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
++	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
++	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
++	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
++	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
++
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ inp[2:3]*r^2
++
++	vadd.i32	$H2#lo,$H2#lo,$D2#lo	@ accumulate inp[0:1]
++	vmull.u32	$D2,$H2#hi,${R0}[1]
++	vadd.i32	$H0#lo,$H0#lo,$D0#lo
++	vmull.u32	$D0,$H0#hi,${R0}[1]
++	vadd.i32	$H3#lo,$H3#lo,$D3#lo
++	vmull.u32	$D3,$H3#hi,${R0}[1]
++	vmlal.u32	$D2,$H1#hi,${R1}[1]
++	vadd.i32	$H1#lo,$H1#lo,$D1#lo
++	vmull.u32	$D1,$H1#hi,${R0}[1]
++
++	vadd.i32	$H4#lo,$H4#lo,$D4#lo
++	vmull.u32	$D4,$H4#hi,${R0}[1]
++	subs		$len,$len,#64
++	vmlal.u32	$D0,$H4#hi,${S1}[1]
++	it		lo
++	movlo		$in2,$zeros
++	vmlal.u32	$D3,$H2#hi,${R1}[1]
++	vld1.32		${S4}[1],[$tbl1,:32]
++	vmlal.u32	$D1,$H0#hi,${R1}[1]
++	vmlal.u32	$D4,$H3#hi,${R1}[1]
++
++	vmlal.u32	$D0,$H3#hi,${S2}[1]
++	vmlal.u32	$D3,$H1#hi,${R2}[1]
++	vmlal.u32	$D4,$H2#hi,${R2}[1]
++	vmlal.u32	$D1,$H4#hi,${S2}[1]
++	vmlal.u32	$D2,$H0#hi,${R2}[1]
++
++	vmlal.u32	$D3,$H0#hi,${R3}[1]
++	vmlal.u32	$D0,$H2#hi,${S3}[1]
++	vmlal.u32	$D4,$H1#hi,${R3}[1]
++	vmlal.u32	$D1,$H3#hi,${S3}[1]
++	vmlal.u32	$D2,$H4#hi,${S3}[1]
++
++	vmlal.u32	$D3,$H4#hi,${S4}[1]
++	vmlal.u32	$D0,$H1#hi,${S4}[1]
++	vmlal.u32	$D4,$H0#hi,${R4}[1]
++	vmlal.u32	$D1,$H2#hi,${S4}[1]
++	vmlal.u32	$D2,$H3#hi,${S4}[1]
++
++	vld4.32		{$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2]	@ inp[2:3] (or 0)
++	add		$in2,$in2,#64
++
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ (hash+inp[0:1])*r^4 and accumulate
++
++	vmlal.u32	$D3,$H3#lo,${R0}[0]
++	vmlal.u32	$D0,$H0#lo,${R0}[0]
++	vmlal.u32	$D4,$H4#lo,${R0}[0]
++	vmlal.u32	$D1,$H1#lo,${R0}[0]
++	vmlal.u32	$D2,$H2#lo,${R0}[0]
++	vld1.32		${S4}[0],[$tbl0,:32]
++
++	vmlal.u32	$D3,$H2#lo,${R1}[0]
++	vmlal.u32	$D0,$H4#lo,${S1}[0]
++	vmlal.u32	$D4,$H3#lo,${R1}[0]
++	vmlal.u32	$D1,$H0#lo,${R1}[0]
++	vmlal.u32	$D2,$H1#lo,${R1}[0]
++
++	vmlal.u32	$D3,$H1#lo,${R2}[0]
++	vmlal.u32	$D0,$H3#lo,${S2}[0]
++	vmlal.u32	$D4,$H2#lo,${R2}[0]
++	vmlal.u32	$D1,$H4#lo,${S2}[0]
++	vmlal.u32	$D2,$H0#lo,${R2}[0]
++
++	vmlal.u32	$D3,$H0#lo,${R3}[0]
++	vmlal.u32	$D0,$H2#lo,${S3}[0]
++	vmlal.u32	$D4,$H1#lo,${R3}[0]
++	vmlal.u32	$D1,$H3#lo,${S3}[0]
++	vmlal.u32	$D3,$H4#lo,${S4}[0]
++
++	vmlal.u32	$D2,$H4#lo,${S3}[0]
++	vmlal.u32	$D0,$H1#lo,${S4}[0]
++	vmlal.u32	$D4,$H0#lo,${R4}[0]
++	vmov.i32	$H4,#1<<24		@ padbit, yes, always
++	vmlal.u32	$D1,$H2#lo,${S4}[0]
++	vmlal.u32	$D2,$H3#lo,${S4}[0]
++
++	vld4.32		{$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp]	@ inp[0:1]
++	add		$inp,$inp,#64
++# ifdef	__ARMEB__
++	vrev32.8	$H0,$H0
++	vrev32.8	$H1,$H1
++	vrev32.8	$H2,$H2
++	vrev32.8	$H3,$H3
++# endif
++
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ lazy reduction interleaved with base 2^32 -> base 2^26 of
++	@ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4.
++
++	vshr.u64	$T0,$D3,#26
++	vmovn.i64	$D3#lo,$D3
++	 vshr.u64	$T1,$D0,#26
++	 vmovn.i64	$D0#lo,$D0
++	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
++	vbic.i32	$D3#lo,#0xfc000000
++	  vsri.u32	$H4,$H3,#8		@ base 2^32 -> base 2^26
++	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
++	  vshl.u32	$H3,$H3,#18
++	 vbic.i32	$D0#lo,#0xfc000000
++
++	vshrn.u64	$T0#lo,$D4,#26
++	vmovn.i64	$D4#lo,$D4
++	 vshr.u64	$T1,$D1,#26
++	 vmovn.i64	$D1#lo,$D1
++	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
++	  vsri.u32	$H3,$H2,#14
++	vbic.i32	$D4#lo,#0xfc000000
++	  vshl.u32	$H2,$H2,#12
++	 vbic.i32	$D1#lo,#0xfc000000
++
++	vadd.i32	$D0#lo,$D0#lo,$T0#lo
++	vshl.u32	$T0#lo,$T0#lo,#2
++	  vbic.i32	$H3,#0xfc000000
++	 vshrn.u64	$T1#lo,$D2,#26
++	 vmovn.i64	$D2#lo,$D2
++	vaddl.u32	$D0,$D0#lo,$T0#lo	@ h4 -> h0 [widen for a sec]
++	  vsri.u32	$H2,$H1,#20
++	 vadd.i32	$D3#lo,$D3#lo,$T1#lo	@ h2 -> h3
++	  vshl.u32	$H1,$H1,#6
++	 vbic.i32	$D2#lo,#0xfc000000
++	  vbic.i32	$H2,#0xfc000000
++
++	vshrn.u64	$T0#lo,$D0,#26		@ re-narrow
++	vmovn.i64	$D0#lo,$D0
++	  vsri.u32	$H1,$H0,#26
++	  vbic.i32	$H0,#0xfc000000
++	 vshr.u32	$T1#lo,$D3#lo,#26
++	 vbic.i32	$D3#lo,#0xfc000000
++	vbic.i32	$D0#lo,#0xfc000000
++	vadd.i32	$D1#lo,$D1#lo,$T0#lo	@ h0 -> h1
++	 vadd.i32	$D4#lo,$D4#lo,$T1#lo	@ h3 -> h4
++	  vbic.i32	$H1,#0xfc000000
++
++	bhi		.Loop_neon
++
++.Lskip_loop:
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
++
++	add		$tbl1,$ctx,#(48+0*9*4)
++	add		$tbl0,$ctx,#(48+1*9*4)
++	adds		$len,$len,#32
++	it		ne
++	movne		$len,#0
++	bne		.Long_tail
++
++	vadd.i32	$H2#hi,$H2#lo,$D2#lo	@ add hash value and move to #hi
++	vadd.i32	$H0#hi,$H0#lo,$D0#lo
++	vadd.i32	$H3#hi,$H3#lo,$D3#lo
++	vadd.i32	$H1#hi,$H1#lo,$D1#lo
++	vadd.i32	$H4#hi,$H4#lo,$D4#lo
++
++.Long_tail:
++	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^1
++	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^2
++
++	vadd.i32	$H2#lo,$H2#lo,$D2#lo	@ can be redundant
++	vmull.u32	$D2,$H2#hi,$R0
++	vadd.i32	$H0#lo,$H0#lo,$D0#lo
++	vmull.u32	$D0,$H0#hi,$R0
++	vadd.i32	$H3#lo,$H3#lo,$D3#lo
++	vmull.u32	$D3,$H3#hi,$R0
++	vadd.i32	$H1#lo,$H1#lo,$D1#lo
++	vmull.u32	$D1,$H1#hi,$R0
++	vadd.i32	$H4#lo,$H4#lo,$D4#lo
++	vmull.u32	$D4,$H4#hi,$R0
++
++	vmlal.u32	$D0,$H4#hi,$S1
++	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
++	vmlal.u32	$D3,$H2#hi,$R1
++	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
++	vmlal.u32	$D1,$H0#hi,$R1
++	vmlal.u32	$D4,$H3#hi,$R1
++	vmlal.u32	$D2,$H1#hi,$R1
++
++	vmlal.u32	$D3,$H1#hi,$R2
++	vld1.32		${S4}[1],[$tbl1,:32]
++	vmlal.u32	$D0,$H3#hi,$S2
++	vld1.32		${S4}[0],[$tbl0,:32]
++	vmlal.u32	$D4,$H2#hi,$R2
++	vmlal.u32	$D1,$H4#hi,$S2
++	vmlal.u32	$D2,$H0#hi,$R2
++
++	vmlal.u32	$D3,$H0#hi,$R3
++	 it		ne
++	 addne		$tbl1,$ctx,#(48+2*9*4)
++	vmlal.u32	$D0,$H2#hi,$S3
++	 it		ne
++	 addne		$tbl0,$ctx,#(48+3*9*4)
++	vmlal.u32	$D4,$H1#hi,$R3
++	vmlal.u32	$D1,$H3#hi,$S3
++	vmlal.u32	$D2,$H4#hi,$S3
++
++	vmlal.u32	$D3,$H4#hi,$S4
++	 vorn		$MASK,$MASK,$MASK	@ all-ones, can be redundant
++	vmlal.u32	$D0,$H1#hi,$S4
++	 vshr.u64	$MASK,$MASK,#38
++	vmlal.u32	$D4,$H0#hi,$R4
++	vmlal.u32	$D1,$H2#hi,$S4
++	vmlal.u32	$D2,$H3#hi,$S4
++
++	beq		.Lshort_tail
++
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ (hash+inp[0:1])*r^4:r^3 and accumulate
++
++	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^3
++	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^4
++
++	vmlal.u32	$D2,$H2#lo,$R0
++	vmlal.u32	$D0,$H0#lo,$R0
++	vmlal.u32	$D3,$H3#lo,$R0
++	vmlal.u32	$D1,$H1#lo,$R0
++	vmlal.u32	$D4,$H4#lo,$R0
++
++	vmlal.u32	$D0,$H4#lo,$S1
++	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
++	vmlal.u32	$D3,$H2#lo,$R1
++	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
++	vmlal.u32	$D1,$H0#lo,$R1
++	vmlal.u32	$D4,$H3#lo,$R1
++	vmlal.u32	$D2,$H1#lo,$R1
++
++	vmlal.u32	$D3,$H1#lo,$R2
++	vld1.32		${S4}[1],[$tbl1,:32]
++	vmlal.u32	$D0,$H3#lo,$S2
++	vld1.32		${S4}[0],[$tbl0,:32]
++	vmlal.u32	$D4,$H2#lo,$R2
++	vmlal.u32	$D1,$H4#lo,$S2
++	vmlal.u32	$D2,$H0#lo,$R2
++
++	vmlal.u32	$D3,$H0#lo,$R3
++	vmlal.u32	$D0,$H2#lo,$S3
++	vmlal.u32	$D4,$H1#lo,$R3
++	vmlal.u32	$D1,$H3#lo,$S3
++	vmlal.u32	$D2,$H4#lo,$S3
++
++	vmlal.u32	$D3,$H4#lo,$S4
++	 vorn		$MASK,$MASK,$MASK	@ all-ones
++	vmlal.u32	$D0,$H1#lo,$S4
++	 vshr.u64	$MASK,$MASK,#38
++	vmlal.u32	$D4,$H0#lo,$R4
++	vmlal.u32	$D1,$H2#lo,$S4
++	vmlal.u32	$D2,$H3#lo,$S4
++
++.Lshort_tail:
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ horizontal addition
++
++	vadd.i64	$D3#lo,$D3#lo,$D3#hi
++	vadd.i64	$D0#lo,$D0#lo,$D0#hi
++	vadd.i64	$D4#lo,$D4#lo,$D4#hi
++	vadd.i64	$D1#lo,$D1#lo,$D1#hi
++	vadd.i64	$D2#lo,$D2#lo,$D2#hi
++
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ lazy reduction, but without narrowing
++
++	vshr.u64	$T0,$D3,#26
++	vand.i64	$D3,$D3,$MASK
++	 vshr.u64	$T1,$D0,#26
++	 vand.i64	$D0,$D0,$MASK
++	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
++	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
++
++	vshr.u64	$T0,$D4,#26
++	vand.i64	$D4,$D4,$MASK
++	 vshr.u64	$T1,$D1,#26
++	 vand.i64	$D1,$D1,$MASK
++	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
++
++	vadd.i64	$D0,$D0,$T0
++	vshl.u64	$T0,$T0,#2
++	 vshr.u64	$T1,$D2,#26
++	 vand.i64	$D2,$D2,$MASK
++	vadd.i64	$D0,$D0,$T0		@ h4 -> h0
++	 vadd.i64	$D3,$D3,$T1		@ h2 -> h3
++
++	vshr.u64	$T0,$D0,#26
++	vand.i64	$D0,$D0,$MASK
++	 vshr.u64	$T1,$D3,#26
++	 vand.i64	$D3,$D3,$MASK
++	vadd.i64	$D1,$D1,$T0		@ h0 -> h1
++	 vadd.i64	$D4,$D4,$T1		@ h3 -> h4
++
++	cmp		$len,#0
++	bne		.Leven
++
++	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++	@ store hash value
++
++	vst4.32		{$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
++	vst1.32		{$D4#lo[0]},[$ctx]
++
++	vldmia	sp!,{d8-d15}			@ epilogue
++	ldmia	sp!,{r4-r7}
++.Lno_data_neon:
++	ret					@ bx	lr
++.size	poly1305_blocks_neon,.-poly1305_blocks_neon
++
++#ifdef __KERNEL__
++.globl	poly1305_emit_neon
++#endif
++.type	poly1305_emit_neon,%function
++.align	5
++poly1305_emit_neon:
++	ldr	ip,[$ctx,#36]		@ is_base2_26
++
++	stmdb	sp!,{r4-r11}
++
++	tst	ip,ip
++	beq	.Lpoly1305_emit_enter
++
++	ldmia	$ctx,{$h0-$h4}
++	eor	$g0,$g0,$g0
++
++	adds	$h0,$h0,$h1,lsl#26	@ base 2^26 -> base 2^32
++	mov	$h1,$h1,lsr#6
++	adcs	$h1,$h1,$h2,lsl#20
++	mov	$h2,$h2,lsr#12
++	adcs	$h2,$h2,$h3,lsl#14
++	mov	$h3,$h3,lsr#18
++	adcs	$h3,$h3,$h4,lsl#8
++	adc	$h4,$g0,$h4,lsr#24	@ can be partially reduced ...
++
++	and	$g0,$h4,#-4		@ ... so reduce
++	and	$h4,$h3,#3
++	add	$g0,$g0,$g0,lsr#2	@ *= 5
++	adds	$h0,$h0,$g0
++	adcs	$h1,$h1,#0
++	adcs	$h2,$h2,#0
++	adcs	$h3,$h3,#0
++	adc	$h4,$h4,#0
++
++	adds	$g0,$h0,#5		@ compare to modulus
++	adcs	$g1,$h1,#0
++	adcs	$g2,$h2,#0
++	adcs	$g3,$h3,#0
++	adc	$g4,$h4,#0
++	tst	$g4,#4			@ did it carry/borrow?
++
++	it	ne
++	movne	$h0,$g0
++	ldr	$g0,[$nonce,#0]
++	it	ne
++	movne	$h1,$g1
++	ldr	$g1,[$nonce,#4]
++	it	ne
++	movne	$h2,$g2
++	ldr	$g2,[$nonce,#8]
++	it	ne
++	movne	$h3,$g3
++	ldr	$g3,[$nonce,#12]
++
++	adds	$h0,$h0,$g0		@ accumulate nonce
++	adcs	$h1,$h1,$g1
++	adcs	$h2,$h2,$g2
++	adc	$h3,$h3,$g3
++
++# ifdef __ARMEB__
++	rev	$h0,$h0
++	rev	$h1,$h1
++	rev	$h2,$h2
++	rev	$h3,$h3
++# endif
++	str	$h0,[$mac,#0]		@ store the result
++	str	$h1,[$mac,#4]
++	str	$h2,[$mac,#8]
++	str	$h3,[$mac,#12]
++
++	ldmia	sp!,{r4-r11}
++	ret				@ bx	lr
++.size	poly1305_emit_neon,.-poly1305_emit_neon
++
++.align	5
++.Lzeros:
++.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
++# ifndef __KERNEL__
++.LOPENSSL_armcap:
++.word	OPENSSL_armcap_P-.Lpoly1305_init
++# endif
++#endif
++___
++}	}
++$code.=<<___;
++.align	2
++#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
++.comm   OPENSSL_armcap_P,4,4
++#endif
++___
++
++open SELF,$0;
++while(<SELF>) {
++	next if (/^#!/);
++	last if (!s/^#/@/ and !/^$/);
++	print;
++}
++close SELF;
++
++foreach (split("\n",$code)) {
++	s/\`([^\`]*)\`/eval $1/geo;
++
++	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or
++	s/\bret\b/bx	lr/go						or
++	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;	# make it possible to compile with -march=armv4
++
++	print $_,"\n";
++}
++close STDOUT; # enforce flush
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305-mips64.pl	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,467 @@
++#!/usr/bin/env perl
++# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
++#
++# This code is taken from the OpenSSL project but the author, Andy Polyakov,
++# has relicensed it under the licenses specified in the SPDX header above.
++# The original headers, including the original license headers, are
++# included below for completeness.
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# Poly1305 hash for MIPS64.
++#
++# May 2016
++#
++# Numbers are cycles per processed byte with poly1305_blocks alone.
++#
++#		IALU/gcc
++# R1x000	5.64/+120%	(big-endian)
++# Octeon II	3.80/+280%	(little-endian)
++
++######################################################################
++# There is a number of MIPS ABI in use, O32 and N32/64 are most
++# widely used. Then there is a new contender: NUBI. It appears that if
++# one picks the latter, it's possible to arrange code in ABI neutral
++# manner. Therefore let's stick to NUBI register layout:
++#
++($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
++($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
++($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
++($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
++#
++# The return value is placed in $a0. Following coding rules facilitate
++# interoperability:
++#
++# - never ever touch $tp, "thread pointer", former $gp [o32 can be
++#   excluded from the rule, because it's specified volatile];
++# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
++#   old code];
++# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
++#
++# For reference here is register layout for N32/64 MIPS ABIs:
++#
++# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
++# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
++# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
++# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
++# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
++#
++# <appro@openssl.org>
++#
++######################################################################
++
++$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
++
++die "MIPS64 only" unless ($flavour =~ /64|n32/i);
++
++$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
++$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
++
++($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
++($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
++
++$code.=<<___;
++#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
++     defined(_MIPS_ARCH_MIPS64R6)) \\
++     && !defined(_MIPS_ARCH_MIPS64R2)
++# define _MIPS_ARCH_MIPS64R2
++#endif
++
++#if defined(_MIPS_ARCH_MIPS64R6)
++# define dmultu(rs,rt)
++# define mflo(rd,rs,rt)	dmulu	rd,rs,rt
++# define mfhi(rd,rs,rt)	dmuhu	rd,rs,rt
++#else
++# define dmultu(rs,rt)		dmultu	rs,rt
++# define mflo(rd,rs,rt)	mflo	rd
++# define mfhi(rd,rs,rt)	mfhi	rd
++#endif
++
++#ifdef	__KERNEL__
++# define poly1305_init   poly1305_init_mips
++# define poly1305_blocks poly1305_blocks_mips
++# define poly1305_emit   poly1305_emit_mips
++#endif
++
++#if defined(__MIPSEB__) && !defined(MIPSEB)
++# define MIPSEB
++#endif
++
++#ifdef MIPSEB
++# define MSB 0
++# define LSB 7
++#else
++# define MSB 7
++# define LSB 0
++#endif
++
++.text
++.set	noat
++.set	noreorder
++
++.align	5
++.globl	poly1305_init
++.ent	poly1305_init
++poly1305_init:
++	.frame	$sp,0,$ra
++	.set	reorder
++
++	sd	$zero,0($ctx)
++	sd	$zero,8($ctx)
++	sd	$zero,16($ctx)
++
++	beqz	$inp,.Lno_key
++
++#if defined(_MIPS_ARCH_MIPS64R6)
++	ld	$in0,0($inp)
++	ld	$in1,8($inp)
++#else
++	ldl	$in0,0+MSB($inp)
++	ldl	$in1,8+MSB($inp)
++	ldr	$in0,0+LSB($inp)
++	ldr	$in1,8+LSB($inp)
++#endif
++#ifdef	MIPSEB
++# if defined(_MIPS_ARCH_MIPS64R2)
++	dsbh	$in0,$in0		# byte swap
++	 dsbh	$in1,$in1
++	dshd	$in0,$in0
++	 dshd	$in1,$in1
++# else
++	ori	$tmp0,$zero,0xFF
++	dsll	$tmp2,$tmp0,32
++	or	$tmp0,$tmp2		# 0x000000FF000000FF
++
++	and	$tmp1,$in0,$tmp0	# byte swap
++	 and	$tmp3,$in1,$tmp0
++	dsrl	$tmp2,$in0,24
++	 dsrl	$tmp4,$in1,24
++	dsll	$tmp1,24
++	 dsll	$tmp3,24
++	and	$tmp2,$tmp0
++	 and	$tmp4,$tmp0
++	dsll	$tmp0,8			# 0x0000FF000000FF00
++	or	$tmp1,$tmp2
++	 or	$tmp3,$tmp4
++	and	$tmp2,$in0,$tmp0
++	 and	$tmp4,$in1,$tmp0
++	dsrl	$in0,8
++	 dsrl	$in1,8
++	dsll	$tmp2,8
++	 dsll	$tmp4,8
++	and	$in0,$tmp0
++	 and	$in1,$tmp0
++	or	$tmp1,$tmp2
++	 or	$tmp3,$tmp4
++	or	$in0,$tmp1
++	 or	$in1,$tmp3
++	dsrl	$tmp1,$in0,32
++	 dsrl	$tmp3,$in1,32
++	dsll	$in0,32
++	 dsll	$in1,32
++	or	$in0,$tmp1
++	 or	$in1,$tmp3
++# endif
++#endif
++	li	$tmp0,1
++	dsll	$tmp0,32
++	daddiu	$tmp0,-63
++	dsll	$tmp0,28
++	daddiu	$tmp0,-1		# 0ffffffc0fffffff
++
++	and	$in0,$tmp0
++	daddiu	$tmp0,-3		# 0ffffffc0ffffffc
++	and	$in1,$tmp0
++
++	sd	$in0,24($ctx)
++	dsrl	$tmp0,$in1,2
++	sd	$in1,32($ctx)
++	daddu	$tmp0,$in1		# s1 = r1 + (r1 >> 2)
++	sd	$tmp0,40($ctx)
++
++.Lno_key:
++	li	$v0,0			# return 0
++	jr	$ra
++.end	poly1305_init
++___
++{
++my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
++   ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
++
++$code.=<<___;
++.align	5
++.globl	poly1305_blocks
++.ent	poly1305_blocks
++poly1305_blocks:
++	.set	noreorder
++	dsrl	$len,4			# number of complete blocks
++	bnez	$len,poly1305_blocks_internal
++	nop
++	jr	$ra
++	nop
++.end	poly1305_blocks
++
++.align	5
++.ent	poly1305_blocks_internal
++poly1305_blocks_internal:
++	.frame	$sp,6*8,$ra
++	.mask	$SAVED_REGS_MASK,-8
++	.set	noreorder
++	dsubu	$sp,6*8
++	sd	$s5,40($sp)
++	sd	$s4,32($sp)
++___
++$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
++	sd	$s3,24($sp)
++	sd	$s2,16($sp)
++	sd	$s1,8($sp)
++	sd	$s0,0($sp)
++___
++$code.=<<___;
++	.set	reorder
++
++	ld	$h0,0($ctx)		# load hash value
++	ld	$h1,8($ctx)
++	ld	$h2,16($ctx)
++
++	ld	$r0,24($ctx)		# load key
++	ld	$r1,32($ctx)
++	ld	$s1,40($ctx)
++
++.Loop:
++#if defined(_MIPS_ARCH_MIPS64R6)
++	ld	$in0,0($inp)		# load input
++	ld	$in1,8($inp)
++#else
++	ldl	$in0,0+MSB($inp)	# load input
++	ldl	$in1,8+MSB($inp)
++	ldr	$in0,0+LSB($inp)
++	ldr	$in1,8+LSB($inp)
++#endif
++	daddiu	$len,-1
++	daddiu	$inp,16
++#ifdef	MIPSEB
++# if defined(_MIPS_ARCH_MIPS64R2)
++	dsbh	$in0,$in0		# byte swap
++	 dsbh	$in1,$in1
++	dshd	$in0,$in0
++	 dshd	$in1,$in1
++# else
++	ori	$tmp0,$zero,0xFF
++	dsll	$tmp2,$tmp0,32
++	or	$tmp0,$tmp2		# 0x000000FF000000FF
++
++	and	$tmp1,$in0,$tmp0	# byte swap
++	 and	$tmp3,$in1,$tmp0
++	dsrl	$tmp2,$in0,24
++	 dsrl	$tmp4,$in1,24
++	dsll	$tmp1,24
++	 dsll	$tmp3,24
++	and	$tmp2,$tmp0
++	 and	$tmp4,$tmp0
++	dsll	$tmp0,8			# 0x0000FF000000FF00
++	or	$tmp1,$tmp2
++	 or	$tmp3,$tmp4
++	and	$tmp2,$in0,$tmp0
++	 and	$tmp4,$in1,$tmp0
++	dsrl	$in0,8
++	 dsrl	$in1,8
++	dsll	$tmp2,8
++	 dsll	$tmp4,8
++	and	$in0,$tmp0
++	 and	$in1,$tmp0
++	or	$tmp1,$tmp2
++	 or	$tmp3,$tmp4
++	or	$in0,$tmp1
++	 or	$in1,$tmp3
++	dsrl	$tmp1,$in0,32
++	 dsrl	$tmp3,$in1,32
++	dsll	$in0,32
++	 dsll	$in1,32
++	or	$in0,$tmp1
++	 or	$in1,$tmp3
++# endif
++#endif
++	daddu	$h0,$in0		# accumulate input
++	daddu	$h1,$in1
++	sltu	$tmp0,$h0,$in0
++	sltu	$tmp1,$h1,$in1
++	daddu	$h1,$tmp0
++
++	dmultu	($r0,$h0)		# h0*r0
++	 daddu	$h2,$padbit
++	 sltu	$tmp0,$h1,$tmp0
++	mflo	($d0,$r0,$h0)
++	mfhi	($d1,$r0,$h0)
++
++	dmultu	($s1,$h1)		# h1*5*r1
++	 daddu	$tmp0,$tmp1
++	 daddu	$h2,$tmp0
++	mflo	($tmp0,$s1,$h1)
++	mfhi	($tmp1,$s1,$h1)
++
++	dmultu	($r1,$h0)		# h0*r1
++	 daddu	$d0,$tmp0
++	 daddu	$d1,$tmp1
++	mflo	($tmp2,$r1,$h0)
++	mfhi	($d2,$r1,$h0)
++	 sltu	$tmp0,$d0,$tmp0
++	 daddu	$d1,$tmp0
++
++	dmultu	($r0,$h1)		# h1*r0
++	 daddu	$d1,$tmp2
++	 sltu	$tmp2,$d1,$tmp2
++	mflo	($tmp0,$r0,$h1)
++	mfhi	($tmp1,$r0,$h1)
++	 daddu	$d2,$tmp2
++
++	dmultu	($s1,$h2)		# h2*5*r1
++	 daddu	$d1,$tmp0
++	 daddu	$d2,$tmp1
++	mflo	($tmp2,$s1,$h2)
++
++	dmultu	($r0,$h2)		# h2*r0
++	 sltu	$tmp0,$d1,$tmp0
++	 daddu	$d2,$tmp0
++	mflo	($tmp3,$r0,$h2)
++
++	daddu	$d1,$tmp2
++	daddu	$d2,$tmp3
++	sltu	$tmp2,$d1,$tmp2
++	daddu	$d2,$tmp2
++
++	li	$tmp0,-4		# final reduction
++	and	$tmp0,$d2
++	dsrl	$tmp1,$d2,2
++	andi	$h2,$d2,3
++	daddu	$tmp0,$tmp1
++	daddu	$h0,$d0,$tmp0
++	sltu	$tmp0,$h0,$tmp0
++	daddu	$h1,$d1,$tmp0
++	sltu	$tmp0,$h1,$tmp0
++	daddu	$h2,$h2,$tmp0
++
++	bnez	$len,.Loop
++
++	sd	$h0,0($ctx)		# store hash value
++	sd	$h1,8($ctx)
++	sd	$h2,16($ctx)
++
++	.set	noreorder
++	ld	$s5,40($sp)		# epilogue
++	ld	$s4,32($sp)
++___
++$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi epilogue
++	ld	$s3,24($sp)
++	ld	$s2,16($sp)
++	ld	$s1,8($sp)
++	ld	$s0,0($sp)
++___
++$code.=<<___;
++	jr	$ra
++	daddu	$sp,6*8
++.end	poly1305_blocks_internal
++___
++}
++{
++my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
++
++$code.=<<___;
++.align	5
++.globl	poly1305_emit
++.ent	poly1305_emit
++poly1305_emit:
++	.frame	$sp,0,$ra
++	.set	reorder
++
++	ld	$tmp0,0($ctx)
++	ld	$tmp1,8($ctx)
++	ld	$tmp2,16($ctx)
++
++	daddiu	$in0,$tmp0,5		# compare to modulus
++	sltiu	$tmp3,$in0,5
++	daddu	$in1,$tmp1,$tmp3
++	sltu	$tmp3,$in1,$tmp3
++	daddu	$tmp2,$tmp2,$tmp3
++
++	dsrl	$tmp2,2			# see if it carried/borrowed
++	dsubu	$tmp2,$zero,$tmp2
++	nor	$tmp3,$zero,$tmp2
++
++	and	$in0,$tmp2
++	and	$tmp0,$tmp3
++	and	$in1,$tmp2
++	and	$tmp1,$tmp3
++	or	$in0,$tmp0
++	or	$in1,$tmp1
++
++	lwu	$tmp0,0($nonce)		# load nonce
++	lwu	$tmp1,4($nonce)
++	lwu	$tmp2,8($nonce)
++	lwu	$tmp3,12($nonce)
++	dsll	$tmp1,32
++	dsll	$tmp3,32
++	or	$tmp0,$tmp1
++	or	$tmp2,$tmp3
++
++	daddu	$in0,$tmp0		# accumulate nonce
++	daddu	$in1,$tmp2
++	sltu	$tmp0,$in0,$tmp0
++	daddu	$in1,$tmp0
++
++	dsrl	$tmp0,$in0,8		# write mac value
++	dsrl	$tmp1,$in0,16
++	dsrl	$tmp2,$in0,24
++	sb	$in0,0($mac)
++	dsrl	$tmp3,$in0,32
++	sb	$tmp0,1($mac)
++	dsrl	$tmp0,$in0,40
++	sb	$tmp1,2($mac)
++	dsrl	$tmp1,$in0,48
++	sb	$tmp2,3($mac)
++	dsrl	$tmp2,$in0,56
++	sb	$tmp3,4($mac)
++	dsrl	$tmp3,$in1,8
++	sb	$tmp0,5($mac)
++	dsrl	$tmp0,$in1,16
++	sb	$tmp1,6($mac)
++	dsrl	$tmp1,$in1,24
++	sb	$tmp2,7($mac)
++
++	sb	$in1,8($mac)
++	dsrl	$tmp2,$in1,32
++	sb	$tmp3,9($mac)
++	dsrl	$tmp3,$in1,40
++	sb	$tmp0,10($mac)
++	dsrl	$tmp0,$in1,48
++	sb	$tmp1,11($mac)
++	dsrl	$tmp1,$in1,56
++	sb	$tmp2,12($mac)
++	sb	$tmp3,13($mac)
++	sb	$tmp0,14($mac)
++	sb	$tmp1,15($mac)
++
++	jr	$ra
++.end	poly1305_emit
++.rdata
++.align	2
++___
++}
++
++open SELF,$0;
++while(<SELF>) {
++	next if (/^#!/);
++	last if (!s/^#/\/\// and !/^$/);
++	print;
++}
++close SELF;
++
++$output=pop and open STDOUT,">$output";
++print $code;
++close STDOUT;
++
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/zinc/poly1305/poly1305-x86_64.pl	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,4266 @@
++#!/usr/bin/env perl
++# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
++#
++# Copyright (C) 2017-2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
++# Copyright (C) 2017-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++# Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
++#
++# This code is taken from the OpenSSL project but the author, Andy Polyakov,
++# has relicensed it under the licenses specified in the SPDX header above.
++# The original headers, including the original license headers, are
++# included below for completeness.
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# This module implements Poly1305 hash for x86_64.
++#
++# March 2015
++#
++# Initial release.
++#
++# December 2016
++#
++# Add AVX512F+VL+BW code path.
++#
++# November 2017
++#
++# Convert AVX512F+VL+BW code path to pure AVX512F, so that it can be
++# executed even on Knights Landing. Trigger for modification was
++# observation that AVX512 code paths can negatively affect overall
++# Skylake-X system performance. Since we are likely to suppress
++# AVX512F capability flag [at least on Skylake-X], conversion serves
++# as kind of "investment protection". Note that next *lake processor,
++# Cannonlake, has AVX512IFMA code path to execute...
++#
++# Numbers are cycles per processed byte with poly1305_blocks alone,
++# measured with rdtsc at fixed clock frequency.
++#
++#		IALU/gcc-4.8(*)	AVX(**)		AVX2	AVX-512
++# P4		4.46/+120%	-
++# Core 2	2.41/+90%	-
++# Westmere	1.88/+120%	-
++# Sandy Bridge	1.39/+140%	1.10
++# Haswell	1.14/+175%	1.11		0.65
++# Skylake[-X]	1.13/+120%	0.96		0.51	[0.35]
++# Silvermont	2.83/+95%	-
++# Knights L	3.60/?		1.65		1.10	0.41(***)
++# Goldmont	1.70/+180%	-
++# VIA Nano	1.82/+150%	-
++# Sledgehammer	1.38/+160%	-
++# Bulldozer	2.30/+130%	0.97
++# Ryzen		1.15/+200%	1.08		1.18
++#
++# (*)	improvement coefficients relative to clang are more modest and
++#	are ~50% on most processors, in both cases we are comparing to
++#	__int128 code;
++# (**)	SSE2 implementation was attempted, but among non-AVX processors
++#	it was faster than integer-only code only on older Intel P4 and
++#	Core processors, 50-30%, less newer processor is, but slower on
++#	contemporary ones, for example almost 2x slower on Atom, and as
++#	former are naturally disappearing, SSE2 is deemed unnecessary;
++# (***)	strangely enough performance seems to vary from core to core,
++#	listed result is best case;
++
++$flavour = shift;
++$output  = shift;
++if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
++
++$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
++$kernel=0; $kernel=1 if (!$flavour && !$output);
++
++if (!$kernel) {
++	$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++	( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
++	( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
++	die "can't locate x86_64-xlate.pl";
++
++	open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
++	*STDOUT=*OUT;
++
++	if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
++	    =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
++		$avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25);
++	}
++
++	if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
++	    `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) {
++		$avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12);
++		$avx += 1 if ($1==2.11 && $2>=8);
++	}
++
++	if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
++	    `ml64 2>&1` =~ /Version ([0-9]+)\./) {
++		$avx = ($1>=10) + ($1>=11);
++	}
++
++	if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
++		$avx = ($2>=3.0) + ($2>3.0);
++	}
++} else {
++	$avx = 4; # The kernel uses ifdefs for this.
++}
++
++sub declare_function() {
++	my ($name, $align, $nargs) = @_;
++	if($kernel) {
++		$code .= ".align $align\n";
++		$code .= "SYM_FUNC_START($name)\n";
++		$code .= ".L$name:\n";
++	} else {
++		$code .= ".globl	$name\n";
++		$code .= ".type	$name,\@function,$nargs\n";
++		$code .= ".align	$align\n";
++		$code .= "$name:\n";
++	}
++}
++
++sub end_function() {
++	my ($name) = @_;
++	if($kernel) {
++		$code .= "SYM_FUNC_END($name)\n";
++	} else {
++		$code .= ".size   $name,.-$name\n";
++	}
++}
++
++$code.=<<___ if $kernel;
++#include <linux/linkage.h>
++___
++
++if ($avx) {
++$code.=<<___ if $kernel;
++.section .rodata
++___
++$code.=<<___;
++.align	64
++.Lconst:
++.Lmask24:
++.long	0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
++.L129:
++.long	`1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0
++.Lmask26:
++.long	0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
++.Lpermd_avx2:
++.long	2,2,2,3,2,0,2,1
++.Lpermd_avx512:
++.long	0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
++
++.L2_44_inp_permd:
++.long	0,1,1,2,2,3,7,7
++.L2_44_inp_shift:
++.quad	0,12,24,64
++.L2_44_mask:
++.quad	0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
++.L2_44_shift_rgt:
++.quad	44,44,42,64
++.L2_44_shift_lft:
++.quad	8,8,10,64
++
++.align	64
++.Lx_mask44:
++.quad	0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
++.quad	0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
++.Lx_mask42:
++.quad	0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
++.quad	0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
++___
++}
++$code.=<<___ if (!$kernel);
++.asciz	"Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
++.align	16
++___
++
++my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx");
++my ($mac,$nonce)=($inp,$len);	# *_emit arguments
++my ($d1,$d2,$d3, $r0,$r1,$s1)=("%r8","%r9","%rdi","%r11","%r12","%r13");
++my ($h0,$h1,$h2)=("%r14","%rbx","%r10");
++
++sub poly1305_iteration {
++# input:	copy of $r1 in %rax, $h0-$h2, $r0-$r1
++# output:	$h0-$h2 *= $r0-$r1
++$code.=<<___;
++	mulq	$h0			# h0*r1
++	mov	%rax,$d2
++	 mov	$r0,%rax
++	mov	%rdx,$d3
++
++	mulq	$h0			# h0*r0
++	mov	%rax,$h0		# future $h0
++	 mov	$r0,%rax
++	mov	%rdx,$d1
++
++	mulq	$h1			# h1*r0
++	add	%rax,$d2
++	 mov	$s1,%rax
++	adc	%rdx,$d3
++
++	mulq	$h1			# h1*s1
++	 mov	$h2,$h1			# borrow $h1
++	add	%rax,$h0
++	adc	%rdx,$d1
++
++	imulq	$s1,$h1			# h2*s1
++	add	$h1,$d2
++	 mov	$d1,$h1
++	adc	\$0,$d3
++
++	imulq	$r0,$h2			# h2*r0
++	add	$d2,$h1
++	mov	\$-4,%rax		# mask value
++	adc	$h2,$d3
++
++	and	$d3,%rax		# last reduction step
++	mov	$d3,$h2
++	shr	\$2,$d3
++	and	\$3,$h2
++	add	$d3,%rax
++	add	%rax,$h0
++	adc	\$0,$h1
++	adc	\$0,$h2
++___
++}
++
++########################################################################
++# Layout of opaque area is following.
++#
++#	unsigned __int64 h[3];		# current hash value base 2^64
++#	unsigned __int64 r[2];		# key value base 2^64
++
++$code.=<<___;
++.text
++___
++$code.=<<___ if (!$kernel);
++.extern	OPENSSL_ia32cap_P
++
++.globl	poly1305_init_x86_64
++.hidden	poly1305_init_x86_64
++.globl	poly1305_blocks_x86_64
++.hidden	poly1305_blocks_x86_64
++.globl	poly1305_emit_x86_64
++.hidden	poly1305_emit_x86_64
++___
++&declare_function("poly1305_init_x86_64", 32, 3);
++$code.=<<___;
++	xor	%rax,%rax
++	mov	%rax,0($ctx)		# initialize hash value
++	mov	%rax,8($ctx)
++	mov	%rax,16($ctx)
++
++	cmp	\$0,$inp
++	je	.Lno_key
++___
++$code.=<<___ if (!$kernel);
++	lea	poly1305_blocks_x86_64(%rip),%r10
++	lea	poly1305_emit_x86_64(%rip),%r11
++___
++$code.=<<___	if (!$kernel && $avx);
++	mov	OPENSSL_ia32cap_P+4(%rip),%r9
++	lea	poly1305_blocks_avx(%rip),%rax
++	lea	poly1305_emit_avx(%rip),%rcx
++	bt	\$`60-32`,%r9		# AVX?
++	cmovc	%rax,%r10
++	cmovc	%rcx,%r11
++___
++$code.=<<___	if (!$kernel && $avx>1);
++	lea	poly1305_blocks_avx2(%rip),%rax
++	bt	\$`5+32`,%r9		# AVX2?
++	cmovc	%rax,%r10
++___
++$code.=<<___	if (!$kernel && $avx>3);
++	mov	\$`(1<<31|1<<21|1<<16)`,%rax
++	shr	\$32,%r9
++	and	%rax,%r9
++	cmp	%rax,%r9
++	je	.Linit_base2_44
++___
++$code.=<<___;
++	mov	\$0x0ffffffc0fffffff,%rax
++	mov	\$0x0ffffffc0ffffffc,%rcx
++	and	0($inp),%rax
++	and	8($inp),%rcx
++	mov	%rax,24($ctx)
++	mov	%rcx,32($ctx)
++___
++$code.=<<___	if (!$kernel && $flavour !~ /elf32/);
++	mov	%r10,0(%rdx)
++	mov	%r11,8(%rdx)
++___
++$code.=<<___	if (!$kernel && $flavour =~ /elf32/);
++	mov	%r10d,0(%rdx)
++	mov	%r11d,4(%rdx)
++___
++$code.=<<___;
++	mov	\$1,%eax
++.Lno_key:
++	ret
++___
++&end_function("poly1305_init_x86_64");
++
++&declare_function("poly1305_blocks_x86_64", 32, 4);
++$code.=<<___;
++.cfi_startproc
++.Lblocks:
++	shr	\$4,$len
++	jz	.Lno_data		# too short
++
++	push	%rbx
++.cfi_push	%rbx
++	push	%r12
++.cfi_push	%r12
++	push	%r13
++.cfi_push	%r13
++	push	%r14
++.cfi_push	%r14
++	push	%r15
++.cfi_push	%r15
++	push	$ctx
++.cfi_push	$ctx
++.Lblocks_body:
++
++	mov	$len,%r15		# reassign $len
++
++	mov	24($ctx),$r0		# load r
++	mov	32($ctx),$s1
++
++	mov	0($ctx),$h0		# load hash value
++	mov	8($ctx),$h1
++	mov	16($ctx),$h2
++
++	mov	$s1,$r1
++	shr	\$2,$s1
++	mov	$r1,%rax
++	add	$r1,$s1			# s1 = r1 + (r1 >> 2)
++	jmp	.Loop
++
++.align	32
++.Loop:
++	add	0($inp),$h0		# accumulate input
++	adc	8($inp),$h1
++	lea	16($inp),$inp
++	adc	$padbit,$h2
++___
++
++	&poly1305_iteration();
++
++$code.=<<___;
++	mov	$r1,%rax
++	dec	%r15			# len-=16
++	jnz	.Loop
++
++	mov	0(%rsp),$ctx
++.cfi_restore	$ctx
++
++	mov	$h0,0($ctx)		# store hash value
++	mov	$h1,8($ctx)
++	mov	$h2,16($ctx)
++
++	mov	8(%rsp),%r15
++.cfi_restore	%r15
++	mov	16(%rsp),%r14
++.cfi_restore	%r14
++	mov	24(%rsp),%r13
++.cfi_restore	%r13
++	mov	32(%rsp),%r12
++.cfi_restore	%r12
++	mov	40(%rsp),%rbx
++.cfi_restore	%rbx
++	lea	48(%rsp),%rsp
++.cfi_adjust_cfa_offset	-48
++.Lno_data:
++.Lblocks_epilogue:
++	ret
++.cfi_endproc
++___
++&end_function("poly1305_blocks_x86_64");
++
++&declare_function("poly1305_emit_x86_64", 32, 3);
++$code.=<<___;
++.Lemit:
++	mov	0($ctx),%r8	# load hash value
++	mov	8($ctx),%r9
++	mov	16($ctx),%r10
++
++	mov	%r8,%rax
++	add	\$5,%r8		# compare to modulus
++	mov	%r9,%rcx
++	adc	\$0,%r9
++	adc	\$0,%r10
++	shr	\$2,%r10	# did 130-bit value overflow?
++	cmovnz	%r8,%rax
++	cmovnz	%r9,%rcx
++
++	add	0($nonce),%rax	# accumulate nonce
++	adc	8($nonce),%rcx
++	mov	%rax,0($mac)	# write result
++	mov	%rcx,8($mac)
++
++	ret
++___
++&end_function("poly1305_emit_x86_64");
++if ($avx) {
++
++if($kernel) {
++	$code .= "#ifdef CONFIG_AS_AVX\n";
++}
++
++########################################################################
++# Layout of opaque area is following.
++#
++#	unsigned __int32 h[5];		# current hash value base 2^26
++#	unsigned __int32 is_base2_26;
++#	unsigned __int64 r[2];		# key value base 2^64
++#	unsigned __int64 pad;
++#	struct { unsigned __int32 r^2, r^1, r^4, r^3; } r[9];
++#
++# where r^n are base 2^26 digits of degrees of multiplier key. There are
++# 5 digits, but last four are interleaved with multiples of 5, totalling
++# in 9 elements: r0, r1, 5*r1, r2, 5*r2, r3, 5*r3, r4, 5*r4.
++
++my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
++    map("%xmm$_",(0..15));
++
++$code.=<<___;
++.type	__poly1305_block,\@abi-omnipotent
++.align	32
++__poly1305_block:
++	push $ctx
++___
++	&poly1305_iteration();
++$code.=<<___;
++	pop $ctx
++	ret
++.size	__poly1305_block,.-__poly1305_block
++
++.type	__poly1305_init_avx,\@abi-omnipotent
++.align	32
++__poly1305_init_avx:
++	push %rbp
++	mov %rsp,%rbp
++	mov	$r0,$h0
++	mov	$r1,$h1
++	xor	$h2,$h2
++
++	lea	48+64($ctx),$ctx	# size optimization
++
++	mov	$r1,%rax
++	call	__poly1305_block	# r^2
++
++	mov	\$0x3ffffff,%eax	# save interleaved r^2 and r base 2^26
++	mov	\$0x3ffffff,%edx
++	mov	$h0,$d1
++	and	$h0#d,%eax
++	mov	$r0,$d2
++	and	$r0#d,%edx
++	mov	%eax,`16*0+0-64`($ctx)
++	shr	\$26,$d1
++	mov	%edx,`16*0+4-64`($ctx)
++	shr	\$26,$d2
++
++	mov	\$0x3ffffff,%eax
++	mov	\$0x3ffffff,%edx
++	and	$d1#d,%eax
++	and	$d2#d,%edx
++	mov	%eax,`16*1+0-64`($ctx)
++	lea	(%rax,%rax,4),%eax	# *5
++	mov	%edx,`16*1+4-64`($ctx)
++	lea	(%rdx,%rdx,4),%edx	# *5
++	mov	%eax,`16*2+0-64`($ctx)
++	shr	\$26,$d1
++	mov	%edx,`16*2+4-64`($ctx)
++	shr	\$26,$d2
++
++	mov	$h1,%rax
++	mov	$r1,%rdx
++	shl	\$12,%rax
++	shl	\$12,%rdx
++	or	$d1,%rax
++	or	$d2,%rdx
++	and	\$0x3ffffff,%eax
++	and	\$0x3ffffff,%edx
++	mov	%eax,`16*3+0-64`($ctx)
++	lea	(%rax,%rax,4),%eax	# *5
++	mov	%edx,`16*3+4-64`($ctx)
++	lea	(%rdx,%rdx,4),%edx	# *5
++	mov	%eax,`16*4+0-64`($ctx)
++	mov	$h1,$d1
++	mov	%edx,`16*4+4-64`($ctx)
++	mov	$r1,$d2
++
++	mov	\$0x3ffffff,%eax
++	mov	\$0x3ffffff,%edx
++	shr	\$14,$d1
++	shr	\$14,$d2
++	and	$d1#d,%eax
++	and	$d2#d,%edx
++	mov	%eax,`16*5+0-64`($ctx)
++	lea	(%rax,%rax,4),%eax	# *5
++	mov	%edx,`16*5+4-64`($ctx)
++	lea	(%rdx,%rdx,4),%edx	# *5
++	mov	%eax,`16*6+0-64`($ctx)
++	shr	\$26,$d1
++	mov	%edx,`16*6+4-64`($ctx)
++	shr	\$26,$d2
++
++	mov	$h2,%rax
++	shl	\$24,%rax
++	or	%rax,$d1
++	mov	$d1#d,`16*7+0-64`($ctx)
++	lea	($d1,$d1,4),$d1		# *5
++	mov	$d2#d,`16*7+4-64`($ctx)
++	lea	($d2,$d2,4),$d2		# *5
++	mov	$d1#d,`16*8+0-64`($ctx)
++	mov	$d2#d,`16*8+4-64`($ctx)
++
++	mov	$r1,%rax
++	call	__poly1305_block	# r^3
++
++	mov	\$0x3ffffff,%eax	# save r^3 base 2^26
++	mov	$h0,$d1
++	and	$h0#d,%eax
++	shr	\$26,$d1
++	mov	%eax,`16*0+12-64`($ctx)
++
++	mov	\$0x3ffffff,%edx
++	and	$d1#d,%edx
++	mov	%edx,`16*1+12-64`($ctx)
++	lea	(%rdx,%rdx,4),%edx	# *5
++	shr	\$26,$d1
++	mov	%edx,`16*2+12-64`($ctx)
++
++	mov	$h1,%rax
++	shl	\$12,%rax
++	or	$d1,%rax
++	and	\$0x3ffffff,%eax
++	mov	%eax,`16*3+12-64`($ctx)
++	lea	(%rax,%rax,4),%eax	# *5
++	mov	$h1,$d1
++	mov	%eax,`16*4+12-64`($ctx)
++
++	mov	\$0x3ffffff,%edx
++	shr	\$14,$d1
++	and	$d1#d,%edx
++	mov	%edx,`16*5+12-64`($ctx)
++	lea	(%rdx,%rdx,4),%edx	# *5
++	shr	\$26,$d1
++	mov	%edx,`16*6+12-64`($ctx)
++
++	mov	$h2,%rax
++	shl	\$24,%rax
++	or	%rax,$d1
++	mov	$d1#d,`16*7+12-64`($ctx)
++	lea	($d1,$d1,4),$d1		# *5
++	mov	$d1#d,`16*8+12-64`($ctx)
++
++	mov	$r1,%rax
++	call	__poly1305_block	# r^4
++
++	mov	\$0x3ffffff,%eax	# save r^4 base 2^26
++	mov	$h0,$d1
++	and	$h0#d,%eax
++	shr	\$26,$d1
++	mov	%eax,`16*0+8-64`($ctx)
++
++	mov	\$0x3ffffff,%edx
++	and	$d1#d,%edx
++	mov	%edx,`16*1+8-64`($ctx)
++	lea	(%rdx,%rdx,4),%edx	# *5
++	shr	\$26,$d1
++	mov	%edx,`16*2+8-64`($ctx)
++
++	mov	$h1,%rax
++	shl	\$12,%rax
++	or	$d1,%rax
++	and	\$0x3ffffff,%eax
++	mov	%eax,`16*3+8-64`($ctx)
++	lea	(%rax,%rax,4),%eax	# *5
++	mov	$h1,$d1
++	mov	%eax,`16*4+8-64`($ctx)
++
++	mov	\$0x3ffffff,%edx
++	shr	\$14,$d1
++	and	$d1#d,%edx
++	mov	%edx,`16*5+8-64`($ctx)
++	lea	(%rdx,%rdx,4),%edx	# *5
++	shr	\$26,$d1
++	mov	%edx,`16*6+8-64`($ctx)
++
++	mov	$h2,%rax
++	shl	\$24,%rax
++	or	%rax,$d1
++	mov	$d1#d,`16*7+8-64`($ctx)
++	lea	($d1,$d1,4),$d1		# *5
++	mov	$d1#d,`16*8+8-64`($ctx)
++
++	lea	-48-64($ctx),$ctx	# size [de-]optimization
++	pop %rbp
++	ret
++.size	__poly1305_init_avx,.-__poly1305_init_avx
++___
++
++&declare_function("poly1305_blocks_avx", 32, 4);
++$code.=<<___;
++.cfi_startproc
++	mov	20($ctx),%r8d		# is_base2_26
++	cmp	\$128,$len
++	jae	.Lblocks_avx
++	test	%r8d,%r8d
++	jz	.Lblocks
++
++.Lblocks_avx:
++	and	\$-16,$len
++	jz	.Lno_data_avx
++
++	vzeroupper
++
++	test	%r8d,%r8d
++	jz	.Lbase2_64_avx
++
++	test	\$31,$len
++	jz	.Leven_avx
++
++	push	%rbp
++.cfi_push	%rbp
++	mov 	%rsp,%rbp
++	push	%rbx
++.cfi_push	%rbx
++	push	%r12
++.cfi_push	%r12
++	push	%r13
++.cfi_push	%r13
++	push	%r14
++.cfi_push	%r14
++	push	%r15
++.cfi_push	%r15
++.Lblocks_avx_body:
++
++	mov	$len,%r15		# reassign $len
++
++	mov	0($ctx),$d1		# load hash value
++	mov	8($ctx),$d2
++	mov	16($ctx),$h2#d
++
++	mov	24($ctx),$r0		# load r
++	mov	32($ctx),$s1
++
++	################################# base 2^26 -> base 2^64
++	mov	$d1#d,$h0#d
++	and	\$`-1*(1<<31)`,$d1
++	mov	$d2,$r1			# borrow $r1
++	mov	$d2#d,$h1#d
++	and	\$`-1*(1<<31)`,$d2
++
++	shr	\$6,$d1
++	shl	\$52,$r1
++	add	$d1,$h0
++	shr	\$12,$h1
++	shr	\$18,$d2
++	add	$r1,$h0
++	adc	$d2,$h1
++
++	mov	$h2,$d1
++	shl	\$40,$d1
++	shr	\$24,$h2
++	add	$d1,$h1
++	adc	\$0,$h2			# can be partially reduced...
++
++	mov	\$-4,$d2		# ... so reduce
++	mov	$h2,$d1
++	and	$h2,$d2
++	shr	\$2,$d1
++	and	\$3,$h2
++	add	$d2,$d1			# =*5
++	add	$d1,$h0
++	adc	\$0,$h1
++	adc	\$0,$h2
++
++	mov	$s1,$r1
++	mov	$s1,%rax
++	shr	\$2,$s1
++	add	$r1,$s1			# s1 = r1 + (r1 >> 2)
++
++	add	0($inp),$h0		# accumulate input
++	adc	8($inp),$h1
++	lea	16($inp),$inp
++	adc	$padbit,$h2
++
++	call	__poly1305_block
++
++	test	$padbit,$padbit		# if $padbit is zero,
++	jz	.Lstore_base2_64_avx	# store hash in base 2^64 format
++
++	################################# base 2^64 -> base 2^26
++	mov	$h0,%rax
++	mov	$h0,%rdx
++	shr	\$52,$h0
++	mov	$h1,$r0
++	mov	$h1,$r1
++	shr	\$26,%rdx
++	and	\$0x3ffffff,%rax	# h[0]
++	shl	\$12,$r0
++	and	\$0x3ffffff,%rdx	# h[1]
++	shr	\$14,$h1
++	or	$r0,$h0
++	shl	\$24,$h2
++	and	\$0x3ffffff,$h0		# h[2]
++	shr	\$40,$r1
++	and	\$0x3ffffff,$h1		# h[3]
++	or	$r1,$h2			# h[4]
++
++	sub	\$16,%r15
++	jz	.Lstore_base2_26_avx
++
++	vmovd	%rax#d,$H0
++	vmovd	%rdx#d,$H1
++	vmovd	$h0#d,$H2
++	vmovd	$h1#d,$H3
++	vmovd	$h2#d,$H4
++	jmp	.Lproceed_avx
++
++.align	32
++.Lstore_base2_64_avx:
++	mov	$h0,0($ctx)
++	mov	$h1,8($ctx)
++	mov	$h2,16($ctx)		# note that is_base2_26 is zeroed
++	jmp	.Ldone_avx
++
++.align	16
++.Lstore_base2_26_avx:
++	mov	%rax#d,0($ctx)		# store hash value base 2^26
++	mov	%rdx#d,4($ctx)
++	mov	$h0#d,8($ctx)
++	mov	$h1#d,12($ctx)
++	mov	$h2#d,16($ctx)
++.align	16
++.Ldone_avx:
++	pop 		%r15
++.cfi_restore	%r15
++	pop 		%r14
++.cfi_restore	%r14
++	pop 		%r13
++.cfi_restore	%r13
++	pop 		%r12
++.cfi_restore	%r12
++	pop 		%rbx
++.cfi_restore	%rbx
++	pop 		%rbp
++.cfi_restore	%rbp
++.Lno_data_avx:
++.Lblocks_avx_epilogue:
++	ret
++.cfi_endproc
++
++.align	32
++.Lbase2_64_avx:
++.cfi_startproc
++	push	%rbp
++.cfi_push	%rbp
++	mov 	%rsp,%rbp
++	push	%rbx
++.cfi_push	%rbx
++	push	%r12
++.cfi_push	%r12
++	push	%r13
++.cfi_push	%r13
++	push	%r14
++.cfi_push	%r14
++	push	%r15
++.cfi_push	%r15
++.Lbase2_64_avx_body:
++
++	mov	$len,%r15		# reassign $len
++
++	mov	24($ctx),$r0		# load r
++	mov	32($ctx),$s1
++
++	mov	0($ctx),$h0		# load hash value
++	mov	8($ctx),$h1
++	mov	16($ctx),$h2#d
++
++	mov	$s1,$r1
++	mov	$s1,%rax
++	shr	\$2,$s1
++	add	$r1,$s1			# s1 = r1 + (r1 >> 2)
++
++	test	\$31,$len
++	jz	.Linit_avx
++
++	add	0($inp),$h0		# accumulate input
++	adc	8($inp),$h1
++	lea	16($inp),$inp
++	adc	$padbit,$h2
++	sub	\$16,%r15
++
++	call	__poly1305_block
++
++.Linit_avx:
++	################################# base 2^64 -> base 2^26
++	mov	$h0,%rax
++	mov	$h0,%rdx
++	shr	\$52,$h0
++	mov	$h1,$d1
++	mov	$h1,$d2
++	shr	\$26,%rdx
++	and	\$0x3ffffff,%rax	# h[0]
++	shl	\$12,$d1
++	and	\$0x3ffffff,%rdx	# h[1]
++	shr	\$14,$h1
++	or	$d1,$h0
++	shl	\$24,$h2
++	and	\$0x3ffffff,$h0		# h[2]
++	shr	\$40,$d2
++	and	\$0x3ffffff,$h1		# h[3]
++	or	$d2,$h2			# h[4]
++
++	vmovd	%rax#d,$H0
++	vmovd	%rdx#d,$H1
++	vmovd	$h0#d,$H2
++	vmovd	$h1#d,$H3
++	vmovd	$h2#d,$H4
++	movl	\$1,20($ctx)		# set is_base2_26
++
++	call	__poly1305_init_avx
++
++.Lproceed_avx:
++	mov	%r15,$len
++	pop 		%r15
++.cfi_restore	%r15
++	pop 		%r14
++.cfi_restore	%r14
++	pop 		%r13
++.cfi_restore	%r13
++	pop 		%r12
++.cfi_restore	%r12
++	pop 		%rbx
++.cfi_restore	%rbx
++	pop 		%rbp
++.cfi_restore	%rbp
++.Lbase2_64_avx_epilogue:
++	jmp	.Ldo_avx
++.cfi_endproc
++
++.align	32
++.Leven_avx:
++.cfi_startproc
++	vmovd		4*0($ctx),$H0		# load hash value
++	vmovd		4*1($ctx),$H1
++	vmovd		4*2($ctx),$H2
++	vmovd		4*3($ctx),$H3
++	vmovd		4*4($ctx),$H4
++
++.Ldo_avx:
++___
++$code.=<<___	if (!$win64);
++	lea		8(%rsp),%r10
++.cfi_def_cfa_register	%r10
++	and		\$-32,%rsp
++	sub		\$-8,%rsp
++	lea		-0x58(%rsp),%r11
++	sub		\$0x178,%rsp
++	
++___
++$code.=<<___	if ($win64);
++	lea		-0xf8(%rsp),%r11
++	sub		\$0x218,%rsp
++	vmovdqa		%xmm6,0x50(%r11)
++	vmovdqa		%xmm7,0x60(%r11)
++	vmovdqa		%xmm8,0x70(%r11)
++	vmovdqa		%xmm9,0x80(%r11)
++	vmovdqa		%xmm10,0x90(%r11)
++	vmovdqa		%xmm11,0xa0(%r11)
++	vmovdqa		%xmm12,0xb0(%r11)
++	vmovdqa		%xmm13,0xc0(%r11)
++	vmovdqa		%xmm14,0xd0(%r11)
++	vmovdqa		%xmm15,0xe0(%r11)
++.Ldo_avx_body:
++___
++$code.=<<___;
++	sub		\$64,$len
++	lea		-32($inp),%rax
++	cmovc		%rax,$inp
++
++	vmovdqu		`16*3`($ctx),$D4	# preload r0^2
++	lea		`16*3+64`($ctx),$ctx	# size optimization
++	lea		.Lconst(%rip),%rcx
++
++	################################################################
++	# load input
++	vmovdqu		16*2($inp),$T0
++	vmovdqu		16*3($inp),$T1
++	vmovdqa		64(%rcx),$MASK		# .Lmask26
++
++	vpsrldq		\$6,$T0,$T2		# splat input
++	vpsrldq		\$6,$T1,$T3
++	vpunpckhqdq	$T1,$T0,$T4		# 4
++	vpunpcklqdq	$T1,$T0,$T0		# 0:1
++	vpunpcklqdq	$T3,$T2,$T3		# 2:3
++
++	vpsrlq		\$40,$T4,$T4		# 4
++	vpsrlq		\$26,$T0,$T1
++	vpand		$MASK,$T0,$T0		# 0
++	vpsrlq		\$4,$T3,$T2
++	vpand		$MASK,$T1,$T1		# 1
++	vpsrlq		\$30,$T3,$T3
++	vpand		$MASK,$T2,$T2		# 2
++	vpand		$MASK,$T3,$T3		# 3
++	vpor		32(%rcx),$T4,$T4	# padbit, yes, always
++
++	jbe		.Lskip_loop_avx
++
++	# expand and copy pre-calculated table to stack
++	vmovdqu		`16*1-64`($ctx),$D1
++	vmovdqu		`16*2-64`($ctx),$D2
++	vpshufd		\$0xEE,$D4,$D3		# 34xx -> 3434
++	vpshufd		\$0x44,$D4,$D0		# xx12 -> 1212
++	vmovdqa		$D3,-0x90(%r11)
++	vmovdqa		$D0,0x00(%rsp)
++	vpshufd		\$0xEE,$D1,$D4
++	vmovdqu		`16*3-64`($ctx),$D0
++	vpshufd		\$0x44,$D1,$D1
++	vmovdqa		$D4,-0x80(%r11)
++	vmovdqa		$D1,0x10(%rsp)
++	vpshufd		\$0xEE,$D2,$D3
++	vmovdqu		`16*4-64`($ctx),$D1
++	vpshufd		\$0x44,$D2,$D2
++	vmovdqa		$D3,-0x70(%r11)
++	vmovdqa		$D2,0x20(%rsp)
++	vpshufd		\$0xEE,$D0,$D4
++	vmovdqu		`16*5-64`($ctx),$D2
++	vpshufd		\$0x44,$D0,$D0
++	vmovdqa		$D4,-0x60(%r11)
++	vmovdqa		$D0,0x30(%rsp)
++	vpshufd		\$0xEE,$D1,$D3
++	vmovdqu		`16*6-64`($ctx),$D0
++	vpshufd		\$0x44,$D1,$D1
++	vmovdqa		$D3,-0x50(%r11)
++	vmovdqa		$D1,0x40(%rsp)
++	vpshufd		\$0xEE,$D2,$D4
++	vmovdqu		`16*7-64`($ctx),$D1
++	vpshufd		\$0x44,$D2,$D2
++	vmovdqa		$D4,-0x40(%r11)
++	vmovdqa		$D2,0x50(%rsp)
++	vpshufd		\$0xEE,$D0,$D3
++	vmovdqu		`16*8-64`($ctx),$D2
++	vpshufd		\$0x44,$D0,$D0
++	vmovdqa		$D3,-0x30(%r11)
++	vmovdqa		$D0,0x60(%rsp)
++	vpshufd		\$0xEE,$D1,$D4
++	vpshufd		\$0x44,$D1,$D1
++	vmovdqa		$D4,-0x20(%r11)
++	vmovdqa		$D1,0x70(%rsp)
++	vpshufd		\$0xEE,$D2,$D3
++	 vmovdqa	0x00(%rsp),$D4		# preload r0^2
++	vpshufd		\$0x44,$D2,$D2
++	vmovdqa		$D3,-0x10(%r11)
++	vmovdqa		$D2,0x80(%rsp)
++
++	jmp		.Loop_avx
++
++.align	32
++.Loop_avx:
++	################################################################
++	# ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
++	# ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
++	#   \___________________/
++	# ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
++	# ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
++	#   \___________________/ \____________________/
++	#
++	# Note that we start with inp[2:3]*r^2. This is because it
++	# doesn't depend on reduction in previous iteration.
++	################################################################
++	# d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
++	# d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
++	# d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
++	# d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
++	# d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
++	#
++	# though note that $Tx and $Hx are "reversed" in this section,
++	# and $D4 is preloaded with r0^2...
++
++	vpmuludq	$T0,$D4,$D0		# d0 = h0*r0
++	vpmuludq	$T1,$D4,$D1		# d1 = h1*r0
++	  vmovdqa	$H2,0x20(%r11)				# offload hash
++	vpmuludq	$T2,$D4,$D2		# d3 = h2*r0
++	 vmovdqa	0x10(%rsp),$H2		# r1^2
++	vpmuludq	$T3,$D4,$D3		# d3 = h3*r0
++	vpmuludq	$T4,$D4,$D4		# d4 = h4*r0
++
++	  vmovdqa	$H0,0x00(%r11)				#
++	vpmuludq	0x20(%rsp),$T4,$H0	# h4*s1
++	  vmovdqa	$H1,0x10(%r11)				#
++	vpmuludq	$T3,$H2,$H1		# h3*r1
++	vpaddq		$H0,$D0,$D0		# d0 += h4*s1
++	vpaddq		$H1,$D4,$D4		# d4 += h3*r1
++	  vmovdqa	$H3,0x30(%r11)				#
++	vpmuludq	$T2,$H2,$H0		# h2*r1
++	vpmuludq	$T1,$H2,$H1		# h1*r1
++	vpaddq		$H0,$D3,$D3		# d3 += h2*r1
++	 vmovdqa	0x30(%rsp),$H3		# r2^2
++	vpaddq		$H1,$D2,$D2		# d2 += h1*r1
++	  vmovdqa	$H4,0x40(%r11)				#
++	vpmuludq	$T0,$H2,$H2		# h0*r1
++	 vpmuludq	$T2,$H3,$H0		# h2*r2
++	vpaddq		$H2,$D1,$D1		# d1 += h0*r1
++
++	 vmovdqa	0x40(%rsp),$H4		# s2^2
++	vpaddq		$H0,$D4,$D4		# d4 += h2*r2
++	vpmuludq	$T1,$H3,$H1		# h1*r2
++	vpmuludq	$T0,$H3,$H3		# h0*r2
++	vpaddq		$H1,$D3,$D3		# d3 += h1*r2
++	 vmovdqa	0x50(%rsp),$H2		# r3^2
++	vpaddq		$H3,$D2,$D2		# d2 += h0*r2
++	vpmuludq	$T4,$H4,$H0		# h4*s2
++	vpmuludq	$T3,$H4,$H4		# h3*s2
++	vpaddq		$H0,$D1,$D1		# d1 += h4*s2
++	 vmovdqa	0x60(%rsp),$H3		# s3^2
++	vpaddq		$H4,$D0,$D0		# d0 += h3*s2
++
++	 vmovdqa	0x80(%rsp),$H4		# s4^2
++	vpmuludq	$T1,$H2,$H1		# h1*r3
++	vpmuludq	$T0,$H2,$H2		# h0*r3
++	vpaddq		$H1,$D4,$D4		# d4 += h1*r3
++	vpaddq		$H2,$D3,$D3		# d3 += h0*r3
++	vpmuludq	$T4,$H3,$H0		# h4*s3
++	vpmuludq	$T3,$H3,$H1		# h3*s3
++	vpaddq		$H0,$D2,$D2		# d2 += h4*s3
++	 vmovdqu	16*0($inp),$H0				# load input
++	vpaddq		$H1,$D1,$D1		# d1 += h3*s3
++	vpmuludq	$T2,$H3,$H3		# h2*s3
++	 vpmuludq	$T2,$H4,$T2		# h2*s4
++	vpaddq		$H3,$D0,$D0		# d0 += h2*s3
++
++	 vmovdqu	16*1($inp),$H1				#
++	vpaddq		$T2,$D1,$D1		# d1 += h2*s4
++	vpmuludq	$T3,$H4,$T3		# h3*s4
++	vpmuludq	$T4,$H4,$T4		# h4*s4
++	 vpsrldq	\$6,$H0,$H2				# splat input
++	vpaddq		$T3,$D2,$D2		# d2 += h3*s4
++	vpaddq		$T4,$D3,$D3		# d3 += h4*s4
++	 vpsrldq	\$6,$H1,$H3				#
++	vpmuludq	0x70(%rsp),$T0,$T4	# h0*r4
++	vpmuludq	$T1,$H4,$T0		# h1*s4
++	 vpunpckhqdq	$H1,$H0,$H4		# 4
++	vpaddq		$T4,$D4,$D4		# d4 += h0*r4
++	 vmovdqa	-0x90(%r11),$T4		# r0^4
++	vpaddq		$T0,$D0,$D0		# d0 += h1*s4
++
++	vpunpcklqdq	$H1,$H0,$H0		# 0:1
++	vpunpcklqdq	$H3,$H2,$H3		# 2:3
++
++	#vpsrlq		\$40,$H4,$H4		# 4
++	vpsrldq		\$`40/8`,$H4,$H4	# 4
++	vpsrlq		\$26,$H0,$H1
++	vpand		$MASK,$H0,$H0		# 0
++	vpsrlq		\$4,$H3,$H2
++	vpand		$MASK,$H1,$H1		# 1
++	vpand		0(%rcx),$H4,$H4		# .Lmask24
++	vpsrlq		\$30,$H3,$H3
++	vpand		$MASK,$H2,$H2		# 2
++	vpand		$MASK,$H3,$H3		# 3
++	vpor		32(%rcx),$H4,$H4	# padbit, yes, always
++
++	vpaddq		0x00(%r11),$H0,$H0	# add hash value
++	vpaddq		0x10(%r11),$H1,$H1
++	vpaddq		0x20(%r11),$H2,$H2
++	vpaddq		0x30(%r11),$H3,$H3
++	vpaddq		0x40(%r11),$H4,$H4
++
++	lea		16*2($inp),%rax
++	lea		16*4($inp),$inp
++	sub		\$64,$len
++	cmovc		%rax,$inp
++
++	################################################################
++	# Now we accumulate (inp[0:1]+hash)*r^4
++	################################################################
++	# d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
++	# d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
++	# d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
++	# d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
++	# d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
++
++	vpmuludq	$H0,$T4,$T0		# h0*r0
++	vpmuludq	$H1,$T4,$T1		# h1*r0
++	vpaddq		$T0,$D0,$D0
++	vpaddq		$T1,$D1,$D1
++	 vmovdqa	-0x80(%r11),$T2		# r1^4
++	vpmuludq	$H2,$T4,$T0		# h2*r0
++	vpmuludq	$H3,$T4,$T1		# h3*r0
++	vpaddq		$T0,$D2,$D2
++	vpaddq		$T1,$D3,$D3
++	vpmuludq	$H4,$T4,$T4		# h4*r0
++	 vpmuludq	-0x70(%r11),$H4,$T0	# h4*s1
++	vpaddq		$T4,$D4,$D4
++
++	vpaddq		$T0,$D0,$D0		# d0 += h4*s1
++	vpmuludq	$H2,$T2,$T1		# h2*r1
++	vpmuludq	$H3,$T2,$T0		# h3*r1
++	vpaddq		$T1,$D3,$D3		# d3 += h2*r1
++	 vmovdqa	-0x60(%r11),$T3		# r2^4
++	vpaddq		$T0,$D4,$D4		# d4 += h3*r1
++	vpmuludq	$H1,$T2,$T1		# h1*r1
++	vpmuludq	$H0,$T2,$T2		# h0*r1
++	vpaddq		$T1,$D2,$D2		# d2 += h1*r1
++	vpaddq		$T2,$D1,$D1		# d1 += h0*r1
++
++	 vmovdqa	-0x50(%r11),$T4		# s2^4
++	vpmuludq	$H2,$T3,$T0		# h2*r2
++	vpmuludq	$H1,$T3,$T1		# h1*r2
++	vpaddq		$T0,$D4,$D4		# d4 += h2*r2
++	vpaddq		$T1,$D3,$D3		# d3 += h1*r2
++	 vmovdqa	-0x40(%r11),$T2		# r3^4
++	vpmuludq	$H0,$T3,$T3		# h0*r2
++	vpmuludq	$H4,$T4,$T0		# h4*s2
++	vpaddq		$T3,$D2,$D2		# d2 += h0*r2
++	vpaddq		$T0,$D1,$D1		# d1 += h4*s2
++	 vmovdqa	-0x30(%r11),$T3		# s3^4
++	vpmuludq	$H3,$T4,$T4		# h3*s2
++	 vpmuludq	$H1,$T2,$T1		# h1*r3
++	vpaddq		$T4,$D0,$D0		# d0 += h3*s2
++
++	 vmovdqa	-0x10(%r11),$T4		# s4^4
++	vpaddq		$T1,$D4,$D4		# d4 += h1*r3
++	vpmuludq	$H0,$T2,$T2		# h0*r3
++	vpmuludq	$H4,$T3,$T0		# h4*s3
++	vpaddq		$T2,$D3,$D3		# d3 += h0*r3
++	vpaddq		$T0,$D2,$D2		# d2 += h4*s3
++	 vmovdqu	16*2($inp),$T0				# load input
++	vpmuludq	$H3,$T3,$T2		# h3*s3
++	vpmuludq	$H2,$T3,$T3		# h2*s3
++	vpaddq		$T2,$D1,$D1		# d1 += h3*s3
++	 vmovdqu	16*3($inp),$T1				#
++	vpaddq		$T3,$D0,$D0		# d0 += h2*s3
++
++	vpmuludq	$H2,$T4,$H2		# h2*s4
++	vpmuludq	$H3,$T4,$H3		# h3*s4
++	 vpsrldq	\$6,$T0,$T2				# splat input
++	vpaddq		$H2,$D1,$D1		# d1 += h2*s4
++	vpmuludq	$H4,$T4,$H4		# h4*s4
++	 vpsrldq	\$6,$T1,$T3				#
++	vpaddq		$H3,$D2,$H2		# h2 = d2 + h3*s4
++	vpaddq		$H4,$D3,$H3		# h3 = d3 + h4*s4
++	vpmuludq	-0x20(%r11),$H0,$H4	# h0*r4
++	vpmuludq	$H1,$T4,$H0
++	 vpunpckhqdq	$T1,$T0,$T4		# 4
++	vpaddq		$H4,$D4,$H4		# h4 = d4 + h0*r4
++	vpaddq		$H0,$D0,$H0		# h0 = d0 + h1*s4
++
++	vpunpcklqdq	$T1,$T0,$T0		# 0:1
++	vpunpcklqdq	$T3,$T2,$T3		# 2:3
++
++	#vpsrlq		\$40,$T4,$T4		# 4
++	vpsrldq		\$`40/8`,$T4,$T4	# 4
++	vpsrlq		\$26,$T0,$T1
++	 vmovdqa	0x00(%rsp),$D4		# preload r0^2
++	vpand		$MASK,$T0,$T0		# 0
++	vpsrlq		\$4,$T3,$T2
++	vpand		$MASK,$T1,$T1		# 1
++	vpand		0(%rcx),$T4,$T4		# .Lmask24
++	vpsrlq		\$30,$T3,$T3
++	vpand		$MASK,$T2,$T2		# 2
++	vpand		$MASK,$T3,$T3		# 3
++	vpor		32(%rcx),$T4,$T4	# padbit, yes, always
++
++	################################################################
++	# lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
++	# and P. Schwabe
++
++	vpsrlq		\$26,$H3,$D3
++	vpand		$MASK,$H3,$H3
++	vpaddq		$D3,$H4,$H4		# h3 -> h4
++
++	vpsrlq		\$26,$H0,$D0
++	vpand		$MASK,$H0,$H0
++	vpaddq		$D0,$D1,$H1		# h0 -> h1
++
++	vpsrlq		\$26,$H4,$D0
++	vpand		$MASK,$H4,$H4
++
++	vpsrlq		\$26,$H1,$D1
++	vpand		$MASK,$H1,$H1
++	vpaddq		$D1,$H2,$H2		# h1 -> h2
++
++	vpaddq		$D0,$H0,$H0
++	vpsllq		\$2,$D0,$D0
++	vpaddq		$D0,$H0,$H0		# h4 -> h0
++
++	vpsrlq		\$26,$H2,$D2
++	vpand		$MASK,$H2,$H2
++	vpaddq		$D2,$H3,$H3		# h2 -> h3
++
++	vpsrlq		\$26,$H0,$D0
++	vpand		$MASK,$H0,$H0
++	vpaddq		$D0,$H1,$H1		# h0 -> h1
++
++	vpsrlq		\$26,$H3,$D3
++	vpand		$MASK,$H3,$H3
++	vpaddq		$D3,$H4,$H4		# h3 -> h4
++
++	ja		.Loop_avx
++
++.Lskip_loop_avx:
++	################################################################
++	# multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
++
++	vpshufd		\$0x10,$D4,$D4		# r0^n, xx12 -> x1x2
++	add		\$32,$len
++	jnz		.Long_tail_avx
++
++	vpaddq		$H2,$T2,$T2
++	vpaddq		$H0,$T0,$T0
++	vpaddq		$H1,$T1,$T1
++	vpaddq		$H3,$T3,$T3
++	vpaddq		$H4,$T4,$T4
++
++.Long_tail_avx:
++	vmovdqa		$H2,0x20(%r11)
++	vmovdqa		$H0,0x00(%r11)
++	vmovdqa		$H1,0x10(%r11)
++	vmovdqa		$H3,0x30(%r11)
++	vmovdqa		$H4,0x40(%r11)
++
++	# d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
++	# d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
++	# d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
++	# d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
++	# d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
++
++	vpmuludq	$T2,$D4,$D2		# d2 = h2*r0
++	vpmuludq	$T0,$D4,$D0		# d0 = h0*r0
++	 vpshufd	\$0x10,`16*1-64`($ctx),$H2		# r1^n
++	vpmuludq	$T1,$D4,$D1		# d1 = h1*r0
++	vpmuludq	$T3,$D4,$D3		# d3 = h3*r0
++	vpmuludq	$T4,$D4,$D4		# d4 = h4*r0
++
++	vpmuludq	$T3,$H2,$H0		# h3*r1
++	vpaddq		$H0,$D4,$D4		# d4 += h3*r1
++	 vpshufd	\$0x10,`16*2-64`($ctx),$H3		# s1^n
++	vpmuludq	$T2,$H2,$H1		# h2*r1
++	vpaddq		$H1,$D3,$D3		# d3 += h2*r1
++	 vpshufd	\$0x10,`16*3-64`($ctx),$H4		# r2^n
++	vpmuludq	$T1,$H2,$H0		# h1*r1
++	vpaddq		$H0,$D2,$D2		# d2 += h1*r1
++	vpmuludq	$T0,$H2,$H2		# h0*r1
++	vpaddq		$H2,$D1,$D1		# d1 += h0*r1
++	vpmuludq	$T4,$H3,$H3		# h4*s1
++	vpaddq		$H3,$D0,$D0		# d0 += h4*s1
++
++	 vpshufd	\$0x10,`16*4-64`($ctx),$H2		# s2^n
++	vpmuludq	$T2,$H4,$H1		# h2*r2
++	vpaddq		$H1,$D4,$D4		# d4 += h2*r2
++	vpmuludq	$T1,$H4,$H0		# h1*r2
++	vpaddq		$H0,$D3,$D3		# d3 += h1*r2
++	 vpshufd	\$0x10,`16*5-64`($ctx),$H3		# r3^n
++	vpmuludq	$T0,$H4,$H4		# h0*r2
++	vpaddq		$H4,$D2,$D2		# d2 += h0*r2
++	vpmuludq	$T4,$H2,$H1		# h4*s2
++	vpaddq		$H1,$D1,$D1		# d1 += h4*s2
++	 vpshufd	\$0x10,`16*6-64`($ctx),$H4		# s3^n
++	vpmuludq	$T3,$H2,$H2		# h3*s2
++	vpaddq		$H2,$D0,$D0		# d0 += h3*s2
++
++	vpmuludq	$T1,$H3,$H0		# h1*r3
++	vpaddq		$H0,$D4,$D4		# d4 += h1*r3
++	vpmuludq	$T0,$H3,$H3		# h0*r3
++	vpaddq		$H3,$D3,$D3		# d3 += h0*r3
++	 vpshufd	\$0x10,`16*7-64`($ctx),$H2		# r4^n
++	vpmuludq	$T4,$H4,$H1		# h4*s3
++	vpaddq		$H1,$D2,$D2		# d2 += h4*s3
++	 vpshufd	\$0x10,`16*8-64`($ctx),$H3		# s4^n
++	vpmuludq	$T3,$H4,$H0		# h3*s3
++	vpaddq		$H0,$D1,$D1		# d1 += h3*s3
++	vpmuludq	$T2,$H4,$H4		# h2*s3
++	vpaddq		$H4,$D0,$D0		# d0 += h2*s3
++
++	vpmuludq	$T0,$H2,$H2		# h0*r4
++	vpaddq		$H2,$D4,$D4		# h4 = d4 + h0*r4
++	vpmuludq	$T4,$H3,$H1		# h4*s4
++	vpaddq		$H1,$D3,$D3		# h3 = d3 + h4*s4
++	vpmuludq	$T3,$H3,$H0		# h3*s4
++	vpaddq		$H0,$D2,$D2		# h2 = d2 + h3*s4
++	vpmuludq	$T2,$H3,$H1		# h2*s4
++	vpaddq		$H1,$D1,$D1		# h1 = d1 + h2*s4
++	vpmuludq	$T1,$H3,$H3		# h1*s4
++	vpaddq		$H3,$D0,$D0		# h0 = d0 + h1*s4
++
++	jz		.Lshort_tail_avx
++
++	vmovdqu		16*0($inp),$H0		# load input
++	vmovdqu		16*1($inp),$H1
++
++	vpsrldq		\$6,$H0,$H2		# splat input
++	vpsrldq		\$6,$H1,$H3
++	vpunpckhqdq	$H1,$H0,$H4		# 4
++	vpunpcklqdq	$H1,$H0,$H0		# 0:1
++	vpunpcklqdq	$H3,$H2,$H3		# 2:3
++
++	vpsrlq		\$40,$H4,$H4		# 4
++	vpsrlq		\$26,$H0,$H1
++	vpand		$MASK,$H0,$H0		# 0
++	vpsrlq		\$4,$H3,$H2
++	vpand		$MASK,$H1,$H1		# 1
++	vpsrlq		\$30,$H3,$H3
++	vpand		$MASK,$H2,$H2		# 2
++	vpand		$MASK,$H3,$H3		# 3
++	vpor		32(%rcx),$H4,$H4	# padbit, yes, always
++
++	vpshufd		\$0x32,`16*0-64`($ctx),$T4	# r0^n, 34xx -> x3x4
++	vpaddq		0x00(%r11),$H0,$H0
++	vpaddq		0x10(%r11),$H1,$H1
++	vpaddq		0x20(%r11),$H2,$H2
++	vpaddq		0x30(%r11),$H3,$H3
++	vpaddq		0x40(%r11),$H4,$H4
++
++	################################################################
++	# multiply (inp[0:1]+hash) by r^4:r^3 and accumulate
++
++	vpmuludq	$H0,$T4,$T0		# h0*r0
++	vpaddq		$T0,$D0,$D0		# d0 += h0*r0
++	vpmuludq	$H1,$T4,$T1		# h1*r0
++	vpaddq		$T1,$D1,$D1		# d1 += h1*r0
++	vpmuludq	$H2,$T4,$T0		# h2*r0
++	vpaddq		$T0,$D2,$D2		# d2 += h2*r0
++	 vpshufd	\$0x32,`16*1-64`($ctx),$T2		# r1^n
++	vpmuludq	$H3,$T4,$T1		# h3*r0
++	vpaddq		$T1,$D3,$D3		# d3 += h3*r0
++	vpmuludq	$H4,$T4,$T4		# h4*r0
++	vpaddq		$T4,$D4,$D4		# d4 += h4*r0
++
++	vpmuludq	$H3,$T2,$T0		# h3*r1
++	vpaddq		$T0,$D4,$D4		# d4 += h3*r1
++	 vpshufd	\$0x32,`16*2-64`($ctx),$T3		# s1
++	vpmuludq	$H2,$T2,$T1		# h2*r1
++	vpaddq		$T1,$D3,$D3		# d3 += h2*r1
++	 vpshufd	\$0x32,`16*3-64`($ctx),$T4		# r2
++	vpmuludq	$H1,$T2,$T0		# h1*r1
++	vpaddq		$T0,$D2,$D2		# d2 += h1*r1
++	vpmuludq	$H0,$T2,$T2		# h0*r1
++	vpaddq		$T2,$D1,$D1		# d1 += h0*r1
++	vpmuludq	$H4,$T3,$T3		# h4*s1
++	vpaddq		$T3,$D0,$D0		# d0 += h4*s1
++
++	 vpshufd	\$0x32,`16*4-64`($ctx),$T2		# s2
++	vpmuludq	$H2,$T4,$T1		# h2*r2
++	vpaddq		$T1,$D4,$D4		# d4 += h2*r2
++	vpmuludq	$H1,$T4,$T0		# h1*r2
++	vpaddq		$T0,$D3,$D3		# d3 += h1*r2
++	 vpshufd	\$0x32,`16*5-64`($ctx),$T3		# r3
++	vpmuludq	$H0,$T4,$T4		# h0*r2
++	vpaddq		$T4,$D2,$D2		# d2 += h0*r2
++	vpmuludq	$H4,$T2,$T1		# h4*s2
++	vpaddq		$T1,$D1,$D1		# d1 += h4*s2
++	 vpshufd	\$0x32,`16*6-64`($ctx),$T4		# s3
++	vpmuludq	$H3,$T2,$T2		# h3*s2
++	vpaddq		$T2,$D0,$D0		# d0 += h3*s2
++
++	vpmuludq	$H1,$T3,$T0		# h1*r3
++	vpaddq		$T0,$D4,$D4		# d4 += h1*r3
++	vpmuludq	$H0,$T3,$T3		# h0*r3
++	vpaddq		$T3,$D3,$D3		# d3 += h0*r3
++	 vpshufd	\$0x32,`16*7-64`($ctx),$T2		# r4
++	vpmuludq	$H4,$T4,$T1		# h4*s3
++	vpaddq		$T1,$D2,$D2		# d2 += h4*s3
++	 vpshufd	\$0x32,`16*8-64`($ctx),$T3		# s4
++	vpmuludq	$H3,$T4,$T0		# h3*s3
++	vpaddq		$T0,$D1,$D1		# d1 += h3*s3
++	vpmuludq	$H2,$T4,$T4		# h2*s3
++	vpaddq		$T4,$D0,$D0		# d0 += h2*s3
++
++	vpmuludq	$H0,$T2,$T2		# h0*r4
++	vpaddq		$T2,$D4,$D4		# d4 += h0*r4
++	vpmuludq	$H4,$T3,$T1		# h4*s4
++	vpaddq		$T1,$D3,$D3		# d3 += h4*s4
++	vpmuludq	$H3,$T3,$T0		# h3*s4
++	vpaddq		$T0,$D2,$D2		# d2 += h3*s4
++	vpmuludq	$H2,$T3,$T1		# h2*s4
++	vpaddq		$T1,$D1,$D1		# d1 += h2*s4
++	vpmuludq	$H1,$T3,$T3		# h1*s4
++	vpaddq		$T3,$D0,$D0		# d0 += h1*s4
++
++.Lshort_tail_avx:
++	################################################################
++	# horizontal addition
++
++	vpsrldq		\$8,$D4,$T4
++	vpsrldq		\$8,$D3,$T3
++	vpsrldq		\$8,$D1,$T1
++	vpsrldq		\$8,$D0,$T0
++	vpsrldq		\$8,$D2,$T2
++	vpaddq		$T3,$D3,$D3
++	vpaddq		$T4,$D4,$D4
++	vpaddq		$T0,$D0,$D0
++	vpaddq		$T1,$D1,$D1
++	vpaddq		$T2,$D2,$D2
++
++	################################################################
++	# lazy reduction
++
++	vpsrlq		\$26,$D3,$H3
++	vpand		$MASK,$D3,$D3
++	vpaddq		$H3,$D4,$D4		# h3 -> h4
++
++	vpsrlq		\$26,$D0,$H0
++	vpand		$MASK,$D0,$D0
++	vpaddq		$H0,$D1,$D1		# h0 -> h1
++
++	vpsrlq		\$26,$D4,$H4
++	vpand		$MASK,$D4,$D4
++
++	vpsrlq		\$26,$D1,$H1
++	vpand		$MASK,$D1,$D1
++	vpaddq		$H1,$D2,$D2		# h1 -> h2
++
++	vpaddq		$H4,$D0,$D0
++	vpsllq		\$2,$H4,$H4
++	vpaddq		$H4,$D0,$D0		# h4 -> h0
++
++	vpsrlq		\$26,$D2,$H2
++	vpand		$MASK,$D2,$D2
++	vpaddq		$H2,$D3,$D3		# h2 -> h3
++
++	vpsrlq		\$26,$D0,$H0
++	vpand		$MASK,$D0,$D0
++	vpaddq		$H0,$D1,$D1		# h0 -> h1
++
++	vpsrlq		\$26,$D3,$H3
++	vpand		$MASK,$D3,$D3
++	vpaddq		$H3,$D4,$D4		# h3 -> h4
++
++	vmovd		$D0,`4*0-48-64`($ctx)	# save partially reduced
++	vmovd		$D1,`4*1-48-64`($ctx)
++	vmovd		$D2,`4*2-48-64`($ctx)
++	vmovd		$D3,`4*3-48-64`($ctx)
++	vmovd		$D4,`4*4-48-64`($ctx)
++___
++$code.=<<___	if ($win64);
++	vmovdqa		0x50(%r11),%xmm6
++	vmovdqa		0x60(%r11),%xmm7
++	vmovdqa		0x70(%r11),%xmm8
++	vmovdqa		0x80(%r11),%xmm9
++	vmovdqa		0x90(%r11),%xmm10
++	vmovdqa		0xa0(%r11),%xmm11
++	vmovdqa		0xb0(%r11),%xmm12
++	vmovdqa		0xc0(%r11),%xmm13
++	vmovdqa		0xd0(%r11),%xmm14
++	vmovdqa		0xe0(%r11),%xmm15
++	lea		0xf8(%r11),%rsp
++.Ldo_avx_epilogue:
++___
++$code.=<<___	if (!$win64);
++	lea		-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++___
++$code.=<<___;
++	vzeroupper
++	ret
++.cfi_endproc
++___
++&end_function("poly1305_blocks_avx");
++
++&declare_function("poly1305_emit_avx", 32, 3);
++$code.=<<___;
++	cmpl	\$0,20($ctx)	# is_base2_26?
++	je	.Lemit
++
++	mov	0($ctx),%eax	# load hash value base 2^26
++	mov	4($ctx),%ecx
++	mov	8($ctx),%r8d
++	mov	12($ctx),%r11d
++	mov	16($ctx),%r10d
++
++	shl	\$26,%rcx	# base 2^26 -> base 2^64
++	mov	%r8,%r9
++	shl	\$52,%r8
++	add	%rcx,%rax
++	shr	\$12,%r9
++	add	%rax,%r8	# h0
++	adc	\$0,%r9
++
++	shl	\$14,%r11
++	mov	%r10,%rax
++	shr	\$24,%r10
++	add	%r11,%r9
++	shl	\$40,%rax
++	add	%rax,%r9	# h1
++	adc	\$0,%r10	# h2
++
++	mov	%r10,%rax	# could be partially reduced, so reduce
++	mov	%r10,%rcx
++	and	\$3,%r10
++	shr	\$2,%rax
++	and	\$-4,%rcx
++	add	%rcx,%rax
++	add	%rax,%r8
++	adc	\$0,%r9
++	adc	\$0,%r10
++
++	mov	%r8,%rax
++	add	\$5,%r8		# compare to modulus
++	mov	%r9,%rcx
++	adc	\$0,%r9
++	adc	\$0,%r10
++	shr	\$2,%r10	# did 130-bit value overflow?
++	cmovnz	%r8,%rax
++	cmovnz	%r9,%rcx
++
++	add	0($nonce),%rax	# accumulate nonce
++	adc	8($nonce),%rcx
++	mov	%rax,0($mac)	# write result
++	mov	%rcx,8($mac)
++
++	ret
++___
++&end_function("poly1305_emit_avx");
++
++if ($kernel) {
++	$code .= "#endif\n";
++}
++
++if ($avx>1) {
++
++if ($kernel) {
++	$code .= "#ifdef CONFIG_AS_AVX2\n";
++}
++
++my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
++    map("%ymm$_",(0..15));
++my $S4=$MASK;
++
++sub poly1305_blocks_avxN {
++	my ($avx512) = @_;
++	my $suffix = $avx512 ? "_avx512" : "";
++$code.=<<___;
++.cfi_startproc
++	mov	20($ctx),%r8d		# is_base2_26
++	cmp	\$128,$len
++	jae	.Lblocks_avx2$suffix
++	test	%r8d,%r8d
++	jz	.Lblocks
++
++.Lblocks_avx2$suffix:
++	and	\$-16,$len
++	jz	.Lno_data_avx2$suffix
++
++	vzeroupper
++
++	test	%r8d,%r8d
++	jz	.Lbase2_64_avx2$suffix
++
++	test	\$63,$len
++	jz	.Leven_avx2$suffix
++
++	push	%rbp
++.cfi_push	%rbp
++	mov 	%rsp,%rbp
++	push	%rbx
++.cfi_push	%rbx
++	push	%r12
++.cfi_push	%r12
++	push	%r13
++.cfi_push	%r13
++	push	%r14
++.cfi_push	%r14
++	push	%r15
++.cfi_push	%r15
++.Lblocks_avx2_body$suffix:
++
++	mov	$len,%r15		# reassign $len
++
++	mov	0($ctx),$d1		# load hash value
++	mov	8($ctx),$d2
++	mov	16($ctx),$h2#d
++
++	mov	24($ctx),$r0		# load r
++	mov	32($ctx),$s1
++
++	################################# base 2^26 -> base 2^64
++	mov	$d1#d,$h0#d
++	and	\$`-1*(1<<31)`,$d1
++	mov	$d2,$r1			# borrow $r1
++	mov	$d2#d,$h1#d
++	and	\$`-1*(1<<31)`,$d2
++
++	shr	\$6,$d1
++	shl	\$52,$r1
++	add	$d1,$h0
++	shr	\$12,$h1
++	shr	\$18,$d2
++	add	$r1,$h0
++	adc	$d2,$h1
++
++	mov	$h2,$d1
++	shl	\$40,$d1
++	shr	\$24,$h2
++	add	$d1,$h1
++	adc	\$0,$h2			# can be partially reduced...
++
++	mov	\$-4,$d2		# ... so reduce
++	mov	$h2,$d1
++	and	$h2,$d2
++	shr	\$2,$d1
++	and	\$3,$h2
++	add	$d2,$d1			# =*5
++	add	$d1,$h0
++	adc	\$0,$h1
++	adc	\$0,$h2
++
++	mov	$s1,$r1
++	mov	$s1,%rax
++	shr	\$2,$s1
++	add	$r1,$s1			# s1 = r1 + (r1 >> 2)
++
++.Lbase2_26_pre_avx2$suffix:
++	add	0($inp),$h0		# accumulate input
++	adc	8($inp),$h1
++	lea	16($inp),$inp
++	adc	$padbit,$h2
++	sub	\$16,%r15
++
++	call	__poly1305_block
++	mov	$r1,%rax
++
++	test	\$63,%r15
++	jnz	.Lbase2_26_pre_avx2$suffix
++
++	test	$padbit,$padbit		# if $padbit is zero,
++	jz	.Lstore_base2_64_avx2$suffix	# store hash in base 2^64 format
++
++	################################# base 2^64 -> base 2^26
++	mov	$h0,%rax
++	mov	$h0,%rdx
++	shr	\$52,$h0
++	mov	$h1,$r0
++	mov	$h1,$r1
++	shr	\$26,%rdx
++	and	\$0x3ffffff,%rax	# h[0]
++	shl	\$12,$r0
++	and	\$0x3ffffff,%rdx	# h[1]
++	shr	\$14,$h1
++	or	$r0,$h0
++	shl	\$24,$h2
++	and	\$0x3ffffff,$h0		# h[2]
++	shr	\$40,$r1
++	and	\$0x3ffffff,$h1		# h[3]
++	or	$r1,$h2			# h[4]
++
++	test	%r15,%r15
++	jz	.Lstore_base2_26_avx2$suffix
++
++	vmovd	%rax#d,%x#$H0
++	vmovd	%rdx#d,%x#$H1
++	vmovd	$h0#d,%x#$H2
++	vmovd	$h1#d,%x#$H3
++	vmovd	$h2#d,%x#$H4
++	jmp	.Lproceed_avx2$suffix
++
++.align	32
++.Lstore_base2_64_avx2$suffix:
++	mov	$h0,0($ctx)
++	mov	$h1,8($ctx)
++	mov	$h2,16($ctx)		# note that is_base2_26 is zeroed
++	jmp	.Ldone_avx2$suffix
++
++.align	16
++.Lstore_base2_26_avx2$suffix:
++	mov	%rax#d,0($ctx)		# store hash value base 2^26
++	mov	%rdx#d,4($ctx)
++	mov	$h0#d,8($ctx)
++	mov	$h1#d,12($ctx)
++	mov	$h2#d,16($ctx)
++.align	16
++.Ldone_avx2$suffix:
++	pop 		%r15
++.cfi_restore	%r15
++	pop 		%r14
++.cfi_restore	%r14
++	pop 		%r13
++.cfi_restore	%r13
++	pop 		%r12
++.cfi_restore	%r12
++	pop 		%rbx
++.cfi_restore	%rbx
++	pop 		%rbp
++.cfi_restore 	%rbp
++.Lno_data_avx2$suffix:
++.Lblocks_avx2_epilogue$suffix:
++	ret
++.cfi_endproc
++
++.align	32
++.Lbase2_64_avx2$suffix:
++.cfi_startproc
++	push	%rbp
++.cfi_push	%rbp
++	mov 	%rsp,%rbp
++	push	%rbx
++.cfi_push	%rbx
++	push	%r12
++.cfi_push	%r12
++	push	%r13
++.cfi_push	%r13
++	push	%r14
++.cfi_push	%r14
++	push	%r15
++.cfi_push	%r15
++.Lbase2_64_avx2_body$suffix:
++
++	mov	$len,%r15		# reassign $len
++
++	mov	24($ctx),$r0		# load r
++	mov	32($ctx),$s1
++
++	mov	0($ctx),$h0		# load hash value
++	mov	8($ctx),$h1
++	mov	16($ctx),$h2#d
++
++	mov	$s1,$r1
++	mov	$s1,%rax
++	shr	\$2,$s1
++	add	$r1,$s1			# s1 = r1 + (r1 >> 2)
++
++	test	\$63,$len
++	jz	.Linit_avx2$suffix
++
++.Lbase2_64_pre_avx2$suffix:
++	add	0($inp),$h0		# accumulate input
++	adc	8($inp),$h1
++	lea	16($inp),$inp
++	adc	$padbit,$h2
++	sub	\$16,%r15
++
++	call	__poly1305_block
++	mov	$r1,%rax
++
++	test	\$63,%r15
++	jnz	.Lbase2_64_pre_avx2$suffix
++
++.Linit_avx2$suffix:
++	################################# base 2^64 -> base 2^26
++	mov	$h0,%rax
++	mov	$h0,%rdx
++	shr	\$52,$h0
++	mov	$h1,$d1
++	mov	$h1,$d2
++	shr	\$26,%rdx
++	and	\$0x3ffffff,%rax	# h[0]
++	shl	\$12,$d1
++	and	\$0x3ffffff,%rdx	# h[1]
++	shr	\$14,$h1
++	or	$d1,$h0
++	shl	\$24,$h2
++	and	\$0x3ffffff,$h0		# h[2]
++	shr	\$40,$d2
++	and	\$0x3ffffff,$h1		# h[3]
++	or	$d2,$h2			# h[4]
++
++	vmovd	%rax#d,%x#$H0
++	vmovd	%rdx#d,%x#$H1
++	vmovd	$h0#d,%x#$H2
++	vmovd	$h1#d,%x#$H3
++	vmovd	$h2#d,%x#$H4
++	movl	\$1,20($ctx)		# set is_base2_26
++
++	call	__poly1305_init_avx
++
++.Lproceed_avx2$suffix:
++	mov	%r15,$len			# restore $len
++___
++$code.=<<___ if (!$kernel);
++	mov	OPENSSL_ia32cap_P+8(%rip),%r9d
++	mov	\$`(1<<31|1<<30|1<<16)`,%r11d
++___
++$code.=<<___;
++	pop 		%r15
++.cfi_restore	%r15
++	pop 		%r14
++.cfi_restore	%r14
++	pop 		%r13
++.cfi_restore	%r13
++	pop 		%r12
++.cfi_restore	%r12
++	pop 		%rbx
++.cfi_restore	%rbx
++	pop 		%rbp
++.cfi_restore 	%rbp
++.Lbase2_64_avx2_epilogue$suffix:
++	jmp	.Ldo_avx2$suffix
++.cfi_endproc
++
++.align	32
++.Leven_avx2$suffix:
++.cfi_startproc
++___
++$code.=<<___ if (!$kernel);
++	mov		OPENSSL_ia32cap_P+8(%rip),%r9d
++___
++$code.=<<___;
++	vmovd		4*0($ctx),%x#$H0	# load hash value base 2^26
++	vmovd		4*1($ctx),%x#$H1
++	vmovd		4*2($ctx),%x#$H2
++	vmovd		4*3($ctx),%x#$H3
++	vmovd		4*4($ctx),%x#$H4
++
++.Ldo_avx2$suffix:
++___
++$code.=<<___		if (!$kernel && $avx>2);
++	cmp		\$512,$len
++	jb		.Lskip_avx512
++	and		%r11d,%r9d
++	test		\$`1<<16`,%r9d		# check for AVX512F
++	jnz		.Lblocks_avx512
++.Lskip_avx512$suffix:
++___
++$code.=<<___ if ($avx > 2 && $avx512 && $kernel);
++	cmp		\$512,$len
++	jae		.Lblocks_avx512
++___
++$code.=<<___	if (!$win64);
++	lea		8(%rsp),%r10
++.cfi_def_cfa_register	%r10
++	sub		\$0x128,%rsp
++___
++$code.=<<___	if ($win64);
++	lea		8(%rsp),%r10
++	sub		\$0x1c8,%rsp
++	vmovdqa		%xmm6,-0xb0(%r10)
++	vmovdqa		%xmm7,-0xa0(%r10)
++	vmovdqa		%xmm8,-0x90(%r10)
++	vmovdqa		%xmm9,-0x80(%r10)
++	vmovdqa		%xmm10,-0x70(%r10)
++	vmovdqa		%xmm11,-0x60(%r10)
++	vmovdqa		%xmm12,-0x50(%r10)
++	vmovdqa		%xmm13,-0x40(%r10)
++	vmovdqa		%xmm14,-0x30(%r10)
++	vmovdqa		%xmm15,-0x20(%r10)
++.Ldo_avx2_body$suffix:
++___
++$code.=<<___;
++	lea		.Lconst(%rip),%rcx
++	lea		48+64($ctx),$ctx	# size optimization
++	vmovdqa		96(%rcx),$T0		# .Lpermd_avx2
++
++	# expand and copy pre-calculated table to stack
++	vmovdqu		`16*0-64`($ctx),%x#$T2
++	and		\$-512,%rsp
++	vmovdqu		`16*1-64`($ctx),%x#$T3
++	vmovdqu		`16*2-64`($ctx),%x#$T4
++	vmovdqu		`16*3-64`($ctx),%x#$D0
++	vmovdqu		`16*4-64`($ctx),%x#$D1
++	vmovdqu		`16*5-64`($ctx),%x#$D2
++	lea		0x90(%rsp),%rax		# size optimization
++	vmovdqu		`16*6-64`($ctx),%x#$D3
++	vpermd		$T2,$T0,$T2		# 00003412 -> 14243444
++	vmovdqu		`16*7-64`($ctx),%x#$D4
++	vpermd		$T3,$T0,$T3
++	vmovdqu		`16*8-64`($ctx),%x#$MASK
++	vpermd		$T4,$T0,$T4
++	vmovdqa		$T2,0x00(%rsp)
++	vpermd		$D0,$T0,$D0
++	vmovdqa		$T3,0x20-0x90(%rax)
++	vpermd		$D1,$T0,$D1
++	vmovdqa		$T4,0x40-0x90(%rax)
++	vpermd		$D2,$T0,$D2
++	vmovdqa		$D0,0x60-0x90(%rax)
++	vpermd		$D3,$T0,$D3
++	vmovdqa		$D1,0x80-0x90(%rax)
++	vpermd		$D4,$T0,$D4
++	vmovdqa		$D2,0xa0-0x90(%rax)
++	vpermd		$MASK,$T0,$MASK
++	vmovdqa		$D3,0xc0-0x90(%rax)
++	vmovdqa		$D4,0xe0-0x90(%rax)
++	vmovdqa		$MASK,0x100-0x90(%rax)
++	vmovdqa		64(%rcx),$MASK		# .Lmask26
++
++	################################################################
++	# load input
++	vmovdqu		16*0($inp),%x#$T0
++	vmovdqu		16*1($inp),%x#$T1
++	vinserti128	\$1,16*2($inp),$T0,$T0
++	vinserti128	\$1,16*3($inp),$T1,$T1
++	lea		16*4($inp),$inp
++
++	vpsrldq		\$6,$T0,$T2		# splat input
++	vpsrldq		\$6,$T1,$T3
++	vpunpckhqdq	$T1,$T0,$T4		# 4
++	vpunpcklqdq	$T3,$T2,$T2		# 2:3
++	vpunpcklqdq	$T1,$T0,$T0		# 0:1
++
++	vpsrlq		\$30,$T2,$T3
++	vpsrlq		\$4,$T2,$T2
++	vpsrlq		\$26,$T0,$T1
++	vpsrlq		\$40,$T4,$T4		# 4
++	vpand		$MASK,$T2,$T2		# 2
++	vpand		$MASK,$T0,$T0		# 0
++	vpand		$MASK,$T1,$T1		# 1
++	vpand		$MASK,$T3,$T3		# 3
++	vpor		32(%rcx),$T4,$T4	# padbit, yes, always
++
++	vpaddq		$H2,$T2,$H2		# accumulate input
++	sub		\$64,$len
++	jz		.Ltail_avx2$suffix
++	jmp		.Loop_avx2$suffix
++
++.align	32
++.Loop_avx2$suffix:
++	################################################################
++	# ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4
++	# ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3
++	# ((inp[2]*r^4+inp[6])*r^4+inp[10])*r^2
++	# ((inp[3]*r^4+inp[7])*r^4+inp[11])*r^1
++	#   \________/\__________/
++	################################################################
++	#vpaddq		$H2,$T2,$H2		# accumulate input
++	vpaddq		$H0,$T0,$H0
++	vmovdqa		`32*0`(%rsp),$T0	# r0^4
++	vpaddq		$H1,$T1,$H1
++	vmovdqa		`32*1`(%rsp),$T1	# r1^4
++	vpaddq		$H3,$T3,$H3
++	vmovdqa		`32*3`(%rsp),$T2	# r2^4
++	vpaddq		$H4,$T4,$H4
++	vmovdqa		`32*6-0x90`(%rax),$T3	# s3^4
++	vmovdqa		`32*8-0x90`(%rax),$S4	# s4^4
++
++	# d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
++	# d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
++	# d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
++	# d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
++	# d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
++	#
++	# however, as h2 is "chronologically" first one available pull
++	# corresponding operations up, so it's
++	#
++	# d4 = h2*r2   + h4*r0 + h3*r1             + h1*r3   + h0*r4
++	# d3 = h2*r1   + h3*r0           + h1*r2   + h0*r3   + h4*5*r4
++	# d2 = h2*r0           + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
++	# d1 = h2*5*r4 + h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3
++	# d0 = h2*5*r3 + h0*r0 + h4*5*r1 + h3*5*r2           + h1*5*r4
++
++	vpmuludq	$H2,$T0,$D2		# d2 = h2*r0
++	vpmuludq	$H2,$T1,$D3		# d3 = h2*r1
++	vpmuludq	$H2,$T2,$D4		# d4 = h2*r2
++	vpmuludq	$H2,$T3,$D0		# d0 = h2*s3
++	vpmuludq	$H2,$S4,$D1		# d1 = h2*s4
++
++	vpmuludq	$H0,$T1,$T4		# h0*r1
++	vpmuludq	$H1,$T1,$H2		# h1*r1, borrow $H2 as temp
++	vpaddq		$T4,$D1,$D1		# d1 += h0*r1
++	vpaddq		$H2,$D2,$D2		# d2 += h1*r1
++	vpmuludq	$H3,$T1,$T4		# h3*r1
++	vpmuludq	`32*2`(%rsp),$H4,$H2	# h4*s1
++	vpaddq		$T4,$D4,$D4		# d4 += h3*r1
++	vpaddq		$H2,$D0,$D0		# d0 += h4*s1
++	 vmovdqa	`32*4-0x90`(%rax),$T1	# s2
++
++	vpmuludq	$H0,$T0,$T4		# h0*r0
++	vpmuludq	$H1,$T0,$H2		# h1*r0
++	vpaddq		$T4,$D0,$D0		# d0 += h0*r0
++	vpaddq		$H2,$D1,$D1		# d1 += h1*r0
++	vpmuludq	$H3,$T0,$T4		# h3*r0
++	vpmuludq	$H4,$T0,$H2		# h4*r0
++	 vmovdqu	16*0($inp),%x#$T0	# load input
++	vpaddq		$T4,$D3,$D3		# d3 += h3*r0
++	vpaddq		$H2,$D4,$D4		# d4 += h4*r0
++	 vinserti128	\$1,16*2($inp),$T0,$T0
++
++	vpmuludq	$H3,$T1,$T4		# h3*s2
++	vpmuludq	$H4,$T1,$H2		# h4*s2
++	 vmovdqu	16*1($inp),%x#$T1
++	vpaddq		$T4,$D0,$D0		# d0 += h3*s2
++	vpaddq		$H2,$D1,$D1		# d1 += h4*s2
++	 vmovdqa	`32*5-0x90`(%rax),$H2	# r3
++	vpmuludq	$H1,$T2,$T4		# h1*r2
++	vpmuludq	$H0,$T2,$T2		# h0*r2
++	vpaddq		$T4,$D3,$D3		# d3 += h1*r2
++	vpaddq		$T2,$D2,$D2		# d2 += h0*r2
++	 vinserti128	\$1,16*3($inp),$T1,$T1
++	 lea		16*4($inp),$inp
++
++	vpmuludq	$H1,$H2,$T4		# h1*r3
++	vpmuludq	$H0,$H2,$H2		# h0*r3
++	 vpsrldq	\$6,$T0,$T2		# splat input
++	vpaddq		$T4,$D4,$D4		# d4 += h1*r3
++	vpaddq		$H2,$D3,$D3		# d3 += h0*r3
++	vpmuludq	$H3,$T3,$T4		# h3*s3
++	vpmuludq	$H4,$T3,$H2		# h4*s3
++	 vpsrldq	\$6,$T1,$T3
++	vpaddq		$T4,$D1,$D1		# d1 += h3*s3
++	vpaddq		$H2,$D2,$D2		# d2 += h4*s3
++	 vpunpckhqdq	$T1,$T0,$T4		# 4
++
++	vpmuludq	$H3,$S4,$H3		# h3*s4
++	vpmuludq	$H4,$S4,$H4		# h4*s4
++	 vpunpcklqdq	$T1,$T0,$T0		# 0:1
++	vpaddq		$H3,$D2,$H2		# h2 = d2 + h3*r4
++	vpaddq		$H4,$D3,$H3		# h3 = d3 + h4*r4
++	 vpunpcklqdq	$T3,$T2,$T3		# 2:3
++	vpmuludq	`32*7-0x90`(%rax),$H0,$H4	# h0*r4
++	vpmuludq	$H1,$S4,$H0		# h1*s4
++	vmovdqa		64(%rcx),$MASK		# .Lmask26
++	vpaddq		$H4,$D4,$H4		# h4 = d4 + h0*r4
++	vpaddq		$H0,$D0,$H0		# h0 = d0 + h1*s4
++
++	################################################################
++	# lazy reduction (interleaved with tail of input splat)
++
++	vpsrlq		\$26,$H3,$D3
++	vpand		$MASK,$H3,$H3
++	vpaddq		$D3,$H4,$H4		# h3 -> h4
++
++	vpsrlq		\$26,$H0,$D0
++	vpand		$MASK,$H0,$H0
++	vpaddq		$D0,$D1,$H1		# h0 -> h1
++
++	vpsrlq		\$26,$H4,$D4
++	vpand		$MASK,$H4,$H4
++
++	 vpsrlq		\$4,$T3,$T2
++
++	vpsrlq		\$26,$H1,$D1
++	vpand		$MASK,$H1,$H1
++	vpaddq		$D1,$H2,$H2		# h1 -> h2
++
++	vpaddq		$D4,$H0,$H0
++	vpsllq		\$2,$D4,$D4
++	vpaddq		$D4,$H0,$H0		# h4 -> h0
++
++	 vpand		$MASK,$T2,$T2		# 2
++	 vpsrlq		\$26,$T0,$T1
++
++	vpsrlq		\$26,$H2,$D2
++	vpand		$MASK,$H2,$H2
++	vpaddq		$D2,$H3,$H3		# h2 -> h3
++
++	 vpaddq		$T2,$H2,$H2		# modulo-scheduled
++	 vpsrlq		\$30,$T3,$T3
++
++	vpsrlq		\$26,$H0,$D0
++	vpand		$MASK,$H0,$H0
++	vpaddq		$D0,$H1,$H1		# h0 -> h1
++
++	 vpsrlq		\$40,$T4,$T4		# 4
++
++	vpsrlq		\$26,$H3,$D3
++	vpand		$MASK,$H3,$H3
++	vpaddq		$D3,$H4,$H4		# h3 -> h4
++
++	 vpand		$MASK,$T0,$T0		# 0
++	 vpand		$MASK,$T1,$T1		# 1
++	 vpand		$MASK,$T3,$T3		# 3
++	 vpor		32(%rcx),$T4,$T4	# padbit, yes, always
++
++	sub		\$64,$len
++	jnz		.Loop_avx2$suffix
++
++	.byte		0x66,0x90
++.Ltail_avx2$suffix:
++	################################################################
++	# while above multiplications were by r^4 in all lanes, in last
++	# iteration we multiply least significant lane by r^4 and most
++	# significant one by r, so copy of above except that references
++	# to the precomputed table are displaced by 4...
++
++	#vpaddq		$H2,$T2,$H2		# accumulate input
++	vpaddq		$H0,$T0,$H0
++	vmovdqu		`32*0+4`(%rsp),$T0	# r0^4
++	vpaddq		$H1,$T1,$H1
++	vmovdqu		`32*1+4`(%rsp),$T1	# r1^4
++	vpaddq		$H3,$T3,$H3
++	vmovdqu		`32*3+4`(%rsp),$T2	# r2^4
++	vpaddq		$H4,$T4,$H4
++	vmovdqu		`32*6+4-0x90`(%rax),$T3	# s3^4
++	vmovdqu		`32*8+4-0x90`(%rax),$S4	# s4^4
++
++	vpmuludq	$H2,$T0,$D2		# d2 = h2*r0
++	vpmuludq	$H2,$T1,$D3		# d3 = h2*r1
++	vpmuludq	$H2,$T2,$D4		# d4 = h2*r2
++	vpmuludq	$H2,$T3,$D0		# d0 = h2*s3
++	vpmuludq	$H2,$S4,$D1		# d1 = h2*s4
++
++	vpmuludq	$H0,$T1,$T4		# h0*r1
++	vpmuludq	$H1,$T1,$H2		# h1*r1
++	vpaddq		$T4,$D1,$D1		# d1 += h0*r1
++	vpaddq		$H2,$D2,$D2		# d2 += h1*r1
++	vpmuludq	$H3,$T1,$T4		# h3*r1
++	vpmuludq	`32*2+4`(%rsp),$H4,$H2	# h4*s1
++	vpaddq		$T4,$D4,$D4		# d4 += h3*r1
++	vpaddq		$H2,$D0,$D0		# d0 += h4*s1
++
++	vpmuludq	$H0,$T0,$T4		# h0*r0
++	vpmuludq	$H1,$T0,$H2		# h1*r0
++	vpaddq		$T4,$D0,$D0		# d0 += h0*r0
++	 vmovdqu	`32*4+4-0x90`(%rax),$T1	# s2
++	vpaddq		$H2,$D1,$D1		# d1 += h1*r0
++	vpmuludq	$H3,$T0,$T4		# h3*r0
++	vpmuludq	$H4,$T0,$H2		# h4*r0
++	vpaddq		$T4,$D3,$D3		# d3 += h3*r0
++	vpaddq		$H2,$D4,$D4		# d4 += h4*r0
++
++	vpmuludq	$H3,$T1,$T4		# h3*s2
++	vpmuludq	$H4,$T1,$H2		# h4*s2
++	vpaddq		$T4,$D0,$D0		# d0 += h3*s2
++	vpaddq		$H2,$D1,$D1		# d1 += h4*s2
++	 vmovdqu	`32*5+4-0x90`(%rax),$H2	# r3
++	vpmuludq	$H1,$T2,$T4		# h1*r2
++	vpmuludq	$H0,$T2,$T2		# h0*r2
++	vpaddq		$T4,$D3,$D3		# d3 += h1*r2
++	vpaddq		$T2,$D2,$D2		# d2 += h0*r2
++
++	vpmuludq	$H1,$H2,$T4		# h1*r3
++	vpmuludq	$H0,$H2,$H2		# h0*r3
++	vpaddq		$T4,$D4,$D4		# d4 += h1*r3
++	vpaddq		$H2,$D3,$D3		# d3 += h0*r3
++	vpmuludq	$H3,$T3,$T4		# h3*s3
++	vpmuludq	$H4,$T3,$H2		# h4*s3
++	vpaddq		$T4,$D1,$D1		# d1 += h3*s3
++	vpaddq		$H2,$D2,$D2		# d2 += h4*s3
++
++	vpmuludq	$H3,$S4,$H3		# h3*s4
++	vpmuludq	$H4,$S4,$H4		# h4*s4
++	vpaddq		$H3,$D2,$H2		# h2 = d2 + h3*r4
++	vpaddq		$H4,$D3,$H3		# h3 = d3 + h4*r4
++	vpmuludq	`32*7+4-0x90`(%rax),$H0,$H4		# h0*r4
++	vpmuludq	$H1,$S4,$H0		# h1*s4
++	vmovdqa		64(%rcx),$MASK		# .Lmask26
++	vpaddq		$H4,$D4,$H4		# h4 = d4 + h0*r4
++	vpaddq		$H0,$D0,$H0		# h0 = d0 + h1*s4
++
++	################################################################
++	# horizontal addition
++
++	vpsrldq		\$8,$D1,$T1
++	vpsrldq		\$8,$H2,$T2
++	vpsrldq		\$8,$H3,$T3
++	vpsrldq		\$8,$H4,$T4
++	vpsrldq		\$8,$H0,$T0
++	vpaddq		$T1,$D1,$D1
++	vpaddq		$T2,$H2,$H2
++	vpaddq		$T3,$H3,$H3
++	vpaddq		$T4,$H4,$H4
++	vpaddq		$T0,$H0,$H0
++
++	vpermq		\$0x2,$H3,$T3
++	vpermq		\$0x2,$H4,$T4
++	vpermq		\$0x2,$H0,$T0
++	vpermq		\$0x2,$D1,$T1
++	vpermq		\$0x2,$H2,$T2
++	vpaddq		$T3,$H3,$H3
++	vpaddq		$T4,$H4,$H4
++	vpaddq		$T0,$H0,$H0
++	vpaddq		$T1,$D1,$D1
++	vpaddq		$T2,$H2,$H2
++
++	################################################################
++	# lazy reduction
++
++	vpsrlq		\$26,$H3,$D3
++	vpand		$MASK,$H3,$H3
++	vpaddq		$D3,$H4,$H4		# h3 -> h4
++
++	vpsrlq		\$26,$H0,$D0
++	vpand		$MASK,$H0,$H0
++	vpaddq		$D0,$D1,$H1		# h0 -> h1
++
++	vpsrlq		\$26,$H4,$D4
++	vpand		$MASK,$H4,$H4
++
++	vpsrlq		\$26,$H1,$D1
++	vpand		$MASK,$H1,$H1
++	vpaddq		$D1,$H2,$H2		# h1 -> h2
++
++	vpaddq		$D4,$H0,$H0
++	vpsllq		\$2,$D4,$D4
++	vpaddq		$D4,$H0,$H0		# h4 -> h0
++
++	vpsrlq		\$26,$H2,$D2
++	vpand		$MASK,$H2,$H2
++	vpaddq		$D2,$H3,$H3		# h2 -> h3
++
++	vpsrlq		\$26,$H0,$D0
++	vpand		$MASK,$H0,$H0
++	vpaddq		$D0,$H1,$H1		# h0 -> h1
++
++	vpsrlq		\$26,$H3,$D3
++	vpand		$MASK,$H3,$H3
++	vpaddq		$D3,$H4,$H4		# h3 -> h4
++
++	vmovd		%x#$H0,`4*0-48-64`($ctx)# save partially reduced
++	vmovd		%x#$H1,`4*1-48-64`($ctx)
++	vmovd		%x#$H2,`4*2-48-64`($ctx)
++	vmovd		%x#$H3,`4*3-48-64`($ctx)
++	vmovd		%x#$H4,`4*4-48-64`($ctx)
++___
++$code.=<<___	if ($win64);
++	vmovdqa		-0xb0(%r10),%xmm6
++	vmovdqa		-0xa0(%r10),%xmm7
++	vmovdqa		-0x90(%r10),%xmm8
++	vmovdqa		-0x80(%r10),%xmm9
++	vmovdqa		-0x70(%r10),%xmm10
++	vmovdqa		-0x60(%r10),%xmm11
++	vmovdqa		-0x50(%r10),%xmm12
++	vmovdqa		-0x40(%r10),%xmm13
++	vmovdqa		-0x30(%r10),%xmm14
++	vmovdqa		-0x20(%r10),%xmm15
++	lea		-8(%r10),%rsp
++.Ldo_avx2_epilogue$suffix:
++___
++$code.=<<___	if (!$win64);
++	lea		-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++___
++$code.=<<___;
++	vzeroupper
++	ret
++.cfi_endproc
++___
++if($avx > 2 && $avx512) {
++my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24));
++my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29));
++my $PADBIT="%zmm30";
++
++map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3));		# switch to %zmm domain
++map(s/%y/%z/,($D0,$D1,$D2,$D3,$D4));
++map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4));
++map(s/%y/%z/,($MASK));
++
++$code.=<<___;
++.cfi_startproc
++.Lblocks_avx512:
++	mov		\$15,%eax
++	kmovw		%eax,%k2
++___
++$code.=<<___	if (!$win64);
++	lea		8(%rsp),%r10
++.cfi_def_cfa_register	%r10
++	sub		\$0x128,%rsp
++___
++$code.=<<___	if ($win64);
++	lea		8(%rsp),%r10
++	sub		\$0x1c8,%rsp
++	vmovdqa		%xmm6,-0xb0(%r10)
++	vmovdqa		%xmm7,-0xa0(%r10)
++	vmovdqa		%xmm8,-0x90(%r10)
++	vmovdqa		%xmm9,-0x80(%r10)
++	vmovdqa		%xmm10,-0x70(%r10)
++	vmovdqa		%xmm11,-0x60(%r10)
++	vmovdqa		%xmm12,-0x50(%r10)
++	vmovdqa		%xmm13,-0x40(%r10)
++	vmovdqa		%xmm14,-0x30(%r10)
++	vmovdqa		%xmm15,-0x20(%r10)
++.Ldo_avx512_body:
++___
++$code.=<<___;
++	lea		.Lconst(%rip),%rcx
++	lea		48+64($ctx),$ctx	# size optimization
++	vmovdqa		96(%rcx),%y#$T2		# .Lpermd_avx2
++
++	# expand pre-calculated table
++	vmovdqu		`16*0-64`($ctx),%x#$D0	# will become expanded ${R0}
++	and		\$-512,%rsp
++	vmovdqu		`16*1-64`($ctx),%x#$D1	# will become ... ${R1}
++	mov		\$0x20,%rax
++	vmovdqu		`16*2-64`($ctx),%x#$T0	# ... ${S1}
++	vmovdqu		`16*3-64`($ctx),%x#$D2	# ... ${R2}
++	vmovdqu		`16*4-64`($ctx),%x#$T1	# ... ${S2}
++	vmovdqu		`16*5-64`($ctx),%x#$D3	# ... ${R3}
++	vmovdqu		`16*6-64`($ctx),%x#$T3	# ... ${S3}
++	vmovdqu		`16*7-64`($ctx),%x#$D4	# ... ${R4}
++	vmovdqu		`16*8-64`($ctx),%x#$T4	# ... ${S4}
++	vpermd		$D0,$T2,$R0		# 00003412 -> 14243444
++	vpbroadcastq	64(%rcx),$MASK		# .Lmask26
++	vpermd		$D1,$T2,$R1
++	vpermd		$T0,$T2,$S1
++	vpermd		$D2,$T2,$R2
++	vmovdqa64	$R0,0x00(%rsp){%k2}	# save in case $len%128 != 0
++	 vpsrlq		\$32,$R0,$T0		# 14243444 -> 01020304
++	vpermd		$T1,$T2,$S2
++	vmovdqu64	$R1,0x00(%rsp,%rax){%k2}
++	 vpsrlq		\$32,$R1,$T1
++	vpermd		$D3,$T2,$R3
++	vmovdqa64	$S1,0x40(%rsp){%k2}
++	vpermd		$T3,$T2,$S3
++	vpermd		$D4,$T2,$R4
++	vmovdqu64	$R2,0x40(%rsp,%rax){%k2}
++	vpermd		$T4,$T2,$S4
++	vmovdqa64	$S2,0x80(%rsp){%k2}
++	vmovdqu64	$R3,0x80(%rsp,%rax){%k2}
++	vmovdqa64	$S3,0xc0(%rsp){%k2}
++	vmovdqu64	$R4,0xc0(%rsp,%rax){%k2}
++	vmovdqa64	$S4,0x100(%rsp){%k2}
++
++	################################################################
++	# calculate 5th through 8th powers of the key
++	#
++	# d0 = r0'*r0 + r1'*5*r4 + r2'*5*r3 + r3'*5*r2 + r4'*5*r1
++	# d1 = r0'*r1 + r1'*r0   + r2'*5*r4 + r3'*5*r3 + r4'*5*r2
++	# d2 = r0'*r2 + r1'*r1   + r2'*r0   + r3'*5*r4 + r4'*5*r3
++	# d3 = r0'*r3 + r1'*r2   + r2'*r1   + r3'*r0   + r4'*5*r4
++	# d4 = r0'*r4 + r1'*r3   + r2'*r2   + r3'*r1   + r4'*r0
++
++	vpmuludq	$T0,$R0,$D0		# d0 = r0'*r0
++	vpmuludq	$T0,$R1,$D1		# d1 = r0'*r1
++	vpmuludq	$T0,$R2,$D2		# d2 = r0'*r2
++	vpmuludq	$T0,$R3,$D3		# d3 = r0'*r3
++	vpmuludq	$T0,$R4,$D4		# d4 = r0'*r4
++	 vpsrlq		\$32,$R2,$T2
++
++	vpmuludq	$T1,$S4,$M0
++	vpmuludq	$T1,$R0,$M1
++	vpmuludq	$T1,$R1,$M2
++	vpmuludq	$T1,$R2,$M3
++	vpmuludq	$T1,$R3,$M4
++	 vpsrlq		\$32,$R3,$T3
++	vpaddq		$M0,$D0,$D0		# d0 += r1'*5*r4
++	vpaddq		$M1,$D1,$D1		# d1 += r1'*r0
++	vpaddq		$M2,$D2,$D2		# d2 += r1'*r1
++	vpaddq		$M3,$D3,$D3		# d3 += r1'*r2
++	vpaddq		$M4,$D4,$D4		# d4 += r1'*r3
++
++	vpmuludq	$T2,$S3,$M0
++	vpmuludq	$T2,$S4,$M1
++	vpmuludq	$T2,$R1,$M3
++	vpmuludq	$T2,$R2,$M4
++	vpmuludq	$T2,$R0,$M2
++	 vpsrlq		\$32,$R4,$T4
++	vpaddq		$M0,$D0,$D0		# d0 += r2'*5*r3
++	vpaddq		$M1,$D1,$D1		# d1 += r2'*5*r4
++	vpaddq		$M3,$D3,$D3		# d3 += r2'*r1
++	vpaddq		$M4,$D4,$D4		# d4 += r2'*r2
++	vpaddq		$M2,$D2,$D2		# d2 += r2'*r0
++
++	vpmuludq	$T3,$S2,$M0
++	vpmuludq	$T3,$R0,$M3
++	vpmuludq	$T3,$R1,$M4
++	vpmuludq	$T3,$S3,$M1
++	vpmuludq	$T3,$S4,$M2
++	vpaddq		$M0,$D0,$D0		# d0 += r3'*5*r2
++	vpaddq		$M3,$D3,$D3		# d3 += r3'*r0
++	vpaddq		$M4,$D4,$D4		# d4 += r3'*r1
++	vpaddq		$M1,$D1,$D1		# d1 += r3'*5*r3
++	vpaddq		$M2,$D2,$D2		# d2 += r3'*5*r4
++
++	vpmuludq	$T4,$S4,$M3
++	vpmuludq	$T4,$R0,$M4
++	vpmuludq	$T4,$S1,$M0
++	vpmuludq	$T4,$S2,$M1
++	vpmuludq	$T4,$S3,$M2
++	vpaddq		$M3,$D3,$D3		# d3 += r2'*5*r4
++	vpaddq		$M4,$D4,$D4		# d4 += r2'*r0
++	vpaddq		$M0,$D0,$D0		# d0 += r2'*5*r1
++	vpaddq		$M1,$D1,$D1		# d1 += r2'*5*r2
++	vpaddq		$M2,$D2,$D2		# d2 += r2'*5*r3
++
++	################################################################
++	# load input
++	vmovdqu64	16*0($inp),%z#$T3
++	vmovdqu64	16*4($inp),%z#$T4
++	lea		16*8($inp),$inp
++
++	################################################################
++	# lazy reduction
++
++	vpsrlq		\$26,$D3,$M3
++	vpandq		$MASK,$D3,$D3
++	vpaddq		$M3,$D4,$D4		# d3 -> d4
++
++	vpsrlq		\$26,$D0,$M0
++	vpandq		$MASK,$D0,$D0
++	vpaddq		$M0,$D1,$D1		# d0 -> d1
++
++	vpsrlq		\$26,$D4,$M4
++	vpandq		$MASK,$D4,$D4
++
++	vpsrlq		\$26,$D1,$M1
++	vpandq		$MASK,$D1,$D1
++	vpaddq		$M1,$D2,$D2		# d1 -> d2
++
++	vpaddq		$M4,$D0,$D0
++	vpsllq		\$2,$M4,$M4
++	vpaddq		$M4,$D0,$D0		# d4 -> d0
++
++	vpsrlq		\$26,$D2,$M2
++	vpandq		$MASK,$D2,$D2
++	vpaddq		$M2,$D3,$D3		# d2 -> d3
++
++	vpsrlq		\$26,$D0,$M0
++	vpandq		$MASK,$D0,$D0
++	vpaddq		$M0,$D1,$D1		# d0 -> d1
++
++	vpsrlq		\$26,$D3,$M3
++	vpandq		$MASK,$D3,$D3
++	vpaddq		$M3,$D4,$D4		# d3 -> d4
++
++	################################################################
++	# at this point we have 14243444 in $R0-$S4 and 05060708 in
++	# $D0-$D4, ...
++
++	vpunpcklqdq	$T4,$T3,$T0	# transpose input
++	vpunpckhqdq	$T4,$T3,$T4
++
++	# ... since input 64-bit lanes are ordered as 73625140, we could
++	# "vperm" it to 76543210 (here and in each loop iteration), *or*
++	# we could just flow along, hence the goal for $R0-$S4 is
++	# 1858286838784888 ...
++
++	vmovdqa32	128(%rcx),$M0		# .Lpermd_avx512:
++	mov		\$0x7777,%eax
++	kmovw		%eax,%k1
++
++	vpermd		$R0,$M0,$R0		# 14243444 -> 1---2---3---4---
++	vpermd		$R1,$M0,$R1
++	vpermd		$R2,$M0,$R2
++	vpermd		$R3,$M0,$R3
++	vpermd		$R4,$M0,$R4
++
++	vpermd		$D0,$M0,${R0}{%k1}	# 05060708 -> 1858286838784888
++	vpermd		$D1,$M0,${R1}{%k1}
++	vpermd		$D2,$M0,${R2}{%k1}
++	vpermd		$D3,$M0,${R3}{%k1}
++	vpermd		$D4,$M0,${R4}{%k1}
++
++	vpslld		\$2,$R1,$S1		# *5
++	vpslld		\$2,$R2,$S2
++	vpslld		\$2,$R3,$S3
++	vpslld		\$2,$R4,$S4
++	vpaddd		$R1,$S1,$S1
++	vpaddd		$R2,$S2,$S2
++	vpaddd		$R3,$S3,$S3
++	vpaddd		$R4,$S4,$S4
++
++	vpbroadcastq	32(%rcx),$PADBIT	# .L129
++
++	vpsrlq		\$52,$T0,$T2		# splat input
++	vpsllq		\$12,$T4,$T3
++	vporq		$T3,$T2,$T2
++	vpsrlq		\$26,$T0,$T1
++	vpsrlq		\$14,$T4,$T3
++	vpsrlq		\$40,$T4,$T4		# 4
++	vpandq		$MASK,$T2,$T2		# 2
++	vpandq		$MASK,$T0,$T0		# 0
++	#vpandq		$MASK,$T1,$T1		# 1
++	#vpandq		$MASK,$T3,$T3		# 3
++	#vporq		$PADBIT,$T4,$T4		# padbit, yes, always
++
++	vpaddq		$H2,$T2,$H2		# accumulate input
++	sub		\$192,$len
++	jbe		.Ltail_avx512
++	jmp		.Loop_avx512
++
++.align	32
++.Loop_avx512:
++	################################################################
++	# ((inp[0]*r^8+inp[ 8])*r^8+inp[16])*r^8
++	# ((inp[1]*r^8+inp[ 9])*r^8+inp[17])*r^7
++	# ((inp[2]*r^8+inp[10])*r^8+inp[18])*r^6
++	# ((inp[3]*r^8+inp[11])*r^8+inp[19])*r^5
++	# ((inp[4]*r^8+inp[12])*r^8+inp[20])*r^4
++	# ((inp[5]*r^8+inp[13])*r^8+inp[21])*r^3
++	# ((inp[6]*r^8+inp[14])*r^8+inp[22])*r^2
++	# ((inp[7]*r^8+inp[15])*r^8+inp[23])*r^1
++	#   \________/\___________/
++	################################################################
++	#vpaddq		$H2,$T2,$H2		# accumulate input
++
++	# d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
++	# d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
++	# d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
++	# d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
++	# d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
++	#
++	# however, as h2 is "chronologically" first one available pull
++	# corresponding operations up, so it's
++	#
++	# d3 = h2*r1   + h0*r3 + h1*r2   + h3*r0 + h4*5*r4
++	# d4 = h2*r2   + h0*r4 + h1*r3   + h3*r1 + h4*r0
++	# d0 = h2*5*r3 + h0*r0 + h1*5*r4         + h3*5*r2 + h4*5*r1
++	# d1 = h2*5*r4 + h0*r1           + h1*r0 + h3*5*r3 + h4*5*r2
++	# d2 = h2*r0           + h0*r2   + h1*r1 + h3*5*r4 + h4*5*r3
++
++	vpmuludq	$H2,$R1,$D3		# d3 = h2*r1
++	 vpaddq		$H0,$T0,$H0
++	vpmuludq	$H2,$R2,$D4		# d4 = h2*r2
++	 vpandq		$MASK,$T1,$T1		# 1
++	vpmuludq	$H2,$S3,$D0		# d0 = h2*s3
++	 vpandq		$MASK,$T3,$T3		# 3
++	vpmuludq	$H2,$S4,$D1		# d1 = h2*s4
++	 vporq		$PADBIT,$T4,$T4		# padbit, yes, always
++	vpmuludq	$H2,$R0,$D2		# d2 = h2*r0
++	 vpaddq		$H1,$T1,$H1		# accumulate input
++	 vpaddq		$H3,$T3,$H3
++	 vpaddq		$H4,$T4,$H4
++
++	  vmovdqu64	16*0($inp),$T3		# load input
++	  vmovdqu64	16*4($inp),$T4
++	  lea		16*8($inp),$inp
++	vpmuludq	$H0,$R3,$M3
++	vpmuludq	$H0,$R4,$M4
++	vpmuludq	$H0,$R0,$M0
++	vpmuludq	$H0,$R1,$M1
++	vpaddq		$M3,$D3,$D3		# d3 += h0*r3
++	vpaddq		$M4,$D4,$D4		# d4 += h0*r4
++	vpaddq		$M0,$D0,$D0		# d0 += h0*r0
++	vpaddq		$M1,$D1,$D1		# d1 += h0*r1
++
++	vpmuludq	$H1,$R2,$M3
++	vpmuludq	$H1,$R3,$M4
++	vpmuludq	$H1,$S4,$M0
++	vpmuludq	$H0,$R2,$M2
++	vpaddq		$M3,$D3,$D3		# d3 += h1*r2
++	vpaddq		$M4,$D4,$D4		# d4 += h1*r3
++	vpaddq		$M0,$D0,$D0		# d0 += h1*s4
++	vpaddq		$M2,$D2,$D2		# d2 += h0*r2
++
++	  vpunpcklqdq	$T4,$T3,$T0		# transpose input
++	  vpunpckhqdq	$T4,$T3,$T4
++
++	vpmuludq	$H3,$R0,$M3
++	vpmuludq	$H3,$R1,$M4
++	vpmuludq	$H1,$R0,$M1
++	vpmuludq	$H1,$R1,$M2
++	vpaddq		$M3,$D3,$D3		# d3 += h3*r0
++	vpaddq		$M4,$D4,$D4		# d4 += h3*r1
++	vpaddq		$M1,$D1,$D1		# d1 += h1*r0
++	vpaddq		$M2,$D2,$D2		# d2 += h1*r1
++
++	vpmuludq	$H4,$S4,$M3
++	vpmuludq	$H4,$R0,$M4
++	vpmuludq	$H3,$S2,$M0
++	vpmuludq	$H3,$S3,$M1
++	vpaddq		$M3,$D3,$D3		# d3 += h4*s4
++	vpmuludq	$H3,$S4,$M2
++	vpaddq		$M4,$D4,$D4		# d4 += h4*r0
++	vpaddq		$M0,$D0,$D0		# d0 += h3*s2
++	vpaddq		$M1,$D1,$D1		# d1 += h3*s3
++	vpaddq		$M2,$D2,$D2		# d2 += h3*s4
++
++	vpmuludq	$H4,$S1,$M0
++	vpmuludq	$H4,$S2,$M1
++	vpmuludq	$H4,$S3,$M2
++	vpaddq		$M0,$D0,$H0		# h0 = d0 + h4*s1
++	vpaddq		$M1,$D1,$H1		# h1 = d2 + h4*s2
++	vpaddq		$M2,$D2,$H2		# h2 = d3 + h4*s3
++
++	################################################################
++	# lazy reduction (interleaved with input splat)
++
++	 vpsrlq		\$52,$T0,$T2		# splat input
++	 vpsllq		\$12,$T4,$T3
++
++	vpsrlq		\$26,$D3,$H3
++	vpandq		$MASK,$D3,$D3
++	vpaddq		$H3,$D4,$H4		# h3 -> h4
++
++	 vporq		$T3,$T2,$T2
++
++	vpsrlq		\$26,$H0,$D0
++	vpandq		$MASK,$H0,$H0
++	vpaddq		$D0,$H1,$H1		# h0 -> h1
++
++	 vpandq		$MASK,$T2,$T2		# 2
++
++	vpsrlq		\$26,$H4,$D4
++	vpandq		$MASK,$H4,$H4
++
++	vpsrlq		\$26,$H1,$D1
++	vpandq		$MASK,$H1,$H1
++	vpaddq		$D1,$H2,$H2		# h1 -> h2
++
++	vpaddq		$D4,$H0,$H0
++	vpsllq		\$2,$D4,$D4
++	vpaddq		$D4,$H0,$H0		# h4 -> h0
++
++	 vpaddq		$T2,$H2,$H2		# modulo-scheduled
++	 vpsrlq		\$26,$T0,$T1
++
++	vpsrlq		\$26,$H2,$D2
++	vpandq		$MASK,$H2,$H2
++	vpaddq		$D2,$D3,$H3		# h2 -> h3
++
++	 vpsrlq		\$14,$T4,$T3
++
++	vpsrlq		\$26,$H0,$D0
++	vpandq		$MASK,$H0,$H0
++	vpaddq		$D0,$H1,$H1		# h0 -> h1
++
++	 vpsrlq		\$40,$T4,$T4		# 4
++
++	vpsrlq		\$26,$H3,$D3
++	vpandq		$MASK,$H3,$H3
++	vpaddq		$D3,$H4,$H4		# h3 -> h4
++
++	 vpandq		$MASK,$T0,$T0		# 0
++	 #vpandq	$MASK,$T1,$T1		# 1
++	 #vpandq	$MASK,$T3,$T3		# 3
++	 #vporq		$PADBIT,$T4,$T4		# padbit, yes, always
++
++	sub		\$128,$len
++	ja		.Loop_avx512
++
++.Ltail_avx512:
++	################################################################
++	# while above multiplications were by r^8 in all lanes, in last
++	# iteration we multiply least significant lane by r^8 and most
++	# significant one by r, that's why table gets shifted...
++
++	vpsrlq		\$32,$R0,$R0		# 0105020603070408
++	vpsrlq		\$32,$R1,$R1
++	vpsrlq		\$32,$R2,$R2
++	vpsrlq		\$32,$S3,$S3
++	vpsrlq		\$32,$S4,$S4
++	vpsrlq		\$32,$R3,$R3
++	vpsrlq		\$32,$R4,$R4
++	vpsrlq		\$32,$S1,$S1
++	vpsrlq		\$32,$S2,$S2
++
++	################################################################
++	# load either next or last 64 byte of input
++	lea		($inp,$len),$inp
++
++	#vpaddq		$H2,$T2,$H2		# accumulate input
++	vpaddq		$H0,$T0,$H0
++
++	vpmuludq	$H2,$R1,$D3		# d3 = h2*r1
++	vpmuludq	$H2,$R2,$D4		# d4 = h2*r2
++	vpmuludq	$H2,$S3,$D0		# d0 = h2*s3
++	 vpandq		$MASK,$T1,$T1		# 1
++	vpmuludq	$H2,$S4,$D1		# d1 = h2*s4
++	 vpandq		$MASK,$T3,$T3		# 3
++	vpmuludq	$H2,$R0,$D2		# d2 = h2*r0
++	 vporq		$PADBIT,$T4,$T4		# padbit, yes, always
++	 vpaddq		$H1,$T1,$H1		# accumulate input
++	 vpaddq		$H3,$T3,$H3
++	 vpaddq		$H4,$T4,$H4
++
++	  vmovdqu	16*0($inp),%x#$T0
++	vpmuludq	$H0,$R3,$M3
++	vpmuludq	$H0,$R4,$M4
++	vpmuludq	$H0,$R0,$M0
++	vpmuludq	$H0,$R1,$M1
++	vpaddq		$M3,$D3,$D3		# d3 += h0*r3
++	vpaddq		$M4,$D4,$D4		# d4 += h0*r4
++	vpaddq		$M0,$D0,$D0		# d0 += h0*r0
++	vpaddq		$M1,$D1,$D1		# d1 += h0*r1
++
++	  vmovdqu	16*1($inp),%x#$T1
++	vpmuludq	$H1,$R2,$M3
++	vpmuludq	$H1,$R3,$M4
++	vpmuludq	$H1,$S4,$M0
++	vpmuludq	$H0,$R2,$M2
++	vpaddq		$M3,$D3,$D3		# d3 += h1*r2
++	vpaddq		$M4,$D4,$D4		# d4 += h1*r3
++	vpaddq		$M0,$D0,$D0		# d0 += h1*s4
++	vpaddq		$M2,$D2,$D2		# d2 += h0*r2
++
++	  vinserti128	\$1,16*2($inp),%y#$T0,%y#$T0
++	vpmuludq	$H3,$R0,$M3
++	vpmuludq	$H3,$R1,$M4
++	vpmuludq	$H1,$R0,$M1
++	vpmuludq	$H1,$R1,$M2
++	vpaddq		$M3,$D3,$D3		# d3 += h3*r0
++	vpaddq		$M4,$D4,$D4		# d4 += h3*r1
++	vpaddq		$M1,$D1,$D1		# d1 += h1*r0
++	vpaddq		$M2,$D2,$D2		# d2 += h1*r1
++
++	  vinserti128	\$1,16*3($inp),%y#$T1,%y#$T1
++	vpmuludq	$H4,$S4,$M3
++	vpmuludq	$H4,$R0,$M4
++	vpmuludq	$H3,$S2,$M0
++	vpmuludq	$H3,$S3,$M1
++	vpmuludq	$H3,$S4,$M2
++	vpaddq		$M3,$D3,$H3		# h3 = d3 + h4*s4
++	vpaddq		$M4,$D4,$D4		# d4 += h4*r0
++	vpaddq		$M0,$D0,$D0		# d0 += h3*s2
++	vpaddq		$M1,$D1,$D1		# d1 += h3*s3
++	vpaddq		$M2,$D2,$D2		# d2 += h3*s4
++
++	vpmuludq	$H4,$S1,$M0
++	vpmuludq	$H4,$S2,$M1
++	vpmuludq	$H4,$S3,$M2
++	vpaddq		$M0,$D0,$H0		# h0 = d0 + h4*s1
++	vpaddq		$M1,$D1,$H1		# h1 = d2 + h4*s2
++	vpaddq		$M2,$D2,$H2		# h2 = d3 + h4*s3
++
++	################################################################
++	# horizontal addition
++
++	mov		\$1,%eax
++	vpermq		\$0xb1,$H3,$D3
++	vpermq		\$0xb1,$D4,$H4
++	vpermq		\$0xb1,$H0,$D0
++	vpermq		\$0xb1,$H1,$D1
++	vpermq		\$0xb1,$H2,$D2
++	vpaddq		$D3,$H3,$H3
++	vpaddq		$D4,$H4,$H4
++	vpaddq		$D0,$H0,$H0
++	vpaddq		$D1,$H1,$H1
++	vpaddq		$D2,$H2,$H2
++
++	kmovw		%eax,%k3
++	vpermq		\$0x2,$H3,$D3
++	vpermq		\$0x2,$H4,$D4
++	vpermq		\$0x2,$H0,$D0
++	vpermq		\$0x2,$H1,$D1
++	vpermq		\$0x2,$H2,$D2
++	vpaddq		$D3,$H3,$H3
++	vpaddq		$D4,$H4,$H4
++	vpaddq		$D0,$H0,$H0
++	vpaddq		$D1,$H1,$H1
++	vpaddq		$D2,$H2,$H2
++
++	vextracti64x4	\$0x1,$H3,%y#$D3
++	vextracti64x4	\$0x1,$H4,%y#$D4
++	vextracti64x4	\$0x1,$H0,%y#$D0
++	vextracti64x4	\$0x1,$H1,%y#$D1
++	vextracti64x4	\$0x1,$H2,%y#$D2
++	vpaddq		$D3,$H3,${H3}{%k3}{z}	# keep single qword in case
++	vpaddq		$D4,$H4,${H4}{%k3}{z}	# it's passed to .Ltail_avx2
++	vpaddq		$D0,$H0,${H0}{%k3}{z}
++	vpaddq		$D1,$H1,${H1}{%k3}{z}
++	vpaddq		$D2,$H2,${H2}{%k3}{z}
++___
++map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT));
++map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK));
++$code.=<<___;
++	################################################################
++	# lazy reduction (interleaved with input splat)
++
++	vpsrlq		\$26,$H3,$D3
++	vpand		$MASK,$H3,$H3
++	 vpsrldq	\$6,$T0,$T2		# splat input
++	 vpsrldq	\$6,$T1,$T3
++	 vpunpckhqdq	$T1,$T0,$T4		# 4
++	vpaddq		$D3,$H4,$H4		# h3 -> h4
++
++	vpsrlq		\$26,$H0,$D0
++	vpand		$MASK,$H0,$H0
++	 vpunpcklqdq	$T3,$T2,$T2		# 2:3
++	 vpunpcklqdq	$T1,$T0,$T0		# 0:1
++	vpaddq		$D0,$H1,$H1		# h0 -> h1
++
++	vpsrlq		\$26,$H4,$D4
++	vpand		$MASK,$H4,$H4
++
++	vpsrlq		\$26,$H1,$D1
++	vpand		$MASK,$H1,$H1
++	 vpsrlq		\$30,$T2,$T3
++	 vpsrlq		\$4,$T2,$T2
++	vpaddq		$D1,$H2,$H2		# h1 -> h2
++
++	vpaddq		$D4,$H0,$H0
++	vpsllq		\$2,$D4,$D4
++	 vpsrlq		\$26,$T0,$T1
++	 vpsrlq		\$40,$T4,$T4		# 4
++	vpaddq		$D4,$H0,$H0		# h4 -> h0
++
++	vpsrlq		\$26,$H2,$D2
++	vpand		$MASK,$H2,$H2
++	 vpand		$MASK,$T2,$T2		# 2
++	 vpand		$MASK,$T0,$T0		# 0
++	vpaddq		$D2,$H3,$H3		# h2 -> h3
++
++	vpsrlq		\$26,$H0,$D0
++	vpand		$MASK,$H0,$H0
++	 vpaddq		$H2,$T2,$H2		# accumulate input for .Ltail_avx2
++	 vpand		$MASK,$T1,$T1		# 1
++	vpaddq		$D0,$H1,$H1		# h0 -> h1
++
++	vpsrlq		\$26,$H3,$D3
++	vpand		$MASK,$H3,$H3
++	 vpand		$MASK,$T3,$T3		# 3
++	 vpor		32(%rcx),$T4,$T4	# padbit, yes, always
++	vpaddq		$D3,$H4,$H4		# h3 -> h4
++
++	lea		0x90(%rsp),%rax		# size optimization for .Ltail_avx2
++	add		\$64,$len
++	jnz		.Ltail_avx2$suffix
++
++	vpsubq		$T2,$H2,$H2		# undo input accumulation
++	vmovd		%x#$H0,`4*0-48-64`($ctx)# save partially reduced
++	vmovd		%x#$H1,`4*1-48-64`($ctx)
++	vmovd		%x#$H2,`4*2-48-64`($ctx)
++	vmovd		%x#$H3,`4*3-48-64`($ctx)
++	vmovd		%x#$H4,`4*4-48-64`($ctx)
++	vzeroall
++___
++$code.=<<___	if ($win64);
++	movdqa		-0xb0(%r10),%xmm6
++	movdqa		-0xa0(%r10),%xmm7
++	movdqa		-0x90(%r10),%xmm8
++	movdqa		-0x80(%r10),%xmm9
++	movdqa		-0x70(%r10),%xmm10
++	movdqa		-0x60(%r10),%xmm11
++	movdqa		-0x50(%r10),%xmm12
++	movdqa		-0x40(%r10),%xmm13
++	movdqa		-0x30(%r10),%xmm14
++	movdqa		-0x20(%r10),%xmm15
++	lea		-8(%r10),%rsp
++.Ldo_avx512_epilogue:
++___
++$code.=<<___	if (!$win64);
++	lea		-8(%r10),%rsp
++.cfi_def_cfa_register	%rsp
++___
++$code.=<<___;
++	ret
++.cfi_endproc
++___
++
++}
++
++}
++
++&declare_function("poly1305_blocks_avx2", 32, 4);
++poly1305_blocks_avxN(0);
++&end_function("poly1305_blocks_avx2");
++
++if($kernel) {
++	$code .= "#endif\n";
++}
++
++#######################################################################
++if ($avx>2) {
++# On entry we have input length divisible by 64. But since inner loop
++# processes 128 bytes per iteration, cases when length is not divisible
++# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this
++# reason stack layout is kept identical to poly1305_blocks_avx2. If not
++# for this tail, we wouldn't have to even allocate stack frame...
++
++if($kernel) {
++	$code .= "#ifdef CONFIG_AS_AVX512\n";
++}
++
++&declare_function("poly1305_blocks_avx512", 32, 4);
++poly1305_blocks_avxN(1);
++&end_function("poly1305_blocks_avx512");
++
++if ($kernel) {
++	$code .= "#endif\n";
++}
++
++if (!$kernel && $avx>3) {
++########################################################################
++# VPMADD52 version using 2^44 radix.
++#
++# One can argue that base 2^52 would be more natural. Well, even though
++# some operations would be more natural, one has to recognize couple of
++# things. Base 2^52 doesn't provide advantage over base 2^44 if you look
++# at amount of multiply-n-accumulate operations. Secondly, it makes it
++# impossible to pre-compute multiples of 5 [referred to as s[]/sN in
++# reference implementations], which means that more such operations
++# would have to be performed in inner loop, which in turn makes critical
++# path longer. In other words, even though base 2^44 reduction might
++# look less elegant, overall critical path is actually shorter...
++
++########################################################################
++# Layout of opaque area is following.
++#
++#	unsigned __int64 h[3];		# current hash value base 2^44
++#	unsigned __int64 s[2];		# key value*20 base 2^44
++#	unsigned __int64 r[3];		# key value base 2^44
++#	struct { unsigned __int64 r^1, r^3, r^2, r^4; } R[4];
++#					# r^n positions reflect
++#					# placement in register, not
++#					# memory, R[3] is R[1]*20
++
++$code.=<<___;
++.type	poly1305_init_base2_44,\@function,3
++.align	32
++poly1305_init_base2_44:
++	xor	%rax,%rax
++	mov	%rax,0($ctx)		# initialize hash value
++	mov	%rax,8($ctx)
++	mov	%rax,16($ctx)
++
++.Linit_base2_44:
++	lea	poly1305_blocks_vpmadd52(%rip),%r10
++	lea	poly1305_emit_base2_44(%rip),%r11
++
++	mov	\$0x0ffffffc0fffffff,%rax
++	mov	\$0x0ffffffc0ffffffc,%rcx
++	and	0($inp),%rax
++	mov	\$0x00000fffffffffff,%r8
++	and	8($inp),%rcx
++	mov	\$0x00000fffffffffff,%r9
++	and	%rax,%r8
++	shrd	\$44,%rcx,%rax
++	mov	%r8,40($ctx)		# r0
++	and	%r9,%rax
++	shr	\$24,%rcx
++	mov	%rax,48($ctx)		# r1
++	lea	(%rax,%rax,4),%rax	# *5
++	mov	%rcx,56($ctx)		# r2
++	shl	\$2,%rax		# magic <<2
++	lea	(%rcx,%rcx,4),%rcx	# *5
++	shl	\$2,%rcx		# magic <<2
++	mov	%rax,24($ctx)		# s1
++	mov	%rcx,32($ctx)		# s2
++	movq	\$-1,64($ctx)		# write impossible value
++___
++$code.=<<___	if ($flavour !~ /elf32/);
++	mov	%r10,0(%rdx)
++	mov	%r11,8(%rdx)
++___
++$code.=<<___	if ($flavour =~ /elf32/);
++	mov	%r10d,0(%rdx)
++	mov	%r11d,4(%rdx)
++___
++$code.=<<___;
++	mov	\$1,%eax
++	ret
++.size	poly1305_init_base2_44,.-poly1305_init_base2_44
++___
++{
++my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17));
++my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21));
++my ($reduc_mask,$reduc_rght,$reduc_left) = map("%ymm$_",(22..25));
++
++$code.=<<___;
++.type	poly1305_blocks_vpmadd52,\@function,4
++.align	32
++poly1305_blocks_vpmadd52:
++	shr	\$4,$len
++	jz	.Lno_data_vpmadd52		# too short
++
++	shl	\$40,$padbit
++	mov	64($ctx),%r8			# peek on power of the key
++
++	# if powers of the key are not calculated yet, process up to 3
++	# blocks with this single-block subroutine, otherwise ensure that
++	# length is divisible by 2 blocks and pass the rest down to next
++	# subroutine...
++
++	mov	\$3,%rax
++	mov	\$1,%r10
++	cmp	\$4,$len			# is input long
++	cmovae	%r10,%rax
++	test	%r8,%r8				# is power value impossible?
++	cmovns	%r10,%rax
++
++	and	$len,%rax			# is input of favourable length?
++	jz	.Lblocks_vpmadd52_4x
++
++	sub		%rax,$len
++	mov		\$7,%r10d
++	mov		\$1,%r11d
++	kmovw		%r10d,%k7
++	lea		.L2_44_inp_permd(%rip),%r10
++	kmovw		%r11d,%k1
++
++	vmovq		$padbit,%x#$PAD
++	vmovdqa64	0(%r10),$inp_permd	# .L2_44_inp_permd
++	vmovdqa64	32(%r10),$inp_shift	# .L2_44_inp_shift
++	vpermq		\$0xcf,$PAD,$PAD
++	vmovdqa64	64(%r10),$reduc_mask	# .L2_44_mask
++
++	vmovdqu64	0($ctx),${Dlo}{%k7}{z}		# load hash value
++	vmovdqu64	40($ctx),${r2r1r0}{%k7}{z}	# load keys
++	vmovdqu64	32($ctx),${r1r0s2}{%k7}{z}
++	vmovdqu64	24($ctx),${r0s2s1}{%k7}{z}
++
++	vmovdqa64	96(%r10),$reduc_rght	# .L2_44_shift_rgt
++	vmovdqa64	128(%r10),$reduc_left	# .L2_44_shift_lft
++
++	jmp		.Loop_vpmadd52
++
++.align	32
++.Loop_vpmadd52:
++	vmovdqu32	0($inp),%x#$T0		# load input as ----3210
++	lea		16($inp),$inp
++
++	vpermd		$T0,$inp_permd,$T0	# ----3210 -> --322110
++	vpsrlvq		$inp_shift,$T0,$T0
++	vpandq		$reduc_mask,$T0,$T0
++	vporq		$PAD,$T0,$T0
++
++	vpaddq		$T0,$Dlo,$Dlo		# accumulate input
++
++	vpermq		\$0,$Dlo,${H0}{%k7}{z}	# smash hash value
++	vpermq		\$0b01010101,$Dlo,${H1}{%k7}{z}
++	vpermq		\$0b10101010,$Dlo,${H2}{%k7}{z}
++
++	vpxord		$Dlo,$Dlo,$Dlo
++	vpxord		$Dhi,$Dhi,$Dhi
++
++	vpmadd52luq	$r2r1r0,$H0,$Dlo
++	vpmadd52huq	$r2r1r0,$H0,$Dhi
++
++	vpmadd52luq	$r1r0s2,$H1,$Dlo
++	vpmadd52huq	$r1r0s2,$H1,$Dhi
++
++	vpmadd52luq	$r0s2s1,$H2,$Dlo
++	vpmadd52huq	$r0s2s1,$H2,$Dhi
++
++	vpsrlvq		$reduc_rght,$Dlo,$T0	# 0 in topmost qword
++	vpsllvq		$reduc_left,$Dhi,$Dhi	# 0 in topmost qword
++	vpandq		$reduc_mask,$Dlo,$Dlo
++
++	vpaddq		$T0,$Dhi,$Dhi
++
++	vpermq		\$0b10010011,$Dhi,$Dhi	# 0 in lowest qword
++
++	vpaddq		$Dhi,$Dlo,$Dlo		# note topmost qword :-)
++
++	vpsrlvq		$reduc_rght,$Dlo,$T0	# 0 in topmost word
++	vpandq		$reduc_mask,$Dlo,$Dlo
++
++	vpermq		\$0b10010011,$T0,$T0
++
++	vpaddq		$T0,$Dlo,$Dlo
++
++	vpermq		\$0b10010011,$Dlo,${T0}{%k1}{z}
++
++	vpaddq		$T0,$Dlo,$Dlo
++	vpsllq		\$2,$T0,$T0
++
++	vpaddq		$T0,$Dlo,$Dlo
++
++	dec		%rax			# len-=16
++	jnz		.Loop_vpmadd52
++
++	vmovdqu64	$Dlo,0($ctx){%k7}	# store hash value
++
++	test		$len,$len
++	jnz		.Lblocks_vpmadd52_4x
++
++.Lno_data_vpmadd52:
++	ret
++.size	poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52
++___
++}
++{
++########################################################################
++# As implied by its name 4x subroutine processes 4 blocks in parallel
++# (but handles even 4*n+2 blocks lengths). It takes up to 4th key power
++# and is handled in 256-bit %ymm registers.
++
++my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
++my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23));
++my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
++
++$code.=<<___;
++.type	poly1305_blocks_vpmadd52_4x,\@function,4
++.align	32
++poly1305_blocks_vpmadd52_4x:
++	shr	\$4,$len
++	jz	.Lno_data_vpmadd52_4x		# too short
++
++	shl	\$40,$padbit
++	mov	64($ctx),%r8			# peek on power of the key
++
++.Lblocks_vpmadd52_4x:
++	vpbroadcastq	$padbit,$PAD
++
++	vmovdqa64	.Lx_mask44(%rip),$mask44
++	mov		\$5,%eax
++	vmovdqa64	.Lx_mask42(%rip),$mask42
++	kmovw		%eax,%k1		# used in 2x path
++
++	test		%r8,%r8			# is power value impossible?
++	js		.Linit_vpmadd52		# if it is, then init R[4]
++
++	vmovq		0($ctx),%x#$H0		# load current hash value
++	vmovq		8($ctx),%x#$H1
++	vmovq		16($ctx),%x#$H2
++
++	test		\$3,$len		# is length 4*n+2?
++	jnz		.Lblocks_vpmadd52_2x_do
++
++.Lblocks_vpmadd52_4x_do:
++	vpbroadcastq	64($ctx),$R0		# load 4th power of the key
++	vpbroadcastq	96($ctx),$R1
++	vpbroadcastq	128($ctx),$R2
++	vpbroadcastq	160($ctx),$S1
++
++.Lblocks_vpmadd52_4x_key_loaded:
++	vpsllq		\$2,$R2,$S2		# S2 = R2*5*4
++	vpaddq		$R2,$S2,$S2
++	vpsllq		\$2,$S2,$S2
++
++	test		\$7,$len		# is len 8*n?
++	jz		.Lblocks_vpmadd52_8x
++
++	vmovdqu64	16*0($inp),$T2		# load data
++	vmovdqu64	16*2($inp),$T3
++	lea		16*4($inp),$inp
++
++	vpunpcklqdq	$T3,$T2,$T1		# transpose data
++	vpunpckhqdq	$T3,$T2,$T3
++
++	# at this point 64-bit lanes are ordered as 3-1-2-0
++
++	vpsrlq		\$24,$T3,$T2		# splat the data
++	vporq		$PAD,$T2,$T2
++	 vpaddq		$T2,$H2,$H2		# accumulate input
++	vpandq		$mask44,$T1,$T0
++	vpsrlq		\$44,$T1,$T1
++	vpsllq		\$20,$T3,$T3
++	vporq		$T3,$T1,$T1
++	vpandq		$mask44,$T1,$T1
++
++	sub		\$4,$len
++	jz		.Ltail_vpmadd52_4x
++	jmp		.Loop_vpmadd52_4x
++	ud2
++
++.align	32
++.Linit_vpmadd52:
++	vmovq		24($ctx),%x#$S1		# load key
++	vmovq		56($ctx),%x#$H2
++	vmovq		32($ctx),%x#$S2
++	vmovq		40($ctx),%x#$R0
++	vmovq		48($ctx),%x#$R1
++
++	vmovdqa		$R0,$H0
++	vmovdqa		$R1,$H1
++	vmovdqa		$H2,$R2
++
++	mov		\$2,%eax
++
++.Lmul_init_vpmadd52:
++	vpxorq		$D0lo,$D0lo,$D0lo
++	vpmadd52luq	$H2,$S1,$D0lo
++	vpxorq		$D0hi,$D0hi,$D0hi
++	vpmadd52huq	$H2,$S1,$D0hi
++	vpxorq		$D1lo,$D1lo,$D1lo
++	vpmadd52luq	$H2,$S2,$D1lo
++	vpxorq		$D1hi,$D1hi,$D1hi
++	vpmadd52huq	$H2,$S2,$D1hi
++	vpxorq		$D2lo,$D2lo,$D2lo
++	vpmadd52luq	$H2,$R0,$D2lo
++	vpxorq		$D2hi,$D2hi,$D2hi
++	vpmadd52huq	$H2,$R0,$D2hi
++
++	vpmadd52luq	$H0,$R0,$D0lo
++	vpmadd52huq	$H0,$R0,$D0hi
++	vpmadd52luq	$H0,$R1,$D1lo
++	vpmadd52huq	$H0,$R1,$D1hi
++	vpmadd52luq	$H0,$R2,$D2lo
++	vpmadd52huq	$H0,$R2,$D2hi
++
++	vpmadd52luq	$H1,$S2,$D0lo
++	vpmadd52huq	$H1,$S2,$D0hi
++	vpmadd52luq	$H1,$R0,$D1lo
++	vpmadd52huq	$H1,$R0,$D1hi
++	vpmadd52luq	$H1,$R1,$D2lo
++	vpmadd52huq	$H1,$R1,$D2hi
++
++	################################################################
++	# partial reduction
++	vpsrlq		\$44,$D0lo,$tmp
++	vpsllq		\$8,$D0hi,$D0hi
++	vpandq		$mask44,$D0lo,$H0
++	vpaddq		$tmp,$D0hi,$D0hi
++
++	vpaddq		$D0hi,$D1lo,$D1lo
++
++	vpsrlq		\$44,$D1lo,$tmp
++	vpsllq		\$8,$D1hi,$D1hi
++	vpandq		$mask44,$D1lo,$H1
++	vpaddq		$tmp,$D1hi,$D1hi
++
++	vpaddq		$D1hi,$D2lo,$D2lo
++
++	vpsrlq		\$42,$D2lo,$tmp
++	vpsllq		\$10,$D2hi,$D2hi
++	vpandq		$mask42,$D2lo,$H2
++	vpaddq		$tmp,$D2hi,$D2hi
++
++	vpaddq		$D2hi,$H0,$H0
++	vpsllq		\$2,$D2hi,$D2hi
++
++	vpaddq		$D2hi,$H0,$H0
++
++	vpsrlq		\$44,$H0,$tmp		# additional step
++	vpandq		$mask44,$H0,$H0
++
++	vpaddq		$tmp,$H1,$H1
++
++	dec		%eax
++	jz		.Ldone_init_vpmadd52
++
++	vpunpcklqdq	$R1,$H1,$R1		# 1,2
++	vpbroadcastq	%x#$H1,%x#$H1		# 2,2
++	vpunpcklqdq	$R2,$H2,$R2
++	vpbroadcastq	%x#$H2,%x#$H2
++	vpunpcklqdq	$R0,$H0,$R0
++	vpbroadcastq	%x#$H0,%x#$H0
++
++	vpsllq		\$2,$R1,$S1		# S1 = R1*5*4
++	vpsllq		\$2,$R2,$S2		# S2 = R2*5*4
++	vpaddq		$R1,$S1,$S1
++	vpaddq		$R2,$S2,$S2
++	vpsllq		\$2,$S1,$S1
++	vpsllq		\$2,$S2,$S2
++
++	jmp		.Lmul_init_vpmadd52
++	ud2
++
++.align	32
++.Ldone_init_vpmadd52:
++	vinserti128	\$1,%x#$R1,$H1,$R1	# 1,2,3,4
++	vinserti128	\$1,%x#$R2,$H2,$R2
++	vinserti128	\$1,%x#$R0,$H0,$R0
++
++	vpermq		\$0b11011000,$R1,$R1	# 1,3,2,4
++	vpermq		\$0b11011000,$R2,$R2
++	vpermq		\$0b11011000,$R0,$R0
++
++	vpsllq		\$2,$R1,$S1		# S1 = R1*5*4
++	vpaddq		$R1,$S1,$S1
++	vpsllq		\$2,$S1,$S1
++
++	vmovq		0($ctx),%x#$H0		# load current hash value
++	vmovq		8($ctx),%x#$H1
++	vmovq		16($ctx),%x#$H2
++
++	test		\$3,$len		# is length 4*n+2?
++	jnz		.Ldone_init_vpmadd52_2x
++
++	vmovdqu64	$R0,64($ctx)		# save key powers
++	vpbroadcastq	%x#$R0,$R0		# broadcast 4th power
++	vmovdqu64	$R1,96($ctx)
++	vpbroadcastq	%x#$R1,$R1
++	vmovdqu64	$R2,128($ctx)
++	vpbroadcastq	%x#$R2,$R2
++	vmovdqu64	$S1,160($ctx)
++	vpbroadcastq	%x#$S1,$S1
++
++	jmp		.Lblocks_vpmadd52_4x_key_loaded
++	ud2
++
++.align	32
++.Ldone_init_vpmadd52_2x:
++	vmovdqu64	$R0,64($ctx)		# save key powers
++	vpsrldq		\$8,$R0,$R0		# 0-1-0-2
++	vmovdqu64	$R1,96($ctx)
++	vpsrldq		\$8,$R1,$R1
++	vmovdqu64	$R2,128($ctx)
++	vpsrldq		\$8,$R2,$R2
++	vmovdqu64	$S1,160($ctx)
++	vpsrldq		\$8,$S1,$S1
++	jmp		.Lblocks_vpmadd52_2x_key_loaded
++	ud2
++
++.align	32
++.Lblocks_vpmadd52_2x_do:
++	vmovdqu64	128+8($ctx),${R2}{%k1}{z}# load 2nd and 1st key powers
++	vmovdqu64	160+8($ctx),${S1}{%k1}{z}
++	vmovdqu64	64+8($ctx),${R0}{%k1}{z}
++	vmovdqu64	96+8($ctx),${R1}{%k1}{z}
++
++.Lblocks_vpmadd52_2x_key_loaded:
++	vmovdqu64	16*0($inp),$T2		# load data
++	vpxorq		$T3,$T3,$T3
++	lea		16*2($inp),$inp
++
++	vpunpcklqdq	$T3,$T2,$T1		# transpose data
++	vpunpckhqdq	$T3,$T2,$T3
++
++	# at this point 64-bit lanes are ordered as x-1-x-0
++
++	vpsrlq		\$24,$T3,$T2		# splat the data
++	vporq		$PAD,$T2,$T2
++	 vpaddq		$T2,$H2,$H2		# accumulate input
++	vpandq		$mask44,$T1,$T0
++	vpsrlq		\$44,$T1,$T1
++	vpsllq		\$20,$T3,$T3
++	vporq		$T3,$T1,$T1
++	vpandq		$mask44,$T1,$T1
++
++	jmp		.Ltail_vpmadd52_2x
++	ud2
++
++.align	32
++.Loop_vpmadd52_4x:
++	#vpaddq		$T2,$H2,$H2		# accumulate input
++	vpaddq		$T0,$H0,$H0
++	vpaddq		$T1,$H1,$H1
++
++	vpxorq		$D0lo,$D0lo,$D0lo
++	vpmadd52luq	$H2,$S1,$D0lo
++	vpxorq		$D0hi,$D0hi,$D0hi
++	vpmadd52huq	$H2,$S1,$D0hi
++	vpxorq		$D1lo,$D1lo,$D1lo
++	vpmadd52luq	$H2,$S2,$D1lo
++	vpxorq		$D1hi,$D1hi,$D1hi
++	vpmadd52huq	$H2,$S2,$D1hi
++	vpxorq		$D2lo,$D2lo,$D2lo
++	vpmadd52luq	$H2,$R0,$D2lo
++	vpxorq		$D2hi,$D2hi,$D2hi
++	vpmadd52huq	$H2,$R0,$D2hi
++
++	 vmovdqu64	16*0($inp),$T2		# load data
++	 vmovdqu64	16*2($inp),$T3
++	 lea		16*4($inp),$inp
++	vpmadd52luq	$H0,$R0,$D0lo
++	vpmadd52huq	$H0,$R0,$D0hi
++	vpmadd52luq	$H0,$R1,$D1lo
++	vpmadd52huq	$H0,$R1,$D1hi
++	vpmadd52luq	$H0,$R2,$D2lo
++	vpmadd52huq	$H0,$R2,$D2hi
++
++	 vpunpcklqdq	$T3,$T2,$T1		# transpose data
++	 vpunpckhqdq	$T3,$T2,$T3
++	vpmadd52luq	$H1,$S2,$D0lo
++	vpmadd52huq	$H1,$S2,$D0hi
++	vpmadd52luq	$H1,$R0,$D1lo
++	vpmadd52huq	$H1,$R0,$D1hi
++	vpmadd52luq	$H1,$R1,$D2lo
++	vpmadd52huq	$H1,$R1,$D2hi
++
++	################################################################
++	# partial reduction (interleaved with data splat)
++	vpsrlq		\$44,$D0lo,$tmp
++	vpsllq		\$8,$D0hi,$D0hi
++	vpandq		$mask44,$D0lo,$H0
++	vpaddq		$tmp,$D0hi,$D0hi
++
++	 vpsrlq		\$24,$T3,$T2
++	 vporq		$PAD,$T2,$T2
++	vpaddq		$D0hi,$D1lo,$D1lo
++
++	vpsrlq		\$44,$D1lo,$tmp
++	vpsllq		\$8,$D1hi,$D1hi
++	vpandq		$mask44,$D1lo,$H1
++	vpaddq		$tmp,$D1hi,$D1hi
++
++	 vpandq		$mask44,$T1,$T0
++	 vpsrlq		\$44,$T1,$T1
++	 vpsllq		\$20,$T3,$T3
++	vpaddq		$D1hi,$D2lo,$D2lo
++
++	vpsrlq		\$42,$D2lo,$tmp
++	vpsllq		\$10,$D2hi,$D2hi
++	vpandq		$mask42,$D2lo,$H2
++	vpaddq		$tmp,$D2hi,$D2hi
++
++	  vpaddq	$T2,$H2,$H2		# accumulate input
++	vpaddq		$D2hi,$H0,$H0
++	vpsllq		\$2,$D2hi,$D2hi
++
++	vpaddq		$D2hi,$H0,$H0
++	 vporq		$T3,$T1,$T1
++	 vpandq		$mask44,$T1,$T1
++
++	vpsrlq		\$44,$H0,$tmp		# additional step
++	vpandq		$mask44,$H0,$H0
++
++	vpaddq		$tmp,$H1,$H1
++
++	sub		\$4,$len		# len-=64
++	jnz		.Loop_vpmadd52_4x
++
++.Ltail_vpmadd52_4x:
++	vmovdqu64	128($ctx),$R2		# load all key powers
++	vmovdqu64	160($ctx),$S1
++	vmovdqu64	64($ctx),$R0
++	vmovdqu64	96($ctx),$R1
++
++.Ltail_vpmadd52_2x:
++	vpsllq		\$2,$R2,$S2		# S2 = R2*5*4
++	vpaddq		$R2,$S2,$S2
++	vpsllq		\$2,$S2,$S2
++
++	#vpaddq		$T2,$H2,$H2		# accumulate input
++	vpaddq		$T0,$H0,$H0
++	vpaddq		$T1,$H1,$H1
++
++	vpxorq		$D0lo,$D0lo,$D0lo
++	vpmadd52luq	$H2,$S1,$D0lo
++	vpxorq		$D0hi,$D0hi,$D0hi
++	vpmadd52huq	$H2,$S1,$D0hi
++	vpxorq		$D1lo,$D1lo,$D1lo
++	vpmadd52luq	$H2,$S2,$D1lo
++	vpxorq		$D1hi,$D1hi,$D1hi
++	vpmadd52huq	$H2,$S2,$D1hi
++	vpxorq		$D2lo,$D2lo,$D2lo
++	vpmadd52luq	$H2,$R0,$D2lo
++	vpxorq		$D2hi,$D2hi,$D2hi
++	vpmadd52huq	$H2,$R0,$D2hi
++
++	vpmadd52luq	$H0,$R0,$D0lo
++	vpmadd52huq	$H0,$R0,$D0hi
++	vpmadd52luq	$H0,$R1,$D1lo
++	vpmadd52huq	$H0,$R1,$D1hi
++	vpmadd52luq	$H0,$R2,$D2lo
++	vpmadd52huq	$H0,$R2,$D2hi
++
++	vpmadd52luq	$H1,$S2,$D0lo
++	vpmadd52huq	$H1,$S2,$D0hi
++	vpmadd52luq	$H1,$R0,$D1lo
++	vpmadd52huq	$H1,$R0,$D1hi
++	vpmadd52luq	$H1,$R1,$D2lo
++	vpmadd52huq	$H1,$R1,$D2hi
++
++	################################################################
++	# horizontal addition
++
++	mov		\$1,%eax
++	kmovw		%eax,%k1
++	vpsrldq		\$8,$D0lo,$T0
++	vpsrldq		\$8,$D0hi,$H0
++	vpsrldq		\$8,$D1lo,$T1
++	vpsrldq		\$8,$D1hi,$H1
++	vpaddq		$T0,$D0lo,$D0lo
++	vpaddq		$H0,$D0hi,$D0hi
++	vpsrldq		\$8,$D2lo,$T2
++	vpsrldq		\$8,$D2hi,$H2
++	vpaddq		$T1,$D1lo,$D1lo
++	vpaddq		$H1,$D1hi,$D1hi
++	 vpermq		\$0x2,$D0lo,$T0
++	 vpermq		\$0x2,$D0hi,$H0
++	vpaddq		$T2,$D2lo,$D2lo
++	vpaddq		$H2,$D2hi,$D2hi
++
++	vpermq		\$0x2,$D1lo,$T1
++	vpermq		\$0x2,$D1hi,$H1
++	vpaddq		$T0,$D0lo,${D0lo}{%k1}{z}
++	vpaddq		$H0,$D0hi,${D0hi}{%k1}{z}
++	vpermq		\$0x2,$D2lo,$T2
++	vpermq		\$0x2,$D2hi,$H2
++	vpaddq		$T1,$D1lo,${D1lo}{%k1}{z}
++	vpaddq		$H1,$D1hi,${D1hi}{%k1}{z}
++	vpaddq		$T2,$D2lo,${D2lo}{%k1}{z}
++	vpaddq		$H2,$D2hi,${D2hi}{%k1}{z}
++
++	################################################################
++	# partial reduction
++	vpsrlq		\$44,$D0lo,$tmp
++	vpsllq		\$8,$D0hi,$D0hi
++	vpandq		$mask44,$D0lo,$H0
++	vpaddq		$tmp,$D0hi,$D0hi
++
++	vpaddq		$D0hi,$D1lo,$D1lo
++
++	vpsrlq		\$44,$D1lo,$tmp
++	vpsllq		\$8,$D1hi,$D1hi
++	vpandq		$mask44,$D1lo,$H1
++	vpaddq		$tmp,$D1hi,$D1hi
++
++	vpaddq		$D1hi,$D2lo,$D2lo
++
++	vpsrlq		\$42,$D2lo,$tmp
++	vpsllq		\$10,$D2hi,$D2hi
++	vpandq		$mask42,$D2lo,$H2
++	vpaddq		$tmp,$D2hi,$D2hi
++
++	vpaddq		$D2hi,$H0,$H0
++	vpsllq		\$2,$D2hi,$D2hi
++
++	vpaddq		$D2hi,$H0,$H0
++
++	vpsrlq		\$44,$H0,$tmp		# additional step
++	vpandq		$mask44,$H0,$H0
++
++	vpaddq		$tmp,$H1,$H1
++						# at this point $len is
++						# either 4*n+2 or 0...
++	sub		\$2,$len		# len-=32
++	ja		.Lblocks_vpmadd52_4x_do
++
++	vmovq		%x#$H0,0($ctx)
++	vmovq		%x#$H1,8($ctx)
++	vmovq		%x#$H2,16($ctx)
++	vzeroall
++
++.Lno_data_vpmadd52_4x:
++	ret
++.size	poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x
++___
++}
++{
++########################################################################
++# As implied by its name 8x subroutine processes 8 blocks in parallel...
++# This is intermediate version, as it's used only in cases when input
++# length is either 8*n, 8*n+1 or 8*n+2...
++
++my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
++my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23));
++my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
++my ($RR0,$RR1,$RR2,$SS1,$SS2) = map("%ymm$_",(6..10));
++
++$code.=<<___;
++.type	poly1305_blocks_vpmadd52_8x,\@function,4
++.align	32
++poly1305_blocks_vpmadd52_8x:
++	shr	\$4,$len
++	jz	.Lno_data_vpmadd52_8x		# too short
++
++	shl	\$40,$padbit
++	mov	64($ctx),%r8			# peek on power of the key
++
++	vmovdqa64	.Lx_mask44(%rip),$mask44
++	vmovdqa64	.Lx_mask42(%rip),$mask42
++
++	test	%r8,%r8				# is power value impossible?
++	js	.Linit_vpmadd52			# if it is, then init R[4]
++
++	vmovq	0($ctx),%x#$H0			# load current hash value
++	vmovq	8($ctx),%x#$H1
++	vmovq	16($ctx),%x#$H2
++
++.Lblocks_vpmadd52_8x:
++	################################################################
++	# fist we calculate more key powers
++
++	vmovdqu64	128($ctx),$R2		# load 1-3-2-4 powers
++	vmovdqu64	160($ctx),$S1
++	vmovdqu64	64($ctx),$R0
++	vmovdqu64	96($ctx),$R1
++
++	vpsllq		\$2,$R2,$S2		# S2 = R2*5*4
++	vpaddq		$R2,$S2,$S2
++	vpsllq		\$2,$S2,$S2
++
++	vpbroadcastq	%x#$R2,$RR2		# broadcast 4th power
++	vpbroadcastq	%x#$R0,$RR0
++	vpbroadcastq	%x#$R1,$RR1
++
++	vpxorq		$D0lo,$D0lo,$D0lo
++	vpmadd52luq	$RR2,$S1,$D0lo
++	vpxorq		$D0hi,$D0hi,$D0hi
++	vpmadd52huq	$RR2,$S1,$D0hi
++	vpxorq		$D1lo,$D1lo,$D1lo
++	vpmadd52luq	$RR2,$S2,$D1lo
++	vpxorq		$D1hi,$D1hi,$D1hi
++	vpmadd52huq	$RR2,$S2,$D1hi
++	vpxorq		$D2lo,$D2lo,$D2lo
++	vpmadd52luq	$RR2,$R0,$D2lo
++	vpxorq		$D2hi,$D2hi,$D2hi
++	vpmadd52huq	$RR2,$R0,$D2hi
++
++	vpmadd52luq	$RR0,$R0,$D0lo
++	vpmadd52huq	$RR0,$R0,$D0hi
++	vpmadd52luq	$RR0,$R1,$D1lo
++	vpmadd52huq	$RR0,$R1,$D1hi
++	vpmadd52luq	$RR0,$R2,$D2lo
++	vpmadd52huq	$RR0,$R2,$D2hi
++
++	vpmadd52luq	$RR1,$S2,$D0lo
++	vpmadd52huq	$RR1,$S2,$D0hi
++	vpmadd52luq	$RR1,$R0,$D1lo
++	vpmadd52huq	$RR1,$R0,$D1hi
++	vpmadd52luq	$RR1,$R1,$D2lo
++	vpmadd52huq	$RR1,$R1,$D2hi
++
++	################################################################
++	# partial reduction
++	vpsrlq		\$44,$D0lo,$tmp
++	vpsllq		\$8,$D0hi,$D0hi
++	vpandq		$mask44,$D0lo,$RR0
++	vpaddq		$tmp,$D0hi,$D0hi
++
++	vpaddq		$D0hi,$D1lo,$D1lo
++
++	vpsrlq		\$44,$D1lo,$tmp
++	vpsllq		\$8,$D1hi,$D1hi
++	vpandq		$mask44,$D1lo,$RR1
++	vpaddq		$tmp,$D1hi,$D1hi
++
++	vpaddq		$D1hi,$D2lo,$D2lo
++
++	vpsrlq		\$42,$D2lo,$tmp
++	vpsllq		\$10,$D2hi,$D2hi
++	vpandq		$mask42,$D2lo,$RR2
++	vpaddq		$tmp,$D2hi,$D2hi
++
++	vpaddq		$D2hi,$RR0,$RR0
++	vpsllq		\$2,$D2hi,$D2hi
++
++	vpaddq		$D2hi,$RR0,$RR0
++
++	vpsrlq		\$44,$RR0,$tmp		# additional step
++	vpandq		$mask44,$RR0,$RR0
++
++	vpaddq		$tmp,$RR1,$RR1
++
++	################################################################
++	# At this point Rx holds 1324 powers, RRx - 5768, and the goal
++	# is 15263748, which reflects how data is loaded...
++
++	vpunpcklqdq	$R2,$RR2,$T2		# 3748
++	vpunpckhqdq	$R2,$RR2,$R2		# 1526
++	vpunpcklqdq	$R0,$RR0,$T0
++	vpunpckhqdq	$R0,$RR0,$R0
++	vpunpcklqdq	$R1,$RR1,$T1
++	vpunpckhqdq	$R1,$RR1,$R1
++___
++######## switch to %zmm
++map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
++map(s/%y/%z/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi);
++map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
++map(s/%y/%z/, $RR0,$RR1,$RR2,$SS1,$SS2);
++
++$code.=<<___;
++	vshufi64x2	\$0x44,$R2,$T2,$RR2	# 15263748
++	vshufi64x2	\$0x44,$R0,$T0,$RR0
++	vshufi64x2	\$0x44,$R1,$T1,$RR1
++
++	vmovdqu64	16*0($inp),$T2		# load data
++	vmovdqu64	16*4($inp),$T3
++	lea		16*8($inp),$inp
++
++	vpsllq		\$2,$RR2,$SS2		# S2 = R2*5*4
++	vpsllq		\$2,$RR1,$SS1		# S1 = R1*5*4
++	vpaddq		$RR2,$SS2,$SS2
++	vpaddq		$RR1,$SS1,$SS1
++	vpsllq		\$2,$SS2,$SS2
++	vpsllq		\$2,$SS1,$SS1
++
++	vpbroadcastq	$padbit,$PAD
++	vpbroadcastq	%x#$mask44,$mask44
++	vpbroadcastq	%x#$mask42,$mask42
++
++	vpbroadcastq	%x#$SS1,$S1		# broadcast 8th power
++	vpbroadcastq	%x#$SS2,$S2
++	vpbroadcastq	%x#$RR0,$R0
++	vpbroadcastq	%x#$RR1,$R1
++	vpbroadcastq	%x#$RR2,$R2
++
++	vpunpcklqdq	$T3,$T2,$T1		# transpose data
++	vpunpckhqdq	$T3,$T2,$T3
++
++	# at this point 64-bit lanes are ordered as 73625140
++
++	vpsrlq		\$24,$T3,$T2		# splat the data
++	vporq		$PAD,$T2,$T2
++	 vpaddq		$T2,$H2,$H2		# accumulate input
++	vpandq		$mask44,$T1,$T0
++	vpsrlq		\$44,$T1,$T1
++	vpsllq		\$20,$T3,$T3
++	vporq		$T3,$T1,$T1
++	vpandq		$mask44,$T1,$T1
++
++	sub		\$8,$len
++	jz		.Ltail_vpmadd52_8x
++	jmp		.Loop_vpmadd52_8x
++
++.align	32
++.Loop_vpmadd52_8x:
++	#vpaddq		$T2,$H2,$H2		# accumulate input
++	vpaddq		$T0,$H0,$H0
++	vpaddq		$T1,$H1,$H1
++
++	vpxorq		$D0lo,$D0lo,$D0lo
++	vpmadd52luq	$H2,$S1,$D0lo
++	vpxorq		$D0hi,$D0hi,$D0hi
++	vpmadd52huq	$H2,$S1,$D0hi
++	vpxorq		$D1lo,$D1lo,$D1lo
++	vpmadd52luq	$H2,$S2,$D1lo
++	vpxorq		$D1hi,$D1hi,$D1hi
++	vpmadd52huq	$H2,$S2,$D1hi
++	vpxorq		$D2lo,$D2lo,$D2lo
++	vpmadd52luq	$H2,$R0,$D2lo
++	vpxorq		$D2hi,$D2hi,$D2hi
++	vpmadd52huq	$H2,$R0,$D2hi
++
++	 vmovdqu64	16*0($inp),$T2		# load data
++	 vmovdqu64	16*4($inp),$T3
++	 lea		16*8($inp),$inp
++	vpmadd52luq	$H0,$R0,$D0lo
++	vpmadd52huq	$H0,$R0,$D0hi
++	vpmadd52luq	$H0,$R1,$D1lo
++	vpmadd52huq	$H0,$R1,$D1hi
++	vpmadd52luq	$H0,$R2,$D2lo
++	vpmadd52huq	$H0,$R2,$D2hi
++
++	 vpunpcklqdq	$T3,$T2,$T1		# transpose data
++	 vpunpckhqdq	$T3,$T2,$T3
++	vpmadd52luq	$H1,$S2,$D0lo
++	vpmadd52huq	$H1,$S2,$D0hi
++	vpmadd52luq	$H1,$R0,$D1lo
++	vpmadd52huq	$H1,$R0,$D1hi
++	vpmadd52luq	$H1,$R1,$D2lo
++	vpmadd52huq	$H1,$R1,$D2hi
++
++	################################################################
++	# partial reduction (interleaved with data splat)
++	vpsrlq		\$44,$D0lo,$tmp
++	vpsllq		\$8,$D0hi,$D0hi
++	vpandq		$mask44,$D0lo,$H0
++	vpaddq		$tmp,$D0hi,$D0hi
++
++	 vpsrlq		\$24,$T3,$T2
++	 vporq		$PAD,$T2,$T2
++	vpaddq		$D0hi,$D1lo,$D1lo
++
++	vpsrlq		\$44,$D1lo,$tmp
++	vpsllq		\$8,$D1hi,$D1hi
++	vpandq		$mask44,$D1lo,$H1
++	vpaddq		$tmp,$D1hi,$D1hi
++
++	 vpandq		$mask44,$T1,$T0
++	 vpsrlq		\$44,$T1,$T1
++	 vpsllq		\$20,$T3,$T3
++	vpaddq		$D1hi,$D2lo,$D2lo
++
++	vpsrlq		\$42,$D2lo,$tmp
++	vpsllq		\$10,$D2hi,$D2hi
++	vpandq		$mask42,$D2lo,$H2
++	vpaddq		$tmp,$D2hi,$D2hi
++
++	  vpaddq	$T2,$H2,$H2		# accumulate input
++	vpaddq		$D2hi,$H0,$H0
++	vpsllq		\$2,$D2hi,$D2hi
++
++	vpaddq		$D2hi,$H0,$H0
++	 vporq		$T3,$T1,$T1
++	 vpandq		$mask44,$T1,$T1
++
++	vpsrlq		\$44,$H0,$tmp		# additional step
++	vpandq		$mask44,$H0,$H0
++
++	vpaddq		$tmp,$H1,$H1
++
++	sub		\$8,$len		# len-=128
++	jnz		.Loop_vpmadd52_8x
++
++.Ltail_vpmadd52_8x:
++	#vpaddq		$T2,$H2,$H2		# accumulate input
++	vpaddq		$T0,$H0,$H0
++	vpaddq		$T1,$H1,$H1
++
++	vpxorq		$D0lo,$D0lo,$D0lo
++	vpmadd52luq	$H2,$SS1,$D0lo
++	vpxorq		$D0hi,$D0hi,$D0hi
++	vpmadd52huq	$H2,$SS1,$D0hi
++	vpxorq		$D1lo,$D1lo,$D1lo
++	vpmadd52luq	$H2,$SS2,$D1lo
++	vpxorq		$D1hi,$D1hi,$D1hi
++	vpmadd52huq	$H2,$SS2,$D1hi
++	vpxorq		$D2lo,$D2lo,$D2lo
++	vpmadd52luq	$H2,$RR0,$D2lo
++	vpxorq		$D2hi,$D2hi,$D2hi
++	vpmadd52huq	$H2,$RR0,$D2hi
++
++	vpmadd52luq	$H0,$RR0,$D0lo
++	vpmadd52huq	$H0,$RR0,$D0hi
++	vpmadd52luq	$H0,$RR1,$D1lo
++	vpmadd52huq	$H0,$RR1,$D1hi
++	vpmadd52luq	$H0,$RR2,$D2lo
++	vpmadd52huq	$H0,$RR2,$D2hi
++
++	vpmadd52luq	$H1,$SS2,$D0lo
++	vpmadd52huq	$H1,$SS2,$D0hi
++	vpmadd52luq	$H1,$RR0,$D1lo
++	vpmadd52huq	$H1,$RR0,$D1hi
++	vpmadd52luq	$H1,$RR1,$D2lo
++	vpmadd52huq	$H1,$RR1,$D2hi
++
++	################################################################
++	# horizontal addition
++
++	mov		\$1,%eax
++	kmovw		%eax,%k1
++	vpsrldq		\$8,$D0lo,$T0
++	vpsrldq		\$8,$D0hi,$H0
++	vpsrldq		\$8,$D1lo,$T1
++	vpsrldq		\$8,$D1hi,$H1
++	vpaddq		$T0,$D0lo,$D0lo
++	vpaddq		$H0,$D0hi,$D0hi
++	vpsrldq		\$8,$D2lo,$T2
++	vpsrldq		\$8,$D2hi,$H2
++	vpaddq		$T1,$D1lo,$D1lo
++	vpaddq		$H1,$D1hi,$D1hi
++	 vpermq		\$0x2,$D0lo,$T0
++	 vpermq		\$0x2,$D0hi,$H0
++	vpaddq		$T2,$D2lo,$D2lo
++	vpaddq		$H2,$D2hi,$D2hi
++
++	vpermq		\$0x2,$D1lo,$T1
++	vpermq		\$0x2,$D1hi,$H1
++	vpaddq		$T0,$D0lo,$D0lo
++	vpaddq		$H0,$D0hi,$D0hi
++	vpermq		\$0x2,$D2lo,$T2
++	vpermq		\$0x2,$D2hi,$H2
++	vpaddq		$T1,$D1lo,$D1lo
++	vpaddq		$H1,$D1hi,$D1hi
++	 vextracti64x4	\$1,$D0lo,%y#$T0
++	 vextracti64x4	\$1,$D0hi,%y#$H0
++	vpaddq		$T2,$D2lo,$D2lo
++	vpaddq		$H2,$D2hi,$D2hi
++
++	vextracti64x4	\$1,$D1lo,%y#$T1
++	vextracti64x4	\$1,$D1hi,%y#$H1
++	vextracti64x4	\$1,$D2lo,%y#$T2
++	vextracti64x4	\$1,$D2hi,%y#$H2
++___
++######## switch back to %ymm
++map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
++map(s/%z/%y/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi);
++map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
++
++$code.=<<___;
++	vpaddq		$T0,$D0lo,${D0lo}{%k1}{z}
++	vpaddq		$H0,$D0hi,${D0hi}{%k1}{z}
++	vpaddq		$T1,$D1lo,${D1lo}{%k1}{z}
++	vpaddq		$H1,$D1hi,${D1hi}{%k1}{z}
++	vpaddq		$T2,$D2lo,${D2lo}{%k1}{z}
++	vpaddq		$H2,$D2hi,${D2hi}{%k1}{z}
++
++	################################################################
++	# partial reduction
++	vpsrlq		\$44,$D0lo,$tmp
++	vpsllq		\$8,$D0hi,$D0hi
++	vpandq		$mask44,$D0lo,$H0
++	vpaddq		$tmp,$D0hi,$D0hi
++
++	vpaddq		$D0hi,$D1lo,$D1lo
++
++	vpsrlq		\$44,$D1lo,$tmp
++	vpsllq		\$8,$D1hi,$D1hi
++	vpandq		$mask44,$D1lo,$H1
++	vpaddq		$tmp,$D1hi,$D1hi
++
++	vpaddq		$D1hi,$D2lo,$D2lo
++
++	vpsrlq		\$42,$D2lo,$tmp
++	vpsllq		\$10,$D2hi,$D2hi
++	vpandq		$mask42,$D2lo,$H2
++	vpaddq		$tmp,$D2hi,$D2hi
++
++	vpaddq		$D2hi,$H0,$H0
++	vpsllq		\$2,$D2hi,$D2hi
++
++	vpaddq		$D2hi,$H0,$H0
++
++	vpsrlq		\$44,$H0,$tmp		# additional step
++	vpandq		$mask44,$H0,$H0
++
++	vpaddq		$tmp,$H1,$H1
++
++	################################################################
++
++	vmovq		%x#$H0,0($ctx)
++	vmovq		%x#$H1,8($ctx)
++	vmovq		%x#$H2,16($ctx)
++	vzeroall
++
++.Lno_data_vpmadd52_8x:
++	ret
++.size	poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x
++___
++}
++$code.=<<___;
++.type	poly1305_emit_base2_44,\@function,3
++.align	32
++poly1305_emit_base2_44:
++	mov	0($ctx),%r8	# load hash value
++	mov	8($ctx),%r9
++	mov	16($ctx),%r10
++
++	mov	%r9,%rax
++	shr	\$20,%r9
++	shl	\$44,%rax
++	mov	%r10,%rcx
++	shr	\$40,%r10
++	shl	\$24,%rcx
++
++	add	%rax,%r8
++	adc	%rcx,%r9
++	adc	\$0,%r10
++
++	mov	%r8,%rax
++	add	\$5,%r8		# compare to modulus
++	mov	%r9,%rcx
++	adc	\$0,%r9
++	adc	\$0,%r10
++	shr	\$2,%r10	# did 130-bit value overflow?
++	cmovnz	%r8,%rax
++	cmovnz	%r9,%rcx
++
++	add	0($nonce),%rax	# accumulate nonce
++	adc	8($nonce),%rcx
++	mov	%rax,0($mac)	# write result
++	mov	%rcx,8($mac)
++
++	ret
++.size	poly1305_emit_base2_44,.-poly1305_emit_base2_44
++___
++}	}	}
++}
++
++if (!$kernel)
++{	# chacha20-poly1305 helpers
++my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") :  # Win64 order
++                                  ("%rdi","%rsi","%rdx","%rcx");  # Unix order
++$code.=<<___;
++.globl	xor128_encrypt_n_pad
++.type	xor128_encrypt_n_pad,\@abi-omnipotent
++.align	16
++xor128_encrypt_n_pad:
++	sub	$otp,$inp
++	sub	$otp,$out
++	mov	$len,%r10		# put len aside
++	shr	\$4,$len		# len / 16
++	jz	.Ltail_enc
++	nop
++.Loop_enc_xmm:
++	movdqu	($inp,$otp),%xmm0
++	pxor	($otp),%xmm0
++	movdqu	%xmm0,($out,$otp)
++	movdqa	%xmm0,($otp)
++	lea	16($otp),$otp
++	dec	$len
++	jnz	.Loop_enc_xmm
++
++	and	\$15,%r10		# len % 16
++	jz	.Ldone_enc
++
++.Ltail_enc:
++	mov	\$16,$len
++	sub	%r10,$len
++	xor	%eax,%eax
++.Loop_enc_byte:
++	mov	($inp,$otp),%al
++	xor	($otp),%al
++	mov	%al,($out,$otp)
++	mov	%al,($otp)
++	lea	1($otp),$otp
++	dec	%r10
++	jnz	.Loop_enc_byte
++
++	xor	%eax,%eax
++.Loop_enc_pad:
++	mov	%al,($otp)
++	lea	1($otp),$otp
++	dec	$len
++	jnz	.Loop_enc_pad
++
++.Ldone_enc:
++	mov	$otp,%rax
++	ret
++.size	xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
++
++.globl	xor128_decrypt_n_pad
++.type	xor128_decrypt_n_pad,\@abi-omnipotent
++.align	16
++xor128_decrypt_n_pad:
++	sub	$otp,$inp
++	sub	$otp,$out
++	mov	$len,%r10		# put len aside
++	shr	\$4,$len		# len / 16
++	jz	.Ltail_dec
++	nop
++.Loop_dec_xmm:
++	movdqu	($inp,$otp),%xmm0
++	movdqa	($otp),%xmm1
++	pxor	%xmm0,%xmm1
++	movdqu	%xmm1,($out,$otp)
++	movdqa	%xmm0,($otp)
++	lea	16($otp),$otp
++	dec	$len
++	jnz	.Loop_dec_xmm
++
++	pxor	%xmm1,%xmm1
++	and	\$15,%r10		# len % 16
++	jz	.Ldone_dec
++
++.Ltail_dec:
++	mov	\$16,$len
++	sub	%r10,$len
++	xor	%eax,%eax
++	xor	%r11,%r11
++.Loop_dec_byte:
++	mov	($inp,$otp),%r11b
++	mov	($otp),%al
++	xor	%r11b,%al
++	mov	%al,($out,$otp)
++	mov	%r11b,($otp)
++	lea	1($otp),$otp
++	dec	%r10
++	jnz	.Loop_dec_byte
++
++	xor	%eax,%eax
++.Loop_dec_pad:
++	mov	%al,($otp)
++	lea	1($otp),$otp
++	dec	$len
++	jnz	.Loop_dec_pad
++
++.Ldone_dec:
++	mov	$otp,%rax
++	ret
++.size	xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
++___
++}
++
++# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
++#		CONTEXT *context,DISPATCHER_CONTEXT *disp)
++if ($win64) {
++$rec="%rcx";
++$frame="%rdx";
++$context="%r8";
++$disp="%r9";
++
++$code.=<<___;
++.extern	__imp_RtlVirtualUnwind
++.type	se_handler,\@abi-omnipotent
++.align	16
++se_handler:
++	push	%rsi
++	push	%rdi
++	push	%rbx
++	push	%rbp
++	push	%r12
++	push	%r13
++	push	%r14
++	push	%r15
++	pushfq
++	sub	\$64,%rsp
++
++	mov	120($context),%rax	# pull context->Rax
++	mov	248($context),%rbx	# pull context->Rip
++
++	mov	8($disp),%rsi		# disp->ImageBase
++	mov	56($disp),%r11		# disp->HandlerData
++
++	mov	0(%r11),%r10d		# HandlerData[0]
++	lea	(%rsi,%r10),%r10	# prologue label
++	cmp	%r10,%rbx		# context->Rip<.Lprologue
++	jb	.Lcommon_seh_tail
++
++	mov	152($context),%rax	# pull context->Rsp
++
++	mov	4(%r11),%r10d		# HandlerData[1]
++	lea	(%rsi,%r10),%r10	# epilogue label
++	cmp	%r10,%rbx		# context->Rip>=.Lepilogue
++	jae	.Lcommon_seh_tail
++
++	lea	48(%rax),%rax
++
++	mov	-8(%rax),%rbx
++	mov	-16(%rax),%rbp
++	mov	-24(%rax),%r12
++	mov	-32(%rax),%r13
++	mov	-40(%rax),%r14
++	mov	-48(%rax),%r15
++	mov	%rbx,144($context)	# restore context->Rbx
++	mov	%rbp,160($context)	# restore context->Rbp
++	mov	%r12,216($context)	# restore context->R12
++	mov	%r13,224($context)	# restore context->R13
++	mov	%r14,232($context)	# restore context->R14
++	mov	%r15,240($context)	# restore context->R14
++
++	jmp	.Lcommon_seh_tail
++.size	se_handler,.-se_handler
++
++.type	avx_handler,\@abi-omnipotent
++.align	16
++avx_handler:
++	push	%rsi
++	push	%rdi
++	push	%rbx
++	push	%rbp
++	push	%r12
++	push	%r13
++	push	%r14
++	push	%r15
++	pushfq
++	sub	\$64,%rsp
++
++	mov	120($context),%rax	# pull context->Rax
++	mov	248($context),%rbx	# pull context->Rip
++
++	mov	8($disp),%rsi		# disp->ImageBase
++	mov	56($disp),%r11		# disp->HandlerData
++
++	mov	0(%r11),%r10d		# HandlerData[0]
++	lea	(%rsi,%r10),%r10	# prologue label
++	cmp	%r10,%rbx		# context->Rip<prologue label
++	jb	.Lcommon_seh_tail
++
++	mov	152($context),%rax	# pull context->Rsp
++
++	mov	4(%r11),%r10d		# HandlerData[1]
++	lea	(%rsi,%r10),%r10	# epilogue label
++	cmp	%r10,%rbx		# context->Rip>=epilogue label
++	jae	.Lcommon_seh_tail
++
++	mov	208($context),%rax	# pull context->R11
++
++	lea	0x50(%rax),%rsi
++	lea	0xf8(%rax),%rax
++	lea	512($context),%rdi	# &context.Xmm6
++	mov	\$20,%ecx
++	.long	0xa548f3fc		# cld; rep movsq
++
++.Lcommon_seh_tail:
++	mov	8(%rax),%rdi
++	mov	16(%rax),%rsi
++	mov	%rax,152($context)	# restore context->Rsp
++	mov	%rsi,168($context)	# restore context->Rsi
++	mov	%rdi,176($context)	# restore context->Rdi
++
++	mov	40($disp),%rdi		# disp->ContextRecord
++	mov	$context,%rsi		# context
++	mov	\$154,%ecx		# sizeof(CONTEXT)
++	.long	0xa548f3fc		# cld; rep movsq
++
++	mov	$disp,%rsi
++	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
++	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
++	mov	0(%rsi),%r8		# arg3, disp->ControlPc
++	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
++	mov	40(%rsi),%r10		# disp->ContextRecord
++	lea	56(%rsi),%r11		# &disp->HandlerData
++	lea	24(%rsi),%r12		# &disp->EstablisherFrame
++	mov	%r10,32(%rsp)		# arg5
++	mov	%r11,40(%rsp)		# arg6
++	mov	%r12,48(%rsp)		# arg7
++	mov	%rcx,56(%rsp)		# arg8, (NULL)
++	call	*__imp_RtlVirtualUnwind(%rip)
++
++	mov	\$1,%eax		# ExceptionContinueSearch
++	add	\$64,%rsp
++	popfq
++	pop	%r15
++	pop	%r14
++	pop	%r13
++	pop	%r12
++	pop	%rbp
++	pop	%rbx
++	pop	%rdi
++	pop	%rsi
++	ret
++.size	avx_handler,.-avx_handler
++
++.section	.pdata
++.align	4
++	.rva	.LSEH_begin_poly1305_init_x86_64
++	.rva	.LSEH_end_poly1305_init_x86_64
++	.rva	.LSEH_info_poly1305_init_x86_64
++
++	.rva	.LSEH_begin_poly1305_blocks_x86_64
++	.rva	.LSEH_end_poly1305_blocks_x86_64
++	.rva	.LSEH_info_poly1305_blocks_x86_64
++
++	.rva	.LSEH_begin_poly1305_emit_x86_64
++	.rva	.LSEH_end_poly1305_emit_x86_64
++	.rva	.LSEH_info_poly1305_emit_x86_64
++___
++$code.=<<___ if ($avx);
++	.rva	.LSEH_begin_poly1305_blocks_avx
++	.rva	.Lbase2_64_avx
++	.rva	.LSEH_info_poly1305_blocks_avx_1
++
++	.rva	.Lbase2_64_avx
++	.rva	.Leven_avx
++	.rva	.LSEH_info_poly1305_blocks_avx_2
++
++	.rva	.Leven_avx
++	.rva	.LSEH_end_poly1305_blocks_avx
++	.rva	.LSEH_info_poly1305_blocks_avx_3
++
++	.rva	.LSEH_begin_poly1305_emit_avx
++	.rva	.LSEH_end_poly1305_emit_avx
++	.rva	.LSEH_info_poly1305_emit_avx
++___
++$code.=<<___ if ($avx>1);
++	.rva	.LSEH_begin_poly1305_blocks_avx2
++	.rva	.Lbase2_64_avx2
++	.rva	.LSEH_info_poly1305_blocks_avx2_1
++
++	.rva	.Lbase2_64_avx2
++	.rva	.Leven_avx2
++	.rva	.LSEH_info_poly1305_blocks_avx2_2
++
++	.rva	.Leven_avx2
++	.rva	.LSEH_end_poly1305_blocks_avx2
++	.rva	.LSEH_info_poly1305_blocks_avx2_3
++___
++$code.=<<___ if ($avx>2);
++	.rva	.LSEH_begin_poly1305_blocks_avx512
++	.rva	.LSEH_end_poly1305_blocks_avx512
++	.rva	.LSEH_info_poly1305_blocks_avx512
++___
++$code.=<<___;
++.section	.xdata
++.align	8
++.LSEH_info_poly1305_init_x86_64:
++	.byte	9,0,0,0
++	.rva	se_handler
++	.rva	.LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64
++
++.LSEH_info_poly1305_blocks_x86_64:
++	.byte	9,0,0,0
++	.rva	se_handler
++	.rva	.Lblocks_body,.Lblocks_epilogue
++
++.LSEH_info_poly1305_emit_x86_64:
++	.byte	9,0,0,0
++	.rva	se_handler
++	.rva	.LSEH_begin_poly1305_emit_x86_64,.LSEH_begin_poly1305_emit_x86_64
++___
++$code.=<<___ if ($avx);
++.LSEH_info_poly1305_blocks_avx_1:
++	.byte	9,0,0,0
++	.rva	se_handler
++	.rva	.Lblocks_avx_body,.Lblocks_avx_epilogue		# HandlerData[]
++
++.LSEH_info_poly1305_blocks_avx_2:
++	.byte	9,0,0,0
++	.rva	se_handler
++	.rva	.Lbase2_64_avx_body,.Lbase2_64_avx_epilogue	# HandlerData[]
++
++.LSEH_info_poly1305_blocks_avx_3:
++	.byte	9,0,0,0
++	.rva	avx_handler
++	.rva	.Ldo_avx_body,.Ldo_avx_epilogue			# HandlerData[]
++
++.LSEH_info_poly1305_emit_avx:
++	.byte	9,0,0,0
++	.rva	se_handler
++	.rva	.LSEH_begin_poly1305_emit_avx,.LSEH_begin_poly1305_emit_avx
++___
++$code.=<<___ if ($avx>1);
++.LSEH_info_poly1305_blocks_avx2_1:
++	.byte	9,0,0,0
++	.rva	se_handler
++	.rva	.Lblocks_avx2_body,.Lblocks_avx2_epilogue	# HandlerData[]
++
++.LSEH_info_poly1305_blocks_avx2_2:
++	.byte	9,0,0,0
++	.rva	se_handler
++	.rva	.Lbase2_64_avx2_body,.Lbase2_64_avx2_epilogue	# HandlerData[]
++
++.LSEH_info_poly1305_blocks_avx2_3:
++	.byte	9,0,0,0
++	.rva	avx_handler
++	.rva	.Ldo_avx2_body,.Ldo_avx2_epilogue		# HandlerData[]
++___
++$code.=<<___ if ($avx>2);
++.LSEH_info_poly1305_blocks_avx512:
++	.byte	9,0,0,0
++	.rva	avx_handler
++	.rva	.Ldo_avx512_body,.Ldo_avx512_epilogue		# HandlerData[]
++___
++}
++
++open SELF,$0;
++while(<SELF>) {
++	next if (/^#!/);
++	last if (!s/^#/\/\// and !/^$/);
++	print;
++}
++close SELF;
++
++foreach (split('\n',$code)) {
++	s/\`([^\`]*)\`/eval($1)/ge;
++	s/%r([a-z]+)#d/%e$1/g;
++	s/%r([0-9]+)#d/%r$1d/g;
++	s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g;
++
++	if ($kernel) {
++		s/(^\.type.*),[0-9]+$/\1/;
++		s/(^\.type.*),\@abi-omnipotent+$/\1,\@function/;
++		next if /^\.cfi.*/;
++	}
++
++	print $_,"\n";
++}
++close STDOUT;
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/compat/Makefile.include	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,98 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++
++kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
++
++ccflags-y += -include $(kbuild-dir)/compat/compat.h
++asflags-y += -include $(kbuild-dir)/compat/compat-asm.h
++
++ifeq ($(wildcard $(srctree)/include/linux/ptr_ring.h),)
++ccflags-y += -I$(kbuild-dir)/compat/ptr_ring/include
++endif
++
++ifeq ($(wildcard $(srctree)/include/linux/siphash.h),)
++ccflags-y += -I$(kbuild-dir)/compat/siphash/include
++wireguard-y += compat/siphash/siphash.o
++endif
++
++ifeq ($(wildcard $(srctree)/include/net/dst_cache.h),)
++ccflags-y += -I$(kbuild-dir)/compat/dst_cache/include
++wireguard-y += compat/dst_cache/dst_cache.o
++endif
++
++ifeq ($(wildcard $(srctree)/arch/x86/include/asm/intel-family.h)$(CONFIG_X86),y)
++ccflags-y += -I$(kbuild-dir)/compat/intel-family-x86/include
++endif
++
++ifeq ($(wildcard $(srctree)/arch/x86/include/asm/fpu/api.h)$(CONFIG_X86),y)
++ccflags-y += -I$(kbuild-dir)/compat/fpu-x86/include
++endif
++
++ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/simd.h)$(shell grep -s -F "generic-y += simd.h" "$(srctree)/arch/$(SRCARCH)/Makefile" "$(srctree)/arch/$(SRCARCH)/Makefile"),)
++ccflags-y += -I$(kbuild-dir)/compat/simd-asm/include
++endif
++
++ifeq ($(wildcard $(srctree)/include/linux/simd.h),)
++ccflags-y += -I$(kbuild-dir)/compat/simd/include
++endif
++
++ifeq ($(wildcard $(srctree)/include/net/udp_tunnel.h),)
++ccflags-y += -I$(kbuild-dir)/compat/udp_tunnel/include
++wireguard-y += compat/udp_tunnel/udp_tunnel.o
++endif
++
++ifeq ($(shell grep -s -F "int crypto_memneq" "$(srctree)/include/crypto/algapi.h"),)
++ccflags-y += -include $(kbuild-dir)/compat/memneq/include.h
++wireguard-y += compat/memneq/memneq.o
++endif
++
++ifeq ($(shell grep -s -F "addr_gen_mode" "$(srctree)/include/linux/ipv6.h"),)
++ccflags-y += -DCOMPAT_CANNOT_USE_DEV_CNF
++endif
++
++ifdef CONFIG_HZ
++ifeq ($(wildcard $(srctree)/include/generated/timeconst.h),)
++ccflags-y += $(shell echo 'define gcd(a,b){auto t;while(b){t=b;b=a%b;a=t;};return a;};hz=$(CONFIG_HZ);cd=gcd(hz,1000000);print "-DHZ_TO_USEC_NUM=",1000000/cd," -DHZ_TO_USEC_DEN=",hz/cd;halt;' | bc -q)
++endif
++endif
++
++ifeq ($(wildcard $(srctree)/arch/arm/include/asm/neon.h)$(CONFIG_ARM),y)
++ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include
++endif
++ifeq ($(wildcard $(srctree)/arch/arm64/include/asm/neon.h)$(CONFIG_ARM64),y)
++ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include
++endif
++
++ifeq ($(CONFIG_X86_64),y)
++	ifeq ($(ssse3_instr),)
++		ssse3_instr := $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
++		ccflags-y += $(ssse3_instr)
++		asflags-y += $(ssse3_instr)
++	endif
++	ifeq ($(avx_instr),)
++		avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
++		ccflags-y += $(avx_instr)
++		asflags-y += $(avx_instr)
++	endif
++	ifeq ($(avx2_instr),)
++		avx2_instr := $(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
++		ccflags-y += $(avx2_instr)
++		asflags-y += $(avx2_instr)
++	endif
++	ifeq ($(avx512_instr),)
++		avx512_instr := $(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
++		ccflags-y += $(avx512_instr)
++		asflags-y += $(avx512_instr)
++	endif
++	ifeq ($(bmi2_instr),)
++		bmi2_instr :=$(call as-instr,mulx %rax$(comma)%rax$(comma)%rax,-DCONFIG_AS_BMI2=1)
++		ccflags-y += $(bmi2_instr)
++		asflags-y += $(bmi2_instr)
++	endif
++	ifeq ($(adx_instr),)
++		adx_instr :=$(call as-instr,adcx %rax$(comma)%rax,-DCONFIG_AS_ADX=1)
++		ccflags-y += $(adx_instr)
++		asflags-y += $(adx_instr)
++	endif
++endif
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/crypto/Makefile.include	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,57 @@
++ifeq ($(CONFIG_X86_64)$(if $(CONFIG_UML),y,n),yn)
++CONFIG_ZINC_ARCH_X86_64 := y
++endif
++ifeq ($(CONFIG_ARM)$(if $(CONFIG_CPU_32v3),y,n),yn)
++CONFIG_ZINC_ARCH_ARM := y
++endif
++ifeq ($(CONFIG_ARM64),y)
++CONFIG_ZINC_ARCH_ARM64 := y
++endif
++ifeq ($(CONFIG_MIPS)$(CONFIG_CPU_MIPS32_R2),yy)
++CONFIG_ZINC_ARCH_MIPS := y
++endif
++ifeq ($(CONFIG_MIPS)$(CONFIG_64BIT),yy)
++CONFIG_ZINC_ARCH_MIPS64 := y
++endif
++
++zinc-y += chacha20/chacha20.o
++zinc-$(CONFIG_ZINC_ARCH_X86_64) += chacha20/chacha20-x86_64.o
++zinc-$(CONFIG_ZINC_ARCH_ARM) += chacha20/chacha20-arm.o chacha20/chacha20-unrolled-arm.o
++zinc-$(CONFIG_ZINC_ARCH_ARM64) += chacha20/chacha20-arm64.o
++zinc-$(CONFIG_ZINC_ARCH_MIPS) += chacha20/chacha20-mips.o
++AFLAGS_chacha20-mips.o += -O2 # This is required to fill the branch delay slots
++
++zinc-y += poly1305/poly1305.o
++zinc-$(CONFIG_ZINC_ARCH_X86_64) += poly1305/poly1305-x86_64.o
++zinc-$(CONFIG_ZINC_ARCH_ARM) += poly1305/poly1305-arm.o
++zinc-$(CONFIG_ZINC_ARCH_ARM64) += poly1305/poly1305-arm64.o
++zinc-$(CONFIG_ZINC_ARCH_MIPS) += poly1305/poly1305-mips.o
++AFLAGS_poly1305-mips.o += -O2 # This is required to fill the branch delay slots
++zinc-$(CONFIG_ZINC_ARCH_MIPS64) += poly1305/poly1305-mips64.o
++
++zinc-y += chacha20poly1305.o
++
++zinc-y += blake2s/blake2s.o
++zinc-$(CONFIG_ZINC_ARCH_X86_64) += blake2s/blake2s-x86_64.o
++
++zinc-y += curve25519/curve25519.o
++zinc-$(CONFIG_ZINC_ARCH_ARM) += curve25519/curve25519-arm.o
++
++quiet_cmd_perlasm = PERLASM $@
++      cmd_perlasm = $(PERL) $< > $@
++$(obj)/%.S: $(src)/%.pl FORCE
++	$(call if_changed,perlasm)
++kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
++targets := $(patsubst $(kbuild-dir)/%.pl,%.S,$(wildcard $(patsubst %.o,$(kbuild-dir)/crypto/zinc/%.pl,$(zinc-y) $(zinc-m) $(zinc-))))
++
++# Old kernels don't set this, which causes trouble.
++.SECONDARY:
++
++wireguard-y += $(addprefix crypto/zinc/,$(zinc-y))
++ccflags-y += -I$(kbuild-dir)/crypto/include
++ccflags-$(CONFIG_ZINC_ARCH_X86_64) += -DCONFIG_ZINC_ARCH_X86_64
++ccflags-$(CONFIG_ZINC_ARCH_ARM) += -DCONFIG_ZINC_ARCH_ARM
++ccflags-$(CONFIG_ZINC_ARCH_ARM64) += -DCONFIG_ZINC_ARCH_ARM64
++ccflags-$(CONFIG_ZINC_ARCH_MIPS) += -DCONFIG_ZINC_ARCH_MIPS
++ccflags-$(CONFIG_ZINC_ARCH_MIPS64) += -DCONFIG_ZINC_ARCH_MIPS64
++ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DCONFIG_ZINC_SELFTEST
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/Makefile	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,16 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
++
++ccflags-y := -O3 -fvisibility=hidden
++ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG -g
++ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
++ccflags-y += -Wframe-larger-than=2048
++ccflags-$(if $(WIREGUARD_VERSION),y,) += -D'WIREGUARD_VERSION="$(WIREGUARD_VERSION)"'
++
++wireguard-y := main.o noise.o device.o peer.o timers.o queueing.o send.o receive.o socket.o peerlookup.o allowedips.o ratelimiter.o cookie.o netlink.o
++
++include $(src)/crypto/Makefile.include
++include $(src)/compat/Makefile.include
++
++obj-$(if $(KBUILD_EXTMOD),m,$(CONFIG_WIREGUARD)) := wireguard.o
+--- /dev/null	2020-03-19 20:46:08.763583611 +0200
++++ b/net/wireguard/Kconfig	2020-03-19 07:15:25.000000000 +0200
+@@ -0,0 +1,33 @@
++config WIREGUARD
++	tristate "IP: WireGuard secure network tunnel"
++	depends on NET && INET
++	depends on IPV6 || !IPV6
++	select NET_UDP_TUNNEL
++	select DST_CACHE
++	select CRYPTO
++	select CRYPTO_ALGAPI
++	select VFP
++	select VFPv3 if CPU_V7
++	select NEON if CPU_V7
++	select KERNEL_MODE_NEON if CPU_V7
++	default m
++	help
++	  WireGuard is a secure, fast, and easy to use replacement for IPsec
++	  that uses modern cryptography and clever networking tricks. It's
++	  designed to be fairly general purpose and abstract enough to fit most
++	  use cases, while at the same time remaining extremely simple to
++	  configure. See www.wireguard.com for more info.
++
++	  It's safe to say Y or M here, as the driver is very lightweight and
++	  is only in use when an administrator chooses to add an interface.
++
++config WIREGUARD_DEBUG
++	bool "Debugging checks and verbose messages"
++	depends on WIREGUARD
++	help
++	  This will write log messages for handshake and other events
++	  that occur for a WireGuard interface. It will also perform some
++	  extra validation checks and unit tests at various points. This is
++	  only useful for debugging.
++
++	  Say N here unless you know what you're doing.
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -85,2 +85,3 @@ config INET
+ if INET
++source "net/wireguard/Kconfig"
+ source "net/ipv4/Kconfig"
+--- a/net/Makefile
++++ b/net/Makefile
+@@ -16,2 +16,3 @@
+ obj-$(CONFIG_NETFILTER)		+= netfilter/
++obj-$(CONFIG_WIREGUARD)		+= wireguard/
+ obj-$(CONFIG_INET)		+= ipv4/
Not Available

benbullard79 [@T] cox.netNo Comment.1695d 22hrs
benbullard79 [@T] cox.netIf is is not OK >you< have to say something.1695d 21hrs
benbullard79 [@T] cox.netPackage won't install, conflicts with fuse-exfat.1695d 21hrs
benbullard79 [@T] cox.netNo Comment.1695d 20hrs