diff --git a/net/wireguard/Kbuild b/net/wireguard/Kbuild new file mode 100644 index 000000000000..f1b4ad76fdf6 --- /dev/null +++ b/net/wireguard/Kbuild @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + +ccflags-y := -O3 -fvisibility=hidden +ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG -g +ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' +ccflags-y += -Wframe-larger-than=2048 +ccflags-$(if $(WIREGUARD_VERSION),y,) += -D'WIREGUARD_VERSION="$(WIREGUARD_VERSION)"' + +wireguard-y := main.o noise.o device.o peer.o timers.o queueing.o send.o receive.o socket.o peerlookup.o allowedips.o ratelimiter.o cookie.o netlink.o + +include $(src)/crypto/Kbuild.include +include $(src)/compat/Kbuild.include + +obj-$(if $(KBUILD_EXTMOD),m,$(CONFIG_WIREGUARD)) := wireguard.o diff --git a/net/wireguard/Makefile b/net/wireguard/Makefile new file mode 100644 index 000000000000..c20bfd39ae2d --- /dev/null +++ b/net/wireguard/Makefile @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + +KERNELRELEASE ?= $(shell uname -r) +KERNELDIR ?= /lib/modules/$(KERNELRELEASE)/build +PREFIX ?= /usr +DESTDIR ?= +SRCDIR ?= $(PREFIX)/src +DKMSDIR ?= $(SRCDIR)/wireguard +DEPMOD ?= depmod + +PWD := $(shell pwd) + +all: module +debug: module-debug + +ifneq ($(V),1) +MAKEFLAGS += --no-print-directory +endif + +WIREGUARD_VERSION = $(patsubst v%,%,$(shell GIT_CEILING_DIRECTORIES="$(PWD)/../.." git describe --dirty 2>/dev/null)) + +module: + @$(MAKE) -C $(KERNELDIR) M=$(PWD) WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules + +module-debug: + @$(MAKE) -C $(KERNELDIR) M=$(PWD) V=1 CONFIG_WIREGUARD_DEBUG=y WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules + +clean: + @$(MAKE) -C $(KERNELDIR) M=$(PWD) clean + +module-install: + @$(MAKE) -C $(KERNELDIR) M=$(PWD) WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules_install + $(DEPMOD) -a $(KERNELRELEASE) + +install: module-install + +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) +DKMS_SOURCES := version.h Makefile Kbuild Kconfig dkms.conf $(filter-out version.h wireguard.mod.c tests/%,$(call rwildcard,,*.c *.h *.S *.pl *.include)) +dkms-install: $(DKMS_SOURCES) + @$(foreach f,$(DKMS_SOURCES),install -v -m0644 -D $(f) $(DESTDIR)$(DKMSDIR)/$(f);) + +style: + $(KERNELDIR)/scripts/checkpatch.pl -f --max-line-length=4000 --codespell --color=always $(filter-out wireguard.mod.c,$(wildcard *.c)) $(wildcard *.h) $(wildcard selftest/*.c) + +check: clean + scan-build --html-title=wireguard-linux-compat -maxloop 100 --view --keep-going $(MAKE) module CONFIG_WIREGUARD_DEBUG=y C=2 CF="-D__CHECK_ENDIAN__" + +coccicheck: clean + @$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_WIREGUARD_DEBUG=y coccicheck MODE=report + +cloc: + @cloc --skip-uniqueness --by-file --extract-with="$$(readlink -f ../kernel-tree-scripts/filter-compat-defines.sh) >FILE< > \$$(basename >FILE<)" $(filter-out wireguard.mod.c,$(wildcard *.c)) $(wildcard *.h) + +-include tests/debug.mk + +.PHONY: all module module-debug module-install install dkms-install clean cloc check style diff --git a/net/wireguard/compat/compat.h b/net/wireguard/compat/compat.h new file mode 100644 index 000000000000..0d109be3b8ff --- /dev/null +++ b/net/wireguard/compat/compat.h @@ -0,0 +1,1070 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_COMPAT_H +#define _WG_COMPAT_H + +#include +#include +#include +#include + +#ifdef RHEL_MAJOR +#if RHEL_MAJOR == 7 +#define ISRHEL7 +#elif RHEL_MAJOR == 8 +#define ISRHEL8 +#ifdef RHEL_MINOR +#if RHEL_MINOR == 2 +#define ISRHEL82 +#endif +#endif +#endif +#endif +#ifdef UTS_UBUNTU_RELEASE_ABI +#if LINUX_VERSION_CODE == KERNEL_VERSION(3, 13, 11) +#define ISUBUNTU1404 +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) +#define ISUBUNTU1604 +#endif +#endif +#ifdef CONFIG_SUSE_KERNEL +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +#define ISOPENSUSE42 +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) +#define ISOPENSUSE15 +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) +#error "WireGuard requires Linux >= 3.10" +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +#error "WireGuard has been merged into Linux >= 5.6 and therefore this compatibility module is no longer required." +#endif + +#if defined(ISRHEL7) +#include +#define headers_end headers_start +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) +#define headers_start data +#define headers_end data +#endif + +#include +#include +#ifndef __ro_after_init +#define __ro_after_init __read_mostly +#endif + +#include +#ifndef READ_ONCE +#define READ_ONCE ACCESS_ONCE +#endif +#ifndef WRITE_ONCE +#ifdef ACCESS_ONCE_RW +#define WRITE_ONCE(p, v) (ACCESS_ONCE_RW(p) = (v)) +#else +#define WRITE_ONCE(p, v) (ACCESS_ONCE(p) = (v)) +#endif +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) +#include "udp_tunnel/udp_tunnel_partial_compat.h" +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(DEBUG) && defined(net_dbg_ratelimited) +#undef net_dbg_ratelimited +#define net_dbg_ratelimited(fmt, ...) do { if (0) no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +#include +#ifndef RCU_LOCKDEP_WARN +#define RCU_LOCKDEP_WARN(cond, message) rcu_lockdep_assert(!(cond), message) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(ISRHEL7) +#define ipv6_dst_lookup(a, b, c, d) ipv6_dst_lookup(b, c, d) +#endif + +#if (LINUX_VERSION_CODE == KERNEL_VERSION(4, 4, 0) || \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 5) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)) || \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 17) && LINUX_VERSION_CODE > KERNEL_VERSION(3, 19, 0)) || \ + (LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 27) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || \ + (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || \ + (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 40) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || \ + (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 54))) && !defined(ISUBUNTU1404) && !defined(ISRHEL7) +#include +#include +#define IP6_ECN_set_ce(a, b) IP6_ECN_set_ce(b) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && IS_ENABLED(CONFIG_IPV6) && !defined(ISRHEL7) +#include +struct ipv6_stub_type { + void *udpv6_encap_enable; + int (*ipv6_dst_lookup)(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); +}; +static const struct ipv6_stub_type ipv6_stub_impl = { + .udpv6_encap_enable = (void *)1, + .ipv6_dst_lookup = ip6_dst_lookup +}; +static const struct ipv6_stub_type *ipv6_stub = &ipv6_stub_impl; +#endif + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) && IS_ENABLED(CONFIG_IPV6) && !defined(ISOPENSUSE42) && !defined(ISRHEL7) +#include +static inline bool ipv6_mod_enabled(void) +{ + return ipv6_stub != NULL && ipv6_stub->udpv6_encap_enable != NULL; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) && !defined(ISRHEL7) +#include +static inline void skb_reset_tc(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ACT + skb->tc_verd = 0; +#endif +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +#include +#include +static inline u32 __compat_get_random_u32(void) +{ + static siphash_key_t key; + static u32 counter = 0; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + static bool has_seeded = false; + if (unlikely(!has_seeded)) { + get_random_bytes(&key, sizeof(key)); + has_seeded = true; + } +#else + get_random_once(&key, sizeof(key)); +#endif + return siphash_2u32(counter++, get_random_int(), &key); +} +#define get_random_u32 __compat_get_random_u32 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) && !defined(ISRHEL7) +static inline void netif_keep_dst(struct net_device *dev) +{ + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; +} +#define COMPAT_CANNOT_USE_CSUM_LEVEL +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7) +#include +#ifndef netdev_alloc_pcpu_stats +#define pcpu_sw_netstats pcpu_tstats +#endif +#ifndef netdev_alloc_pcpu_stats +#define netdev_alloc_pcpu_stats alloc_percpu +#endif +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && !defined(ISRHEL7) +#include +#ifndef netdev_alloc_pcpu_stats +#define netdev_alloc_pcpu_stats(type) \ +({ \ + typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \ + if (pcpu_stats) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ + typeof(type) *stat; \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ + u64_stats_init(&stat->syncp); \ + } \ + } \ + pcpu_stats; \ +}) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7) +#include "checksum/checksum_partial_compat.h" +static inline void *__compat_pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) +{ + if (tail != skb) { + skb->data_len += len; + skb->len += len; + } + return skb_put(tail, len); +} +#define pskb_put __compat_pskb_put +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7) +#include +static inline void skb_scrub_packet(struct sk_buff *skb, bool xnet) +{ +#ifdef CONFIG_CAVIUM_OCTEON_IPFWD_OFFLOAD + memset(&skb->cvm_info, 0, sizeof(skb->cvm_info)); + skb->cvm_reserved = 0; +#endif + skb->tstamp.tv64 = 0; + skb->pkt_type = PACKET_HOST; + skb->skb_iif = 0; + skb_dst_drop(skb); + secpath_reset(skb); + nf_reset(skb); + nf_reset_trace(skb); + if (!xnet) + return; + skb_orphan(skb); + skb->mark = 0; +} +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) || defined(ISUBUNTU1404)) && !defined(ISRHEL7) +#include +static inline u32 __compat_prandom_u32_max(u32 ep_ro) +{ + return (u32)(((u64)prandom_u32() * ep_ro) >> 32); +} +#define prandom_u32_max __compat_prandom_u32_max +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 75) && !defined(ISRHEL7) +#ifndef U8_MAX +#define U8_MAX ((u8)~0U) +#endif +#ifndef S8_MAX +#define S8_MAX ((s8)(U8_MAX >> 1)) +#endif +#ifndef S8_MIN +#define S8_MIN ((s8)(-S8_MAX - 1)) +#endif +#ifndef U16_MAX +#define U16_MAX ((u16)~0U) +#endif +#ifndef S16_MAX +#define S16_MAX ((s16)(U16_MAX >> 1)) +#endif +#ifndef S16_MIN +#define S16_MIN ((s16)(-S16_MAX - 1)) +#endif +#ifndef U32_MAX +#define U32_MAX ((u32)~0U) +#endif +#ifndef S32_MAX +#define S32_MAX ((s32)(U32_MAX >> 1)) +#endif +#ifndef S32_MIN +#define S32_MIN ((s32)(-S32_MAX - 1)) +#endif +#ifndef U64_MAX +#define U64_MAX ((u64)~0ULL) +#endif +#ifndef S64_MAX +#define S64_MAX ((s64)(U64_MAX >> 1)) +#endif +#ifndef S64_MIN +#define S64_MIN ((s64)(-S64_MAX - 1)) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 60) && !defined(ISRHEL7) +/* Making this static may very well invalidate its usefulness, + * but so it goes with compat code. */ +static inline void memzero_explicit(void *s, size_t count) +{ + memset(s, 0, count); + barrier(); +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && !defined(ISRHEL7) +static const struct in6_addr __compat_in6addr_any = IN6ADDR_ANY_INIT; +#define in6addr_any __compat_in6addr_any +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && !defined(ISOPENSUSE15) +#include +#include +#include +struct rng_initializer { + struct completion done; + struct random_ready_callback cb; +}; +static inline void rng_initialized_callback(struct random_ready_callback *cb) +{ + complete(&container_of(cb, struct rng_initializer, cb)->done); +} +static inline int wait_for_random_bytes(void) +{ + static bool rng_is_initialized = false; + int ret; + if (unlikely(!rng_is_initialized)) { + struct rng_initializer rng = { + .done = COMPLETION_INITIALIZER(rng.done), + .cb = { .owner = THIS_MODULE, .func = rng_initialized_callback } + }; + ret = add_random_ready_callback(&rng.cb); + if (!ret) { + ret = wait_for_completion_interruptible(&rng.done); + if (ret) { + del_random_ready_callback(&rng.cb); + return ret; + } + } else if (ret != -EALREADY) + return ret; + rng_is_initialized = true; + } + return 0; +} +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) +/* This is a disaster. Without this API, we really have no way of + * knowing if it's initialized. We just return that it has and hope + * for the best... */ +static inline int wait_for_random_bytes(void) +{ + return 0; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && !defined(ISRHEL8) +#include +#include +struct rng_is_initialized_callback { + struct random_ready_callback cb; + atomic_t *rng_state; +}; +static inline void rng_is_initialized_callback(struct random_ready_callback *cb) +{ + struct rng_is_initialized_callback *rdy = container_of(cb, struct rng_is_initialized_callback, cb); + atomic_set(rdy->rng_state, 2); + kfree(rdy); +} +static inline bool rng_is_initialized(void) +{ + static atomic_t rng_state = ATOMIC_INIT(0); + + if (atomic_read(&rng_state) == 2) + return true; + + if (atomic_cmpxchg(&rng_state, 0, 1) == 0) { + int ret; + struct rng_is_initialized_callback *rdy = kmalloc(sizeof(*rdy), GFP_ATOMIC); + if (!rdy) { + atomic_set(&rng_state, 0); + return false; + } + rdy->cb.owner = THIS_MODULE; + rdy->cb.func = rng_is_initialized_callback; + rdy->rng_state = &rng_state; + ret = add_random_ready_callback(&rdy->cb); + if (ret) + kfree(rdy); + if (ret == -EALREADY) { + atomic_set(&rng_state, 2); + return true; + } else if (ret) + atomic_set(&rng_state, 0); + return false; + } + return false; +} +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) +/* This is a disaster. Without this API, we really have no way of + * knowing if it's initialized. We just return that it has and hope + * for the best... */ +static inline bool rng_is_initialized(void) +{ + return true; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && !defined(ISOPENSUSE15) +static inline int get_random_bytes_wait(void *buf, int nbytes) +{ + int ret = wait_for_random_bytes(); + if (unlikely(ret)) + return ret; + get_random_bytes(buf, nbytes); + return 0; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7) +#define system_power_efficient_wq system_unbound_wq +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0) +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +#include +#ifndef ktime_get_real_ts64 +#define timespec64 timespec +#define ktime_get_real_ts64 ktime_get_real_ts +#endif +#else +#include +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +static inline u64 __compat_jiffies64_to_nsecs(u64 j) +{ +#if !(NSEC_PER_SEC % HZ) + return (NSEC_PER_SEC / HZ) * j; +#else + return div_u64(j * HZ_TO_USEC_NUM, HZ_TO_USEC_DEN) * 1000; +#endif +} +#define jiffies64_to_nsecs __compat_jiffies64_to_nsecs +#endif +static inline u64 ktime_get_coarse_boottime_ns(void) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + return ktime_to_ns(ktime_get_boottime()); +#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 12) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 53) + return ktime_to_ns(ktime_mono_to_any(ns_to_ktime(jiffies64_to_nsecs(get_jiffies_64())), TK_OFFS_BOOT)); +#else + return ktime_to_ns(ktime_get_coarse_boottime()); +#endif +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#include +static inline __be32 __compat_confirm_addr_indev(struct in_device *in_dev, __be32 dst, __be32 local, int scope) +{ + int same = 0; + __be32 addr = 0; + for_ifa(in_dev) { + if (!addr && (local == ifa->ifa_local || !local) && ifa->ifa_scope <= scope) { + addr = ifa->ifa_local; + if (same) + break; + } + if (!same) { + same = (!local || inet_ifa_match(local, ifa)) && (!dst || inet_ifa_match(dst, ifa)); + if (same && addr) { + if (local || !dst) + break; + if (inet_ifa_match(addr, ifa)) + break; + if (ifa->ifa_scope <= scope) { + addr = ifa->ifa_local; + break; + } + same = 0; + } + } + } endfor_ifa(in_dev); + return same ? addr : 0; +} +static inline __be32 __compat_inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope) +{ + __be32 addr = 0; + struct net_device *dev; + if (in_dev) + return __compat_confirm_addr_indev(in_dev, dst, local, scope); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + in_dev = __in_dev_get_rcu(dev); + if (in_dev) { + addr = __compat_confirm_addr_indev(in_dev, dst, local, scope); + if (addr) + break; + } + } + rcu_read_unlock(); + return addr; +} +#define inet_confirm_addr __compat_inet_confirm_addr +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) +#include +#include +#include +static inline void *__compat_kvmalloc(size_t size, gfp_t flags) +{ + gfp_t kmalloc_flags = flags; + void *ret; + if (size > PAGE_SIZE) { + kmalloc_flags |= __GFP_NOWARN; + if (!(kmalloc_flags & __GFP_REPEAT) || (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + kmalloc_flags |= __GFP_NORETRY; + } + ret = kmalloc(size, kmalloc_flags); + if (ret || size <= PAGE_SIZE) + return ret; + return __vmalloc(size, flags, PAGE_KERNEL); +} +static inline void *__compat_kvzalloc(size_t size, gfp_t flags) +{ + return __compat_kvmalloc(size, flags | __GFP_ZERO); +} +#define kvmalloc __compat_kvmalloc +#define kvzalloc __compat_kvzalloc +#endif + +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 41)) && !defined(ISUBUNTU1404) +#include +#include +static inline void __compat_kvfree(const void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} +#define kvfree __compat_kvfree +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 9) +#include +#define priv_destructor destructor +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && !defined(ISOPENSUSE15) +#define wg_newlink(a,b,c,d,e) wg_newlink(a,b,c,d) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) +#include +#include +#define nlmsg_parse(a, b, c, d, e, f) nlmsg_parse(a, b, c, d, e) +#define nla_parse_nested(a, b, c, d, e) nla_parse_nested(a, b, c, d) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) && !defined(ISRHEL7) +static inline struct nlattr **genl_family_attrbuf(const struct genl_family *family) +{ + return family->attrbuf; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +#define PTR_ERR_OR_ZERO(p) PTR_RET(p) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) +#include +#define nla_put_u64_64bit(a, b, c, d) nla_put_u64(a, b, c) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#include +#ifndef GENL_UNS_ADMIN_PERM +#define GENL_UNS_ADMIN_PERM GENL_ADMIN_PERM +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) && !defined(ISRHEL7) +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) +#define genl_register_family(a) genl_register_family_with_ops(a, genl_ops, ARRAY_SIZE(genl_ops)) +#define COMPAT_CANNOT_USE_CONST_GENL_OPS +#else +#define genl_register_family(a) genl_register_family_with_ops(a, genl_ops) +#endif +#define COMPAT_CANNOT_USE_GENL_NOPS +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 2) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 16) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 65) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 101) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 84) +#define __COMPAT_NETLINK_DUMP_BLOCK { \ + int ret; \ + skb->end -= nlmsg_total_size(sizeof(int)); \ + ret = wg_get_device_dump_real(skb, cb); \ + skb->end += nlmsg_total_size(sizeof(int)); \ + return ret; \ +} +#define __COMPAT_NETLINK_DUMP_OVERRIDE +#else +#define __COMPAT_NETLINK_DUMP_BLOCK return wg_get_device_dump_real(skb, cb); +#endif +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 25) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 87) +#define wg_get_device_dump(a, b) wg_get_device_dump_real(a, b); \ +static int wg_get_device_dump(a, b) { \ + struct wg_device *wg = (struct wg_device *)cb->args[0]; \ + if (!wg) { \ + int ret = wg_get_device_start(cb); \ + if (ret) \ + return ret; \ + } \ + __COMPAT_NETLINK_DUMP_BLOCK \ +} \ +static int wg_get_device_dump_real(a, b) +#define COMPAT_CANNOT_USE_NETLINK_START +#elif defined(__COMPAT_NETLINK_DUMP_OVERRIDE) +#define wg_get_device_dump(a, b) wg_get_device_dump_real(a, b); \ +static int wg_get_device_dump(a, b) { \ + __COMPAT_NETLINK_DUMP_BLOCK \ +} \ +static int wg_get_device_dump_real(a, b) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +#define COMPAT_CANNOT_USE_IN6_DEV_GET +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +#define COMPAT_CANNOT_USE_IFF_NO_QUEUE +#endif + +#if defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) +#include +#include +static inline int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) +{ + return boot_cpu_has(X86_FEATURE_XSAVE) && xgetbv(XCR_XFEATURE_ENABLED_MASK) & xfeatures_needed; +} +#endif +#ifndef XFEATURE_MASK_YMM +#define XFEATURE_MASK_YMM XSTATE_YMM +#endif +#ifndef XFEATURE_MASK_SSE +#define XFEATURE_MASK_SSE XSTATE_SSE +#endif +#ifndef XSTATE_AVX512 +#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) +#endif +#ifndef XFEATURE_MASK_AVX512 +#define XFEATURE_MASK_AVX512 XSTATE_AVX512 +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && defined(CONFIG_X86_64) +/* This is incredibly dumb and reckless, but as it turns out, there's + * not really hardware Linux runs properly on that supports F but not BW + * and VL, so in practice this isn't so bad. Plus, this is compat layer, + * so the bar remains fairly low. + */ +#include +#ifndef X86_FEATURE_AVX512BW +#define X86_FEATURE_AVX512BW X86_FEATURE_AVX512F +#endif +#ifndef X86_FEATURE_AVX512VL +#define X86_FEATURE_AVX512VL X86_FEATURE_AVX512F +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) +struct __compat_dummy_container { char dev; }; +#define netdev_notifier_info net_device *)data); __attribute((unused)) char __compat_dummy_variable = ((struct __compat_dummy_container +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) +#define timer_setup(a, b, c) setup_timer(a, ((void (*)(unsigned long))b), ((unsigned long)a)) +#define from_timer(var, callback_timer, timer_fieldname) container_of(callback_timer, typeof(*var), timer_fieldname) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 3) +#define COMPAT_CANNOT_USE_AVX512 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) +#include +#define genl_dump_check_consistent(a, b) genl_dump_check_consistent(a, b, &genl_family) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && !defined(ISRHEL7) && !defined(ISOPENSUSE15) +static inline void *skb_put_data(struct sk_buff *skb, const void *data, unsigned int len) +{ + void *tmp = skb_put(skb, len); + memcpy(tmp, data, len); + return tmp; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) && !defined(ISRHEL7) +#define napi_complete_done(n, work_done) napi_complete(n) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) +#include +/* NAPI_STATE_SCHED gets set by netif_napi_add anyway, so this is safe. + * Also, kernels without NAPI_STATE_NO_BUSY_POLL don't have a call to + * napi_hash_add inside of netif_napi_add. + */ +#define NAPI_STATE_NO_BUSY_POLL NAPI_STATE_SCHED +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#include +#ifndef atomic_read_acquire +#define atomic_read_acquire(v) ({ int __compat_p1 = atomic_read(v); smp_rmb(); __compat_p1; }) +#endif +#ifndef atomic_set_release +#define atomic_set_release(v, i) ({ smp_wmb(); atomic_set(v, i); }) +#endif +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +#include +#ifndef atomic_read_acquire +#define atomic_read_acquire(v) smp_load_acquire(&(v)->counter) +#endif +#ifndef atomic_set_release +#define atomic_set_release(v, i) smp_store_release(&(v)->counter, (i)) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0) +static inline void le32_to_cpu_array(u32 *buf, unsigned int words) +{ + while (words--) { + __le32_to_cpus(buf); + buf++; + } +} +static inline void cpu_to_le32_array(u32 *buf, unsigned int words) +{ + while (words--) { + __cpu_to_le32s(buf); + buf++; + } +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) +#include +static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2, + unsigned int size) +{ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + __builtin_constant_p(size) && + (size % sizeof(unsigned long)) == 0) { + unsigned long *d = (unsigned long *)dst; + unsigned long *s1 = (unsigned long *)src1; + unsigned long *s2 = (unsigned long *)src2; + + while (size > 0) { + *d++ = *s1++ ^ *s2++; + size -= sizeof(unsigned long); + } + } else { + if (unlikely(dst != src1)) + memmove(dst, src1, size); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + crypto_xor(dst, src2, size); +#else + __crypto_xor(dst, src2, size); +#endif + } +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +#define read_cpuid_part() read_cpuid_part_number() +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) && !defined(ISRHEL7) +#define hlist_add_behind(a, b) hlist_add_after(b, a) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0) +#define totalram_pages() totalram_pages +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0) +struct __kernel_timespec { + int64_t tv_sec, tv_nsec; +}; +#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) +#include +#ifdef __kernel_timespec +#undef __kernel_timespec +struct __kernel_timespec { + int64_t tv_sec, tv_nsec; +}; +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) +#include +#ifndef ALIGN_DOWN +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) && !defined(ISRHEL82) +#include +#define skb_probe_transport_header(a) skb_probe_transport_header(a, 0) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) && !defined(ISRHEL7) +#define ignore_df local_df +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) && !defined(ISRHEL82) +/* Note that all intentional uses of the non-_bh variety need to explicitly + * undef these, conditionalized on COMPAT_CANNOT_DEPRECIATE_BH_RCU. + */ +#include +static __always_inline void old_synchronize_rcu(void) +{ + synchronize_rcu(); +} +static __always_inline void old_call_rcu(void *a, void *b) +{ + call_rcu(a, b); +} +static __always_inline void old_rcu_barrier(void) +{ + rcu_barrier(); +} +#ifdef synchronize_rcu +#undef synchronize_rcu +#endif +#ifdef call_rcu +#undef call_rcu +#endif +#ifdef rcu_barrier +#undef rcu_barrier +#endif +#define synchronize_rcu synchronize_rcu_bh +#define call_rcu call_rcu_bh +#define rcu_barrier rcu_barrier_bh +#define COMPAT_CANNOT_DEPRECIATE_BH_RCU +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 10) && !defined(ISRHEL8) +static inline void skb_mark_not_on_list(struct sk_buff *skb) +{ + skb->next = NULL; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) && !defined(ISRHEL8) +#define NLA_EXACT_LEN NLA_UNSPEC +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) && !defined(ISRHEL82) +#define NLA_MIN_LEN NLA_UNSPEC +#define COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) && defined(__aarch64__) +#define cpu_have_named_feature(name) (elf_hwcap & (HWCAP_ ## name)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) +#include +#ifndef offsetofend +#define offsetofend(TYPE, MEMBER) (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0) +#define genl_dumpit_info(cb) ({ \ + struct { struct nlattr **attrs; } *a = (void *)((u8 *)cb->args + offsetofend(struct dump_ctx, next_allowedip)); \ + BUILD_BUG_ON(sizeof(cb->args) < offsetofend(struct dump_ctx, next_allowedip) + sizeof(*a)); \ + a->attrs = genl_family_attrbuf(&genl_family); \ + if (nlmsg_parse(cb->nlh, GENL_HDRLEN + genl_family.hdrsize, a->attrs, genl_family.maxattr, device_policy, NULL) < 0) \ + memset(a->attrs, 0, (genl_family.maxattr + 1) * sizeof(struct nlattr *)); \ + a; \ +}) +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 5) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 18) +#define ipv6_dst_lookup_flow(a, b, c, d) ipv6_dst_lookup(a, b, &dst, c) + (void *)0 ?: dst +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) +#include +#ifndef skb_list_walk_safe +#define skb_list_walk_safe(first, skb, next) \ + for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \ + (skb) = (next), (next) = (skb) ? (skb)->next : NULL) +#endif +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) +#define blake2s_init zinc_blake2s_init +#define blake2s_init_key zinc_blake2s_init_key +#define blake2s_update zinc_blake2s_update +#define blake2s_final zinc_blake2s_final +#define blake2s_hmac zinc_blake2s_hmac +#define chacha20 zinc_chacha20 +#define hchacha20 zinc_hchacha20 +#define chacha20poly1305_encrypt zinc_chacha20poly1305_encrypt +#define chacha20poly1305_encrypt_sg_inplace zinc_chacha20poly1305_encrypt_sg_inplace +#define chacha20poly1305_decrypt zinc_chacha20poly1305_decrypt +#define chacha20poly1305_decrypt_sg_inplace zinc_chacha20poly1305_decrypt_sg_inplace +#define xchacha20poly1305_encrypt zinc_xchacha20poly1305_encrypt +#define xchacha20poly1305_decrypt zinc_xchacha20poly1305_decrypt +#define curve25519 zinc_curve25519 +#define curve25519_generate_secret zinc_curve25519_generate_secret +#define curve25519_generate_public zinc_curve25519_generate_public +#define poly1305_init zinc_poly1305_init +#define poly1305_update zinc_poly1305_update +#define poly1305_final zinc_poly1305_final +#define blake2s_compress_ssse3 zinc_blake2s_compress_ssse3 +#define blake2s_compress_avx512 zinc_blake2s_compress_avx512 +#define poly1305_init_arm zinc_poly1305_init_arm +#define poly1305_blocks_arm zinc_poly1305_blocks_arm +#define poly1305_emit_arm zinc_poly1305_emit_arm +#define poly1305_blocks_neon zinc_poly1305_blocks_neon +#define poly1305_emit_neon zinc_poly1305_emit_neon +#define poly1305_init_mips zinc_poly1305_init_mips +#define poly1305_blocks_mips zinc_poly1305_blocks_mips +#define poly1305_emit_mips zinc_poly1305_emit_mips +#define poly1305_init_x86_64 zinc_poly1305_init_x86_64 +#define poly1305_blocks_x86_64 zinc_poly1305_blocks_x86_64 +#define poly1305_emit_x86_64 zinc_poly1305_emit_x86_64 +#define poly1305_emit_avx zinc_poly1305_emit_avx +#define poly1305_blocks_avx zinc_poly1305_blocks_avx +#define poly1305_blocks_avx2 zinc_poly1305_blocks_avx2 +#define poly1305_blocks_avx512 zinc_poly1305_blocks_avx512 +#define curve25519_neon zinc_curve25519_neon +#define hchacha20_ssse3 zinc_hchacha20_ssse3 +#define chacha20_ssse3 zinc_chacha20_ssse3 +#define chacha20_avx2 zinc_chacha20_avx2 +#define chacha20_avx512 zinc_chacha20_avx512 +#define chacha20_avx512vl zinc_chacha20_avx512vl +#define chacha20_mips zinc_chacha20_mips +#define chacha20_arm zinc_chacha20_arm +#define hchacha20_arm zinc_hchacha20_arm +#define chacha20_neon zinc_chacha20_neon +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) +#include +static inline int skb_ensure_writable(struct sk_buff *skb, int write_len) +{ + if (!pskb_may_pull(skb, write_len)) + return -ENOMEM; + + if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) + return 0; + + return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) +#if IS_ENABLED(CONFIG_NF_NAT) +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) +#include +#endif +static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + struct sk_buff *cloned_skb = NULL; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + __be32 orig_ip; + + ct = nf_ct_get(skb_in, &ctinfo); + if (!ct || !(ct->status & IPS_SRC_NAT)) { + icmp_send(skb_in, type, code, info); + return; + } + + if (skb_shared(skb_in)) + skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); + + if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || + (skb_network_header(skb_in) + sizeof(struct iphdr)) > + skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, + skb_network_offset(skb_in) + sizeof(struct iphdr)))) + goto out; + + orig_ip = ip_hdr(skb_in)->saddr; + ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip; + icmp_send(skb_in, type, code, info); + ip_hdr(skb_in)->saddr = orig_ip; +out: + consume_skb(cloned_skb); +} +static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) +{ + struct sk_buff *cloned_skb = NULL; + enum ip_conntrack_info ctinfo; + struct in6_addr orig_ip; + struct nf_conn *ct; + + ct = nf_ct_get(skb_in, &ctinfo); + if (!ct || !(ct->status & IPS_SRC_NAT)) { + icmpv6_send(skb_in, type, code, info); + return; + } + + if (skb_shared(skb_in)) + skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); + + if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || + (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) > + skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, + skb_network_offset(skb_in) + sizeof(struct ipv6hdr)))) + goto out; + + orig_ip = ipv6_hdr(skb_in)->saddr; + ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; + icmpv6_send(skb_in, type, code, info); + ipv6_hdr(skb_in)->saddr = orig_ip; +out: + consume_skb(cloned_skb); +} +#else +#define icmp_ndo_send icmp_send +#define icmpv6_ndo_send icmpv6_send +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) +#define COMPAT_CANNOT_USE_MAX_MTU +#endif + +#if defined(ISUBUNTU1604) +#include +#ifndef _WG_LINUX_SIPHASH_H +#define hsiphash_2u32 siphash_2u32 +#define hsiphash_3u32 siphash_3u32 +#define hsiphash_key_t siphash_key_t +#endif +#endif + +#ifdef CONFIG_VE +#include +#ifdef NETIF_F_VIRTUAL +#undef NETIF_F_LLTX +#define NETIF_F_LLTX (__NETIF_F(LLTX) | __NETIF_F(VIRTUAL)) +#endif +#endif + +/* https://github.com/ClangBuiltLinux/linux/issues/7 */ +#if defined( __clang__) && (!defined(CONFIG_CLANG_VERSION) || CONFIG_CLANG_VERSION < 80000) +#include +#undef BUILD_BUG_ON +#define BUILD_BUG_ON(x) +#endif + +/* PaX compatibility */ +#ifdef CONSTIFY_PLUGIN +#include +#undef __read_mostly +#define __read_mostly +#endif +#if (defined(RAP_PLUGIN) || defined(CONFIG_CFI_CLANG)) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) +#include +#define wg_expired_retransmit_handshake(a) wg_expired_retransmit_handshake(unsigned long timer) +#define wg_expired_send_keepalive(a) wg_expired_send_keepalive(unsigned long timer) +#define wg_expired_new_handshake(a) wg_expired_new_handshake(unsigned long timer) +#define wg_expired_zero_key_material(a) wg_expired_zero_key_material(unsigned long timer) +#define wg_expired_send_persistent_keepalive(a) wg_expired_send_persistent_keepalive(unsigned long timer) +#undef timer_setup +#define timer_setup(a, b, c) setup_timer(a, ((void (*)(unsigned long))b), ((unsigned long)a)) +#undef from_timer +#define from_timer(var, callback_timer, timer_fieldname) container_of((struct timer_list *)callback_timer, typeof(*var), timer_fieldname) +#endif + +#endif /* _WG_COMPAT_H */ diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519-hacl64.c b/net/wireguard/crypto/zinc/curve25519/curve25519-hacl64.c new file mode 100644 index 000000000000..d6dcd0ce1892 --- /dev/null +++ b/net/wireguard/crypto/zinc/curve25519/curve25519-hacl64.c @@ -0,0 +1,779 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2016-2017 INRIA and Microsoft Corporation. + * Copyright (C) 2018-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This is a machine-generated formally verified implementation of Curve25519 + * ECDH from: . Though originally machine + * generated, it has been tweaked to be suitable for use in the kernel. It is + * optimized for 64-bit machines that can efficiently work with 128-bit + * integer types. + */ + +typedef __uint128_t u128; + +static __always_inline u64 u64_eq_mask(u64 a, u64 b) +{ + u64 x = a ^ b; + u64 minus_x = ~x + (u64)1U; + u64 x_or_minus_x = x | minus_x; + u64 xnx = x_or_minus_x >> (u32)63U; + u64 c = xnx - (u64)1U; + return c; +} + +static __always_inline u64 u64_gte_mask(u64 a, u64 b) +{ + u64 x = a; + u64 y = b; + u64 x_xor_y = x ^ y; + u64 x_sub_y = x - y; + u64 x_sub_y_xor_y = x_sub_y ^ y; + u64 q = x_xor_y | x_sub_y_xor_y; + u64 x_xor_q = x ^ q; + u64 x_xor_q_ = x_xor_q >> (u32)63U; + u64 c = x_xor_q_ - (u64)1U; + return c; +} + +static __always_inline void modulo_carry_top(u64 *b) +{ + u64 b4 = b[4]; + u64 b0 = b[0]; + u64 b4_ = b4 & 0x7ffffffffffffLLU; + u64 b0_ = b0 + 19 * (b4 >> 51); + b[4] = b4_; + b[0] = b0_; +} + +static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input) +{ + { + u128 xi = input[0]; + output[0] = ((u64)(xi)); + } + { + u128 xi = input[1]; + output[1] = ((u64)(xi)); + } + { + u128 xi = input[2]; + output[2] = ((u64)(xi)); + } + { + u128 xi = input[3]; + output[3] = ((u64)(xi)); + } + { + u128 xi = input[4]; + output[4] = ((u64)(xi)); + } +} + +static __always_inline void +fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s) +{ + output[0] += (u128)input[0] * s; + output[1] += (u128)input[1] * s; + output[2] += (u128)input[2] * s; + output[3] += (u128)input[3] * s; + output[4] += (u128)input[4] * s; +} + +static __always_inline void fproduct_carry_wide_(u128 *tmp) +{ + { + u32 ctr = 0; + u128 tctr = tmp[ctr]; + u128 tctrp1 = tmp[ctr + 1]; + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; + u128 c = ((tctr) >> (51)); + tmp[ctr] = ((u128)(r0)); + tmp[ctr + 1] = ((tctrp1) + (c)); + } + { + u32 ctr = 1; + u128 tctr = tmp[ctr]; + u128 tctrp1 = tmp[ctr + 1]; + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; + u128 c = ((tctr) >> (51)); + tmp[ctr] = ((u128)(r0)); + tmp[ctr + 1] = ((tctrp1) + (c)); + } + + { + u32 ctr = 2; + u128 tctr = tmp[ctr]; + u128 tctrp1 = tmp[ctr + 1]; + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; + u128 c = ((tctr) >> (51)); + tmp[ctr] = ((u128)(r0)); + tmp[ctr + 1] = ((tctrp1) + (c)); + } + { + u32 ctr = 3; + u128 tctr = tmp[ctr]; + u128 tctrp1 = tmp[ctr + 1]; + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; + u128 c = ((tctr) >> (51)); + tmp[ctr] = ((u128)(r0)); + tmp[ctr + 1] = ((tctrp1) + (c)); + } +} + +static __always_inline void fmul_shift_reduce(u64 *output) +{ + u64 tmp = output[4]; + u64 b0; + { + u32 ctr = 5 - 0 - 1; + u64 z = output[ctr - 1]; + output[ctr] = z; + } + { + u32 ctr = 5 - 1 - 1; + u64 z = output[ctr - 1]; + output[ctr] = z; + } + { + u32 ctr = 5 - 2 - 1; + u64 z = output[ctr - 1]; + output[ctr] = z; + } + { + u32 ctr = 5 - 3 - 1; + u64 z = output[ctr - 1]; + output[ctr] = z; + } + output[0] = tmp; + b0 = output[0]; + output[0] = 19 * b0; +} + +static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input, + u64 *input21) +{ + u32 i; + u64 input2i; + { + u64 input2i = input21[0]; + fproduct_sum_scalar_multiplication_(output, input, input2i); + fmul_shift_reduce(input); + } + { + u64 input2i = input21[1]; + fproduct_sum_scalar_multiplication_(output, input, input2i); + fmul_shift_reduce(input); + } + { + u64 input2i = input21[2]; + fproduct_sum_scalar_multiplication_(output, input, input2i); + fmul_shift_reduce(input); + } + { + u64 input2i = input21[3]; + fproduct_sum_scalar_multiplication_(output, input, input2i); + fmul_shift_reduce(input); + } + i = 4; + input2i = input21[i]; + fproduct_sum_scalar_multiplication_(output, input, input2i); +} + +static __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21) +{ + u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] }; + { + u128 b4; + u128 b0; + u128 b4_; + u128 b0_; + u64 i0; + u64 i1; + u64 i0_; + u64 i1_; + u128 t[5] = { 0 }; + fmul_mul_shift_reduce_(t, tmp, input21); + fproduct_carry_wide_(t); + b4 = t[4]; + b0 = t[0]; + b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); + b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); + t[4] = b4_; + t[0] = b0_; + fproduct_copy_from_wide_(output, t); + i0 = output[0]; + i1 = output[1]; + i0_ = i0 & 0x7ffffffffffffLLU; + i1_ = i1 + (i0 >> 51); + output[0] = i0_; + output[1] = i1_; + } +} + +static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output) +{ + u64 r0 = output[0]; + u64 r1 = output[1]; + u64 r2 = output[2]; + u64 r3 = output[3]; + u64 r4 = output[4]; + u64 d0 = r0 * 2; + u64 d1 = r1 * 2; + u64 d2 = r2 * 2 * 19; + u64 d419 = r4 * 19; + u64 d4 = d419 * 2; + u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) + + (((u128)(d2) * (r3)))); + u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) + + (((u128)(r3 * 19) * (r3)))); + u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) + + (((u128)(d4) * (r3)))); + u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) + + (((u128)(r4) * (d419)))); + u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) + + (((u128)(r2) * (r2)))); + tmp[0] = s0; + tmp[1] = s1; + tmp[2] = s2; + tmp[3] = s3; + tmp[4] = s4; +} + +static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output) +{ + u128 b4; + u128 b0; + u128 b4_; + u128 b0_; + u64 i0; + u64 i1; + u64 i0_; + u64 i1_; + fsquare_fsquare__(tmp, output); + fproduct_carry_wide_(tmp); + b4 = tmp[4]; + b0 = tmp[0]; + b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); + b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); + tmp[4] = b4_; + tmp[0] = b0_; + fproduct_copy_from_wide_(output, tmp); + i0 = output[0]; + i1 = output[1]; + i0_ = i0 & 0x7ffffffffffffLLU; + i1_ = i1 + (i0 >> 51); + output[0] = i0_; + output[1] = i1_; +} + +static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp, + u32 count1) +{ + u32 i; + fsquare_fsquare_(tmp, output); + for (i = 1; i < count1; ++i) + fsquare_fsquare_(tmp, output); +} + +static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input, + u32 count1) +{ + u128 t[5]; + memcpy(output, input, 5 * sizeof(*input)); + fsquare_fsquare_times_(output, t, count1); +} + +static __always_inline void fsquare_fsquare_times_inplace(u64 *output, + u32 count1) +{ + u128 t[5]; + fsquare_fsquare_times_(output, t, count1); +} + +static __always_inline void crecip_crecip(u64 *out, u64 *z) +{ + u64 buf[20] = { 0 }; + u64 *a0 = buf; + u64 *t00 = buf + 5; + u64 *b0 = buf + 10; + u64 *t01; + u64 *b1; + u64 *c0; + u64 *a; + u64 *t0; + u64 *b; + u64 *c; + fsquare_fsquare_times(a0, z, 1); + fsquare_fsquare_times(t00, a0, 2); + fmul_fmul(b0, t00, z); + fmul_fmul(a0, b0, a0); + fsquare_fsquare_times(t00, a0, 1); + fmul_fmul(b0, t00, b0); + fsquare_fsquare_times(t00, b0, 5); + t01 = buf + 5; + b1 = buf + 10; + c0 = buf + 15; + fmul_fmul(b1, t01, b1); + fsquare_fsquare_times(t01, b1, 10); + fmul_fmul(c0, t01, b1); + fsquare_fsquare_times(t01, c0, 20); + fmul_fmul(t01, t01, c0); + fsquare_fsquare_times_inplace(t01, 10); + fmul_fmul(b1, t01, b1); + fsquare_fsquare_times(t01, b1, 50); + a = buf; + t0 = buf + 5; + b = buf + 10; + c = buf + 15; + fmul_fmul(c, t0, b); + fsquare_fsquare_times(t0, c, 100); + fmul_fmul(t0, t0, c); + fsquare_fsquare_times_inplace(t0, 50); + fmul_fmul(t0, t0, b); + fsquare_fsquare_times_inplace(t0, 5); + fmul_fmul(out, t0, a); +} + +static __always_inline void fsum(u64 *a, u64 *b) +{ + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + a[4] += b[4]; +} + +static __always_inline void fdifference(u64 *a, u64 *b) +{ + u64 tmp[5] = { 0 }; + u64 b0; + u64 b1; + u64 b2; + u64 b3; + u64 b4; + memcpy(tmp, b, 5 * sizeof(*b)); + b0 = tmp[0]; + b1 = tmp[1]; + b2 = tmp[2]; + b3 = tmp[3]; + b4 = tmp[4]; + tmp[0] = b0 + 0x3fffffffffff68LLU; + tmp[1] = b1 + 0x3ffffffffffff8LLU; + tmp[2] = b2 + 0x3ffffffffffff8LLU; + tmp[3] = b3 + 0x3ffffffffffff8LLU; + tmp[4] = b4 + 0x3ffffffffffff8LLU; + { + u64 xi = a[0]; + u64 yi = tmp[0]; + a[0] = yi - xi; + } + { + u64 xi = a[1]; + u64 yi = tmp[1]; + a[1] = yi - xi; + } + { + u64 xi = a[2]; + u64 yi = tmp[2]; + a[2] = yi - xi; + } + { + u64 xi = a[3]; + u64 yi = tmp[3]; + a[3] = yi - xi; + } + { + u64 xi = a[4]; + u64 yi = tmp[4]; + a[4] = yi - xi; + } +} + +static __always_inline void fscalar(u64 *output, u64 *b, u64 s) +{ + u128 tmp[5]; + u128 b4; + u128 b0; + u128 b4_; + u128 b0_; + { + u64 xi = b[0]; + tmp[0] = ((u128)(xi) * (s)); + } + { + u64 xi = b[1]; + tmp[1] = ((u128)(xi) * (s)); + } + { + u64 xi = b[2]; + tmp[2] = ((u128)(xi) * (s)); + } + { + u64 xi = b[3]; + tmp[3] = ((u128)(xi) * (s)); + } + { + u64 xi = b[4]; + tmp[4] = ((u128)(xi) * (s)); + } + fproduct_carry_wide_(tmp); + b4 = tmp[4]; + b0 = tmp[0]; + b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); + b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); + tmp[4] = b4_; + tmp[0] = b0_; + fproduct_copy_from_wide_(output, tmp); +} + +static __always_inline void crecip(u64 *output, u64 *input) +{ + crecip_crecip(output, input); +} + +static __always_inline void point_swap_conditional_step(u64 *a, u64 *b, + u64 swap1, u32 ctr) +{ + u32 i = ctr - 1; + u64 ai = a[i]; + u64 bi = b[i]; + u64 x = swap1 & (ai ^ bi); + u64 ai1 = ai ^ x; + u64 bi1 = bi ^ x; + a[i] = ai1; + b[i] = bi1; +} + +static __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1) +{ + point_swap_conditional_step(a, b, swap1, 5); + point_swap_conditional_step(a, b, swap1, 4); + point_swap_conditional_step(a, b, swap1, 3); + point_swap_conditional_step(a, b, swap1, 2); + point_swap_conditional_step(a, b, swap1, 1); +} + +static __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap) +{ + u64 swap1 = 0 - iswap; + point_swap_conditional5(a, b, swap1); + point_swap_conditional5(a + 5, b + 5, swap1); +} + +static __always_inline void point_copy(u64 *output, u64 *input) +{ + memcpy(output, input, 5 * sizeof(*input)); + memcpy(output + 5, input + 5, 5 * sizeof(*input)); +} + +static __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p, + u64 *pq, u64 *qmqp) +{ + u64 *qx = qmqp; + u64 *x2 = pp; + u64 *z2 = pp + 5; + u64 *x3 = ppq; + u64 *z3 = ppq + 5; + u64 *x = p; + u64 *z = p + 5; + u64 *xprime = pq; + u64 *zprime = pq + 5; + u64 buf[40] = { 0 }; + u64 *origx = buf; + u64 *origxprime0 = buf + 5; + u64 *xxprime0; + u64 *zzprime0; + u64 *origxprime; + xxprime0 = buf + 25; + zzprime0 = buf + 30; + memcpy(origx, x, 5 * sizeof(*x)); + fsum(x, z); + fdifference(z, origx); + memcpy(origxprime0, xprime, 5 * sizeof(*xprime)); + fsum(xprime, zprime); + fdifference(zprime, origxprime0); + fmul_fmul(xxprime0, xprime, z); + fmul_fmul(zzprime0, x, zprime); + origxprime = buf + 5; + { + u64 *xx0; + u64 *zz0; + u64 *xxprime; + u64 *zzprime; + u64 *zzzprime; + xx0 = buf + 15; + zz0 = buf + 20; + xxprime = buf + 25; + zzprime = buf + 30; + zzzprime = buf + 35; + memcpy(origxprime, xxprime, 5 * sizeof(*xxprime)); + fsum(xxprime, zzprime); + fdifference(zzprime, origxprime); + fsquare_fsquare_times(x3, xxprime, 1); + fsquare_fsquare_times(zzzprime, zzprime, 1); + fmul_fmul(z3, zzzprime, qx); + fsquare_fsquare_times(xx0, x, 1); + fsquare_fsquare_times(zz0, z, 1); + { + u64 *zzz; + u64 *xx; + u64 *zz; + u64 scalar; + zzz = buf + 10; + xx = buf + 15; + zz = buf + 20; + fmul_fmul(x2, xx, zz); + fdifference(zz, xx); + scalar = 121665; + fscalar(zzz, zz, scalar); + fsum(zzz, xx); + fmul_fmul(z2, zzz, zz); + } + } +} + +static __always_inline void +ladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, + u64 *q, u8 byt) +{ + u64 bit0 = (u64)(byt >> 7); + u64 bit; + point_swap_conditional(nq, nqpq, bit0); + addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q); + bit = (u64)(byt >> 7); + point_swap_conditional(nq2, nqpq2, bit); +} + +static __always_inline void +ladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2, + u64 *nqpq2, u64 *q, u8 byt) +{ + u8 byt1; + ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt); + byt1 = byt << 1; + ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1); +} + +static __always_inline void +ladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, + u64 *q, u8 byt, u32 i) +{ + while (i--) { + ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2, + nqpq2, q, byt); + byt <<= 2; + } +} + +static __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq, + u64 *nqpq, u64 *nq2, + u64 *nqpq2, u64 *q, + u32 i) +{ + while (i--) { + u8 byte = n1[i]; + ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, + byte, 4); + } +} + +static void ladder_cmult(u64 *result, u8 *n1, u64 *q) +{ + u64 point_buf[40] = { 0 }; + u64 *nq = point_buf; + u64 *nqpq = point_buf + 10; + u64 *nq2 = point_buf + 20; + u64 *nqpq2 = point_buf + 30; + point_copy(nqpq, q); + nq[0] = 1; + ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32); + point_copy(result, nq); +} + +static __always_inline void format_fexpand(u64 *output, const u8 *input) +{ + const u8 *x00 = input + 6; + const u8 *x01 = input + 12; + const u8 *x02 = input + 19; + const u8 *x0 = input + 24; + u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4; + i0 = get_unaligned_le64(input); + i1 = get_unaligned_le64(x00); + i2 = get_unaligned_le64(x01); + i3 = get_unaligned_le64(x02); + i4 = get_unaligned_le64(x0); + output0 = i0 & 0x7ffffffffffffLLU; + output1 = i1 >> 3 & 0x7ffffffffffffLLU; + output2 = i2 >> 6 & 0x7ffffffffffffLLU; + output3 = i3 >> 1 & 0x7ffffffffffffLLU; + output4 = i4 >> 12 & 0x7ffffffffffffLLU; + output[0] = output0; + output[1] = output1; + output[2] = output2; + output[3] = output3; + output[4] = output4; +} + +static __always_inline void format_fcontract_first_carry_pass(u64 *input) +{ + u64 t0 = input[0]; + u64 t1 = input[1]; + u64 t2 = input[2]; + u64 t3 = input[3]; + u64 t4 = input[4]; + u64 t1_ = t1 + (t0 >> 51); + u64 t0_ = t0 & 0x7ffffffffffffLLU; + u64 t2_ = t2 + (t1_ >> 51); + u64 t1__ = t1_ & 0x7ffffffffffffLLU; + u64 t3_ = t3 + (t2_ >> 51); + u64 t2__ = t2_ & 0x7ffffffffffffLLU; + u64 t4_ = t4 + (t3_ >> 51); + u64 t3__ = t3_ & 0x7ffffffffffffLLU; + input[0] = t0_; + input[1] = t1__; + input[2] = t2__; + input[3] = t3__; + input[4] = t4_; +} + +static __always_inline void format_fcontract_first_carry_full(u64 *input) +{ + format_fcontract_first_carry_pass(input); + modulo_carry_top(input); +} + +static __always_inline void format_fcontract_second_carry_pass(u64 *input) +{ + u64 t0 = input[0]; + u64 t1 = input[1]; + u64 t2 = input[2]; + u64 t3 = input[3]; + u64 t4 = input[4]; + u64 t1_ = t1 + (t0 >> 51); + u64 t0_ = t0 & 0x7ffffffffffffLLU; + u64 t2_ = t2 + (t1_ >> 51); + u64 t1__ = t1_ & 0x7ffffffffffffLLU; + u64 t3_ = t3 + (t2_ >> 51); + u64 t2__ = t2_ & 0x7ffffffffffffLLU; + u64 t4_ = t4 + (t3_ >> 51); + u64 t3__ = t3_ & 0x7ffffffffffffLLU; + input[0] = t0_; + input[1] = t1__; + input[2] = t2__; + input[3] = t3__; + input[4] = t4_; +} + +static __always_inline void format_fcontract_second_carry_full(u64 *input) +{ + u64 i0; + u64 i1; + u64 i0_; + u64 i1_; + format_fcontract_second_carry_pass(input); + modulo_carry_top(input); + i0 = input[0]; + i1 = input[1]; + i0_ = i0 & 0x7ffffffffffffLLU; + i1_ = i1 + (i0 >> 51); + input[0] = i0_; + input[1] = i1_; +} + +static __always_inline void format_fcontract_trim(u64 *input) +{ + u64 a0 = input[0]; + u64 a1 = input[1]; + u64 a2 = input[2]; + u64 a3 = input[3]; + u64 a4 = input[4]; + u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU); + u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU); + u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU); + u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU); + u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU); + u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4; + u64 a0_ = a0 - (0x7ffffffffffedLLU & mask); + u64 a1_ = a1 - (0x7ffffffffffffLLU & mask); + u64 a2_ = a2 - (0x7ffffffffffffLLU & mask); + u64 a3_ = a3 - (0x7ffffffffffffLLU & mask); + u64 a4_ = a4 - (0x7ffffffffffffLLU & mask); + input[0] = a0_; + input[1] = a1_; + input[2] = a2_; + input[3] = a3_; + input[4] = a4_; +} + +static __always_inline void format_fcontract_store(u8 *output, u64 *input) +{ + u64 t0 = input[0]; + u64 t1 = input[1]; + u64 t2 = input[2]; + u64 t3 = input[3]; + u64 t4 = input[4]; + u64 o0 = t1 << 51 | t0; + u64 o1 = t2 << 38 | t1 >> 13; + u64 o2 = t3 << 25 | t2 >> 26; + u64 o3 = t4 << 12 | t3 >> 39; + u8 *b0 = output; + u8 *b1 = output + 8; + u8 *b2 = output + 16; + u8 *b3 = output + 24; + put_unaligned_le64(o0, b0); + put_unaligned_le64(o1, b1); + put_unaligned_le64(o2, b2); + put_unaligned_le64(o3, b3); +} + +static __always_inline void format_fcontract(u8 *output, u64 *input) +{ + format_fcontract_first_carry_full(input); + format_fcontract_second_carry_full(input); + format_fcontract_trim(input); + format_fcontract_store(output, input); +} + +static __always_inline void format_scalar_of_point(u8 *scalar, u64 *point) +{ + u64 *x = point; + u64 *z = point + 5; + u64 buf[10] __aligned(32) = { 0 }; + u64 *zmone = buf; + u64 *sc = buf + 5; + crecip(zmone, z); + fmul_fmul(sc, x, zmone); + format_fcontract(scalar, sc); +} + +static void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + u64 buf0[10] __aligned(32) = { 0 }; + u64 *x0 = buf0; + u64 *z = buf0 + 5; + u64 *q; + format_fexpand(x0, basepoint); + z[0] = 1; + q = buf0; + { + u8 e[32] __aligned(32) = { 0 }; + u8 *scalar; + memcpy(e, secret, 32); + curve25519_clamp_secret(e); + scalar = e; + { + u64 buf[15] = { 0 }; + u64 *nq = buf; + u64 *x = nq; + x[0] = 1; + ladder_cmult(nq, scalar, q); + format_scalar_of_point(mypublic, nq); + memzero_explicit(buf, sizeof(buf)); + } + memzero_explicit(e, sizeof(e)); + } + memzero_explicit(buf0, sizeof(buf0)); +} diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64-glue.c b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64-glue.c new file mode 100644 index 000000000000..e08cc2ba74f3 --- /dev/null +++ b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64-glue.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include + +#include "curve25519-x86_64.c" + +static bool curve25519_use_bmi2_adx __ro_after_init; +static bool *const curve25519_nobs[] __initconst = { + &curve25519_use_bmi2_adx }; + +static void __init curve25519_fpu_init(void) +{ + curve25519_use_bmi2_adx = IS_ENABLED(CONFIG_AS_BMI2) && + IS_ENABLED(CONFIG_AS_ADX) && + boot_cpu_has(X86_FEATURE_BMI2) && + boot_cpu_has(X86_FEATURE_ADX); +} + +static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + if (IS_ENABLED(CONFIG_AS_ADX) && IS_ENABLED(CONFIG_AS_BMI2) && + curve25519_use_bmi2_adx) { + curve25519_ever64(mypublic, secret, basepoint); + return true; + } + return false; +} + +static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + if (IS_ENABLED(CONFIG_AS_ADX) && IS_ENABLED(CONFIG_AS_BMI2) && + curve25519_use_bmi2_adx) { + curve25519_ever64_base(pub, secret); + return true; + } + return false; +} diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c new file mode 100644 index 000000000000..74570cbe3124 --- /dev/null +++ b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c @@ -0,0 +1,1369 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2020 Jason A. Donenfeld . All Rights Reserved. + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + */ + +static __always_inline u64 eq_mask(u64 a, u64 b) +{ + u64 x = a ^ b; + u64 minus_x = ~x + (u64)1U; + u64 x_or_minus_x = x | minus_x; + u64 xnx = x_or_minus_x >> (u32)63U; + return xnx - (u64)1U; +} + +static __always_inline u64 gte_mask(u64 a, u64 b) +{ + u64 x = a; + u64 y = b; + u64 x_xor_y = x ^ y; + u64 x_sub_y = x - y; + u64 x_sub_y_xor_y = x_sub_y ^ y; + u64 q = x_xor_y | x_sub_y_xor_y; + u64 x_xor_q = x ^ q; + u64 x_xor_q_ = x_xor_q >> (u32)63U; + return x_xor_q_ - (u64)1U; +} + +/* Computes the addition of four-element f1 with value in f2 + * and returns the carry (if any) */ +static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) +{ + u64 carry_r; + + asm volatile( + /* Clear registers to propagate the carry bit */ + " xor %%r8, %%r8;" + " xor %%r9, %%r9;" + " xor %%r10, %%r10;" + " xor %%r11, %%r11;" + " xor %1, %1;" + + /* Begin addition chain */ + " addq 0(%3), %0;" + " movq %0, 0(%2);" + " adcxq 8(%3), %%r8;" + " movq %%r8, 8(%2);" + " adcxq 16(%3), %%r9;" + " movq %%r9, 16(%2);" + " adcxq 24(%3), %%r10;" + " movq %%r10, 24(%2);" + + /* Return the carry bit in a register */ + " adcx %%r11, %1;" + : "+&r" (f2), "=&r" (carry_r) + : "r" (out), "r" (f1) + : "%r8", "%r9", "%r10", "%r11", "memory", "cc" + ); + + return carry_r; +} + +/* Computes the field addition of two field elements */ +static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw addition of f1 + f2 */ + " movq 0(%0), %%r8;" + " addq 0(%2), %%r8;" + " movq 8(%0), %%r9;" + " adcxq 8(%2), %%r9;" + " movq 16(%0), %%r10;" + " adcxq 16(%2), %%r10;" + " movq 24(%0), %%r11;" + " adcxq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %0;" + " cmovc %0, %%rax;" + + /* Step 2: Add carry*38 to the original sum */ + " xor %%rcx, %%rcx;" + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %0, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r" (f2) + : "r" (out), "r" (f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" + ); +} + +/* Computes the field substraction of two field elements */ +static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw substraction of f1-f2 */ + " movq 0(%1), %%r8;" + " subq 0(%2), %%r8;" + " movq 8(%1), %%r9;" + " sbbq 8(%2), %%r9;" + " movq 16(%1), %%r10;" + " sbbq 16(%2), %%r10;" + " movq 24(%1), %%r11;" + " sbbq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %%rcx;" + " cmovc %%rcx, %%rax;" + + /* Step 2: Substract carry*38 from the original difference */ + " sub %%rax, %%r8;" + " sbb $0, %%r9;" + " sbb $0, %%r10;" + " sbb $0, %%r11;" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rcx, %%rax;" + " sub %%rax, %%r8;" + + /* Store the result */ + " movq %%r8, 0(%0);" + " movq %%r9, 8(%0);" + " movq %%r10, 16(%0);" + " movq %%r11, 24(%0);" + : + : "r" (out), "r" (f1), "r" (f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" + ); +} + +/* Computes a field multiplication: out <- f1 * f2 + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + /* Compute the raw multiplication: tmp <- src1 * src2 */ + + /* Compute src1[0] * src2 */ + " movq 0(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ + " movq 8(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ + " movq 16(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ + " movq 24(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %3, %3;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %3, %%rax;" + " adox %3, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %3, %%r9;" + " movq %%r9, 8(%0);" + " adcx %3, %%r10;" + " movq %%r10, 16(%0);" + " adcx %3, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) + : + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" + ); +} + +/* Computes two field multiplications: + * out[0] <- f1[0] * f2[0] + * out[1] <- f1[1] * f2[1] + * Uses the 16-element buffer tmp for intermediate results. */ +static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ + + /* Compute src1[0] * src2 */ + " movq 0(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ + " movq 8(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ + " movq 16(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ + " movq 24(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" + + /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ + + /* Compute src1[0] * src2 */ + " movq 32(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ + " movq 40(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);" + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ + " movq 48(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);" + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ + " movq 56(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);" + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Wrap the results back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %3, %3;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %3, %%rax;" + " adox %3, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %3, %%r9;" + " movq %%r9, 8(%0);" + " adcx %3, %%r10;" + " movq %%r10, 16(%0);" + " adcx %3, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%1), %%r8, %%r13;" + " xor %3, %3;" + " adoxq 64(%1), %%r8;" + " mulxq 104(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 72(%1), %%r9;" + " mulxq 112(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 80(%1), %%r10;" + " mulxq 120(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%1), %%r11;" + " adcx %3, %%rax;" + " adox %3, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %3, %%r9;" + " movq %%r9, 40(%0);" + " adcx %3, %%r10;" + " movq %%r10, 48(%0);" + " adcx %3, %%r11;" + " movq %%r11, 56(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%0);" + : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) + : + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" + ); +} + +/* Computes the field multiplication of four-element f1 with value in f2 */ +static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) +{ + register u64 f2_r asm("rdx") = f2; + + asm volatile( + /* Compute the raw multiplication of f1*f2 */ + " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ + " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */ + " add %%rcx, %%r9;" + " mov $0, %%rcx;" + " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ + " adcx %%r12, %%r10;" + " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ + " adcx %%r13, %%r11;" + " adcx %%rcx, %%rax;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $38, %%rdx;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r" (f2_r) + : "r" (out), "r" (f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc" + ); +} + +/* Computes p1 <- bit ? p2 : p1 in constant time */ +static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) +{ + asm volatile( + /* Invert the polarity of bit to match cmov expectations */ + " add $18446744073709551615, %0;" + + /* cswap p1[0], p2[0] */ + " movq 0(%1), %%r8;" + " movq 0(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 0(%1);" + " movq %%r9, 0(%2);" + + /* cswap p1[1], p2[1] */ + " movq 8(%1), %%r8;" + " movq 8(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 8(%1);" + " movq %%r9, 8(%2);" + + /* cswap p1[2], p2[2] */ + " movq 16(%1), %%r8;" + " movq 16(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 16(%1);" + " movq %%r9, 16(%2);" + + /* cswap p1[3], p2[3] */ + " movq 24(%1), %%r8;" + " movq 24(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 24(%1);" + " movq %%r9, 24(%2);" + + /* cswap p1[4], p2[4] */ + " movq 32(%1), %%r8;" + " movq 32(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 32(%1);" + " movq %%r9, 32(%2);" + + /* cswap p1[5], p2[5] */ + " movq 40(%1), %%r8;" + " movq 40(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 40(%1);" + " movq %%r9, 40(%2);" + + /* cswap p1[6], p2[6] */ + " movq 48(%1), %%r8;" + " movq 48(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 48(%1);" + " movq %%r9, 48(%2);" + + /* cswap p1[7], p2[7] */ + " movq 56(%1), %%r8;" + " movq 56(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 56(%1);" + " movq %%r9, 56(%2);" + : "+&r" (bit) + : "r" (p1), "r" (p2) + : "%r8", "%r9", "%r10", "memory", "cc" + ); +} + +/* Computes the square of a field element: out <- f * f + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Compute the raw multiplication: tmp <- f * f */ + + /* Step 1: Compute all partial products */ + " movq 0(%1), %%rdx;" /* f[0] */ + " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ + " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%1), %%rdx;" /* f[3] */ + " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ + " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15, %%r15;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%r12;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%r12, %%r12;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%0);" + " add %%rcx, %%r8;" " movq %%r8, 8(%0);" + " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" + " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" + " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" + " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" + " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" + " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" + + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %%rcx, %%rcx;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%0);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%0);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + : "+&r" (tmp), "+&r" (f), "+&r" (out) + : + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" + ); +} + +/* Computes two field squarings: + * out[0] <- f[0] * f[0] + * out[1] <- f[1] * f[1] + * Uses the 16-element buffer tmp for intermediate results */ +static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Step 1: Compute all partial products */ + " movq 0(%1), %%rdx;" /* f[0] */ + " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ + " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%1), %%rdx;" /* f[3] */ + " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ + " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15, %%r15;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%r12;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%r12, %%r12;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%0);" + " add %%rcx, %%r8;" " movq %%r8, 8(%0);" + " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" + " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" + " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" + " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" + " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" + " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" + + /* Step 1: Compute all partial products */ + " movq 32(%1), %%rdx;" /* f[0] */ + " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ + " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 56(%1), %%rdx;" /* f[3] */ + " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ + " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ + " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15, %%r15;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%r12;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%r12, %%r12;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 64(%0);" + " add %%rcx, %%r8;" " movq %%r8, 72(%0);" + " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" " movq %%r9, 80(%0);" + " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" + " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" + " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);" + " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" + " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" + + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %%rcx, %%rcx;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%0);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%0);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%1), %%r8, %%r13;" + " xor %%rcx, %%rcx;" + " adoxq 64(%1), %%r8;" + " mulxq 104(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 72(%1), %%r9;" + " mulxq 112(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 80(%1), %%r10;" + " mulxq 120(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%1), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 40(%0);" + " adcx %%rcx, %%r10;" + " movq %%r10, 48(%0);" + " adcx %%rcx, %%r11;" + " movq %%r11, 56(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%0);" + : "+&r" (tmp), "+&r" (f), "+&r" (out) + : + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" + ); +} + +static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) +{ + u64 *nq = p01_tmp1; + u64 *nq_p1 = p01_tmp1 + (u32)8U; + u64 *tmp1 = p01_tmp1 + (u32)16U; + u64 *x1 = q; + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *z3 = nq_p1 + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + u64 *x3; + u64 *z31; + u64 *d0; + u64 *c0; + u64 *a1; + u64 *b1; + u64 *d; + u64 *c; + u64 *ab1; + u64 *dc1; + fadd(a, x2, z2); + fsub(b, x2, z2); + x3 = nq_p1; + z31 = nq_p1 + (u32)4U; + d0 = dc; + c0 = dc + (u32)4U; + fadd(c0, x3, z31); + fsub(d0, x3, z31); + fmul2(dc, dc, ab, tmp2); + fadd(x3, d0, c0); + fsub(z31, d0, c0); + a1 = tmp1; + b1 = tmp1 + (u32)4U; + d = tmp1 + (u32)8U; + c = tmp1 + (u32)12U; + ab1 = tmp1; + dc1 = tmp1 + (u32)8U; + fsqr2(dc1, ab1, tmp2); + fsqr2(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b1, c, (u64)121665U); + fadd(b1, b1, d); + fmul2(nq, dc1, ab1, tmp2); + fmul(z3, z3, x1, tmp2); +} + +static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) +{ + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *d = tmp1 + (u32)8U; + u64 *c = tmp1 + (u32)12U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + fadd(a, x2, z2); + fsub(b, x2, z2); + fsqr2(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b, c, (u64)121665U); + fadd(b, b, d); + fmul2(nq, dc, ab, tmp2); +} + +static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) +{ + u64 tmp2[16U] = { 0U }; + u64 p01_tmp1_swap[33U] = { 0U }; + u64 *p0 = p01_tmp1_swap; + u64 *p01 = p01_tmp1_swap; + u64 *p03 = p01; + u64 *p11 = p01 + (u32)8U; + u64 *x0; + u64 *z0; + u64 *p01_tmp1; + u64 *p01_tmp11; + u64 *nq10; + u64 *nq_p11; + u64 *swap1; + u64 sw0; + u64 *nq1; + u64 *tmp1; + memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); + x0 = p03; + z0 = p03 + (u32)4U; + x0[0U] = (u64)1U; + x0[1U] = (u64)0U; + x0[2U] = (u64)0U; + x0[3U] = (u64)0U; + z0[0U] = (u64)0U; + z0[1U] = (u64)0U; + z0[2U] = (u64)0U; + z0[3U] = (u64)0U; + p01_tmp1 = p01_tmp1_swap; + p01_tmp11 = p01_tmp1_swap; + nq10 = p01_tmp1_swap; + nq_p11 = p01_tmp1_swap + (u32)8U; + swap1 = p01_tmp1_swap + (u32)32U; + cswap2((u64)1U, nq10, nq_p11); + point_add_and_double(init1, p01_tmp11, tmp2); + swap1[0U] = (u64)1U; + { + u32 i; + for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { + u64 *p01_tmp12 = p01_tmp1_swap; + u64 *swap2 = p01_tmp1_swap + (u32)32U; + u64 *nq2 = p01_tmp12; + u64 *nq_p12 = p01_tmp12 + (u32)8U; + u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); + u64 sw = swap2[0U] ^ bit; + cswap2(sw, nq2, nq_p12); + point_add_and_double(init1, p01_tmp12, tmp2); + swap2[0U] = bit; + } + } + sw0 = swap1[0U]; + cswap2(sw0, nq10, nq_p11); + nq1 = p01_tmp1; + tmp1 = p01_tmp1 + (u32)16U; + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + memcpy(out, p0, (u32)8U * sizeof(p0[0U])); + + memzero_explicit(tmp2, sizeof(tmp2)); + memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); +} + +static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) +{ + u32 i; + fsqr(o, inp, tmp); + for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) + fsqr(o, o, tmp); +} + +static void finv(u64 *o, const u64 *i, u64 *tmp) +{ + u64 t1[16U] = { 0U }; + u64 *a0 = t1; + u64 *b = t1 + (u32)4U; + u64 *c = t1 + (u32)8U; + u64 *t00 = t1 + (u32)12U; + u64 *tmp1 = tmp; + u64 *a; + u64 *t0; + fsquare_times(a0, i, tmp1, (u32)1U); + fsquare_times(t00, a0, tmp1, (u32)2U); + fmul(b, t00, i, tmp); + fmul(a0, b, a0, tmp); + fsquare_times(t00, a0, tmp1, (u32)1U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)5U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)10U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)20U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)10U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)50U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)100U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)50U); + fmul(t00, t00, b, tmp); + fsquare_times(t00, t00, tmp1, (u32)5U); + a = t1; + t0 = t1 + (u32)12U; + fmul(o, t0, a, tmp); +} + +static void store_felem(u64 *b, u64 *f) +{ + u64 f30 = f[3U]; + u64 top_bit0 = f30 >> (u32)63U; + u64 carry0; + u64 f31; + u64 top_bit; + u64 carry; + u64 f0; + u64 f1; + u64 f2; + u64 f3; + u64 m0; + u64 m1; + u64 m2; + u64 m3; + u64 mask; + u64 f0_; + u64 f1_; + u64 f2_; + u64 f3_; + u64 o0; + u64 o1; + u64 o2; + u64 o3; + f[3U] = f30 & (u64)0x7fffffffffffffffU; + carry0 = add_scalar(f, f, (u64)19U * top_bit0); + f31 = f[3U]; + top_bit = f31 >> (u32)63U; + f[3U] = f31 & (u64)0x7fffffffffffffffU; + carry = add_scalar(f, f, (u64)19U * top_bit); + f0 = f[0U]; + f1 = f[1U]; + f2 = f[2U]; + f3 = f[3U]; + m0 = gte_mask(f0, (u64)0xffffffffffffffedU); + m1 = eq_mask(f1, (u64)0xffffffffffffffffU); + m2 = eq_mask(f2, (u64)0xffffffffffffffffU); + m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); + mask = ((m0 & m1) & m2) & m3; + f0_ = f0 - (mask & (u64)0xffffffffffffffedU); + f1_ = f1 - (mask & (u64)0xffffffffffffffffU); + f2_ = f2 - (mask & (u64)0xffffffffffffffffU); + f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); + o0 = f0_; + o1 = f1_; + o2 = f2_; + o3 = f3_; + b[0U] = o0; + b[1U] = o1; + b[2U] = o2; + b[3U] = o3; +} + +static void encode_point(u8 *o, const u64 *i) +{ + const u64 *x = i; + const u64 *z = i + (u32)4U; + u64 tmp[4U] = { 0U }; + u64 tmp_w[16U] = { 0U }; + finv(tmp, z, tmp_w); + fmul(tmp, tmp, x, tmp_w); + store_felem((u64 *)o, tmp); +} + +static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) +{ + u64 init1[8U] = { 0U }; + u64 tmp[4U] = { 0U }; + u64 tmp3; + u64 *x; + u64 *z; + { + u32 i; + for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { + u64 *os = tmp; + const u8 *bj = pub + i * (u32)8U; + u64 u = *(u64 *)bj; + u64 r = u; + u64 x0 = r; + os[i] = x0; + } + } + tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; + x = init1; + z = init1 + (u32)4U; + z[0U] = (u64)1U; + z[1U] = (u64)0U; + z[2U] = (u64)0U; + z[3U] = (u64)0U; + x[0U] = tmp[0U]; + x[1U] = tmp[1U]; + x[2U] = tmp[2U]; + x[3U] = tmp[3U]; + montgomery_ladder(init1, priv, init1); + encode_point(out, init1); +} + +/* The below constants were generated using this sage script: + * + * #!/usr/bin/env sage + * import sys + * from sage.all import * + * def limbs(n): + * n = int(n) + * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) + * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l + * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) + * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] + * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) + * print("static const u64 table_ladder[] = {") + * p = ec.lift_x(9) + * for i in range(252): + * l = (p[0] + p[2]) / (p[0] - p[2]) + * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) + * p = p * 2 + * print("};") + * + */ + +static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; + +static const u64 table_ladder[] = { + 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, + 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, + 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, + 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, + 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, + 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, + 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, + 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, + 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, + 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, + 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, + 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, + 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, + 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, + 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, + 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, + 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, + 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, + 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, + 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, + 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, + 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, + 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, + 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, + 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, + 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, + 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, + 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, + 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, + 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, + 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, + 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, + 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, + 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, + 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, + 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, + 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, + 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, + 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, + 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, + 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, + 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, + 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, + 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, + 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, + 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, + 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, + 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, + 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, + 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, + 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, + 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, + 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, + 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, + 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, + 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, + 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, + 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, + 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, + 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, + 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, + 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, + 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, + 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, + 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, + 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, + 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, + 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, + 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, + 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, + 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, + 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, + 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, + 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, + 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, + 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, + 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, + 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, + 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, + 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, + 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, + 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, + 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, + 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, + 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, + 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, + 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, + 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, + 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, + 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, + 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, + 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, + 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, + 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, + 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, + 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, + 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, + 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, + 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, + 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, + 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, + 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, + 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, + 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, + 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, + 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, + 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, + 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, + 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, + 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, + 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, + 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, + 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, + 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, + 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, + 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, + 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, + 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, + 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, + 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, + 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, + 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, + 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, + 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, + 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, + 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, + 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, + 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, + 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, + 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, + 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, + 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, + 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, + 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, + 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, + 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, + 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, + 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, + 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, + 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, + 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, + 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, + 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, + 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, + 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, + 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, + 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, + 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, + 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, + 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, + 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, + 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, + 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, + 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, + 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, + 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, + 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, + 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, + 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, + 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, + 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, + 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, + 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, + 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, + 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, + 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, + 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, + 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, + 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, + 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, + 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, + 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, + 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, + 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, + 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, + 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, + 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, + 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, + 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, + 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, + 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, + 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, + 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, + 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, + 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, + 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, + 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, + 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, + 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, + 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, + 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, + 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, + 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, + 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, + 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, + 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, + 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, + 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, + 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, + 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, + 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, + 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, + 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, + 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, + 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, + 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, + 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, + 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, + 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, + 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, + 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, + 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, + 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, + 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, + 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, + 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, + 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, + 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, + 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, + 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, + 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, + 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, + 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, + 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, + 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, + 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, + 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, + 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, + 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, + 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, + 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, + 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, + 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, + 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, + 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, + 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, + 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, + 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, + 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, + 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, + 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, + 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, + 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, + 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, + 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, + 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, + 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, + 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, + 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, + 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, + 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, + 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL +}; + +static void curve25519_ever64_base(u8 *out, const u8 *priv) +{ + u64 swap = 1; + int i, j, k; + u64 tmp[16 + 32 + 4]; + u64 *x1 = &tmp[0]; + u64 *z1 = &tmp[4]; + u64 *x2 = &tmp[8]; + u64 *z2 = &tmp[12]; + u64 *xz1 = &tmp[0]; + u64 *xz2 = &tmp[8]; + u64 *a = &tmp[0 + 16]; + u64 *b = &tmp[4 + 16]; + u64 *c = &tmp[8 + 16]; + u64 *ab = &tmp[0 + 16]; + u64 *abcd = &tmp[0 + 16]; + u64 *ef = &tmp[16 + 16]; + u64 *efgh = &tmp[16 + 16]; + u64 *key = &tmp[0 + 16 + 32]; + + memcpy(key, priv, 32); + ((u8 *)key)[0] &= 248; + ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; + + x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; + z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; + z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; + memcpy(x2, p_minus_s, sizeof(p_minus_s)); + + j = 3; + for (i = 0; i < 4; ++i) { + while (j < (const int[]){ 64, 64, 64, 63 }[i]) { + u64 bit = (key[i] >> j) & 1; + k = (64 * i + j - 3); + swap = swap ^ bit; + cswap2(swap, xz1, xz2); + swap = bit; + fsub(b, x1, z1); + fadd(a, x1, z1); + fmul(c, &table_ladder[4 * k], b, ef); + fsub(b, a, c); + fadd(a, a, c); + fsqr2(ab, ab, efgh); + fmul2(xz1, xz2, ab, efgh); + ++j; + } + j = 0; + } + + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + encode_point(out, xz1); + + memzero_explicit(tmp, sizeof(tmp)); +} diff --git a/net/wireguard/device.h b/net/wireguard/device.h new file mode 100644 index 000000000000..b15a8be9d816 --- /dev/null +++ b/net/wireguard/device.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_DEVICE_H +#define _WG_DEVICE_H + +#include "noise.h" +#include "allowedips.h" +#include "peerlookup.h" +#include "cookie.h" + +#include +#include +#include +#include +#include +#include + +struct wg_device; + +struct multicore_worker { + void *ptr; + struct work_struct work; +}; + +struct crypt_queue { + struct ptr_ring ring; + union { + struct { + struct multicore_worker __percpu *worker; + int last_cpu; + }; + struct work_struct work; + }; +}; + +struct wg_device { + struct net_device *dev; + struct crypt_queue encrypt_queue, decrypt_queue; + struct sock __rcu *sock4, *sock6; + struct net *creating_net; + struct noise_static_identity static_identity; + struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; + struct workqueue_struct *packet_crypt_wq; + struct sk_buff_head incoming_handshakes; + int incoming_handshake_cpu; + struct multicore_worker __percpu *incoming_handshakes_worker; + struct cookie_checker cookie_checker; + struct pubkey_hashtable *peer_hashtable; + struct index_hashtable *index_hashtable; + struct allowedips peer_allowedips; + struct mutex device_update_lock, socket_update_lock; + struct list_head device_list, peer_list; + unsigned int num_peers, device_update_gen; + u32 fwmark; + u16 incoming_port; + bool have_creating_net_ref; +}; + +int wg_device_init(void); +void wg_device_uninit(void); + +#endif /* _WG_DEVICE_H */ diff --git a/net/wireguard/dkms.conf b/net/wireguard/dkms.conf new file mode 100644 index 000000000000..b094a8766f07 --- /dev/null +++ b/net/wireguard/dkms.conf @@ -0,0 +1,9 @@ +PACKAGE_NAME="wireguard" +PACKAGE_VERSION="0.0.20200215" +AUTOINSTALL=yes + +BUILT_MODULE_NAME="wireguard" +DEST_MODULE_LOCATION="/kernel/net" + +# requires kernel 3.10 - 5.5, inclusive: +BUILD_EXCLUSIVE_KERNEL="^((5\.[0-5]($|[.-]))|(4\.)|(3\.1[0-9]))" diff --git a/net/wireguard/socket.c b/net/wireguard/socket.c new file mode 100644 index 000000000000..b0d6541582d3 --- /dev/null +++ b/net/wireguard/socket.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "queueing.h" +#include "messages.h" + +#include +#include +#include +#include +#include +#include +#include + +static int send4(struct wg_device *wg, struct sk_buff *skb, + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) +{ + struct flowi4 fl = { + .saddr = endpoint->src4.s_addr, + .daddr = endpoint->addr4.sin_addr.s_addr, + .fl4_dport = endpoint->addr4.sin_port, + .flowi4_mark = wg->fwmark, + .flowi4_proto = IPPROTO_UDP + }; + struct rtable *rt = NULL; + struct sock *sock; + int ret = 0; + + skb_mark_not_on_list(skb); + skb->dev = wg->dev; + skb->mark = wg->fwmark; + + rcu_read_lock_bh(); + sock = rcu_dereference_bh(wg->sock4); + + if (unlikely(!sock)) { + ret = -ENONET; + goto err; + } + + fl.fl4_sport = inet_sk(sock)->inet_sport; + + if (cache) + rt = dst_cache_get_ip4(cache, &fl.saddr); + + if (!rt) { + security_sk_classify_flow(sock, flowi4_to_flowi(&fl)); + if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, + fl.saddr, RT_SCOPE_HOST))) { + endpoint->src4.s_addr = 0; + *(__force __be32 *)&endpoint->src_if4 = 0; + fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + } + rt = ip_route_output_flow(sock_net(sock), &fl, sock); + if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && + PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && + rt->dst.dev->ifindex != endpoint->src_if4)))) { + endpoint->src4.s_addr = 0; + *(__force __be32 *)&endpoint->src_if4 = 0; + fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + if (!IS_ERR(rt)) + ip_rt_put(rt); + rt = ip_route_output_flow(sock_net(sock), &fl, sock); + } + if (unlikely(IS_ERR(rt))) { + ret = PTR_ERR(rt); + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", + wg->dev->name, &endpoint->addr, ret); + goto err; + } else if (unlikely(rt->dst.dev == skb->dev)) { + ip_rt_put(rt); + ret = -ELOOP; + net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", + wg->dev->name, &endpoint->addr); + goto err; + } + if (cache) + dst_cache_set_ip4(cache, &rt->dst, fl.saddr); + } + + skb->ignore_df = 1; + udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, + ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, + fl.fl4_dport, false, false); + goto out; + +err: + kfree_skb(skb); +out: + rcu_read_unlock_bh(); + return ret; +} + +static int send6(struct wg_device *wg, struct sk_buff *skb, + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct flowi6 fl = { + .saddr = endpoint->src6, + .daddr = endpoint->addr6.sin6_addr, + .fl6_dport = endpoint->addr6.sin6_port, + .flowi6_mark = wg->fwmark, + .flowi6_oif = endpoint->addr6.sin6_scope_id, + .flowi6_proto = IPPROTO_UDP + /* TODO: addr->sin6_flowinfo */ + }; + struct dst_entry *dst = NULL; + struct sock *sock; + int ret = 0; + + skb_mark_not_on_list(skb); + skb->dev = wg->dev; + skb->mark = wg->fwmark; + + rcu_read_lock_bh(); + sock = rcu_dereference_bh(wg->sock6); + + if (unlikely(!sock)) { + ret = -ENONET; + goto err; + } + + fl.fl6_sport = inet_sk(sock)->inet_sport; + + if (cache) + dst = dst_cache_get_ip6(cache, &fl.saddr); + + if (!dst) { + security_sk_classify_flow(sock, flowi6_to_flowi(&fl)); + if (unlikely(!ipv6_addr_any(&fl.saddr) && + !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { + endpoint->src6 = fl.saddr = in6addr_any; + if (cache) + dst_cache_reset(cache); + } + dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, + NULL); + if (unlikely(IS_ERR(dst))) { + ret = PTR_ERR(dst); + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", + wg->dev->name, &endpoint->addr, ret); + goto err; + } else if (unlikely(dst->dev == skb->dev)) { + dst_release(dst); + ret = -ELOOP; + net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", + wg->dev->name, &endpoint->addr); + goto err; + } + if (cache) + dst_cache_set_ip6(cache, dst, &fl.saddr); + } + + skb->ignore_df = 1; + udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, + ip6_dst_hoplimit(dst), 0, fl.fl6_sport, + fl.fl6_dport, false); + goto out; + +err: + kfree_skb(skb); +out: + rcu_read_unlock_bh(); + return ret; +#else + return -EAFNOSUPPORT; +#endif +} + +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds) +{ + size_t skb_len = skb->len; + int ret = -EAFNOSUPPORT; + + read_lock_bh(&peer->endpoint_lock); + if (peer->endpoint.addr.sa_family == AF_INET) + ret = send4(peer->device, skb, &peer->endpoint, ds, + &peer->endpoint_cache); + else if (peer->endpoint.addr.sa_family == AF_INET6) + ret = send6(peer->device, skb, &peer->endpoint, ds, + &peer->endpoint_cache); + else + dev_kfree_skb(skb); + if (likely(!ret)) + peer->tx_bytes += skb_len; + read_unlock_bh(&peer->endpoint_lock); + + return ret; +} + +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer, + size_t len, u8 ds) +{ + struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + + if (unlikely(!skb)) + return -ENOMEM; + + skb_reserve(skb, SKB_HEADER_LEN); + skb_set_inner_network_header(skb, 0); + skb_put_data(skb, buffer, len); + return wg_socket_send_skb_to_peer(peer, skb, ds); +} + +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, + struct sk_buff *in_skb, void *buffer, + size_t len) +{ + int ret = 0; + struct sk_buff *skb; + struct endpoint endpoint; + + if (unlikely(!in_skb)) + return -EINVAL; + ret = wg_socket_endpoint_from_skb(&endpoint, in_skb); + if (unlikely(ret < 0)) + return ret; + + skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + if (unlikely(!skb)) + return -ENOMEM; + skb_reserve(skb, SKB_HEADER_LEN); + skb_set_inner_network_header(skb, 0); + skb_put_data(skb, buffer, len); + + if (endpoint.addr.sa_family == AF_INET) + ret = send4(wg, skb, &endpoint, 0, NULL); + else if (endpoint.addr.sa_family == AF_INET6) + ret = send6(wg, skb, &endpoint, 0, NULL); + /* No other possibilities if the endpoint is valid, which it is, + * as we checked above. + */ + + return ret; +} + +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, + const struct sk_buff *skb) +{ + memset(endpoint, 0, sizeof(*endpoint)); + if (skb->protocol == htons(ETH_P_IP)) { + endpoint->addr4.sin_family = AF_INET; + endpoint->addr4.sin_port = udp_hdr(skb)->source; + endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; + endpoint->src4.s_addr = ip_hdr(skb)->daddr; + endpoint->src_if4 = skb->skb_iif; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + endpoint->addr6.sin6_family = AF_INET6; + endpoint->addr6.sin6_port = udp_hdr(skb)->source; + endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; + endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id( + &ipv6_hdr(skb)->saddr, skb->skb_iif); + endpoint->src6 = ipv6_hdr(skb)->daddr; + } else { + return -EINVAL; + } + return 0; +} + +static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b) +{ + return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET && + a->addr4.sin_port == b->addr4.sin_port && + a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr && + a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) || + (a->addr.sa_family == AF_INET6 && + b->addr.sa_family == AF_INET6 && + a->addr6.sin6_port == b->addr6.sin6_port && + ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) && + a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && + ipv6_addr_equal(&a->src6, &b->src6)) || + unlikely(!a->addr.sa_family && !b->addr.sa_family); +} + +void wg_socket_set_peer_endpoint(struct wg_peer *peer, + const struct endpoint *endpoint) +{ + /* First we check unlocked, in order to optimize, since it's pretty rare + * that an endpoint will change. If we happen to be mid-write, and two + * CPUs wind up writing the same thing or something slightly different, + * it doesn't really matter much either. + */ + if (endpoint_eq(endpoint, &peer->endpoint)) + return; + write_lock_bh(&peer->endpoint_lock); + if (endpoint->addr.sa_family == AF_INET) { + peer->endpoint.addr4 = endpoint->addr4; + peer->endpoint.src4 = endpoint->src4; + peer->endpoint.src_if4 = endpoint->src_if4; + } else if (endpoint->addr.sa_family == AF_INET6) { + peer->endpoint.addr6 = endpoint->addr6; + peer->endpoint.src6 = endpoint->src6; + } else { + goto out; + } + dst_cache_reset(&peer->endpoint_cache); +out: + write_unlock_bh(&peer->endpoint_lock); +} + +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, + const struct sk_buff *skb) +{ + struct endpoint endpoint; + + if (!wg_socket_endpoint_from_skb(&endpoint, skb)) + wg_socket_set_peer_endpoint(peer, &endpoint); +} + +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) +{ + write_lock_bh(&peer->endpoint_lock); + memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); + dst_cache_reset(&peer->endpoint_cache); + write_unlock_bh(&peer->endpoint_lock); +} + +static int wg_receive(struct sock *sk, struct sk_buff *skb) +{ + struct wg_device *wg; + + if (unlikely(!sk)) + goto err; + wg = sk->sk_user_data; + if (unlikely(!wg)) + goto err; + skb_mark_not_on_list(skb); + wg_packet_receive(wg, skb); + return 0; + +err: + kfree_skb(skb); + return 0; +} + +static void sock_free(struct sock *sock) +{ + if (unlikely(!sock)) + return; + sk_clear_memalloc(sock); + udp_tunnel_sock_release(sock->sk_socket); +} + +static void set_sock_opts(struct socket *sock) +{ + sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_sndbuf = INT_MAX; + sk_set_memalloc(sock->sk); +} + +int wg_socket_init(struct wg_device *wg, u16 port) +{ + int ret; + struct udp_tunnel_sock_cfg cfg = { + .sk_user_data = wg, + .encap_type = 1, + .encap_rcv = wg_receive + }; + struct socket *new4 = NULL, *new6 = NULL; + struct udp_port_cfg port4 = { + .family = AF_INET, + .local_ip.s_addr = htonl(INADDR_ANY), + .local_udp_port = htons(port), + .use_udp_checksums = true + }; +#if IS_ENABLED(CONFIG_IPV6) + int retries = 0; + struct udp_port_cfg port6 = { + .family = AF_INET6, + .local_ip6 = IN6ADDR_ANY_INIT, + .use_udp6_tx_checksums = true, + .use_udp6_rx_checksums = true, + .ipv6_v6only = true + }; +#endif + +#if IS_ENABLED(CONFIG_IPV6) +retry: +#endif + + ret = udp_sock_create(wg->creating_net, &port4, &new4); + if (ret < 0) { + pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); + return ret; + } + set_sock_opts(new4); + setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); + +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6_mod_enabled()) { + port6.local_udp_port = inet_sk(new4->sk)->inet_sport; + ret = udp_sock_create(wg->creating_net, &port6, &new6); + if (ret < 0) { + udp_tunnel_sock_release(new4); + if (ret == -EADDRINUSE && !port && retries++ < 100) + goto retry; + pr_err("%s: Could not create IPv6 socket\n", + wg->dev->name); + return ret; + } + set_sock_opts(new6); + setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); + } +#endif + + wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); + return 0; +} + +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, + struct sock *new6) +{ + struct sock *old4, *old6; + + mutex_lock(&wg->socket_update_lock); + old4 = rcu_dereference_protected(wg->sock4, + lockdep_is_held(&wg->socket_update_lock)); + old6 = rcu_dereference_protected(wg->sock6, + lockdep_is_held(&wg->socket_update_lock)); + rcu_assign_pointer(wg->sock4, new4); + rcu_assign_pointer(wg->sock6, new6); + if (new4) + wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); + mutex_unlock(&wg->socket_update_lock); + synchronize_rcu(); + sock_free(old4); + sock_free(old6); +} diff --git a/net/wireguard/tests/netns.sh b/net/wireguard/tests/netns.sh new file mode 100755 index 000000000000..ba5f83a91c68 --- /dev/null +++ b/net/wireguard/tests/netns.sh @@ -0,0 +1,553 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. +# +# This script tests the below topology: +# +# ┌─────────────────────┐ ┌──────────────────────────────────┐ ┌─────────────────────┐ +# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ +# │ │ │ │ │ │ +# │┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐│ +# ││ wg0 │───────────┼───┼────────────│ lo │────────────┼───┼───────────│ wg0 ││ +# │├────────┴──────────┐│ │ ┌───────┴────────┴────────┐ │ │┌──────────┴────────┤│ +# ││192.168.241.1/24 ││ │ │(ns1) (ns2) │ │ ││192.168.241.2/24 ││ +# ││fd00::1/24 ││ │ │127.0.0.1:1 127.0.0.1:2│ │ ││fd00::2/24 ││ +# │└───────────────────┘│ │ │[::]:1 [::]:2 │ │ │└───────────────────┘│ +# └─────────────────────┘ │ └─────────────────────────┘ │ └─────────────────────┘ +# └──────────────────────────────────┘ +# +# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the +# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0 +# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further +# details on how this is accomplished. +set -e + +exec 3>&1 +export LANG=C +export WG_HIDE_KEYS=never +netns0="wg-test-$$-0" +netns1="wg-test-$$-1" +netns2="wg-test-$$-2" +pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; } +pp() { pretty "" "$*"; "$@"; } +maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; } +n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; } +n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; } +n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; } +ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } +ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } +ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } +sleep() { read -t "$1" -N 1 || true; } +waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } + +cleanup() { + set +e + exec 2>/dev/null + printf "$orig_message_cost" > /proc/sys/net/core/message_cost + ip0 link del dev wg0 + ip1 link del dev wg0 + ip2 link del dev wg0 + local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)" + [[ -n $to_kill ]] && kill $to_kill + pp ip netns del $netns1 + pp ip netns del $netns2 + pp ip netns del $netns0 + exit +} + +orig_message_cost="$(< /proc/sys/net/core/message_cost)" +trap cleanup EXIT +printf 0 > /proc/sys/net/core/message_cost + +ip netns del $netns0 2>/dev/null || true +ip netns del $netns1 2>/dev/null || true +ip netns del $netns2 2>/dev/null || true +pp ip netns add $netns0 +pp ip netns add $netns1 +pp ip netns add $netns2 +ip0 link set up dev lo + +ip0 link add dev wg0 type wireguard +ip0 link set wg0 netns $netns1 +ip0 link add dev wg0 type wireguard +ip0 link set wg0 netns $netns2 +key1="$(pp wg genkey)" +key2="$(pp wg genkey)" +key3="$(pp wg genkey)" +pub1="$(pp wg pubkey <<<"$key1")" +pub2="$(pp wg pubkey <<<"$key2")" +pub3="$(pp wg pubkey <<<"$key3")" +psk="$(pp wg genpsk)" +[[ -n $key1 && -n $key2 && -n $psk ]] + +configure_peers() { + ip1 addr add 192.168.241.1/24 dev wg0 + ip1 addr add fd00::1/24 dev wg0 + + ip2 addr add 192.168.241.2/24 dev wg0 + ip2 addr add fd00::2/24 dev wg0 + + n1 wg set wg0 \ + private-key <(echo "$key1") \ + listen-port 1 \ + peer "$pub2" \ + preshared-key <(echo "$psk") \ + allowed-ips 192.168.241.2/32,fd00::2/128 + n2 wg set wg0 \ + private-key <(echo "$key2") \ + listen-port 2 \ + peer "$pub1" \ + preshared-key <(echo "$psk") \ + allowed-ips 192.168.241.1/32,fd00::1/128 + + ip1 link set up dev wg0 + ip2 link set up dev wg0 +} +configure_peers + +tests() { + # Ping over IPv4 + n2 ping -c 10 -f -W 1 192.168.241.1 + n1 ping -c 10 -f -W 1 192.168.241.2 + + # Ping over IPv6 + n2 ping6 -c 10 -f -W 1 fd00::1 + n1 ping6 -c 10 -f -W 1 fd00::2 + + # TCP over IPv4 + n2 iperf3 -s -1 -B 192.168.241.2 & + waitiperf $netns2 $! + n1 iperf3 -Z -t 3 -c 192.168.241.2 + + # TCP over IPv6 + n1 iperf3 -s -1 -B fd00::1 & + waitiperf $netns1 $! + n2 iperf3 -Z -t 3 -c fd00::1 + + # UDP over IPv4 + n1 iperf3 -s -1 -B 192.168.241.1 & + waitiperf $netns1 $! + n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1 + + # UDP over IPv6 + n2 iperf3 -s -1 -B fd00::2 & + waitiperf $netns2 $! + n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 +} + +[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" +big_mtu=$(( 34816 - 1500 + $orig_mtu )) + +# Test using IPv4 as outer transport +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 +# Before calling tests, we first make sure that the stats counters and timestamper are working +n2 ping -c 10 -f -W 1 192.168.241.1 +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0) +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0) +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) +read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer) +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) +read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer) +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) +read _ timestamp < <(n1 wg show wg0 latest-handshakes) +(( timestamp != 0 )) + +tests +ip1 link set wg0 mtu $big_mtu +ip2 link set wg0 mtu $big_mtu +tests + +ip1 link set wg0 mtu $orig_mtu +ip2 link set wg0 mtu $orig_mtu + +# Test using IPv6 as outer transport +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 +n2 wg set wg0 peer "$pub1" endpoint [::1]:1 +tests +ip1 link set wg0 mtu $big_mtu +ip2 link set wg0 mtu $big_mtu +tests + +# Test that route MTUs work with the padding +ip1 link set wg0 mtu 1300 +ip2 link set wg0 mtu 1300 +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 +n0 iptables -A INPUT -m length --length 1360 -j DROP +n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299 +n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299 +n2 ping -c 1 -W 1 -s 1269 192.168.241.1 +n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299 +n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299 +n0 iptables -F INPUT + +ip1 link set wg0 mtu $orig_mtu +ip2 link set wg0 mtu $orig_mtu + +# Test using IPv4 that roaming works +ip0 -4 addr del 127.0.0.1/8 dev lo +ip0 -4 addr add 127.212.121.99/8 dev lo +n1 wg set wg0 listen-port 9999 +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n1 ping6 -W 1 -c 1 fd00::2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]] + +# Test using IPv6 that roaming works +n1 wg set wg0 listen-port 9998 +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]] + +# Test that crypto-RP filter works +n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24 +exec 4< <(n1 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns1 $ncat_pid +n2 ncat -u 192.168.241.1 1111 <<<"X" +read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]] +kill $ncat_pid +more_specific_key="$(pp wg genkey | pp wg pubkey)" +n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32 +n2 wg set wg0 listen-port 9997 +exec 4< <(n1 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns1 $ncat_pid +n2 ncat -u 192.168.241.1 1111 <<<"X" +! read -r -N 1 -t 1 out <&4 || false +kill $ncat_pid +n1 wg set wg0 peer "$more_specific_key" remove +[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]] + +# Test that we can change private keys keys and immediately handshake +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2 +n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 private-key <(echo "$key3") +n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove +n1 ping -W 1 -c 1 192.168.241.2 + +ip1 link del wg0 +ip2 link del wg0 + +# Test using NAT. We now change the topology to this: +# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐ +# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ +# │ │ │ │ │ │ +# │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │ +# │ │ wg0 │─────────────│vethc│───────────┼────┼────│vethrc│ │vethrs│──────────────┼─────┼──│veths│────────────│ wg0 │ │ +# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├──────┴─────────┐ ├──────┴────────────┐ │ │ ├─────┴──────────┐ ├─────┴──────────┐ │ +# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │ +# │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │ +# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │ +# └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘ + +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers + +ip0 link add vethrc type veth peer name vethc +ip0 link add vethrs type veth peer name veths +ip0 link set vethc netns $netns1 +ip0 link set veths netns $netns2 +ip0 link set vethrc up +ip0 link set vethrs up +ip0 addr add 192.168.1.1/24 dev vethrc +ip0 addr add 10.0.0.1/24 dev vethrs +ip1 addr add 192.168.1.100/24 dev vethc +ip1 link set vethc up +ip1 route add default via 192.168.1.1 +ip2 addr add 10.0.0.100/24 dev veths +ip2 link set veths up +waitiface $netns0 vethrc +waitiface $netns0 vethrs +waitiface $netns1 vethc +waitiface $netns2 veths + +n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout' +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream' +n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1 + +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1 +n1 ping -W 1 -c 1 192.168.241.2 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`). +pp sleep 3 +n2 ping -W 1 -c 1 192.168.241.1 +n1 wg set wg0 peer "$pub2" persistent-keepalive 0 + +# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. +ip1 -6 addr add fc00::9/96 dev vethc +ip1 -6 route add default via fc00::1 +ip2 -4 addr add 192.168.99.7/32 dev wg0 +ip2 -6 addr add abab::1111/128 dev wg0 +n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111 +ip1 -6 route add default dev wg0 table 51820 +ip1 -6 rule add not fwmark 51820 table 51820 +ip1 -6 rule add table main suppress_prefixlength 0 +ip1 -4 route add default dev wg0 table 51820 +ip1 -4 rule add not fwmark 51820 table 51820 +ip1 -4 rule add table main suppress_prefixlength 0 +# suppress_prefixlength only got added in 3.12, and we want to support 3.10+. +if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then + # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. + n1 ping -W 1 -c 100 -f 192.168.99.7 + n1 ping -W 1 -c 100 -f abab::1111 +fi + +# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route. +n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2 +n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit. +n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +ip0 -4 route add 192.168.241.1 via 10.0.0.100 +n2 wg set wg0 peer "$pub1" remove +[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]] + +n0 iptables -t nat -F +n0 iptables -t filter -F +n2 iptables -t nat -F +ip0 link del vethrc +ip0 link del vethrs +ip1 link del wg0 +ip2 link del wg0 + +# Test that saddr routing is sticky but not too sticky, changing to this topology: +# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────┐ +# │ $ns1 namespace │ │ $ns2 namespace │ +# │ │ │ │ +# │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ │ +# │ │ wg0 │─────────────│veth1│───────────┼────┼──│veth2│────────────│ wg0 │ │ +# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├─────┴──────────┐ ├─────┴──────────┐ │ +# │ │192.168.241.1/24│ │10.0.0.1/24 ││ │ │10.0.0.2/24 │ │192.168.241.2/24│ │ +# │ │fd00::1/24 │ │fd00:aa::1/96 ││ │ │fd00:aa::2/96 │ │fd00::2/24 │ │ +# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └────────────────┘ │ +# └────────────────────────────────────────┘ └────────────────────────────────────────┘ + +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad' +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad' +n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries' + +# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed +ip1 addr add 10.0.0.1/24 dev veth1 +ip1 addr add fd00:aa::1/96 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip2 addr add fd00:aa::2/96 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +ip1 addr add 10.0.0.10/24 dev veth1 +ip1 addr del 10.0.0.1/24 dev veth1 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n1 ping -W 1 -c 1 192.168.241.2 +ip1 addr add fd00:aa::10/96 dev veth1 +ip1 addr del fd00:aa::1/96 dev veth1 +n1 ping -W 1 -c 1 192.168.241.2 + +# Now we show that we can successfully do reply to sender routing +ip1 link set veth1 down +ip2 link set veth2 down +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip1 addr add 10.0.0.2/24 dev veth1 +ip1 addr add fd00:aa::1/96 dev veth1 +ip1 addr add fd00:aa::2/96 dev veth1 +ip2 addr add 10.0.0.3/24 dev veth2 +ip2 addr add fd00:aa::3/96 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]] +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]] +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]] + +# What happens if the inbound destination address belongs to a different interface as the default route? +ip1 link add dummy0 type dummy +ip1 addr add 10.50.0.1/24 dev dummy0 +ip1 link set dummy0 up +ip2 route add 10.50.0.0/24 dev veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]] + +ip1 link del dummy0 +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 route flush dev veth1 +ip2 route flush dev veth2 + +# Now we see what happens if another interface route takes precedence over an ongoing one +ip1 link add veth3 type veth peer name veth4 +ip1 link set veth4 netns $netns2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip1 addr add 10.0.0.3/24 dev veth3 +ip1 link set veth1 up +ip2 link set veth2 up +ip1 link set veth3 up +ip2 link set veth4 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +waitiface $netns1 veth3 +waitiface $netns2 veth4 +ip1 route flush dev veth1 +ip1 route flush dev veth3 +ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1 +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter' +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter' +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] + +ip1 link del veth1 +ip1 link del veth3 +ip1 link del wg0 +ip2 link del wg0 + +# We test that Netlink/IPC is working properly by doing things that usually cause split responses +ip0 link add dev wg0 type wireguard +config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" ) +for a in {1..255}; do + for b in {0..255}; do + config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" ) + done +done +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +i=0 +for ip in $(n0 wg show wg0 allowed-ips); do + ((++i)) +done +((i == 255*256*2+1)) +ip0 link del wg0 +ip0 link add dev wg0 type wireguard +config=( "[Interface]" "PrivateKey=$(wg genkey)" ) +for a in {1..40}; do + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) + for b in {1..52}; do + config+=( "AllowedIPs=$a.$b.0.0/16" ) + done +done +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +i=0 +while read -r line; do + j=0 + for ip in $line; do + ((++j)) + done + ((j == 53)) + ((++i)) +done < <(n0 wg show wg0 allowed-ips) +((i == 40)) +ip0 link del wg0 +ip0 link add wg0 type wireguard +config=( ) +for i in {1..29}; do + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) +done +config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" ) +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +n0 wg showconf wg0 > /dev/null +ip0 link del wg0 + +allowedips=( ) +for i in {1..197}; do + allowedips+=( abcd::$i ) +done +saved_ifs="$IFS" +IFS=, +allowedips="${allowedips[*]}" +IFS="$saved_ifs" +ip0 link add wg0 type wireguard +n0 wg set wg0 peer "$pub1" +n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips" +{ + read -r pub allowedips + [[ $pub == "$pub1" && $allowedips == "(none)" ]] + read -r pub allowedips + [[ $pub == "$pub2" ]] + i=0 + for _ in $allowedips; do + ((++i)) + done + ((i == 197)) +} < <(n0 wg show wg0 allowed-ips) +ip0 link del wg0 + +! n0 wg show doesnotexist || false + +ip0 link add wg0 type wireguard +n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") +[[ $(n0 wg show wg0 private-key) == "$key1" ]] +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]] +n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null +[[ $(n0 wg show wg0 private-key) == "(none)" ]] +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]] +n0 wg set wg0 peer "$pub2" +n0 wg set wg0 private-key <(echo "$key2") +[[ $(n0 wg show wg0 public-key) == "$pub2" ]] +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 peer "$pub2" +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 private-key <(echo "$key1") +n0 wg set wg0 peer "$pub2" +[[ $(n0 wg show wg0 peers) == "$pub2" ]] +n0 wg set wg0 private-key <(echo "/${key1:1}") +[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]] +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16 +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 +n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 +n0 wg set wg0 peer "$pub2" allowed-ips ::/0 +n0 wg set wg0 peer "$pub2" remove +low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= ) +n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer } +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer } +[[ -z $(n0 wg show wg0 peers) ]] +ip0 link del wg0 + +declare -A objects +while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do + [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue + objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" +done < /dev/kmsg +alldeleted=1 +for object in "${!objects[@]}"; do + if [[ ${objects["$object"]} != *createddestroyed ]]; then + echo "Error: $object: merely ${objects["$object"]}" >&3 + alldeleted=0 + fi +done +[[ $alldeleted -eq 1 ]] +pretty "" "Objects that were created were also destroyed." diff --git a/net/wireguard/tests/qemu/Makefile b/net/wireguard/tests/qemu/Makefile new file mode 100644 index 000000000000..76dc450460e3 --- /dev/null +++ b/net/wireguard/tests/qemu/Makefile @@ -0,0 +1,410 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + +PWD := $(shell pwd) + +CHOST := $(shell gcc -dumpmachine) +HOST_ARCH := $(firstword $(subst -, ,$(CHOST))) +ifneq (,$(ARCH)) +CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(filter-out android,$(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))) +endif +ifeq (,$(CBUILD)) +CBUILD := $(CHOST) +endif +ARCH := $(firstword $(subst -, ,$(CBUILD))) + +# Set these from the environment to override +KERNEL_VERSION ?= 5.5 +KERNEL_VERSION := $(KERNEL_VERSION)$(if $(DEBUG_KERNEL),$(if $(findstring -debug,$(KERNEL_VERSION)),,-debug),) +BUILD_PATH ?= $(PWD)/../../../qemu-build/$(ARCH) +DISTFILES_PATH ?= $(PWD)/distfiles +NR_CPUS ?= 4 + +MIRROR := https://download.wireguard.com/qemu-test/distfiles/ + +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) +WIREGUARD_SOURCES := ../../Kbuild ../../Kconfig $(filter-out ../../tests/%,$(call rwildcard,../../,*.c *.h *.S *.pl *.include)) + +default: qemu + +# variable name, tarball project name, version, tarball extension, default URI base +define tar_download = +$(1)_VERSION := $(3) +$(1)_NAME := $(2)-$$($(1)_VERSION) +$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME:-debug=)$(4) +$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME) +$(call file_download,$$($(1)_NAME:-debug=)$(4),$(5)) +endef + +define file_download = +$(DISTFILES_PATH)/$(1): + mkdir -p $(DISTFILES_PATH) + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@ $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@ $(2)$(1) || rm -f $$@' +endef + +ifeq ($(findstring -rc,$(KERNEL_VERSION)),) +KERNEL_URL_DIRECTORY := https://cdn.kernel.org/pub/linux/kernel/v$(firstword $(subst ., ,$(KERNEL_VERSION:-debug=))).x/ +else +KERNEL_URL_DIRECTORY := https://git.kernel.org/torvalds/t/ +endif + +$(eval $(call tar_download,KERNEL,linux,$(KERNEL_VERSION),.tar.gz,$(KERNEL_URL_DIRECTORY))) +$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/)) +$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/)) +$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/)) +$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/)) +$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/)) +$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/)) +$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/)) +$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#)) +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/)) + +export CFLAGS ?= -O3 -pipe +export LDFLAGS ?= +export CPPFLAGS := -I$(BUILD_PATH)/include + +ifeq ($(HOST_ARCH),$(ARCH)) +CROSS_COMPILE_FLAG := --host=$(CHOST) +NOPIE_GCC := gcc -fno-PIE +CFLAGS += -march=native +OMIT_AVX512 := $(shell gcc -march=native -Q --help=target | sed -n 's/.*\-m\(avx512[^ ]*\).*\[enabled\].*/-mno-\1/p') +CFLAGS += $(OMIT_AVX512) +STRIP := strip +else +$(info Cross compilation: building for $(CBUILD) using $(CHOST)) +CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) +export CROSS_COMPILE=$(CBUILD)- +NOPIE_GCC := $(CBUILD)-gcc -fno-PIE +STRIP := $(CBUILD)-strip +endif +ifeq ($(ARCH),aarch64) +QEMU_ARCH := aarch64 +KERNEL_ARCH := arm64 +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm64/boot/Image +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a53 -machine virt +CFLAGS += -march=armv8-a -mtune=cortex-a53 +endif +else ifeq ($(ARCH),aarch64_be) +QEMU_ARCH := aarch64 +KERNEL_ARCH := arm64 +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm64/boot/Image +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a53 -machine virt +CFLAGS += -march=armv8-a -mtune=cortex-a53 +endif +else ifeq ($(ARCH),arm) +QEMU_ARCH := arm +KERNEL_ARCH := arm +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm/boot/zImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a15 -machine virt +CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux +endif +else ifeq ($(ARCH),armeb) +QEMU_ARCH := arm +KERNEL_ARCH := arm +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm/boot/zImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a15 -machine virt +CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian. +LDFLAGS += -Wl,--be8 +endif +else ifeq ($(ARCH),x86_64) +QEMU_ARCH := x86_64 +KERNEL_ARCH := x86_64 +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/x86/boot/bzImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine q35,accel=kvm +else +QEMU_MACHINE := -cpu Skylake-Server -machine q35 +CFLAGS += -march=skylake-avx512 +endif +else ifeq ($(ARCH),i686) +QEMU_ARCH := i386 +KERNEL_ARCH := x86 +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/x86/boot/bzImage +ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) +QEMU_MACHINE := -cpu host -machine q35,accel=kvm +else +QEMU_MACHINE := -cpu coreduo -machine q35 +CFLAGS += -march=prescott +endif +else ifeq ($(ARCH),mips64) +QEMU_ARCH := mips64 +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EB +else +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 +CFLAGS += -march=mips64r2 -EB +endif +else ifeq ($(ARCH),mips64el) +QEMU_ARCH := mips64el +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EL +else +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 +CFLAGS += -march=mips64r2 -EL +endif +else ifeq ($(ARCH),mips) +QEMU_ARCH := mips +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EB +else +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 +CFLAGS += -march=mips32r2 -EB +endif +else ifeq ($(ARCH),mipsel) +QEMU_ARCH := mipsel +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EL +else +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 +CFLAGS += -march=mips32r2 -EL +endif +else ifeq ($(ARCH),powerpc64le) +QEMU_ARCH := ppc64 +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine pseries +else +QEMU_MACHINE := -machine pseries +endif +CFLAGS += -mcpu=powerpc64le -mlong-double-64 +else ifeq ($(ARCH),powerpc) +QEMU_ARCH := ppc +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/powerpc/boot/uImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 +else +QEMU_MACHINE := -machine ppce500 +endif +CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt +else ifeq ($(ARCH),m68k) +QEMU_ARCH := m68k +KERNEL_ARCH := m68k +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config) +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +else +QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +endif +else +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) +endif + +REAL_CC := $(CBUILD)-gcc +MUSL_CC := $(BUILD_PATH)/musl-gcc +export CC := $(MUSL_CC) +USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed + +build: $(KERNEL_BZIMAGE) +qemu: $(KERNEL_BZIMAGE) + rm -f $(BUILD_PATH)/result + timeout --foreground 20m qemu-system-$(QEMU_ARCH) \ + -nodefaults \ + -nographic \ + -smp $(NR_CPUS) \ + $(QEMU_MACHINE) \ + -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_PATH)/.config && echo 1G || echo 256M) \ + -serial stdio \ + -serial file:$(BUILD_PATH)/result \ + -no-reboot \ + -monitor none \ + -kernel $< + grep -Fq success $(BUILD_PATH)/result + +$(BUILD_PATH)/init-cpio-spec.txt: + mkdir -p $(BUILD_PATH) + echo "file /init $(BUILD_PATH)/init 755 0 0" > $@ + echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@ + echo "dir /dev 755 0 0" >> $@ + echo "nod /dev/console 644 0 0 c 5 1" >> $@ + echo "dir /bin 755 0 0" >> $@ + echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@ + echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@ + echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@ + echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@ + echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@ + echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@ + echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@ + echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@ + echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@ + echo "slink /bin/ping6 ping 777 0 0" >> $@ + echo "dir /lib 755 0 0" >> $@ + echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ + echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@ + +ifeq ($(findstring -git,$(KERNEL_VERSION)),) +$(KERNEL_PATH)/.installed: $(KERNEL_TAR) + mkdir -p $(KERNEL_PATH) + flock -s $<.lock tar --strip-components=1 -C $(KERNEL_PATH) -xf $< + sed -i "/^if INET\$$/a source \"net/wireguard/Kconfig\"" $(KERNEL_PATH)/net/Kconfig + sed -i "/^obj-\$$(CONFIG_NETFILTER).*+=/a obj-\$$(CONFIG_WIREGUARD) += wireguard/" $(KERNEL_PATH)/net/Makefile + ln -sfT $(shell readlink -f ../..) $(KERNEL_PATH)/net/wireguard + touch $@ +else +$(KERNEL_PATH)/.installed: + mkdir -p $(dir $(KERNEL_PATH)) + flock -s $(dir $(KERNEL_TAR))/$(KERNEL_VERSION).lock git clone $(GIT_URI_$(patsubst %-git,%,$(patsubst %-debug,%,$(KERNEL_VERSION)))) $(KERNEL_PATH) + touch $@ +always-pull: $(KERNEL_PATH)/.installed + flock -s $(dir $(KERNEL_TAR))/$(KERNEL_VERSION).lock git -C $(KERNEL_PATH) fetch + flock -s $(dir $(KERNEL_TAR))/$(KERNEL_VERSION).lock git -C $(KERNEL_PATH) reset --hard FETCH_HEAD +.PHONY: always-pull +$(KERNEL_BZIMAGE): always-pull +endif + +$(KERNEL_PATH)/.config: kernel.config arch/$(ARCH).config | $(KERNEL_PATH)/.installed + cp kernel.config $(KERNEL_PATH)/minimal.config + printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_PATH)/minimal.config + cat arch/$(ARCH).config >> $(KERNEL_PATH)/minimal.config + $(MAKE) -C $(KERNEL_PATH) ARCH=$(KERNEL_ARCH) allnoconfig + cd $(KERNEL_PATH) && ARCH=$(KERNEL_ARCH) scripts/kconfig/merge_config.sh -n .config minimal.config + $(if $(findstring -debug,$(KERNEL_VERSION)),cd $(KERNEL_PATH) && sed -i 's/^EXTRAVERSION =.*/EXTRAVERSION = -debug/' Makefile && ARCH=$(KERNEL_ARCH) scripts/kconfig/merge_config.sh -n .config $(PWD)/debug.config,) + +$(KERNEL_BZIMAGE): $(KERNEL_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) + LOCALVERSION="" $(MAKE) -C $(KERNEL_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" + +$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_PATH)/.config + LOCALVERSION="" $(MAKE) -C $(KERNEL_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install + touch $@ + +$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD) + $(MAKE) -C $(MUSL_PATH) + $(STRIP) -s $@ + +$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so + $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers + touch $@ + +$(MUSL_CC): $(MUSL_PATH)/lib/libc.so + sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs + printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc + chmod +x $(BUILD_PATH)/musl-gcc + +$(IPERF_PATH)/.installed: $(IPERF_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + sed -i '1s/^/#include /' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h + sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile* + touch $@ + +$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) + cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no + $(MAKE) -C $(IPERF_PATH) + $(STRIP) -s $@ + +$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) + cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared + $(MAKE) -C $(LIBMNL_PATH) + sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc + +$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg + $(STRIP) -s $@ + +$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) + mkdir -p $(BUILD_PATH) + $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< + $(STRIP) -s $@ + +$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping + $(STRIP) -s $@ + +$(BASH_PATH)/.installed: $(BASH_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) + cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble + $(MAKE) -C $(BASH_PATH) + $(STRIP) -s $@ + +$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk + printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile + touch $@ + +$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip + $(STRIP) -s $(IPROUTE2_PATH)/ip/ip + +$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss + $(STRIP) -s $(IPROUTE2_PATH)/misc/ss + +$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure + touch $@ + +$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include + $(MAKE) -C $(IPTABLES_PATH) + $(STRIP) -s $@ + +$(NMAP_PATH)/.installed: $(NMAP_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS) + cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh + $(MAKE) -C $(NMAP_PATH)/libpcap + $(MAKE) -C $(NMAP_PATH)/ncat + $(STRIP) -s $@ + +clean: + rm -rf $(BUILD_PATH) + +distclean: clean + rm -rf $(DISTFILES_PATH) + +.PHONY: qemu build clean distclean +.DELETE_ON_ERROR: diff --git a/net/wireguard/tests/qemu/arch/m68k.config b/net/wireguard/tests/qemu/arch/m68k.config new file mode 100644 index 000000000000..62a15bdb877e --- /dev/null +++ b/net/wireguard/tests/qemu/arch/m68k.config @@ -0,0 +1,9 @@ +CONFIG_MMU=y +CONFIG_M68KCLASSIC=y +CONFIG_M68040=y +CONFIG_MAC=y +CONFIG_SERIAL_PMACZILOG=y +CONFIG_SERIAL_PMACZILOG_TTYS=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/init.c b/net/wireguard/tests/qemu/init.c new file mode 100644 index 000000000000..90bc9813cadc --- /dev/null +++ b/net/wireguard/tests/qemu/init.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__attribute__((noreturn)) static void poweroff(void) +{ + fflush(stdout); + fflush(stderr); + reboot(RB_AUTOBOOT); + sleep(30); + fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n"); + exit(1); +} + +static void panic(const char *what) +{ + fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno)); + poweroff(); +} + +#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m") + +static void print_banner(void) +{ + struct utsname utsname; + int len; + + if (uname(&utsname) < 0) + panic("uname"); + + len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine); + printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, ""); +} + +static void seed_rng(void) +{ + int fd; + struct { + int entropy_count; + int buffer_size; + unsigned char buffer[256]; + } entropy = { + .entropy_count = sizeof(entropy.buffer) * 8, + .buffer_size = sizeof(entropy.buffer), + .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!" + }; + + if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9))) + panic("mknod(/dev/urandom)"); + fd = open("/dev/urandom", O_WRONLY); + if (fd < 0) + panic("open(urandom)"); + for (int i = 0; i < 256; ++i) { + if (ioctl(fd, RNDADDENTROPY, &entropy) < 0) + panic("ioctl(urandom)"); + } + close(fd); +} + +static void mount_filesystems(void) +{ + pretty_message("[+] Mounting filesystems..."); + mkdir("/dev", 0755); + mkdir("/proc", 0755); + mkdir("/sys", 0755); + mkdir("/tmp", 0755); + mkdir("/run", 0755); + mkdir("/var", 0755); + if (mount("none", "/dev", "devtmpfs", 0, NULL)) + panic("devtmpfs mount"); + if (mount("none", "/proc", "proc", 0, NULL)) + panic("procfs mount"); + if (mount("none", "/sys", "sysfs", 0, NULL)) + panic("sysfs mount"); + if (mount("none", "/tmp", "tmpfs", 0, NULL)) + panic("tmpfs mount"); + if (mount("none", "/run", "tmpfs", 0, NULL)) + panic("tmpfs mount"); + if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL)) + ; /* Not a problem if it fails.*/ + if (symlink("/run", "/var/run")) + panic("run symlink"); + if (symlink("/proc/self/fd", "/dev/fd")) + panic("fd symlink"); +} + +static void enable_logging(void) +{ + int fd; + pretty_message("[+] Enabling logging..."); + fd = open("/proc/sys/kernel/printk", O_WRONLY); + if (fd >= 0) { + if (write(fd, "9\n", 2) != 2) + panic("write(printk)"); + close(fd); + } + fd = open("/proc/sys/debug/exception-trace", O_WRONLY); + if (fd >= 0) { + if (write(fd, "1\n", 2) != 2) + panic("write(exception-trace)"); + close(fd); + } + fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY); + if (fd >= 0) { + if (write(fd, "1\n", 2) != 2) + panic("write(panic_on_warn)"); + close(fd); + } +} + +static void kmod_selftests(void) +{ + FILE *file; + char line[2048], *start, *pass; + bool success = true; + pretty_message("[+] Module self-tests:"); + file = fopen("/proc/kmsg", "r"); + if (!file) + panic("fopen(kmsg)"); + if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0) + panic("fcntl(kmsg, nonblock)"); + while (fgets(line, sizeof(line), file)) { + start = strstr(line, "wireguard: "); + if (!start) + continue; + start += 11; + *strchrnul(start, '\n') = '\0'; + if (strstr(start, "www.wireguard.com")) + break; + pass = strstr(start, ": pass"); + if (!pass || pass[6] != '\0') { + success = false; + printf(" \x1b[31m* %s\x1b[0m\n", start); + } else + printf(" \x1b[32m* %s\x1b[0m\n", start); + } + fclose(file); + if (!success) { + puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m"); + poweroff(); + } +} + +static void launch_tests(void) +{ + char cmdline[4096], *success_dev; + int status, fd; + pid_t pid; + + pretty_message("[+] Launching tests..."); + pid = fork(); + if (pid == -1) + panic("fork"); + else if (pid == 0) { + execl("/init.sh", "init", NULL); + panic("exec"); + } + if (waitpid(pid, &status, 0) < 0) + panic("waitpid"); + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + pretty_message("[+] Tests successful! :-)"); + fd = open("/proc/cmdline", O_RDONLY); + if (fd < 0) + panic("open(/proc/cmdline)"); + if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0) + panic("read(/proc/cmdline)"); + cmdline[sizeof(cmdline) - 1] = '\0'; + for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) { + if (strncmp(success_dev, "wg.success=", 11)) + continue; + memcpy(success_dev + 11 - 5, "/dev/", 5); + success_dev += 11 - 5; + break; + } + if (!success_dev || !strlen(success_dev)) + panic("Unable to find success device"); + + fd = open(success_dev, O_WRONLY); + if (fd < 0) + panic("open(success_dev)"); + if (write(fd, "success\n", 8) != 8) + panic("write(success_dev)"); + close(fd); + } else { + const char *why = "unknown cause"; + int what = -1; + + if (WIFEXITED(status)) { + why = "exit code"; + what = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + why = "signal"; + what = WTERMSIG(status); + } + printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what); + } +} + +static void ensure_console(void) +{ + for (unsigned int i = 0; i < 1000; ++i) { + int fd = open("/dev/console", O_RDWR); + if (fd < 0) { + usleep(50000); + continue; + } + dup2(fd, 0); + dup2(fd, 1); + dup2(fd, 2); + close(fd); + if (write(1, "\0\0\0\0\n", 5) == 5) + return; + } + panic("Unable to open console device"); +} + +static void clear_leaks(void) +{ + int fd; + + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); + if (fd < 0) + return; + pretty_message("[+] Starting memory leak detection..."); + write(fd, "clear\n", 5); + close(fd); +} + +static void check_leaks(void) +{ + int fd; + + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); + if (fd < 0) + return; + pretty_message("[+] Scanning for memory leaks..."); + sleep(2); /* Wait for any grace periods. */ + write(fd, "scan\n", 5); + close(fd); + + fd = open("/sys/kernel/debug/kmemleak", O_RDONLY); + if (fd < 0) + return; + if (sendfile(1, fd, NULL, 0x7ffff000) > 0) + panic("Memory leaks encountered"); + close(fd); +} + +int main(int argc, char *argv[]) +{ + seed_rng(); + ensure_console(); + print_banner(); + mount_filesystems(); + kmod_selftests(); + enable_logging(); + clear_leaks(); + launch_tests(); + check_leaks(); + poweroff(); + return 1; +} diff --git a/net/wireguard/tests/qemu/kernel.config b/net/wireguard/tests/qemu/kernel.config new file mode 100644 index 000000000000..071a43f5dfff --- /dev/null +++ b/net/wireguard/tests/qemu/kernel.config @@ -0,0 +1,87 @@ +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_NET_CORE=y +CONFIG_NET_IPIP=y +CONFIG_DUMMY=y +CONFIG_VETH=y +CONFIG_MULTIUSER=y +CONFIG_NAMESPACES=y +CONFIG_NET_NS=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IPV6=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_NAT=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_NAT=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_NAT_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_TTY=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_SCRIPT=y +CONFIG_VDSO=y +CONFIG_VIRTUALIZATION=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_KVM_GUEST=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_PRINTK=y +CONFIG_KALLSYMS=y +CONFIG_BUG=y +CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y +CONFIG_JUMP_LABEL=y +CONFIG_EMBEDDED=n +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_SHMEM=y +CONFIG_SLUB=y +CONFIG_SPARSEMEM_VMEMMAP=y +CONFIG_SMP=y +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +CONFIG_NUMA=y +CONFIG_PREEMPT=y +CONFIG_NO_HZ=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_HZ_PERIODIC=n +CONFIG_HIGH_RES_TIMERS=y +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_ARCH_RANDOM=y +CONFIG_FILE_LOCKING=y +CONFIG_POSIX_TIMERS=y +CONFIG_DEVTMPFS=y +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_PRINTK_TIME=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_KERNEL_GZIP=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_BUG_ON_DATA_CORRUPTION=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_WQ_WATCHDOG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y +CONFIG_PANIC_TIMEOUT=-1 +CONFIG_STACKTRACE=y +CONFIG_EARLY_PRINTK=y +CONFIG_GDB_SCRIPTS=y +CONFIG_WIREGUARD=y +CONFIG_WIREGUARD_DEBUG=y diff --git a/net/wireguard/version.h b/net/wireguard/version.h new file mode 100644 index 000000000000..1f89f50912ea --- /dev/null +++ b/net/wireguard/version.h @@ -0,0 +1,3 @@ +#ifndef WIREGUARD_VERSION +#define WIREGUARD_VERSION "0.0.20200215" +#endif