system/xen: Updated for version 4.8.0.

Signed-off-by: Mario Preksavec <mario@slackware.hr>
This commit is contained in:
Mario Preksavec 2017-01-08 17:51:53 +01:00 committed by David Spencer
parent 9b2ee6f34d
commit fc59ea21ba
25 changed files with 209 additions and 1374 deletions

View file

@ -9,7 +9,12 @@ This script has a few optional dependencies:
mbootpack - creates LILO compatible kernel images
libssh2 - mostly used by libvirt, enable with USE_LIBSSH2=yes
ocaml-findlib - autodetected, builds oxenstored binary
spice - enable with USE_SPICE=yes
Linking with the stock libraries:
bluez - enable with USE_BLUEZ=yes
gtk - enable with USE_GTK=yes
Reading material:

View file

@ -28,6 +28,6 @@ find etc/rc.d -type f -name 'rc.xen*.new' \
find etc/default -type f -name 'xen*.new' \
| while read new ; do config $new ; done
find etc/xen etc/qemu -type f -name '*.new' \
find etc/xen -type f -name '*.new' \
| while read new ; do config $new ; done

View file

@ -46,7 +46,7 @@ Xen EFI binary.
To make things a bit easier, a copy of Xen EFI binary can be found here:
http://slackware.hr/~mario/xen/xen.efi.gz
http://slackware.hr/~mario/xen/xen-4.8.0.efi.gz
If an automatic boot to Xen kernel is desired, the binary should be renamed and
copied to the following location: /boot/efi/EFI/BOOT/bootx64.efi

View file

@ -6,7 +6,7 @@
# Modified by Mario Preksavec <mario@slackware.hr>
KERNEL=${KERNEL:-4.4.38}
XEN=${XEN:-4.7.1}
XEN=${XEN:-4.8.0}
BOOTLOADER=${BOOTLOADER:-lilo}
ROOTMOD=${ROOTMOD:-ext4}

View file

@ -1,12 +0,0 @@
--- xen-4.5.0/tools/Makefile.orig 2015-01-12 17:53:24.000000000 +0100
+++ xen-4.5.0/tools/Makefile 2015-02-16 09:19:18.999816844 +0100
@@ -225,6 +225,9 @@
--disable-docs \
--disable-guest-agent \
--python=$(PYTHON) \
+ --sysconfdir=/etc \
+ --@@CONF_LIBSSH2@@able-libssh2 \
+ --@@CONF_BLUEZ@@able-bluez \
$(CONFIG_QEMUU_EXTRA_ARGS) \
--cpu=$(IOEMU_CPU_ARCH) \
$(IOEMU_CONFIGURE_CROSS); \

View file

@ -2,7 +2,7 @@
# Slackware build script for xen
# Copyright 2010, 2011, 2013, 2014, 2015, 2016 Mario Preksavec, Zagreb, Croatia
# Copyright 2010, 2011, 2013, 2014, 2015, 2016, 2017 Mario Preksavec, Zagreb, Croatia
# All rights reserved.
#
# Redistribution and use of this script, with or without modification, is
@ -23,13 +23,13 @@
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PRGNAM=xen
VERSION=${VERSION:-4.7.1}
BUILD=${BUILD:-3}
VERSION=${VERSION:-4.8.0}
BUILD=${BUILD:-1}
TAG=${TAG:-_SBo}
SEABIOS=${SEABIOS:-1.9.2}
OVMF=${OVMF:-52a99493cce88a9d4ec8a02d7f1bd1a1001ce60d}
IPXE=${IPXE:-9a93db3f0947484e30e753bbd61a10b17336e20e}
SEABIOS=${SEABIOS:-1.10.0}
OVMF=${OVMF:-20160905_bc54e50}
IPXE=${IPXE:-827dd1bfee67daa683935ce65316f7e0f057fe1c}
if [ -z "$ARCH" ]; then
case "$( uname -m )" in
@ -73,24 +73,37 @@ else
LIBDIRSUFFIX=""
fi
case "${USE_LIBSSH2:-no}" in
yes) CONF_LIBSSH2="en" ;;
*) CONF_LIBSSH2="dis" ;;
esac
case "${USE_BLUEZ:-no}" in
yes) CONF_BLUEZ="en" ;;
*) CONF_BLUEZ="dis" ;;
esac
CONF_XEN="--disable-qemu-traditional --disable-rombios"
CONF_QEMUU="--sysconfdir=/etc"
case "${BUILD_STUBDOM:-no}" in
yes) CONF_STUBDOM="en" ;;
*) CONF_STUBDOM="dis" ;;
yes) CONF_XEN+=" --enable-stubdom" ;;
*) CONF_XEN+=" --disable-stubdom" ;;
esac
case "${WITH_OVMF:-yes}" in
no) CONF_OVMF="dis" ;;
*) CONF_OVMF="en" ;;
no) CONF_XEN+=" --disable-ovmf" ;;
*) CONF_XEN+=" --enable-ovmf" ;;
esac
case "${USE_LIBSSH2:-no}" in
yes) CONF_QEMUU+=" --enable-libssh2" ;;
*) CONF_QEMUU+=" --disable-libssh2" ;;
esac
case "${USE_BLUEZ:-no}" in
yes) CONF_QEMUU+=" --enable-bluez" ;;
*) CONF_QEMUU+=" --disable-bluez" ;;
esac
case "${USE_GTK:-no}" in
yes) CONF_QEMUU+=" --enable-gtk" ;;
*) CONF_QEMUU+=" --disable-gtk" ;;
esac
case "${USE_SPICE:-no}" in
yes) CONF_QEMUU+=" --enable-spice" ;;
*) CONF_QEMUU+=" --disable-spice" ;;
esac
set -e
@ -111,16 +124,12 @@ find -L . \
# Apply Xen Security Advisory patches
for i in $CWD/xsa/* ; do
case $i in
*qemut*.patch) patch -d tools/qemu-xen-traditional -p1 <$i ;;
*qemu*.patch) patch -d tools/qemu-xen -p1 <$i ;;
*.patch) patch -p1 <$i ;;
*qemut*.patch) patch --verbose -d tools/qemu-xen-traditional -p1 <$i ;;
*qemu*.patch) patch --verbose -d tools/qemu-xen -p1 <$i ;;
*.patch) patch --verbose -p1 <$i ;;
esac
done
# Don't link with libssh and bluez by default
sed "s/@@CONF_LIBSSH2@@/$CONF_LIBSSH2/;s/@@CONF_BLUEZ@@/$CONF_BLUEZ/" \
$CWD/patches/qemu_configure_options.diff | patch -p1
# Remove hardlinks
patch -p1 <$CWD/patches/symlinks_instead_of_hardlinks.diff
@ -137,7 +146,9 @@ cp $CWD/ipxe-git-$IPXE.tar.gz tools/firmware/etherboot/_ipxe.tar.gz
ln -s seabios-dir-remote seabios-dir
make -C seabios-dir defconfig
# OVMF
tar -xf $CWD/ovmf-git-$OVMF.tar.gz
tar -xf $CWD/xen-ovmf-$OVMF.tar.bz2
mv xen-ovmf-$OVMF ovmf-dir-remote
ln -s ovmf-dir-remote ovmf-dir
cp ovmf-makefile ovmf-dir/Makefile
)
cp $CWD/{lwip,zlib,newlib,pciutils,grub,gmp,tpm_emulator}-*.tar.?z* \
@ -153,10 +164,8 @@ CXXFLAGS="$SLKCFLAGS" \
--localstatedir=/var \
--mandir=/usr/man \
--docdir=/usr/doc/$PRGNAM-$VERSION \
--disable-qemu-traditional \
--disable-rombios \
--${CONF_STUBDOM}able-stubdom \
--${CONF_OVMF}able-ovmf \
$CONF_XEN \
--with-extra-qemuu-configure-args="$CONF_QEMUU" \
--build=$ARCH-slackware-linux
make install-xen \
@ -173,7 +182,7 @@ make install-tools \
MANDIR=/usr/man \
DESTDIR=$PKG
if [ "$CONF_STUBDOM" = "en" ]; then
if [ "$BUILD_STUBDOM" = "yes" ]; then
make install-stubdom \
docdir=/usr/doc/$PRGNAM-$VERSION \
DOCDIR=/usr/doc/$PRGNAM-$VERSION \

View file

@ -1,8 +1,8 @@
PRGNAM="xen"
VERSION="4.7.1"
VERSION="4.8.0"
HOMEPAGE="http://www.xenproject.org/"
DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.7.1.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/ipxe-git-9a93db3f0947484e30e753bbd61a10b17336e20e.tar.gz \
DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.8.0.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/ipxe-git-827dd1bfee67daa683935ce65316f7e0f057fe1c.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/lwip-1.3.0.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/zlib-1.2.3.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/newlib-1.16.0.tar.gz \
@ -11,10 +11,10 @@ DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.7.1.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/polarssl-1.1.4-gpl.tgz \
http://mirror.slackware.hr/sources/xen-extfiles/gmp-4.3.2.tar.bz2 \
http://mirror.slackware.hr/sources/xen-extfiles/tpm_emulator-0.7.4.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/seabios-1.9.2.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/ovmf-git-52a99493cce88a9d4ec8a02d7f1bd1a1001ce60d.tar.gz"
MD5SUM="8e258d87a1008a3200eec6989e164fa4 \
7496268cebf47d5c9ccb0696e3b26065 \
http://mirror.slackware.hr/sources/xen-seabios/seabios-1.10.0.tar.gz \
http://mirror.slackware.hr/sources/xen-ovmf/xen-ovmf-20160905_bc54e50.tar.bz2"
MD5SUM="d738f7c741110342621cb8a4d10b0191 \
71c69b5e1db9e01d5f246226eca03c22 \
36cc57650cffda9a0269493be2a169bb \
debc62758716a169df9f62e6ab2bc634 \
bf8f1f9e3ca83d732c00a79a6ef29bc4 \
@ -23,8 +23,8 @@ MD5SUM="8e258d87a1008a3200eec6989e164fa4 \
7b72caf22b01464ee7d6165f2fd85f44 \
dd60683d7057917e34630b4a787932e8 \
e26becb8a6a2b6695f6b3e8097593db8 \
32201f54c5fb478914d0bb2449b18454 \
bd4b1d36212692fa4874ecad2a42abed"
633ffc9df0295eeeb4182444eb0300ee \
87ba85cfec3993e4ee566dc58724d8a6"
REQUIRES="acpica yajl"
DOWNLOAD_x86_64=""
MD5SUM_x86_64=""

View file

@ -1,152 +0,0 @@
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/hvm: Fix the handling of non-present segments
In 32bit, the data segments may be NULL to indicate that the segment is
ineligible for use. In both 32bit and 64bit, the LDT selector may be NULL to
indicate that the entire LDT is ineligible for use. However, nothing in Xen
actually checks for this condition when performing other segmentation
checks. (Note however that limit and writeability checks are correctly
performed).
Neither Intel nor AMD specify the exact behaviour of loading a NULL segment.
Experimentally, AMD zeroes all attributes but leaves the base and limit
unmodified. Intel zeroes the base, sets the limit to 0xfffffff and resets the
attributes to just .G and .D/B.
The use of the segment information in the VMCB/VMCS is equivalent to a native
pipeline interacting with the segment cache. The present bit can therefore
have a subtly different meaning, and it is now cooked to uniformly indicate
whether the segment is usable or not.
GDTR and IDTR don't have access rights like the other segments, but for
consistency, they are treated as being present so no special casing is needed
elsewhere in the segmentation logic.
AMD hardware does not consider the present bit for %cs and %tr, and will
function as if they were present. They are therefore unconditionally set to
present when reading information from the VMCB, to maintain the new meaning of
usability.
Intel hardware has a separate unusable bit in the VMCS segment attributes.
This bit is inverted and stored in the present field, so the hvm code can work
with architecturally-common state.
This is XSA-191.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/hvm/hvm.c | 8 ++++++++
xen/arch/x86/hvm/svm/svm.c | 4 ++++
xen/arch/x86/hvm/vmx/vmx.c | 20 +++++++++++---------
xen/arch/x86/x86_emulate/x86_emulate.c | 4 ++++
4 files changed, 27 insertions(+), 9 deletions(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 704fd64..deb1783 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2512,6 +2512,10 @@ bool_t hvm_virtual_to_linear_addr(
*/
addr = (uint32_t)(addr + reg->base);
+ /* Segment not valid for use (cooked meaning of .p)? */
+ if ( !reg->attr.fields.p )
+ goto out;
+
switch ( access_type )
{
case hvm_access_read:
@@ -2767,6 +2771,10 @@ static int hvm_load_segment_selector(
hvm_get_segment_register(
v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab);
+ /* Segment not valid for use (cooked meaning of .p)? */
+ if ( !desctab.attr.fields.p )
+ goto fail;
+
/* Check against descriptor table limit. */
if ( ((sel & 0xfff8) + 7) > desctab.limit )
goto fail;
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 16427f6..4cba406 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -627,6 +627,7 @@ static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
{
case x86_seg_cs:
memcpy(reg, &vmcb->cs, sizeof(*reg));
+ reg->attr.fields.p = 1;
reg->attr.fields.g = reg->limit > 0xFFFFF;
break;
case x86_seg_ds:
@@ -660,13 +661,16 @@ static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
case x86_seg_tr:
svm_sync_vmcb(v);
memcpy(reg, &vmcb->tr, sizeof(*reg));
+ reg->attr.fields.p = 1;
reg->attr.fields.type |= 0x2;
break;
case x86_seg_gdtr:
memcpy(reg, &vmcb->gdtr, sizeof(*reg));
+ reg->attr.bytes = 0x80;
break;
case x86_seg_idtr:
memcpy(reg, &vmcb->idtr, sizeof(*reg));
+ reg->attr.bytes = 0x80;
break;
case x86_seg_ldtr:
svm_sync_vmcb(v);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 9a8f694..a652c52 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1035,10 +1035,12 @@ void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
reg->sel = sel;
reg->limit = limit;
- reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00);
- /* Unusable flag is folded into Present flag. */
- if ( attr & (1u<<16) )
- reg->attr.fields.p = 0;
+ /*
+ * Fold VT-x representation into Xen's representation. The Present bit is
+ * unconditionally set to the inverse of unusable.
+ */
+ reg->attr.bytes =
+ (!(attr & (1u << 16)) << 7) | (attr & 0x7f) | ((attr >> 4) & 0xf00);
/* Adjust for virtual 8086 mode */
if ( v->arch.hvm_vmx.vmx_realmode && seg <= x86_seg_tr
@@ -1118,11 +1120,11 @@ static void vmx_set_segment_register(struct vcpu *v, enum x86_segment seg,
}
}
- attr = ((attr & 0xf00) << 4) | (attr & 0xff);
-
- /* Not-present must mean unusable. */
- if ( !reg->attr.fields.p )
- attr |= (1u << 16);
+ /*
+ * Unfold Xen representation into VT-x representation. The unusable bit
+ * is unconditionally set to the inverse of present.
+ */
+ attr = (!(attr & (1u << 7)) << 16) | ((attr & 0xf00) << 4) | (attr & 0xff);
/* VMX has strict consistency requirement for flag G. */
attr |= !!(limit >> 20) << 15;
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 7a707dc..7cb6f98 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1367,6 +1367,10 @@ protmode_load_seg(
&desctab, ctxt)) )
return rc;
+ /* Segment not valid for use (cooked meaning of .p)? */
+ if ( !desctab.attr.fields.p )
+ goto raise_exn;
+
/* Check against descriptor table limit. */
if ( ((sel & 0xfff8) + 7) > desctab.limit )
goto raise_exn;

View file

@ -1,64 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/HVM: don't load LDTR with VM86 mode attrs during task switch
Just like TR, LDTR is purely a protected mode facility and hence needs
to be loaded accordingly. Also move its loading to where it
architecurally belongs.
This is XSA-192.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2728,17 +2728,16 @@ static void hvm_unmap_entry(void *p)
}
static int hvm_load_segment_selector(
- enum x86_segment seg, uint16_t sel)
+ enum x86_segment seg, uint16_t sel, unsigned int eflags)
{
struct segment_register desctab, cs, segr;
struct desc_struct *pdesc, desc;
u8 dpl, rpl, cpl;
bool_t writable;
int fault_type = TRAP_invalid_tss;
- struct cpu_user_regs *regs = guest_cpu_user_regs();
struct vcpu *v = current;
- if ( regs->eflags & X86_EFLAGS_VM )
+ if ( eflags & X86_EFLAGS_VM )
{
segr.sel = sel;
segr.base = (uint32_t)sel << 4;
@@ -2986,6 +2985,8 @@ void hvm_task_switch(
if ( rc != HVMCOPY_okay )
goto out;
+ if ( hvm_load_segment_selector(x86_seg_ldtr, tss.ldt, 0) )
+ goto out;
if ( hvm_set_cr3(tss.cr3, 1) )
goto out;
@@ -3008,13 +3009,12 @@ void hvm_task_switch(
}
exn_raised = 0;
- if ( hvm_load_segment_selector(x86_seg_ldtr, tss.ldt) ||
- hvm_load_segment_selector(x86_seg_es, tss.es) ||
- hvm_load_segment_selector(x86_seg_cs, tss.cs) ||
- hvm_load_segment_selector(x86_seg_ss, tss.ss) ||
- hvm_load_segment_selector(x86_seg_ds, tss.ds) ||
- hvm_load_segment_selector(x86_seg_fs, tss.fs) ||
- hvm_load_segment_selector(x86_seg_gs, tss.gs) )
+ if ( hvm_load_segment_selector(x86_seg_es, tss.es, tss.eflags) ||
+ hvm_load_segment_selector(x86_seg_cs, tss.cs, tss.eflags) ||
+ hvm_load_segment_selector(x86_seg_ss, tss.ss, tss.eflags) ||
+ hvm_load_segment_selector(x86_seg_ds, tss.ds, tss.eflags) ||
+ hvm_load_segment_selector(x86_seg_fs, tss.fs, tss.eflags) ||
+ hvm_load_segment_selector(x86_seg_gs, tss.gs, tss.eflags) )
exn_raised = 1;
rc = hvm_copy_to_guest_virt(

View file

@ -1,68 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/PV: writes of %fs and %gs base MSRs require canonical addresses
Commit c42494acb2 ("x86: fix FS/GS base handling when using the
fsgsbase feature") replaced the use of wrmsr_safe() on these paths
without recognizing that wr{f,g}sbase() use just wrmsrl() and that the
WR{F,G}SBASE instructions also raise #GP for non-canonical input.
Similarly arch_set_info_guest() needs to prevent non-canonical
addresses from getting stored into state later to be loaded by context
switch code. For consistency also check stack pointers and LDT base.
DR0..3, otoh, already get properly checked in set_debugreg() (albeit
we discard the error there).
The SHADOW_GS_BASE check isn't strictly necessary, but I think we
better avoid trying the WRMSR if we know it's going to fail.
This is XSA-193.
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -890,7 +890,13 @@ int arch_set_info_guest(
{
if ( !compat )
{
- if ( !is_canonical_address(c.nat->user_regs.eip) ||
+ if ( !is_canonical_address(c.nat->user_regs.rip) ||
+ !is_canonical_address(c.nat->user_regs.rsp) ||
+ !is_canonical_address(c.nat->kernel_sp) ||
+ (c.nat->ldt_ents && !is_canonical_address(c.nat->ldt_base)) ||
+ !is_canonical_address(c.nat->fs_base) ||
+ !is_canonical_address(c.nat->gs_base_kernel) ||
+ !is_canonical_address(c.nat->gs_base_user) ||
!is_canonical_address(c.nat->event_callback_eip) ||
!is_canonical_address(c.nat->syscall_callback_eip) ||
!is_canonical_address(c.nat->failsafe_callback_eip) )
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2723,19 +2723,22 @@ static int emulate_privileged_op(struct
switch ( regs->_ecx )
{
case MSR_FS_BASE:
- if ( is_pv_32bit_domain(currd) )
+ if ( is_pv_32bit_domain(currd) ||
+ !is_canonical_address(msr_content) )
goto fail;
wrfsbase(msr_content);
v->arch.pv_vcpu.fs_base = msr_content;
break;
case MSR_GS_BASE:
- if ( is_pv_32bit_domain(currd) )
+ if ( is_pv_32bit_domain(currd) ||
+ !is_canonical_address(msr_content) )
goto fail;
wrgsbase(msr_content);
v->arch.pv_vcpu.gs_base_kernel = msr_content;
break;
case MSR_SHADOW_GS_BASE:
- if ( is_pv_32bit_domain(currd) )
+ if ( is_pv_32bit_domain(currd) ||
+ !is_canonical_address(msr_content) )
goto fail;
if ( wrmsr_safe(MSR_SHADOW_GS_BASE, msr_content) )
goto fail;

View file

@ -1,144 +0,0 @@
From 71096b016f7fd54a72af73576948cb25cf42ebcb Mon Sep 17 00:00:00 2001
From: Roger Pau Monné <roger.pau@citrix.com>Date: Wed, 2 Nov 2016 15:02:00 +0000
Subject: [PATCH] libelf: fix stack memory leak when loading 32 bit symbol
tables
The 32 bit Elf structs are smaller than the 64 bit ones, which means that
when loading them there's some padding left uninitialized at the end of each
struct (because the size indicated in e_ehsize and e_shentsize is
smaller than the size of elf_ehdr and elf_shdr).
Fix this by introducing a new helper that is used to set
[caller_]xdest_{base/size} and that takes care of performing the appropriate
memset of the region. This newly introduced helper is then used to set and
unset xdest_{base/size} in elf_load_bsdsyms. Now that the full struct
is zeroed, there's no need to specifically zero the undefined section.
This is XSA-194.
Suggested-by: Ian Jackson <ian.jackson@eu.citrix.com>
Also remove the open coded (and redundant with the earlier
elf_memset_unchecked()) use of caller_xdest_* from elf_init().
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
---
xen/common/libelf/libelf-loader.c | 14 +++-----------
xen/common/libelf/libelf-tools.c | 11 +++++++++--
xen/include/xen/libelf.h | 15 +++++++++------
3 files changed, 21 insertions(+), 19 deletions(-)
diff --git a/xen/common/libelf/libelf-loader.c b/xen/common/libelf/libelf-loader.c
index 4d3ae4d..bc1f87b 100644
--- a/xen/common/libelf/libelf-loader.c
+++ b/xen/common/libelf/libelf-loader.c
@@ -43,8 +43,6 @@ elf_errorstatus elf_init(struct elf_binary *elf, const char *image_input, size_t
elf->ehdr = ELF_MAKE_HANDLE(elf_ehdr, (elf_ptrval)image_input);
elf->class = elf_uval_3264(elf, elf->ehdr, e32.e_ident[EI_CLASS]);
elf->data = elf_uval_3264(elf, elf->ehdr, e32.e_ident[EI_DATA]);
- elf->caller_xdest_base = NULL;
- elf->caller_xdest_size = 0;
/* Sanity check phdr. */
offset = elf_uval(elf, elf->ehdr, e_phoff) +
@@ -284,9 +282,8 @@ do { \
#define SYMTAB_INDEX 1
#define STRTAB_INDEX 2
- /* Allow elf_memcpy_safe to write to symbol_header. */
- elf->caller_xdest_base = &header;
- elf->caller_xdest_size = sizeof(header);
+ /* Allow elf_memcpy_safe to write to header. */
+ elf_set_xdest(elf, &header, sizeof(header));
/*
* Calculate the position of the various elements in GUEST MEMORY SPACE.
@@ -319,11 +316,7 @@ do { \
elf_store_field_bitness(elf, header_handle, e_phentsize, 0);
elf_store_field_bitness(elf, header_handle, e_phnum, 0);
- /* Zero the undefined section. */
- section_handle = ELF_MAKE_HANDLE(elf_shdr,
- ELF_REALPTR2PTRVAL(&header.elf_header.section[SHN_UNDEF]));
shdr_size = elf_uval(elf, elf->ehdr, e_shentsize);
- elf_memset_safe(elf, ELF_HANDLE_PTRVAL(section_handle), 0, shdr_size);
/*
* The symtab section header is going to reside in section[SYMTAB_INDEX],
@@ -404,8 +397,7 @@ do { \
}
/* Remove permissions from elf_memcpy_safe. */
- elf->caller_xdest_base = NULL;
- elf->caller_xdest_size = 0;
+ elf_set_xdest(elf, NULL, 0);
#undef SYMTAB_INDEX
#undef STRTAB_INDEX
diff --git a/xen/common/libelf/libelf-tools.c b/xen/common/libelf/libelf-tools.c
index 5a4757b..e73e729 100644
--- a/xen/common/libelf/libelf-tools.c
+++ b/xen/common/libelf/libelf-tools.c
@@ -59,8 +59,7 @@ bool elf_access_ok(struct elf_binary * elf,
return 1;
if ( elf_ptrval_in_range(ptrval, size, elf->dest_base, elf->dest_size) )
return 1;
- if ( elf_ptrval_in_range(ptrval, size,
- elf->caller_xdest_base, elf->caller_xdest_size) )
+ if ( elf_ptrval_in_range(ptrval, size, elf->xdest_base, elf->xdest_size) )
return 1;
elf_mark_broken(elf, "out of range access");
return 0;
@@ -373,6 +372,14 @@ bool elf_phdr_is_loadable(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr
return ((p_type == PT_LOAD) && (p_flags & (PF_R | PF_W | PF_X)) != 0);
}
+void elf_set_xdest(struct elf_binary *elf, void *addr, uint64_t size)
+{
+ elf->xdest_base = addr;
+ elf->xdest_size = size;
+ if ( addr != NULL )
+ elf_memset_safe(elf, ELF_REALPTR2PTRVAL(addr), 0, size);
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/include/xen/libelf.h b/xen/include/xen/libelf.h
index 95b5370..cf62bc7 100644
--- a/xen/include/xen/libelf.h
+++ b/xen/include/xen/libelf.h
@@ -210,13 +210,11 @@ struct elf_binary {
uint64_t bsd_symtab_pend;
/*
- * caller's other acceptable destination
- *
- * Again, these are trusted and must be valid (or 0) so long
- * as the struct elf_binary is in use.
+ * caller's other acceptable destination.
+ * Set by elf_set_xdest. Do not set these directly.
*/
- void *caller_xdest_base;
- uint64_t caller_xdest_size;
+ void *xdest_base;
+ uint64_t xdest_size;
#ifndef __XEN__
/* misc */
@@ -494,5 +492,10 @@ static inline void ELF_ADVANCE_DEST(struct elf_binary *elf, uint64_t amount)
}
}
+/* Specify a (single) additional destination, to which the image may
+ * cause writes. As with dest_base and dest_size, the values provided
+ * are trusted and must be valid so long as the struct elf_binary
+ * is in use or until elf_set_xdest(,0,0) is called. */
+void elf_set_xdest(struct elf_binary *elf, void *addr, uint64_t size);
#endif /* __XEN_LIBELF_H__ */
--
2.1.4

View file

@ -1,45 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86emul: fix huge bit offset handling
We must never chop off the high 32 bits.
This is XSA-195.
Reported-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2549,6 +2549,12 @@ x86_emulate(
else
{
/*
+ * Instructions such as bt can reference an arbitrary offset from
+ * their memory operand, but the instruction doing the actual
+ * emulation needs the appropriate op_bytes read from memory.
+ * Adjust both the source register and memory operand to make an
+ * equivalent instruction.
+ *
* EA += BitOffset DIV op_bytes*8
* BitOffset = BitOffset MOD op_bytes*8
* DIV truncates towards negative infinity.
@@ -2560,14 +2566,15 @@ x86_emulate(
src.val = (int32_t)src.val;
if ( (long)src.val < 0 )
{
- unsigned long byte_offset;
- byte_offset = op_bytes + (((-src.val-1) >> 3) & ~(op_bytes-1));
+ unsigned long byte_offset =
+ op_bytes + (((-src.val - 1) >> 3) & ~(op_bytes - 1L));
+
ea.mem.off -= byte_offset;
src.val = (byte_offset << 3) + src.val;
}
else
{
- ea.mem.off += (src.val >> 3) & ~(op_bytes - 1);
+ ea.mem.off += (src.val >> 3) & ~(op_bytes - 1L);
src.val &= (op_bytes << 3) - 1;
}
}

View file

@ -1,61 +0,0 @@
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/emul: Correct the IDT entry calculation in inject_swint()
The logic, as introduced in c/s 36ebf14ebe "x86/emulate: support for emulating
software event injection" is buggy. The size of an IDT entry depends on long
mode being active, not the width of the code segment currently in use.
In particular, this means that a compatibility code segment which hits
emulation for software event injection will end up using an incorrect offset
in the IDT for DPL/Presence checking. In practice, this only occurs on old
AMD hardware lacking NRip support; all newer AMD hardware, and all Intel
hardware bypass this path in the emulator.
While here, fix a minor issue with reading the IDT entry. The return value
from ops->read() wasn't checked, but in reality the only failure case is if a
pagefault occurs. This is not a realistic problem as the kernel will almost
certainly crash with a double fault if this setup actually occured.
This is part of XSA-196.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/x86_emulate/x86_emulate.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 7a707dc..f74aa8f 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1630,10 +1630,16 @@ static int inject_swint(enum x86_swint_type type,
{
if ( !in_realmode(ctxt, ops) )
{
- unsigned int idte_size = (ctxt->addr_size == 64) ? 16 : 8;
- unsigned int idte_offset = vector * idte_size;
+ unsigned int idte_size, idte_offset;
struct segment_register idtr;
uint32_t idte_ctl;
+ int lm = in_longmode(ctxt, ops);
+
+ if ( lm < 0 )
+ return X86EMUL_UNHANDLEABLE;
+
+ idte_size = lm ? 16 : 8;
+ idte_offset = vector * idte_size;
/* icebp sets the External Event bit despite being an instruction. */
error_code = (vector << 3) | ECODE_IDT |
@@ -1661,8 +1667,9 @@ static int inject_swint(enum x86_swint_type type,
* Should strictly speaking read all 8/16 bytes of an entry,
* but we currently only care about the dpl and present bits.
*/
- ops->read(x86_seg_none, idtr.base + idte_offset + 4,
- &idte_ctl, sizeof(idte_ctl), ctxt);
+ if ( (rc = ops->read(x86_seg_none, idtr.base + idte_offset + 4,
+ &idte_ctl, sizeof(idte_ctl), ctxt)) )
+ goto done;
/* Is this entry present? */
if ( !(idte_ctl & (1u << 15)) )

View file

@ -1,76 +0,0 @@
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/svm: Fix injection of software interrupts
The non-NextRip logic in c/s 36ebf14eb "x86/emulate: support for emulating
software event injection" was based on an older version of the AMD software
manual. The manual was later corrected, following findings from that series.
I took the original wording of "not supported without NextRIP" to mean that
X86_EVENTTYPE_SW_INTERRUPT was not eligible for use. It turns out that this
is not the case, and the new wording is clearer on the matter.
Despite testing the original patch series on non-NRip hardware, the
swint-emulation XTF test case focuses on the debug vectors; it never ended up
executing an `int $n` instruction for a vector which wasn't also an exception.
During a vmentry, the use of X86_EVENTTYPE_HW_EXCEPTION comes with a vector
check to ensure that it is only used with exception vectors. Xen's use of
X86_EVENTTYPE_HW_EXCEPTION for `int $n` injection has always been buggy on AMD
hardware.
Fix this by always using X86_EVENTTYPE_SW_INTERRUPT.
Print and decode the eventinj information in svm_vmcb_dump(), as it has
several invalid combinations which cause vmentry failures.
This is part of XSA-196.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/hvm/svm/svm.c | 13 +++++--------
xen/arch/x86/hvm/svm/svmdebug.c | 4 ++++
2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 4391744..76efc3e 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1231,17 +1231,14 @@ static void svm_inject_trap(const struct hvm_trap *trap)
{
case X86_EVENTTYPE_SW_INTERRUPT: /* int $n */
/*
- * Injection type 4 (software interrupt) is only supported with
- * NextRIP support. Without NextRIP, the emulator will have performed
- * DPL and presence checks for us.
+ * Software interrupts (type 4) cannot be properly injected if the
+ * processor doesn't support NextRIP. Without NextRIP, the emulator
+ * will have performed DPL and presence checks for us, and will have
+ * moved eip forward if appropriate.
*/
if ( cpu_has_svm_nrips )
- {
vmcb->nextrip = regs->eip + _trap.insn_len;
- event.fields.type = X86_EVENTTYPE_SW_INTERRUPT;
- }
- else
- event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
+ event.fields.type = X86_EVENTTYPE_SW_INTERRUPT;
break;
case X86_EVENTTYPE_PRI_SW_EXCEPTION: /* icebp */
diff --git a/xen/arch/x86/hvm/svm/svmdebug.c b/xen/arch/x86/hvm/svm/svmdebug.c
index ded5d19..f93dfed 100644
--- a/xen/arch/x86/hvm/svm/svmdebug.c
+++ b/xen/arch/x86/hvm/svm/svmdebug.c
@@ -48,6 +48,10 @@ void svm_vmcb_dump(const char *from, struct vmcb_struct *vmcb)
vmcb->tlb_control,
(unsigned long long)vmcb->_vintr.bytes,
(unsigned long long)vmcb->interrupt_shadow);
+ printk("eventinj %016"PRIx64", valid? %d, ec? %d, type %u, vector %#x\n",
+ vmcb->eventinj.bytes, vmcb->eventinj.fields.v,
+ vmcb->eventinj.fields.ev, vmcb->eventinj.fields.type,
+ vmcb->eventinj.fields.vector);
printk("exitcode = %#Lx exitintinfo = %#Lx\n",
(unsigned long long)vmcb->exitcode,
(unsigned long long)vmcb->exitintinfo.bytes);

View file

@ -1,65 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: xen: fix ioreq handling
Avoid double fetches and bounds check size to avoid overflowing
internal variables.
This is XSA-197.
Reported-by: yanghongke <yanghongke@huawei.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Ian Jackson <ian.jackson@eu.citrix.com>
--- a/i386-dm/helper2.c
+++ b/i386-dm/helper2.c
@@ -375,6 +375,11 @@ static void cpu_ioreq_pio(CPUState *env,
{
uint32_t i;
+ if (req->size > sizeof(unsigned long)) {
+ fprintf(stderr, "PIO: bad size (%u)\n", req->size);
+ exit(-1);
+ }
+
if (req->dir == IOREQ_READ) {
if (!req->data_is_ptr) {
req->data = do_inp(env, req->addr, req->size);
@@ -404,6 +409,11 @@ static void cpu_ioreq_move(CPUState *env
{
uint32_t i;
+ if (req->size > sizeof(req->data)) {
+ fprintf(stderr, "MMIO: bad size (%u)\n", req->size);
+ exit(-1);
+ }
+
if (!req->data_is_ptr) {
if (req->dir == IOREQ_READ) {
for (i = 0; i < req->count; i++) {
@@ -516,11 +526,13 @@ static int __handle_buffered_iopage(CPUS
req.df = 1;
req.type = buf_req->type;
req.data_is_ptr = 0;
+ xen_rmb();
qw = (req.size == 8);
if (qw) {
buf_req = &buffered_io_page->buf_ioreq[(rdptr + 1) %
IOREQ_BUFFER_SLOT_NUM];
req.data |= ((uint64_t)buf_req->data) << 32;
+ xen_rmb();
}
__handle_ioreq(env, &req);
@@ -552,7 +564,11 @@ static void cpu_handle_ioreq(void *opaqu
__handle_buffered_iopage(env);
if (req) {
- __handle_ioreq(env, req);
+ ioreq_t copy = *req;
+
+ xen_rmb();
+ __handle_ioreq(env, &copy);
+ req->data = copy.data;
if (req->state != STATE_IOREQ_INPROCESS) {
fprintf(logfile, "Badness in I/O request ... not in service?!: "

View file

@ -1,63 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: xen: fix ioreq handling
Avoid double fetches and bounds check size to avoid overflowing
internal variables.
This is XSA-197.
Reported-by: yanghongke <yanghongke@huawei.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -810,6 +810,10 @@ static void cpu_ioreq_pio(ioreq_t *req)
trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
req->data, req->count, req->size);
+ if (req->size > sizeof(uint32_t)) {
+ hw_error("PIO: bad size (%u)", req->size);
+ }
+
if (req->dir == IOREQ_READ) {
if (!req->data_is_ptr) {
req->data = do_inp(req->addr, req->size);
@@ -846,6 +850,10 @@ static void cpu_ioreq_move(ioreq_t *req)
trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
req->data, req->count, req->size);
+ if (req->size > sizeof(req->data)) {
+ hw_error("MMIO: bad size (%u)", req->size);
+ }
+
if (!req->data_is_ptr) {
if (req->dir == IOREQ_READ) {
for (i = 0; i < req->count; i++) {
@@ -1010,11 +1018,13 @@ static int handle_buffered_iopage(XenIOS
req.df = 1;
req.type = buf_req->type;
req.data_is_ptr = 0;
+ xen_rmb();
qw = (req.size == 8);
if (qw) {
buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
IOREQ_BUFFER_SLOT_NUM];
req.data |= ((uint64_t)buf_req->data) << 32;
+ xen_rmb();
}
handle_ioreq(state, &req);
@@ -1045,7 +1055,11 @@ static void cpu_handle_ioreq(void *opaqu
handle_buffered_iopage(state);
if (req) {
- handle_ioreq(state, req);
+ ioreq_t copy = *req;
+
+ xen_rmb();
+ handle_ioreq(state, &copy);
+ req->data = copy.data;
if (req->state != STATE_IOREQ_INPROCESS) {
fprintf(stderr, "Badness in I/O request ... not in service?!: "

View file

@ -1,62 +0,0 @@
From 71a389ae940bc52bf897a6e5becd73fd8ede94c5 Mon Sep 17 00:00:00 2001
From: Ian Jackson <ian.jackson@eu.citrix.com>
Date: Thu, 3 Nov 2016 16:37:40 +0000
Subject: [PATCH] pygrub: Properly quote results, when returning them to the
caller:
* When the caller wants sexpr output, use `repr()'
This is what Xend expects.
The returned S-expressions are now escaped and quoted by Python,
generally using '...'. Previously kernel and ramdisk were unquoted
and args was quoted with "..." but without proper escaping. This
change may break toolstacks which do not properly dequote the
returned S-expressions.
* When the caller wants "simple" output, crash if the delimiter is
contained in the returned value.
With --output-format=simple it does not seem like this could ever
happen, because the bootloader config parsers all take line-based
input from the various bootloader config files.
With --output-format=simple0, this can happen if the bootloader
config file contains nul bytes.
This is XSA-198.
Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
Tested-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
tools/pygrub/src/pygrub | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
index 40f9584..dd0c8f7 100755
--- a/tools/pygrub/src/pygrub
+++ b/tools/pygrub/src/pygrub
@@ -721,14 +721,17 @@ def sniff_netware(fs, cfg):
return cfg
def format_sxp(kernel, ramdisk, args):
- s = "linux (kernel %s)" % kernel
+ s = "linux (kernel %s)" % repr(kernel)
if ramdisk:
- s += "(ramdisk %s)" % ramdisk
+ s += "(ramdisk %s)" % repr(ramdisk)
if args:
- s += "(args \"%s\")" % args
+ s += "(args %s)" % repr(args)
return s
def format_simple(kernel, ramdisk, args, sep):
+ for check in (kernel, ramdisk, args):
+ if check is not None and sep in check:
+ raise RuntimeError, "simple format cannot represent delimiter-containing value"
s = ("kernel %s" % kernel) + sep
if ramdisk:
s += ("ramdisk %s" % ramdisk) + sep
--
2.1.4

View file

@ -1,55 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86emul: CMPXCHG8B ignores operand size prefix
Otherwise besides mis-handling the instruction, the comparison failure
case would result in uninitialized stack data being handed back to the
guest in rDX:rAX (32 bits leaked for 32-bit guests, 96 bits for 64-bit
ones).
This is XSA-200.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -435,6 +435,24 @@ int main(int argc, char **argv)
goto fail;
printf("okay\n");
+ printf("%-40s", "Testing cmpxchg8b (%edi) [opsize]...");
+ instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0xc7; instr[3] = 0x0f;
+ res[0] = 0x12345678;
+ res[1] = 0x87654321;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.edi = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) ||
+ (res[0] != 0x12345678) ||
+ (res[1] != 0x87654321) ||
+ (regs.eax != 0x12345678) ||
+ (regs.edx != 0x87654321) ||
+ ((regs.eflags&0x240) != 0x200) ||
+ (regs.eip != (unsigned long)&instr[4]) )
+ goto fail;
+ printf("okay\n");
+
printf("%-40s", "Testing movsxbd (%%eax),%%ecx...");
instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
regs.eflags = 0x200;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4775,8 +4775,12 @@ x86_emulate(
generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
if ( op_bytes == 8 )
+ {
host_and_vcpu_must_have(cx16);
- op_bytes *= 2;
+ op_bytes = 16;
+ }
+ else
+ op_bytes = 8;
/* Get actual old value. */
if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes,

View file

@ -1,87 +0,0 @@
From: Wei Chen <Wei.Chen@arm.com>
Subject: arm64: handle guest-generated EL1 asynchronous abort
In current code, when the hypervisor receives an asynchronous abort
from a guest, the hypervisor will do panic, the host will be down.
We have to prevent such security issue, so, in this patch we crash
the guest, when the hypervisor receives an asynchronous abort from
the guest.
This is CVE-2016-9815, part of XSA-201.
Signed-off-by: Wei Chen <Wei.Chen@arm.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Steve Capper <steve.capper@arm.com>
Reviewed-by: Julien Grall <Julien.Grall@arm.com>
--- a/xen/arch/arm/arm64/entry.S
+++ b/xen/arch/arm/arm64/entry.S
@@ -204,9 +204,12 @@ guest_fiq_invalid:
entry hyp=0, compat=0
invalid BAD_FIQ
-guest_error_invalid:
+guest_error:
entry hyp=0, compat=0
- invalid BAD_ERROR
+ msr daifclr, #2
+ mov x0, sp
+ bl do_trap_guest_error
+ exit hyp=0, compat=0
guest_sync_compat:
entry hyp=0, compat=1
@@ -225,9 +228,12 @@ guest_fiq_invalid_compat:
entry hyp=0, compat=1
invalid BAD_FIQ
-guest_error_invalid_compat:
+guest_error_compat:
entry hyp=0, compat=1
- invalid BAD_ERROR
+ msr daifclr, #2
+ mov x0, sp
+ bl do_trap_guest_error
+ exit hyp=0, compat=1
ENTRY(return_to_new_vcpu32)
exit hyp=0, compat=1
@@ -286,12 +292,12 @@ ENTRY(hyp_traps_vector)
ventry guest_sync // Synchronous 64-bit EL0/EL1
ventry guest_irq // IRQ 64-bit EL0/EL1
ventry guest_fiq_invalid // FIQ 64-bit EL0/EL1
- ventry guest_error_invalid // Error 64-bit EL0/EL1
+ ventry guest_error // Error 64-bit EL0/EL1
ventry guest_sync_compat // Synchronous 32-bit EL0/EL1
ventry guest_irq_compat // IRQ 32-bit EL0/EL1
ventry guest_fiq_invalid_compat // FIQ 32-bit EL0/EL1
- ventry guest_error_invalid_compat // Error 32-bit EL0/EL1
+ ventry guest_error_compat // Error 32-bit EL0/EL1
/*
* struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next)
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -2723,6 +2723,21 @@ asmlinkage void do_trap_hypervisor(struct cpu_user_regs *regs)
}
}
+asmlinkage void do_trap_guest_error(struct cpu_user_regs *regs)
+{
+ enter_hypervisor_head(regs);
+
+ /*
+ * Currently, to ensure hypervisor safety, when we received a
+ * guest-generated vSerror/vAbort, we just crash the guest to protect
+ * the hypervisor. In future we can better handle this by injecting
+ * a vSerror/vAbort to the guest.
+ */
+ gdprintk(XENLOG_WARNING, "Guest(Dom-%u) will be crashed by vSError\n",
+ current->domain->domain_id);
+ domain_crash_synchronous();
+}
+
asmlinkage void do_trap_irq(struct cpu_user_regs *regs)
{
enter_hypervisor_head(regs);

View file

@ -1,199 +0,0 @@
From: Wei Chen <Wei.Chen@arm.com>
Subject: arm64: handle async aborts delivered while at EL2
If EL1 generates an asynchronous abort and then traps into EL2
(by HVC or IRQ) before the abort has been delivered, the hypervisor
could not catch it, because the PSTATE.A bit is masked all the time
in hypervisor. So this asynchronous abort may be slipped to next
running guest with PSTATE.A bit unmasked.
In order to avoid this, it is necessary to take the abort at EL2, by
clearing the PSTATE.A bit. In this patch, we unmask the PSTATE.A bit
to open a window to catch guest-generated asynchronous abort in all
EL1 -> EL2 swich paths. If we catched such asynchronous abort in
checking window, the hyp_error exception will be triggered and the
abort source guest will be crashed.
This is CVE-2016-9816, part of XSA-201.
Signed-off-by: Wei Chen <Wei.Chen@arm.com>
Reviewed-by: Julien Grall <julien.grall@arm.com>
--- a/xen/arch/arm/arm64/entry.S
+++ b/xen/arch/arm/arm64/entry.S
@@ -173,6 +173,43 @@ hyp_error_invalid:
entry hyp=1
invalid BAD_ERROR
+hyp_error:
+ /*
+ * Only two possibilities:
+ * 1) Either we come from the exit path, having just unmasked
+ * PSTATE.A: change the return code to an EL2 fault, and
+ * carry on, as we're already in a sane state to handle it.
+ * 2) Or we come from anywhere else, and that's a bug: we panic.
+ */
+ entry hyp=1
+ msr daifclr, #2
+
+ /*
+ * The ELR_EL2 may be modified by an interrupt, so we have to use the
+ * saved value in cpu_user_regs to check whether we come from 1) or
+ * not.
+ */
+ ldr x0, [sp, #UREGS_PC]
+ adr x1, abort_guest_exit_start
+ cmp x0, x1
+ adr x1, abort_guest_exit_end
+ ccmp x0, x1, #4, ne
+ mov x0, sp
+ mov x1, #BAD_ERROR
+
+ /*
+ * Not equal, the exception come from 2). It's a bug, we have to
+ * panic the hypervisor.
+ */
+ b.ne do_bad_mode
+
+ /*
+ * Otherwise, the exception come from 1). It happened because of
+ * the guest. Crash this guest.
+ */
+ bl do_trap_guest_error
+ exit hyp=1
+
/* Traps taken in Current EL with SP_ELx */
hyp_sync:
entry hyp=1
@@ -189,15 +226,29 @@ hyp_irq:
guest_sync:
entry hyp=0, compat=0
+ bl check_pending_vserror
+ /*
+ * If x0 is Non-zero, a vSError took place, the initial exception
+ * doesn't have any significance to be handled. Exit ASAP
+ */
+ cbnz x0, 1f
msr daifclr, #2
mov x0, sp
bl do_trap_hypervisor
+1:
exit hyp=0, compat=0
guest_irq:
entry hyp=0, compat=0
+ bl check_pending_vserror
+ /*
+ * If x0 is Non-zero, a vSError took place, the initial exception
+ * doesn't have any significance to be handled. Exit ASAP
+ */
+ cbnz x0, 1f
mov x0, sp
bl do_trap_irq
+1:
exit hyp=0, compat=0
guest_fiq_invalid:
@@ -213,15 +264,29 @@ guest_error:
guest_sync_compat:
entry hyp=0, compat=1
+ bl check_pending_vserror
+ /*
+ * If x0 is Non-zero, a vSError took place, the initial exception
+ * doesn't have any significance to be handled. Exit ASAP
+ */
+ cbnz x0, 1f
msr daifclr, #2
mov x0, sp
bl do_trap_hypervisor
+1:
exit hyp=0, compat=1
guest_irq_compat:
entry hyp=0, compat=1
+ bl check_pending_vserror
+ /*
+ * If x0 is Non-zero, a vSError took place, the initial exception
+ * doesn't have any significance to be handled. Exit ASAP
+ */
+ cbnz x0, 1f
mov x0, sp
bl do_trap_irq
+1:
exit hyp=0, compat=1
guest_fiq_invalid_compat:
@@ -270,6 +335,62 @@ return_from_trap:
eret
/*
+ * This function is used to check pending virtual SError in the gap of
+ * EL1 -> EL2 world switch.
+ * The x0 register will be used to indicate the results of detection.
+ * x0 -- Non-zero indicates a pending virtual SError took place.
+ * x0 -- Zero indicates no pending virtual SError took place.
+ */
+check_pending_vserror:
+ /*
+ * Save elr_el2 to check whether the pending SError exception takes
+ * place while we are doing this sync exception.
+ */
+ mrs x0, elr_el2
+
+ /* Synchronize against in-flight ld/st */
+ dsb sy
+
+ /*
+ * Unmask PSTATE asynchronous abort bit. If there is a pending
+ * SError, the EL2 error exception will happen after PSTATE.A
+ * is cleared.
+ */
+ msr daifclr, #4
+
+ /*
+ * This is our single instruction exception window. A pending
+ * SError is guaranteed to occur at the earliest when we unmask
+ * it, and at the latest just after the ISB.
+ *
+ * If a pending SError occurs, the program will jump to EL2 error
+ * exception handler, and the elr_el2 will be set to
+ * abort_guest_exit_start or abort_guest_exit_end.
+ */
+abort_guest_exit_start:
+
+ isb
+
+abort_guest_exit_end:
+ /* Mask PSTATE asynchronous abort bit, close the checking window. */
+ msr daifset, #4
+
+ /*
+ * Compare elr_el2 and the saved value to check whether we are
+ * returning from a valid exception caused by pending SError.
+ */
+ mrs x1, elr_el2
+ cmp x0, x1
+
+ /*
+ * Not equal, the pending SError exception took place, set
+ * x0 to non-zero.
+ */
+ cset x0, ne
+
+ ret
+
+/*
* Exception vectors.
*/
.macro ventry label
@@ -287,7 +408,7 @@ ENTRY(hyp_traps_vector)
ventry hyp_sync // Synchronous EL2h
ventry hyp_irq // IRQ EL2h
ventry hyp_fiq_invalid // FIQ EL2h
- ventry hyp_error_invalid // Error EL2h
+ ventry hyp_error // Error EL2h
ventry guest_sync // Synchronous 64-bit EL0/EL1
ventry guest_irq // IRQ 64-bit EL0/EL1

View file

@ -1,47 +0,0 @@
From: Wei Chen <Wei.Chen@arm.com>
Subject: arm: crash the guest when it traps on external abort
If we spot a data or prefetch abort bearing the ESR_EL2.EA bit set, we
know that this is an external abort, and that should crash the guest.
This is CVE-2016-9817, part of XSA-201.
Signed-off-by: Wei Chen <Wei.Chen@arm.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Steve Capper <steve.capper@arm.com>
Reviewed-by: Julien Grall <Julien.Grall@arm.com>
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -2383,6 +2383,15 @@ static void do_trap_instr_abort_guest(struct cpu_user_regs *regs,
int rc;
register_t gva = READ_SYSREG(FAR_EL2);
+ /*
+ * If this bit has been set, it means that this instruction abort is caused
+ * by a guest external abort. Currently we crash the guest to protect the
+ * hypervisor. In future one can better handle this by injecting a virtual
+ * abort to the guest.
+ */
+ if ( hsr.iabt.eat )
+ domain_crash_synchronous();
+
switch ( hsr.iabt.ifsc & 0x3f )
{
case FSC_FLT_PERM ... FSC_FLT_PERM + 3:
@@ -2437,6 +2446,15 @@ static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
return;
}
+ /*
+ * If this bit has been set, it means that this data abort is caused
+ * by a guest external abort. Currently we crash the guest to protect the
+ * hypervisor. In future one can better handle this by injecting a virtual
+ * abort to the guest.
+ */
+ if ( dabt.eat )
+ domain_crash_synchronous();
+
info.dabt = dabt;
#ifdef CONFIG_ARM_32
info.gva = READ_CP32(HDFAR);

View file

@ -1,130 +0,0 @@
From: Wei Chen <Wei.Chen@arm.com>
Subject: arm32: handle async aborts delivered while at HYP
If guest generates an asynchronous abort and then traps into HYP
(by HVC or IRQ) before the abort has been delivered, the hypervisor
could not catch it, because the PSTATE.A bit is masked all the time
in hypervisor. So this asynchronous abort may be slipped to next
running guest with PSTATE.A bit unmasked.
In order to avoid this, it is necessary to take the abort at HYP, by
clearing the PSTATE.A bit. In this patch, we unmask the PSTATE.A bit
to open a window to catch guest-generated asynchronous abort in all
Guest -> HYP switch paths. If we caught such asynchronous abort in
checking window, the HYP data abort exception will be triggered and
the abort source guest will be crashed.
This is CVE-2016-9818, part of XSA-201.
Signed-off-by: Wei Chen <Wei.Chen@arm.com>
Reviewed-by: Julien Grall <julien.grall@arm.com>
--- a/xen/arch/arm/arm32/entry.S
+++ b/xen/arch/arm/arm32/entry.S
@@ -42,6 +42,61 @@ save_guest_regs:
SAVE_BANKED(fiq)
SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); SAVE_ONE_BANKED(R10_fiq)
SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq);
+ /*
+ * Start to check pending virtual abort in the gap of Guest -> HYP
+ * world switch.
+ *
+ * Save ELR_hyp to check whether the pending virtual abort exception
+ * takes place while we are doing this trap exception.
+ */
+ mrs r1, ELR_hyp
+
+ /*
+ * Force loads and stores to complete before unmasking asynchronous
+ * aborts and forcing the delivery of the exception.
+ */
+ dsb sy
+
+ /*
+ * Unmask asynchronous abort bit. If there is a pending asynchronous
+ * abort, the data_abort exception will happen after A bit is cleared.
+ */
+ cpsie a
+
+ /*
+ * This is our single instruction exception window. A pending
+ * asynchronous abort is guaranteed to occur at the earliest when we
+ * unmask it, and at the latest just after the ISB.
+ *
+ * If a pending abort occurs, the program will jump to data_abort
+ * exception handler, and the ELR_hyp will be set to
+ * abort_guest_exit_start or abort_guest_exit_end.
+ */
+ .global abort_guest_exit_start
+abort_guest_exit_start:
+
+ isb
+
+ .global abort_guest_exit_end
+abort_guest_exit_end:
+ /* Mask CPSR asynchronous abort bit, close the checking window. */
+ cpsid a
+
+ /*
+ * Compare ELR_hyp and the saved value to check whether we are
+ * returning from a valid exception caused by pending virtual
+ * abort.
+ */
+ mrs r2, ELR_hyp
+ cmp r1, r2
+
+ /*
+ * Not equal, the pending virtual abort exception took place, the
+ * initial exception does not have any significance to be handled.
+ * Exit ASAP.
+ */
+ bne return_from_trap
+
mov pc, lr
#define DEFINE_TRAP_ENTRY(trap) \
--- a/xen/arch/arm/arm32/traps.c
+++ b/xen/arch/arm/arm32/traps.c
@@ -63,7 +63,10 @@ asmlinkage void do_trap_prefetch_abort(struct cpu_user_regs *regs)
asmlinkage void do_trap_data_abort(struct cpu_user_regs *regs)
{
- do_unexpected_trap("Data Abort", regs);
+ if ( VABORT_GEN_BY_GUEST(regs) )
+ do_trap_guest_error(regs);
+ else
+ do_unexpected_trap("Data Abort", regs);
}
/*
--- a/xen/include/asm-arm/arm32/processor.h
+++ b/xen/include/asm-arm/arm32/processor.h
@@ -55,6 +55,17 @@ struct cpu_user_regs
uint32_t pad1; /* Doubleword-align the user half of the frame */
};
+
+/* Functions for pending virtual abort checking window. */
+void abort_guest_exit_start(void);
+void abort_guest_exit_end(void);
+
+#define VABORT_GEN_BY_GUEST(r) \
+( \
+ ( (unsigned long)abort_guest_exit_start == (r)->pc ) || \
+ ( (unsigned long)abort_guest_exit_end == (r)->pc ) \
+)
+
#endif
/* Layout as used in assembly, with src/dest registers mixed in */
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -690,6 +690,8 @@ void vcpu_regs_user_to_hyp(struct vcpu *vcpu,
int call_smc(register_t function_id, register_t arg0, register_t arg1,
register_t arg2);
+void do_trap_guest_error(struct cpu_user_regs *regs);
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_ARM_PROCESSOR_H */
/*

View file

@ -0,0 +1,75 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86: force EFLAGS.IF on when exiting to PV guests
Guest kernels modifying instructions in the process of being emulated
for another of their vCPU-s may effect EFLAGS.IF to be cleared upon
next exiting to guest context, by converting the being emulated
instruction to CLI (at the right point in time). Prevent any such bad
effects by always forcing EFLAGS.IF on. And to cover hypothetical other
similar issues, also force EFLAGS.{IOPL,NT,VM} to zero.
This is XSA-202.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -109,6 +109,8 @@ compat_process_trap:
/* %rbx: struct vcpu, interrupts disabled */
ENTRY(compat_restore_all_guest)
ASSERT_INTERRUPTS_DISABLED
+ mov $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),%r11d
+ and UREGS_eflags(%rsp),%r11d
.Lcr4_orig:
.skip .Lcr4_alt_end - .Lcr4_alt, 0x90
.Lcr4_orig_end:
@@ -144,6 +146,8 @@ ENTRY(compat_restore_all_guest)
(.Lcr4_orig_end - .Lcr4_orig), \
(.Lcr4_alt_end - .Lcr4_alt)
.popsection
+ or $X86_EFLAGS_IF,%r11
+ mov %r11d,UREGS_eflags(%rsp)
RESTORE_ALL adj=8 compat=1
.Lft0: iretq
_ASM_PRE_EXTABLE(.Lft0, handle_exception)
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -40,28 +40,29 @@ restore_all_guest:
testw $TRAP_syscall,4(%rsp)
jz iret_exit_to_guest
+ movq 24(%rsp),%r11 # RFLAGS
+ andq $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),%r11
+ orq $X86_EFLAGS_IF,%r11
+
/* Don't use SYSRET path if the return address is not canonical. */
movq 8(%rsp),%rcx
sarq $47,%rcx
incl %ecx
cmpl $1,%ecx
- ja .Lforce_iret
+ movq 8(%rsp),%rcx # RIP
+ ja iret_exit_to_guest
cmpw $FLAT_USER_CS32,16(%rsp)# CS
- movq 8(%rsp),%rcx # RIP
- movq 24(%rsp),%r11 # RFLAGS
movq 32(%rsp),%rsp # RSP
je 1f
sysretq
1: sysretl
-.Lforce_iret:
- /* Mimic SYSRET behavior. */
- movq 8(%rsp),%rcx # RIP
- movq 24(%rsp),%r11 # RFLAGS
ALIGN
/* No special register assumptions. */
iret_exit_to_guest:
+ andl $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),24(%rsp)
+ orl $X86_EFLAGS_IF,24(%rsp)
addq $8,%rsp
.Lft0: iretq
_ASM_PRE_EXTABLE(.Lft0, handle_exception)

View file

@ -0,0 +1,19 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/HVM: add missing NULL check before using VMFUNC hook
This is XSA-203.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1694,6 +1694,8 @@ static int hvmemul_vmfunc(
{
int rc;
+ if ( !hvm_funcs.altp2m_vcpu_emulate_vmfunc )
+ return X86EMUL_UNHANDLEABLE;
rc = hvm_funcs.altp2m_vcpu_emulate_vmfunc(ctxt->regs);
if ( rc != X86EMUL_OKAY )
hvmemul_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE,

View file

@ -0,0 +1,57 @@
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Sun, 18 Dec 2016 15:42:59 +0000
Subject: [PATCH] x86/emul: Correct the handling of eflags with SYSCALL
A singlestep #DB is determined by the resulting eflags value from the
execution of SYSCALL, not the original eflags value.
By using the original eflags value, we negate the guest kernels attempt to
protect itself from a privilege escalation by masking TF.
Have the SYSCALL emulation recalculate tf after the instruction is complete.
This is XSA-204
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/x86_emulate/x86_emulate.c | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index d82e85d..ff952a9 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4561,6 +4561,23 @@ x86_emulate(
(rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) )
goto done;
+ /*
+ * SYSCALL (unlike most instructions) evaluates its singlestep action
+ * based on the resulting EFLG_TF, not the starting EFLG_TF.
+ *
+ * As the #DB is raised after the CPL change and before the OS can
+ * switch stack, it is a large risk for privilege escalation.
+ *
+ * 64bit kernels should mask EFLG_TF in MSR_FMASK to avoid any
+ * vulnerability. Running the #DB handler on an IST stack is also a
+ * mitigation.
+ *
+ * 32bit kernels have no ability to mask EFLG_TF at all. Their only
+ * mitigation is to use a task gate for handling #DB (or to not use
+ * enable EFER.SCE to start with).
+ */
+ tf = _regs.eflags & EFLG_TF;
+
break;
}
@@ -5412,7 +5429,7 @@ x86_emulate(
*ctxt->regs = _regs;
- /* Inject #DB if single-step tracing was enabled at instruction start. */
+ /* Should a singlestep #DB be raised? */
if ( tf && (rc == X86EMUL_OKAY) && ops->inject_hw_exception )
rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;