GIT f97af70b3aa8a92ddeabb7d42477e7d13dd0a192 git://kvm.qumranet.com/home/avi/kvm.git commit Author: Avi Kivity Date: Tue Mar 20 18:44:51 2007 +0200 KVM: Remove set_cr0_no_modeswitch() arch op set_cr0_no_modeswitch() was a hack to avoid corrupting segment registers. As we now cache the protected mode values on entry to real mode, this isn't an issue anymore, and it interferes with reboot (which usually _is_ a modeswitch). Signed-off-by: Avi Kivity commit e314dde30e3851e8effc017c6fffced11d90183a Author: Avi Kivity Date: Tue Mar 20 18:40:40 2007 +0200 KVM: Workaround vmx inability to virtualize the reset state The reset state has cs.selector == 0xf000 and cs.base == 0xffff0000, which aren't compatible with vm86 mode, which is used for real mode virtualization. When we create a vcpu, we set cs.base to 0xf0000, but if we get there by way of a reset, the values are inconsistent and vmx refuses to enter guest mode. Workaround by detecting the state and munging it appropriately. Signed-off-by: Avi Kivity commit 88aea7ddfae755633b0a80ccfa56244b3c79c7b0 Author: Avi Kivity Date: Tue Mar 20 14:34:28 2007 +0200 KVM: MMU: Remove global pte tracking The initial, noncaching, version of the kvm mmu flushed the all nonglobal shadow page table translations (much like a native tlb flush). The new implementation flushes translations only when they change, rendering global pte tracking superfluous. This removes the unused tracking mechanism and storage space. Signed-off-by: Avi Kivity commit 66e5d5c81b5b89e39aa86e3bf9864d228f468b0d Author: Avi Kivity Date: Tue Mar 20 14:29:06 2007 +0200 KVM: MMU: Remove unnecessary check for pdptr access We already special case the pdptr access, so no need to check it again. Signed-off-by: Avi Kivity commit c01571ed56754dfea458cc37d553c360082411a1 Author: Avi Kivity Date: Tue Mar 20 12:46:50 2007 +0200 KVM: Avoid guest virtual addresses in string pio userspace interface The current string pio interface communicates using guest virtual addresses, relying on userspace to translate addresses and to check permissions. This interface cannot fully support guest smp, as the check needs to take into account two pages at one in case an unaligned string transfer straddles a page boundary. Change the interface not to communicate guest addresses at all; instead use a buffer page (mmaped by userspace) and do transfers there. The kernel manages the virtual to physical translation and can perform the checks atomically by taking the appropriate locks. Signed-off-by: Avi Kivity commit 74c24de6e7848a45d6109d987d4fd2ccd83e432e Author: Avi Kivity Date: Wed Mar 7 13:11:17 2007 +0200 KVM: Future-proof argument-less ioctls Some ioctls ignore their arguments. By requiring them to be zero now, we allow a nonzero value to have some special meaning in the future. Signed-off-by: Avi Kivity commit 29e686a1dc9631b7898d087a0ab1c4716672e209 Author: Avi Kivity Date: Wed Mar 7 13:05:38 2007 +0200 KVM: Allow kernel to select size of mmap() buffer This allows us to store offsets in the kernel/user kvm_run area, and be sure that userspace has them mapped. As offsets can be outside the kvm_run struct, userspace has no way of knowing how much to mmap. Signed-off-by: Avi Kivity commit cce3a1062817218c67163732339e2ea25e9f023b Author: Avi Kivity Date: Mon Mar 5 19:46:05 2007 +0200 KVM: Add guest mode signal mask Allow a special signal mask to be used while executing in guest mode. This allows signals to be used to interrupt a vcpu without requiring signal delivery to a userspace handler, which is quite expensive. Userspace still receives -EINTR and can get the signal via sigwait(). Signed-off-by: Avi Kivity commit cd3aaa2392baec9674792d71d304ec41e540b517 Author: Avi Kivity Date: Mon Mar 5 17:45:40 2007 +0200 KVM: Initialize the apic_base msr on svm too Older userspace didn't care, but newer userspace (with the cpuid changes) does. Signed-off-by: Avi Kivity commit c303c0efc5b2ff8c0f77c9079fa66f62801da93d Author: Avi Kivity Date: Sun Mar 4 14:24:03 2007 +0200 KVM: Add a special exit reason when exiting due to an interrupt This is redundant, as we also return -EINTR from the ioctl, but it allows us to examine the exit_reason field on resume without seeing old data. Signed-off-by: Avi Kivity commit 62919332e00e3226dd1f728ff83107d06a6d9a81 Author: Avi Kivity Date: Sun Mar 4 14:17:08 2007 +0200 KVM: Fold kvm_run::exit_type into kvm_run::exit_reason Currently, userspace is told about the nature of the last exit from the guest using two fields, exit_type and exit_reason, where exit_type has just two enumerations (and no need for more). So fold exit_type into exit_reason, reducing the complexity of determining what really happened. Signed-off-by: Avi Kivity commit 9e16898f4f5d6cdc35030bb272631611b71548fe Author: Avi Kivity Date: Sun Mar 4 13:59:30 2007 +0200 KVM: Allow userspace to process hypercalls which have no kernel handler This is useful for paravirtualized graphics devices, for example. Signed-off-by: Avi Kivity commit 440fd9098bceb2ca0856d962ff62db9af4d1094a Author: Avi Kivity Date: Thu Mar 1 17:56:20 2007 +0200 KVM: Add method to check for backwards-compatible API extensions Signed-off-by: Avi Kivity commit 0b37dedb178bcb3b0a28f65e6ae835bf58184301 Author: Avi Kivity Date: Thu Mar 1 17:20:13 2007 +0200 KVM: Renumber ioctls The recent changes have left the ioctl numbers in complete disarray. Signed-off-by: Avi Kivity commit 95cab16b18e1c1a786a9fc5ea6fcd68b29ae3481 Author: Avi Kivity Date: Thu Mar 1 16:47:06 2007 +0200 KVM: Remove minor wart from KVM_CREATE_VCPU ioctl That ioctl does not transfer any data, so it should be an _IO rather than an _IOW. Signed-off-by: Avi Kivity commit ba5cb15b027b76ba7b4d247914eb6d20065c0767 Author: Avi Kivity Date: Thu Mar 1 16:20:40 2007 +0200 KVM: Remove the 'emulated' field from the userspace interface We no longer emulate single instructions in userspace. Instead, we service mmio or pio requests. Signed-off-by: Avi Kivity commit 706e8fe655be36aa686f1fbb398d3a4470d4939b Author: Avi Kivity Date: Wed Feb 28 20:46:53 2007 +0200 KVM: Handle cpuid in the kernel instead of punting to userspace KVM used to handle cpuid by letting userspace decide what values to return to the guest. We now handle cpuid completely in the kernel. We still let userspace decide which values the guest will see by having userspace set up the value table beforehand (this is necessary to allow management software to set the cpu features to the least common denominator, so that live migration can work). The motivation for the change is that kvm kernel code can be impacted by cpuid features, for example the x86 emulator. Signed-off-by: Avi Kivity commit aad2f6e0faf4b03e087bbe6751acdacd72e911b6 Author: Avi Kivity Date: Thu Feb 22 19:48:43 2007 +0200 KVM: Initialize PIO I/O count This allows userspace to ignore the io.rep field. No a big deal, but friendly. Signed-off-by: Avi Kivity commit e668cf946ee8654c7f5afe3feeed686a3566c22a Author: Avi Kivity Date: Thu Feb 22 19:39:30 2007 +0200 KVM: Do not communicate to userspace through cpu registers during PIO Currently when passing the a PIO emulation request to userspace, we rely on userspace updating %rax (on 'in' instructions) and %rsi/%rdi/%rcx (on string instructions). This (a) requires two extra ioctls for getting and setting the registers and (b) is unfriendly to non-x86 archs, when they get kvm ports. So fix by doing the register fixups in the kernel and passing to userspace only an abstract description of the PIO to be done. Signed-off-by: Avi Kivity commit 3de857cd1335bd2e02b60d3a50b7da93ccbabf1d Author: Avi Kivity Date: Thu Feb 22 12:58:31 2007 +0200 KVM: Use a shared page for kernel/user communication when runing a vcpu Instead of passing a 'struct kvm_run' back and forth between the kernel and userspace, allocate a page and allow the user to mmap() it. This reduces needless copying and makes the interface expandable by providing lots of free space. Signed-off-by: Avi Kivity commit 128e159e11e999496ec44a549fcac91de3802389 Author: Avi Kivity Date: Mon Mar 19 13:18:10 2007 +0200 KVM: Prevent system selectors leaking into guest on real->protected mode transition on vmx Intel virtualization extensions do not support virtualizing real mode. So kvm uses virtualized vm86 mode to run real mode code. Unfortunately, this virtualized vm86 mode does not support the so called "big real" mode, where the segment selector and base do not agree with each other according to the real mode rules (base == selector << 4). To work around this, kvm checks whether a selector/base pair violates the virtualized vm86 rules, and if so, forces it into conformance. On a transition back to protected mode, if we see that the guest did not touch a forced segment, we restore it back to the original protected mode value. This pile of hacks breaks down if the gdt has changed in real mode, as it can cause a segment selector to point to a system descriptor instead of a normal data segment. In fact, this happens with the Windows bootloader and the qemu acpi bios, where a protected mode memcpy routine issues an innocent 'pop %es' and traps on an attempt to load a system descriptor. "Fix" by checking if the to-be-restored selector points at a system segment, and if so, coercing it into a normal data segment. The long term solution, of course, is to abandon vm86 mode and use emulation for big real mode. Signed-off-by: Avi Kivity commit ade11a015f83d270d1201c440199146f852fe5e4 Author: Uri Lublin Date: Wed Mar 14 19:21:06 2007 +0200 added KVM_GET_MEM_MAP ioctl to get the memory bitmap for a memory slot To be used when there may be "holes" in the memory. Specifically to not break VM migration when ballooning mechanism exists Signed-off-by: Uri Lublin commit b0092d187cfa19dfcada3b85d728af5ae27989dc Author: Avi Kivity Date: Wed Mar 14 15:54:54 2007 +0200 KVM: Remove extraneous guest entry on mmio read When emulating an mmio read, we actually emulate twice: once to determine the physical address of the mmio, and, after we've exited to userspace to get the mmio value, we emulate again to place the value in the result register and update any flags. But we don't really need to enter the guest again for that, only to take an immediate vmexit. So, if we detect that we're doing an mmio read, emulate a single instruction before entering the guest again. Signed-off-by: Avi Kivity commit 470db88b8b3491199e8d55b771d66e74b2fd53cd Author: Ingo Molnar Date: Sun Mar 11 13:52:33 2007 +0100 KVM: always reload segment selectors failed VM entry on VMX might still change %fs or %gs, thus make sure that KVM always reloads the segment selectors. This is crutial on both x86 and x86_64: x86 has __KERNEL_PDA in %fs on which things like 'current' depends and x86_64 has 0 there and needs MSR_GS_BASE to work. Signed-off-by: Ingo Molnar commit f7edc6a39584a3f95687a5320675fadb23bccbe5 Author: Ingo Molnar Date: Sat Mar 10 11:22:51 2007 +0100 KVM: trivial whitespace fixes trivial whitespace fixes. Signed-off-by: Ingo Molnar commit f3a33bfeaa5cade1a9ac1facb5cb904a483b1e5c Author: Avi Kivity Date: Fri Mar 9 13:04:31 2007 +0200 KVM: MMU: Fix host memory corruption on i386 with >= 4GB ram PAGE_MASK is an unsigned long, so using it to mask physical addresses on i386 (which are 64-bit wide) leads to truncation. This can result in page->private of unrelated memory pages being modified, with disasterous results. Fix by not using PAGE_MASK for physical addresses; instead calculate the correct value directly from PAGE_SIZE. Also fix a similar BUG_ON(). Signed-off-by: Avi Kivity commit 6ee9853b015f8807f497ffad39b142ddc1403aa9 Author: Avi Kivity Date: Thu Mar 8 17:13:32 2007 +0200 KVM: MMU: Fix guest writes to nonpae pde KVM shadow page tables are always in pae mode, regardless of the guest setting. This means that a guest pde (mapping 4MB of memory) is mapped to two shadow pdes (mapping 2MB each). When the guest writes to a pte or pde, we intercept the write and emulate it. We also remove any shadowed mappings corresponding to the write. Since the mmu did not account for the doubling in the number of pdes, it removed the wrong entry, resulting in a mismatch between shadow page tables and guest page tables, followed shortly by guest memory corruption. This patch fixes the problem by detecting the special case of writing to a non-pae pde and adjusting the address and number of shadow pdes zapped accordingly. Signed-off-by: Avi Kivity commit 374c1509c7d04a4e351b1812c2f0b9dac3ea0c0a Author: Avi Kivity Date: Thu Mar 8 11:48:09 2007 +0200 KVM: Fix bogus sign extension in mmu mapping audit When auditing a 32-bit guest on a 64-bit host, sign extension of the page table directory pointer table index caused bogus addresses to be shown on audit errors. Fix by declaring the index unsigned. Signed-off-by: Avi Kivity commit fac539542cbf923a39238b10557c88f99fd45b59 Author: Avi Kivity Date: Wed Mar 7 09:29:48 2007 +0200 KVM: Export This allows users to actually build prgrams that use kvm without the entire source tree. Signed-off-by: Avi Kivity commit c14a46343cc9f04f15ebc67573031fe8bbe1555a Author: Avi Kivity Date: Tue Mar 6 12:05:53 2007 +0200 KVM: Fix guest sysenter on vmx The vmx code currently treats the guest's sysenter support msrs as 32-bit values, which breaks 32-bit compat mode userspace on 64-bit guests. Fix by using the native word width of the machine. Signed-off-by: Avi Kivity commit ea135e7671189ffb7e67843bf98740dac0c6ccfa Author: Avi Kivity Date: Sun Mar 4 13:27:36 2007 +0200 KVM: Use own minor number Use the minor number (232) allocated to kvm by lanana. Signed-off-by: Avi Kivity commit 21af17507f37658414191b1cf1337efbaf7dd530 Author: Dor Laor Date: Mon Feb 19 18:25:43 2007 +0200 KVM: Use the generic skip_emulated_instruction() in hypercall code Instead of twiddling the rip registers directly, use the skip_emulated_instruction() function to do that for us. Signed-off-by: Dor Laor Signed-off-by: Avi Kivity commit 57d78025d84fb607aa335d015a79b257517aa209 Author: Dor Laor Date: Mon Feb 19 16:44:49 2007 +0200 KVM: Fix guest register corruption on paravirt hypercall The hypercall code mixes up the ->cache_regs() and ->decache_regs() callbacks, resulting in guest register corruption. Signed-off-by: Dor Laor Signed-off-by: Avi Kivity commit 28e9803c9134683a884efe05abdb3f814c1ca7e7 Author: Avi Kivity Date: Thu Mar 1 19:21:03 2007 +0200 KVM: Unset kvm_arch_ops if arch module loading failed Otherwise, the core module thinks the arch module is loaded, and won't let you reload it after you've fixed the bug. Signed-off-by: Avi Kivity commit 426bc2fd1462706ec92d0e9efdb0cf3643f4eb67 Author: Avi Kivity Date: Thu Mar 1 11:28:13 2007 +0200 KVM: Move kvmfs magic number to From: Andrew Morton Use the standard magic.h for kvmfs. Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Avi Kivity commit c1a8557e1da6e7d8bf8f77cb1b47c077f5c2a67d Author: Avi Kivity Date: Mon Feb 26 16:29:43 2007 +0200 KVM: Fix bogus failure in kvm.ko module initialization A bogus 'return r' can cause an otherwise successful module load to fail. This both denies users the use of kvm, and it also denies them the use of their machine, as it leaves a filesystem registered with its callbacks pointing into now-freed module memory. Fix by returning a zero like a good module. Thanks to Richard Lucassen (?) for reporting the problem and for providing access to a machine which exhibited it. Signed-off-by: Avi Kivity commit 7703ff91ee2ed171f2175d030e7f063c4efab2f5 Author: Uri Lublin Date: Thu Feb 22 17:37:32 2007 +0200 KVM: Remove write access permissions when dirty-page-logging is enabled Enabling dirty page logging is done using KVM_SET_MEMORY_REGION ioctl. If the memory region already exists, we need to remove write accesses, so writes will be caught, and dirty pages will be logged. Signed-off-by: Uri Lublin Signed-off-by: Avi Kivity commit b77fd1f62576463434fc434cbdcd808847e169a1 Author: Uri Lublin Date: Thu Feb 22 17:15:33 2007 +0200 kvm: move do_remove_write_access() up To be called from kvm_vm_ioctl_set_memory_region() Signed-off-by: Uri Lublin Signed-off-by: Avi Kivity commit 62e287e7210d6ff142b3b05233fa1f5df686b794 Author: Uri Lublin Date: Thu Feb 22 16:43:09 2007 +0200 KVM: Fix dirty page log bitmap size/access calculation Since dirty_bitmap is an unsigned long array, the alignment and size need to take that into account. Signed-off-by: Uri Lublin Signed-off-by: Avi Kivity commit 871574eb14e959c19d94fdee7c3e2b88ae06770f Author: Uri Lublin Date: Wed Feb 21 18:25:21 2007 +0200 KVM: Add missing calls to mark_page_dirty() A few places where we modify guest memory fail to call mark_page_dirty(), causing live migration to fail. This adds the missing calls. Signed-off-by: Uri Lublin Signed-off-by: Avi Kivity commit 42017e8bf8eb7b6f65b95bca1368ee274fc5ef50 Author: Uri Lublin Date: Thu Feb 22 17:37:32 2007 +0200 kvm: dirty page logging: remove write access permissions when dirty-page-logging is enabled Enabling dirty page logging is done using KVM_SET_MEMORY_REGION ioctl. If the memory region already exists, there is a need to remove write accesses, so writes will be caught, and dirty pages will be logged. commit a9fd29cfcb643b97cd76c7d836be4d0ed80f69e0 Author: Uri Lublin Date: Thu Feb 22 17:15:33 2007 +0200 kvm: move do_remove_write_access() up To be called from kvm_vm_ioctl_set_memory_region() commit fba4ba9c513ad2cd328f5f16980aa7b90d40cec0 Author: Uri Lublin Date: Thu Feb 22 16:43:09 2007 +0200 kvm: dirty pages log: fix bitmap size/access calculation Since dirty_bitmap is an unsigned long array (pointer) commit ae160d732685ab33d5a3a495663aa2b54c4d4734 Author: Uri Lublin Date: Thu Feb 22 15:47:42 2007 +0200 .gitignore: ignore emacs backup files (*~) commit 8267c1cd9a8a038e91c94e0cabc571a3614dc3e5 Author: Avi Kivity Date: Wed Feb 21 19:47:40 2007 +0200 KVM: Bump API version Signed-off-by: Avi Kivity commit c65237e78c19b8173338a49933c611dece13c1c6 Author: Avi Kivity Date: Wed Feb 21 18:04:26 2007 +0200 KVM: Per-vcpu inodes Allocate a distinct inode for every vcpu in a VM. This has the following benefits: - the filp cachelines are no longer bounced when f_count is incremented on every ioctl() - the API and internal code are distinctly clearer; for example, on the KVM_GET_REGS ioctl, there is no need to copy the vcpu number from userspace and then copy the registers back; the vcpu identity is derived from the fd used to make the call Right now the performance benefits are completely theoretical since (a) we don't support more than one vcpu per VM and (b) virtualization hardware inefficiencies completely everwhelm any cacheline bouncing effects. But both of these will change, and we need to prepare the API today. Signed-off-by: Avi Kivity commit 11c1297fadc533d1f66252088b4f4775018bafbb Author: Avi Kivity Date: Tue Feb 20 18:41:05 2007 +0200 KVM: Move kvm_vm_ioctl_create_vcpu() around In preparation of some hacking. Signed-off-by: Avi Kivity commit f3ad84386727171d8308338a2c5dee1deac2e50d Author: Avi Kivity Date: Tue Feb 20 18:27:58 2007 +0200 KVM: Rename some kvm_dev_ioctl_*() functions to kvm_vm_ioctl_*() This reflects the changed scope, from device-wide to single vm (previously every device open created a virtual machine). Signed-off-by: Avi Kivity commit 733e3f74f1c51bbc2e7a99df8b51767504b58de2 Author: Avi Kivity Date: Wed Feb 21 19:28:04 2007 +0200 KVM: Create an inode per virtual machine This avoids having filp->f_op and the corresponding inode->i_fop different, which is a little unorthodox. The ioctl list is split into two: global kvm ioctls and per-vm ioctls. A new ioctl, KVM_CREATE_VM, is used to create VMs and return the VM fd. Signed-off-by: Avi Kivity commit 52a96114380f8ab615626e4cec57b7015895bd0f Author: Avi Kivity Date: Tue Feb 20 14:07:37 2007 +0200 KVM: Add internal filesystem for generating inodes The kvmfs inodes will represent virtual machines and vcpus, as necessary, reducing cacheline bouncing due to inodes and filps being shared. Signed-off-by: Avi Kivity commit b00bc8b10197715f5b842f1f9a60e67a3484b10f Author: Uri Lublin Date: Wed Feb 21 18:25:21 2007 +0200 kvm, dirty pages log: adding some calls to mark_page_dirty() commit 58a214eba321d92f833221c26777e2119e34a19d Author: Avi Kivity Date: Mon Feb 19 14:37:48 2007 +0200 KVM: More 0 -> NULL conversions Signed-off-by: Avi Kivity commit f73199bb57b4c8feb7d8f60c6f1a25107de18dab Author: Joerg Roedel Date: Mon Feb 19 14:37:47 2007 +0200 KVM: SVM: intercept SMI to handle it at host level This patch changes the SVM code to intercept SMIs and handle it outside the guest. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity commit fa2742c78f10fad8682e3af17df3e9fc2eece9e4 Author: Avi Kivity Date: Mon Feb 19 14:37:47 2007 +0200 KVM: svm: init cr0 with the wp bit set Signed-off-by: Avi Kivity commit 8da588a919dc0bef76e384d16fd13ea2189aa82d Author: Avi Kivity Date: Mon Feb 19 14:37:47 2007 +0200 KVM: Wire up hypercall handlers to a central arch-independent location Signed-off-by: Avi Kivity commit 68f16784f188d280c75b39e2367ebc1adbc66d9d Author: Avi Kivity Date: Mon Feb 19 14:37:47 2007 +0200 KVM: Add hypercall host support for svm Signed-off-by: Avi Kivity commit 7c8bd4d6fc0e2bfb35cd4c0e8ff39c4f8972d951 Author: Ingo Molnar Date: Mon Feb 19 14:37:47 2007 +0200 KVM: Add host hypercall support for vmx Signed-off-by: Avi Kivity commit f846fa34a14ec37dc0194c6f47ea4374c140e6f1 Author: Ingo Molnar Date: Mon Feb 19 14:37:47 2007 +0200 KVM: add MSR based hypercall API This adds a special MSR based hypercall API to KVM. This is to be used by paravirtual kernels and virtual drivers. Signed-off-by: Ingo Molnar Signed-off-by: Avi Kivity commit 8aa04bb13cf90d68c26d6bea1e4c720f1f027be0 Author: Markus Rechberger Date: Mon Feb 19 14:37:47 2007 +0200 KVM: Use page_private()/set_page_private() apis Besides using an established api, this allows using kvm in older kernels. Signed-off-by: Markus Rechberger Signed-off-by: Avi Kivity commit 4d5a7e81cc63d28e94373cdeb74dc44045edaa10 Author: Ahmed S. Darwish Date: Mon Feb 19 14:37:46 2007 +0200 KVM: Use ARRAY_SIZE macro instead of manual calculation. Signed-off-by: Ahmed S. Darwish Signed-off-by: Dor Laor Signed-off-by: Avi Kivity commit 0fe9875fb3f9946a6c1cef6f1b9a286edc8ee2b9 Author: Markus Rechberger Date: Mon Feb 19 14:37:46 2007 +0200 KVM: vmx: hack set_cr0_no_modeswitch() to actually do modeswitch From: Joerg Roedel The whole thing is rotten, but this allows vmx to boot with the guest reboot fix. Signed-off-by: Markus Rechberger Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity commit 7e6e2bbad7f5dbccb389ee6d79be661972b18b15 Author: Avi Kivity Date: Mon Feb 19 14:37:46 2007 +0200 KVM: Cosmetics Signed-off-by: Avi Kivity commit cc66daca849ca8c2900ba8cc7640de664296d36a Author: Jeremy Katz Date: Mon Feb 19 14:37:46 2007 +0200 KVM: Move virtualization deactivation from CPU_DEAD state to CPU_DOWN_PREPARE This gives it more chances of surviving suspend. Signed-off-by: Jeremy Katz Signed-off-by: Avi Kivity commit 2959cd13ecc1fbe1b2339937481844ff963f1e7f Author: Avi Kivity Date: Mon Feb 19 14:37:46 2007 +0200 KVM: mmu: add missing dirty page tracking cases We fail to mark a page dirty in three cases: - setting the accessed bit in a pte - setting the dirty bit in a pte - emulating a write into a pagetable This fix adds the missing cases. Signed-off-by: Avi Kivity .gitignore | 3 drivers/kvm/kvm.h | 33 +++ drivers/kvm/kvm_main.c | 476 ++++++++++++++++++++++++++++++++++++++++++-- drivers/kvm/mmu.c | 20 +- drivers/kvm/paging_tmpl.h | 3 drivers/kvm/svm.c | 58 +++-- drivers/kvm/vmx.c | 111 +++++----- include/linux/Kbuild | 1 include/linux/kvm.h | 113 +++++++--- include/linux/miscdevice.h | 1 10 files changed, 652 insertions(+), 167 deletions(-) diff --git a/.gitignore b/.gitignore index 060a71d..343c716 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,6 @@ series # cscope files cscope.* + +# emacs backup files +*~ diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 0d122bf..8917df8 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -55,6 +55,7 @@ #define KVM_MEMORY_SLOTS 4 #define KVM_NUM_MMU_PAGES 256 #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 +#define KVM_MAX_CPUID_ENTRIES 40 #define FX_IMAGE_SIZE 512 #define FX_IMAGE_ALIGN 16 @@ -73,6 +74,8 @@ #define SELECTOR_RPL_MASK 0x03 #define IOPL_SHIFT 12 +#define KVM_PIO_PAGE_OFFSET 1 + /* * Address types: * @@ -133,7 +136,6 @@ struct kvm_mmu_page { unsigned long slot_bitmap; /* One bit set per slot which has memory * in this shadow page. */ - int global; /* Set if all ptes in this page are global */ int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ union { @@ -219,6 +221,18 @@ enum { VCPU_SREG_LDTR, }; +struct kvm_pio_request { + unsigned long count; + int cur_count; + struct page *guest_pages[2]; + unsigned guest_page_offset; + int in; + int size; + int string; + int down; + int rep; +}; + struct kvm_vcpu { struct kvm *kvm; union { @@ -228,6 +242,7 @@ struct kvm_vcpu { struct mutex mutex; int cpu; int launched; + struct kvm_run *run; int interrupt_window_open; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) @@ -273,6 +288,12 @@ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE int mmio_size; unsigned char mmio_data[8]; gpa_t mmio_phys_addr; + gva_t mmio_fault_cr2; + struct kvm_pio_request pio; + void *pio_data; + + int sigset_active; + sigset_t sigset; struct { int active; @@ -284,6 +305,9 @@ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE u32 ar; } tr, es, ds, fs, gs; } rmode; + + int cpuid_nent; + struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; }; struct kvm_memory_slot { @@ -360,8 +384,6 @@ struct kvm_arch_ops { void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); - void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu, - unsigned long cr0); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); @@ -412,6 +434,7 @@ #define HPA_MSB ((sizeof(hpa_t) * 8) - 1 #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); void kvm_emulator_want_group7_invlpg(void); @@ -444,6 +467,10 @@ void realmode_set_cr(struct kvm_vcpu *vc struct x86_emulate_ctxt; +int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int string, int down, + gva_t address, int rep, unsigned port); +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_clts(struct kvm_vcpu *vcpu); int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index dc7a8c7..77c4176 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -346,6 +346,17 @@ static void kvm_free_physmem(struct kvm kvm_free_physmem_slot(&kvm->memslots[i], NULL); } +static void free_pio_guest_pages(struct kvm_vcpu *vcpu) +{ + int i; + + for (i = 0; i < 2; ++i) + if (vcpu->pio.guest_pages[i]) { + __free_page(vcpu->pio.guest_pages[i]); + vcpu->pio.guest_pages[i] = NULL; + } +} + static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { if (!vcpu->vmcs) @@ -355,6 +366,11 @@ static void kvm_free_vcpu(struct kvm_vcp kvm_mmu_destroy(vcpu); vcpu_put(vcpu); kvm_arch_ops->vcpu_free(vcpu); + free_page((unsigned long)vcpu->run); + vcpu->run = NULL; + free_page((unsigned long)vcpu->pio_data); + vcpu->pio_data = NULL; + free_pio_guest_pages(vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -830,6 +846,61 @@ out: return r; } +/* + * Get the memory map for a memory slot. + * A bit is on iff the page exists. + * To be used when there may be "holes" in the memory. + */ +static int kvm_vm_ioctl_get_memory_map(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + struct kvm_memory_slot *memslot; + unsigned char *mem_bitmap = NULL; + int r, n; + unsigned long i; + + /* mark busy */ + spin_lock(&kvm->lock); + ++kvm->busy; + spin_unlock(&kvm->lock); + + r = -EINVAL; + if (log->slot >= KVM_MEMORY_SLOTS) + goto out; + + memslot = &kvm->memslots[log->slot]; + r = -ENOENT; + if (!memslot->phys_mem) + goto out; + + /* allocate a temporary bitmap */ + n = memslot->npages / 8; /* one bit per page */ + r = -ENOMEM; + mem_bitmap = vmalloc(n); + if (!mem_bitmap) + goto out; + memset(mem_bitmap, 0, n); + + /* fill the bitmap */ + for (i = 0; i < memslot->npages; ++i) + if (memslot->phys_mem[i]) + __set_bit(i, mem_bitmap); + + /* copy bitmap to user */ + r = -EFAULT; + if (copy_to_user(log->dirty_bitmap, mem_bitmap, n)) + goto out; + + r = 0; +out: + if (mem_bitmap) + vfree(mem_bitmap); + spin_lock(&kvm->lock); + --kvm->busy; + spin_unlock(&kvm->lock); + return r; +} + struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { int i; @@ -1116,6 +1187,7 @@ int emulate_instruction(struct kvm_vcpu int r; int cs_db, cs_l; + vcpu->mmio_fault_cr2 = cr2; kvm_arch_ops->cache_regs(vcpu); kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); @@ -1177,7 +1249,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, { unsigned long nr, a0, a1, a2, a3, a4, a5, ret; - kvm_arch_ops->decache_regs(vcpu); + kvm_arch_ops->cache_regs(vcpu); ret = -KVM_EINVAL; #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { @@ -1201,10 +1273,19 @@ #endif } switch (nr) { default: - ; + run->hypercall.args[0] = a0; + run->hypercall.args[1] = a1; + run->hypercall.args[2] = a2; + run->hypercall.args[3] = a3; + run->hypercall.args[4] = a4; + run->hypercall.args[5] = a5; + run->hypercall.ret = ret; + run->hypercall.longmode = is_long_mode(vcpu); + kvm_arch_ops->decache_regs(vcpu); + return 0; } vcpu->regs[VCPU_REGS_RAX] = ret; - kvm_arch_ops->cache_regs(vcpu); + kvm_arch_ops->decache_regs(vcpu); return 1; } EXPORT_SYMBOL_GPL(kvm_hypercall); @@ -1502,29 +1583,236 @@ void save_msrs(struct vmx_msr_entry *e, } EXPORT_SYMBOL_GPL(save_msrs); +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) +{ + int i; + u32 function; + struct kvm_cpuid_entry *e, *best; + + kvm_arch_ops->cache_regs(vcpu); + function = vcpu->regs[VCPU_REGS_RAX]; + vcpu->regs[VCPU_REGS_RAX] = 0; + vcpu->regs[VCPU_REGS_RBX] = 0; + vcpu->regs[VCPU_REGS_RCX] = 0; + vcpu->regs[VCPU_REGS_RDX] = 0; + best = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == function) { + best = e; + break; + } + /* + * Both basic or both extended? + */ + if (((e->function ^ function) & 0x80000000) == 0) + if (!best || e->function > best->function) + best = e; + } + if (best) { + vcpu->regs[VCPU_REGS_RAX] = best->eax; + vcpu->regs[VCPU_REGS_RBX] = best->ebx; + vcpu->regs[VCPU_REGS_RCX] = best->ecx; + vcpu->regs[VCPU_REGS_RDX] = best->edx; + } + kvm_arch_ops->decache_regs(vcpu); + kvm_arch_ops->skip_emulated_instruction(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); + +static int pio_copy_data(struct kvm_vcpu *vcpu) +{ + void *p = vcpu->pio_data; + void *q; + unsigned bytes; + int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; + + kvm_arch_ops->vcpu_put(vcpu); + q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, + PAGE_KERNEL); + if (!q) { + kvm_arch_ops->vcpu_load(vcpu); + return -ENOMEM; + } + q += vcpu->pio.guest_page_offset; + bytes = vcpu->pio.size * vcpu->pio.cur_count; + if (vcpu->pio.in) + memcpy(q, p, bytes); + else + memcpy(p, q, bytes); + q -= vcpu->pio.guest_page_offset; + vunmap(q); + kvm_arch_ops->vcpu_load(vcpu); + return 0; +} + +static int complete_pio(struct kvm_vcpu *vcpu) +{ + struct kvm_pio_request *io = &vcpu->pio; + long delta; + int r; + + kvm_arch_ops->cache_regs(vcpu); + + io->count -= io->cur_count; + if (!io->string) { + if (io->in) + memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data, + io->size); + } else { + if (io->in) { + r = pio_copy_data(vcpu); + if (r) { + kvm_arch_ops->cache_regs(vcpu); + return r; + } + } + + delta = 1; + if (io->rep) { + delta *= io->cur_count; + /* + * The size of the register should really depend on + * current address size. + */ + vcpu->regs[VCPU_REGS_RCX] -= delta; + } + if (io->down) + delta = -delta; + delta *= io->size; + if (io->in) + vcpu->regs[VCPU_REGS_RDI] += delta; + else + vcpu->regs[VCPU_REGS_RSI] += delta; + } + + vcpu->run->io_completed = 0; + + kvm_arch_ops->decache_regs(vcpu); + + if (!io->count) + kvm_arch_ops->skip_emulated_instruction(vcpu); + return 0; +} + +int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int string, int down, + gva_t address, int rep, unsigned port) +{ + unsigned now, in_page; + int i; + int nr_pages = 1; + struct page *page; + + vcpu->run->exit_reason = KVM_EXIT_IO; + vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; + vcpu->run->io.size = size; + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; + vcpu->run->io.count = count; + vcpu->run->io.port = port; + vcpu->pio.count = count; + vcpu->pio.cur_count = count; + vcpu->pio.size = size; + vcpu->pio.in = in; + vcpu->pio.string = string; + vcpu->pio.down = down; + vcpu->pio.guest_page_offset = offset_in_page(address); + vcpu->pio.rep = rep; + + if (!string) { + kvm_arch_ops->cache_regs(vcpu); + memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); + kvm_arch_ops->decache_regs(vcpu); + return 0; + } + + now = min(count, PAGE_SIZE / size); + + if (!down) + in_page = PAGE_SIZE - offset_in_page(address); + else + in_page = offset_in_page(address) + size; + now = min(count, (unsigned long)in_page / size); + if (!now) { + /* + * String I/O straddles page boundary. Pin two guest pages + * so that we satisfy atomicity constraints. Do just one + * transaction to avoid complexity. + */ + nr_pages = 2; + now = 1; + } + if (down) { + /* + * String I/O in reverse. Yuck. Kill the guest, fix later. + */ + printk(KERN_ERR "kvm: guest string pio down\n"); + inject_gp(vcpu); + return 1; + } + vcpu->run->io.count = now; + vcpu->pio.cur_count = now; + + for (i = 0; i < nr_pages; ++i) { + spin_lock(&vcpu->kvm->lock); + page = gva_to_page(vcpu, address + i * PAGE_SIZE); + if (page) + get_page(page); + vcpu->pio.guest_pages[i] = page; + spin_unlock(&vcpu->kvm->lock); + if (!page) { + inject_gp(vcpu); + free_pio_guest_pages(vcpu); + return 1; + } + } + + if (!vcpu->pio.in) + return pio_copy_data(vcpu); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_setup_pio); + static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; + sigset_t sigsaved; vcpu_load(vcpu); + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + /* re-sync apic's tpr */ vcpu->cr8 = kvm_run->cr8; - if (kvm_run->emulated) { - kvm_arch_ops->skip_emulated_instruction(vcpu); - kvm_run->emulated = 0; - } - - if (kvm_run->mmio_completed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; + if (kvm_run->io_completed) { + if (vcpu->pio.count) { + r = complete_pio(vcpu); + if (r) + goto out; + } else { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + vcpu->mmio_read_completed = 1; + emulate_instruction(vcpu, kvm_run, + vcpu->mmio_fault_cr2, 0); + } } vcpu->mmio_needed = 0; + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { + kvm_arch_ops->cache_regs(vcpu); + vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; + kvm_arch_ops->decache_regs(vcpu); + } + r = kvm_arch_ops->run(vcpu, kvm_run); +out: + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + vcpu_put(vcpu); return r; } @@ -1697,7 +1985,7 @@ #endif kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); mmu_reset_needed |= vcpu->cr0 != sregs->cr0; - kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0); + kvm_arch_ops->set_cr0(vcpu, sregs->cr0); mmu_reset_needed |= vcpu->cr4 != sregs->cr4; kvm_arch_ops->set_cr4(vcpu, sregs->cr4); @@ -1887,6 +2175,36 @@ static int kvm_vcpu_ioctl_debug_guest(st return r; } +static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + struct kvm_vcpu *vcpu = vma->vm_file->private_data; + unsigned long pgoff; + struct page *page; + + *type = VM_FAULT_MINOR; + pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + if (pgoff == 0) + page = virt_to_page(vcpu->run); + else if (pgoff == KVM_PIO_PAGE_OFFSET) + page = virt_to_page(vcpu->pio_data); + else + return NOPAGE_SIGBUS; + get_page(page); + return page; +} + +static struct vm_operations_struct kvm_vcpu_vm_ops = { + .nopage = kvm_vcpu_nopage, +}; + +static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &kvm_vcpu_vm_ops; + return 0; +} + static int kvm_vcpu_release(struct inode *inode, struct file *filp) { struct kvm_vcpu *vcpu = filp->private_data; @@ -1899,6 +2217,7 @@ static struct file_operations kvm_vcpu_f .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .compat_ioctl = kvm_vcpu_ioctl, + .mmap = kvm_vcpu_mmap, }; /* @@ -1947,6 +2266,7 @@ static int kvm_vm_ioctl_create_vcpu(stru { int r; struct kvm_vcpu *vcpu; + struct page *page; r = -EINVAL; if (!valid_vcpu(n)) @@ -1961,6 +2281,18 @@ static int kvm_vm_ioctl_create_vcpu(stru return -EEXIST; } + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + r = -ENOMEM; + if (!page) + goto out_unlock; + vcpu->run = page_address(page); + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + r = -ENOMEM; + if (!page) + goto out_free_run; + vcpu->pio_data = page_address(page); + vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, FX_IMAGE_ALIGN); vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; @@ -1990,11 +2322,46 @@ static int kvm_vm_ioctl_create_vcpu(stru out_free_vcpus: kvm_free_vcpu(vcpu); +out_free_run: + free_page((unsigned long)vcpu->run); + vcpu->run = NULL; +out_unlock: mutex_unlock(&vcpu->mutex); out: return r; } +static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, + struct kvm_cpuid *cpuid, + struct kvm_cpuid_entry __user *entries) +{ + int r; + + r = -E2BIG; + if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) + goto out; + r = -EFAULT; + if (copy_from_user(&vcpu->cpuid_entries, entries, + cpuid->nent * sizeof(struct kvm_cpuid_entry))) + goto out; + vcpu->cpuid_nent = cpuid->nent; + return 0; + +out: + return r; +} + +static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) +{ + if (sigset) { + sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); + vcpu->sigset_active = 1; + vcpu->sigset = *sigset; + } else + vcpu->sigset_active = 0; + return 0; +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2003,21 +2370,12 @@ static long kvm_vcpu_ioctl(struct file * int r = -EINVAL; switch (ioctl) { - case KVM_RUN: { - struct kvm_run kvm_run; - - r = -EFAULT; - if (copy_from_user(&kvm_run, argp, sizeof kvm_run)) - goto out; - r = kvm_vcpu_ioctl_run(vcpu, &kvm_run); - if (r < 0 && r != -EINTR) - goto out; - if (copy_to_user(argp, &kvm_run, sizeof kvm_run)) { - r = -EFAULT; + case KVM_RUN: + r = -EINVAL; + if (arg) goto out; - } + r = kvm_vcpu_ioctl_run(vcpu, vcpu->run); break; - } case KVM_GET_REGS: { struct kvm_regs kvm_regs; @@ -2113,6 +2471,41 @@ static long kvm_vcpu_ioctl(struct file * case KVM_SET_MSRS: r = msr_io(vcpu, argp, do_set_msr, 0); break; + case KVM_SET_CPUID: { + struct kvm_cpuid __user *cpuid_arg = argp; + struct kvm_cpuid cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); + if (r) + goto out; + break; + } + case KVM_SET_SIGNAL_MASK: { + struct kvm_signal_mask __user *sigmask_arg = argp; + struct kvm_signal_mask kvm_sigmask; + sigset_t sigset, *p; + + p = NULL; + if (argp) { + r = -EFAULT; + if (copy_from_user(&kvm_sigmask, argp, + sizeof kvm_sigmask)) + goto out; + r = -EINVAL; + if (kvm_sigmask.len != sizeof sigset) + goto out; + r = -EFAULT; + if (copy_from_user(&sigset, sigmask_arg->sigset, + sizeof sigset)) + goto out; + p = &sigset; + } + r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); + break; + } default: ; } @@ -2155,6 +2548,17 @@ static long kvm_vm_ioctl(struct file *fi goto out; break; } + case KVM_GET_MEM_MAP: { + struct kvm_dirty_log log; + + r = -EFAULT; + if (copy_from_user(&log, argp, sizeof log)) + goto out; + r = kvm_vm_ioctl_get_memory_map(kvm, &log); + if (r) + goto out; + break; + } default: ; } @@ -2248,13 +2652,19 @@ static long kvm_dev_ioctl(struct file *f unsigned int ioctl, unsigned long arg) { void __user *argp = (void __user *)arg; - int r = -EINVAL; + long r = -EINVAL; switch (ioctl) { case KVM_GET_API_VERSION: + r = -EINVAL; + if (arg) + goto out; r = KVM_API_VERSION; break; case KVM_CREATE_VM: + r = -EINVAL; + if (arg) + goto out; r = kvm_dev_ioctl_create_vm(); break; case KVM_GET_MSR_INDEX_LIST: { @@ -2284,6 +2694,18 @@ static long kvm_dev_ioctl(struct file *f r = 0; break; } + case KVM_CHECK_EXTENSION: + /* + * No extensions defined at present. + */ + r = 0; + break; + case KVM_GET_VCPU_MMAP_SIZE: + r = -EINVAL; + if (arg) + goto out; + r = 2 * PAGE_SIZE; + break; default: ; } @@ -2299,7 +2721,7 @@ static struct file_operations kvm_charde }; static struct miscdevice kvm_dev = { - MISC_DYNAMIC_MINOR, + KVM_MINOR, "kvm", &kvm_chardev_ops, }; diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index e85b4c7..b181106 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -461,7 +461,6 @@ static struct kvm_mmu_page *kvm_mmu_allo list_add(&page->link, &vcpu->kvm->active_mmu_pages); ASSERT(is_empty_shadow_page(page->page_hpa)); page->slot_bitmap = 0; - page->global = 1; page->multimapped = 0; page->parent_pte = parent_pte; --vcpu->kvm->n_free_mmu_pages; @@ -735,6 +734,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, return gpa_to_hpa(vcpu, gpa); } +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) +{ + gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); + + if (gpa == UNMAPPED_GVA) + return NULL; + return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT); +} + static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } @@ -918,11 +926,6 @@ static void paging_new_cr3(struct kvm_vc kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); } -static void mark_pagetable_nonglobal(void *shadow_pte) -{ - page_header(__pa(shadow_pte))->global = 0; -} - static inline void set_pte_common(struct kvm_vcpu *vcpu, u64 *shadow_pte, gpa_t gaddr, @@ -940,9 +943,6 @@ static inline void set_pte_common(struct *shadow_pte |= access_bits; - if (!(*shadow_pte & PT_GLOBAL_MASK)) - mark_pagetable_nonglobal(shadow_pte); - if (is_error_hpa(paddr)) { *shadow_pte |= gaddr; *shadow_pte |= PT_SHADOW_IO_MARK; @@ -1359,7 +1359,7 @@ static void audit_mappings_page(struct k static void audit_mappings(struct kvm_vcpu *vcpu) { - int i; + unsigned i; if (vcpu->mmu.root_level == 4) audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4); diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index f3bcee9..17bd440 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -148,8 +148,7 @@ #endif break; } - if (walker->level != 3 || is_long_mode(vcpu)) - walker->inherited_ar &= walker->table[index]; + walker->inherited_ar &= walker->table[index]; table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK); kunmap_atomic(walker->table, KM_USER0); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 3d8ea7a..d3cc115 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -582,6 +582,9 @@ static int svm_create_vcpu(struct kvm_vc init_vmcb(vcpu->svm->vmcb); fx_init(vcpu); + vcpu->apic_base = 0xfee00000 | + /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | + MSR_IA32_APICBASE_ENABLE; return 0; @@ -981,7 +984,7 @@ static int io_get_override(struct kvm_vc return 0; } -static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) +static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) { unsigned long addr_mask; unsigned long *reg; @@ -1025,38 +1028,38 @@ static unsigned long io_adress(struct kv static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? - int _in = io_info & SVM_IOIO_TYPE_MASK; + int size, down, in, string, rep; + unsigned port; + unsigned long count; + gva_t address = 0; ++kvm_stat.io_exits; vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; - kvm_run->exit_reason = KVM_EXIT_IO; - kvm_run->io.port = io_info >> 16; - kvm_run->io.direction = (_in) ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; - kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT); - kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0; - kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0; + in = (io_info & SVM_IOIO_TYPE_MASK) != 0; + port = io_info >> 16; + size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; + string = (io_info & SVM_IOIO_STR_MASK) != 0; + rep = (io_info & SVM_IOIO_REP_MASK) != 0; + count = 1; + down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; - if (kvm_run->io.string) { + if (string) { unsigned addr_mask; - addr_mask = io_adress(vcpu, _in, &kvm_run->io.address); + addr_mask = io_adress(vcpu, in, &address); if (!addr_mask) { printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__); return 1; } - if (kvm_run->io.rep) { - kvm_run->io.count - = vcpu->regs[VCPU_REGS_RCX] & addr_mask; - kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags - & X86_EFLAGS_DF) != 0; - } - } else - kvm_run->io.value = vcpu->svm->vmcb->save.rax; - return 0; + if (rep) + count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; + } + return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, + address, rep, port); } static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -1078,7 +1081,8 @@ static int halt_interception(struct kvm_ static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - vcpu->svm->vmcb->save.rip += 3; + vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 3; + skip_emulated_instruction(vcpu); return kvm_hypercall(vcpu, kvm_run); } @@ -1098,8 +1102,8 @@ static int task_switch_interception(stru static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; - kvm_run->exit_reason = KVM_EXIT_CPUID; - return 0; + kvm_emulate_cpuid(vcpu); + return 1; } static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -1295,8 +1299,6 @@ static int handle_exit(struct kvm_vcpu * { u32 exit_code = vcpu->svm->vmcb->control.exit_code; - kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; - if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " @@ -1606,8 +1608,9 @@ #endif vcpu->svm->next_rip = 0; if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { - kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; - kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code; + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + kvm_run->fail_entry.hardware_entry_failure_reason + = vcpu->svm->vmcb->control.exit_code; post_kvm_run_save(vcpu, kvm_run); return 0; } @@ -1617,12 +1620,14 @@ #endif if (signal_pending(current)) { ++kvm_stat.signal_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { ++kvm_stat.request_irq_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } kvm_resched(vcpu); @@ -1711,7 +1716,6 @@ static struct kvm_arch_ops svm_arch_ops .get_cs_db_l_bits = svm_get_cs_db_l_bits, .decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits, .set_cr0 = svm_set_cr0, - .set_cr0_no_modeswitch = svm_set_cr0, .set_cr3 = svm_set_cr3, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index bfa0ce4..027a962 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -618,7 +618,7 @@ static void fix_pmode_dataseg(int seg, s { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - if (vmcs_readl(sf->base) == save->base) { + if (vmcs_readl(sf->base) == save->base && (save->base & AR_S_MASK)) { vmcs_write16(sf->selector, save->selector); vmcs_writel(sf->base, save->base); vmcs_write32(sf->limit, save->limit); @@ -712,6 +712,8 @@ static void enter_rmode(struct kvm_vcpu vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); vmcs_write32(GUEST_CS_LIMIT, 0xffff); + if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) + vmcs_writel(GUEST_CS_BASE, 0xf0000); vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es); @@ -786,22 +788,6 @@ #endif vcpu->cr0 = cr0; } -/* - * Used when restoring the VM to avoid corrupting segment registers - */ -static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0) -{ - if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK)) - enter_rmode(vcpu); - - vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0); - update_exception_bitmap(vcpu); - vmcs_writel(CR0_READ_SHADOW, cr0); - vmcs_writel(GUEST_CR0, - (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); - vcpu->cr0 = cr0; -} - static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { vmcs_writel(GUEST_CR3, cr3); @@ -1394,7 +1380,7 @@ static int handle_triple_fault(struct kv return 0; } -static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) +static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) { u64 inst; gva_t rip; @@ -1439,33 +1425,35 @@ static int get_io_count(struct kvm_vcpu done: countr_size *= 8; *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); + //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]); return 1; } static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; + int size, down, in, string, rep; + unsigned port; + unsigned long count; + gva_t address; ++kvm_stat.io_exits; exit_qualification = vmcs_read64(EXIT_QUALIFICATION); - kvm_run->exit_reason = KVM_EXIT_IO; - if (exit_qualification & 8) - kvm_run->io.direction = KVM_EXIT_IO_IN; - else - kvm_run->io.direction = KVM_EXIT_IO_OUT; - kvm_run->io.size = (exit_qualification & 7) + 1; - kvm_run->io.string = (exit_qualification & 16) != 0; - kvm_run->io.string_down - = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; - kvm_run->io.rep = (exit_qualification & 32) != 0; - kvm_run->io.port = exit_qualification >> 16; - if (kvm_run->io.string) { - if (!get_io_count(vcpu, &kvm_run->io.count)) + in = (exit_qualification & 8) != 0; + size = (exit_qualification & 7) + 1; + string = (exit_qualification & 16) != 0; + down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; + count = 1; + rep = (exit_qualification & 32) != 0; + port = exit_qualification >> 16; + address = 0; + if (string) { + if (rep && !get_io_count(vcpu, &count)) return 1; - kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); - } else - kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ - return 0; + address = vmcs_readl(GUEST_LINEAR_ADDRESS); + } + return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, + address, rep, port); } static void @@ -1583,8 +1571,8 @@ static int handle_dr(struct kvm_vcpu *vc static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - kvm_run->exit_reason = KVM_EXIT_CPUID; - return 0; + kvm_emulate_cpuid(vcpu); + return 1; } static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -1658,7 +1646,7 @@ static int handle_halt(struct kvm_vcpu * static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3); + skip_emulated_instruction(vcpu); return kvm_hypercall(vcpu, kvm_run); } @@ -1888,6 +1876,27 @@ #endif [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) : "cc", "memory" ); + /* + * Reload segment selectors ASAP. (it's needed for a functional + * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64 + * relies on having 0 in %gs for the CPU PDA to work.) + */ + if (fs_gs_ldt_reload_needed) { + load_ldt(ldt_sel); + load_fs(fs_sel); + /* + * If we have to reload gs, we must take care to + * preserve our gs base. + */ + local_irq_disable(); + load_gs(gs_sel); +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); +#endif + local_irq_enable(); + + reload_tss(); + } ++kvm_stat.exits; save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); @@ -1899,28 +1908,12 @@ #endif asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); - kvm_run->exit_type = 0; if (fail) { - kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; - kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + kvm_run->fail_entry.hardware_entry_failure_reason + = vmcs_read32(VM_INSTRUCTION_ERROR); r = 0; } else { - if (fs_gs_ldt_reload_needed) { - load_ldt(ldt_sel); - load_fs(fs_sel); - /* - * If we have to reload gs, we must take care to - * preserve our gs base. - */ - local_irq_disable(); - load_gs(gs_sel); -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); -#endif - local_irq_enable(); - - reload_tss(); - } /* * Profile KVM exit RIPs: */ @@ -1928,19 +1921,20 @@ #endif profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); vcpu->launched = 1; - kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; r = kvm_handle_exit(kvm_run, vcpu); if (r > 0) { /* Give scheduler a change to reschedule. */ if (signal_pending(current)) { ++kvm_stat.signal_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { ++kvm_stat.request_irq_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } @@ -2059,7 +2053,6 @@ static struct kvm_arch_ops vmx_arch_ops .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits, .set_cr0 = vmx_set_cr0, - .set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch, .set_cr3 = vmx_set_cr3, .set_cr4 = vmx_set_cr4, #ifdef CONFIG_X86_64 diff --git a/include/linux/Kbuild b/include/linux/Kbuild index e81e301..b35b593 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -99,6 +99,7 @@ header-y += iso_fs.h header-y += ixjuser.h header-y += jffs2.h header-y += keyctl.h +header-y += kvm.h header-y += limits.h header-y += lock_dlm_plock.h header-y += magic.h diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 275354f..e7e2fa8 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #define __LINUX_KVM_H #include #include -#define KVM_API_VERSION 4 +#define KVM_API_VERSION 9 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -34,36 +34,33 @@ struct kvm_memory_region { #define KVM_MEM_LOG_DIRTY_PAGES 1UL -#define KVM_EXIT_TYPE_FAIL_ENTRY 1 -#define KVM_EXIT_TYPE_VM_EXIT 2 - enum kvm_exit_reason { KVM_EXIT_UNKNOWN = 0, KVM_EXIT_EXCEPTION = 1, KVM_EXIT_IO = 2, - KVM_EXIT_CPUID = 3, + KVM_EXIT_HYPERCALL = 3, KVM_EXIT_DEBUG = 4, KVM_EXIT_HLT = 5, KVM_EXIT_MMIO = 6, KVM_EXIT_IRQ_WINDOW_OPEN = 7, KVM_EXIT_SHUTDOWN = 8, + KVM_EXIT_FAIL_ENTRY = 9, + KVM_EXIT_INTR = 10, }; -/* for KVM_RUN */ +/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ - __u32 emulated; /* skip current instruction */ - __u32 mmio_completed; /* mmio request completed */ + __u32 io_completed; /* mmio/pio request completed */ __u8 request_interrupt_window; - __u8 padding1[7]; + __u8 padding1[3]; /* out */ - __u32 exit_type; __u32 exit_reason; __u32 instruction_length; __u8 ready_for_interrupt_injection; __u8 if_flag; - __u16 padding2; + __u8 padding2[6]; /* in (pre_kvm_run), out (post_kvm_run) */ __u64 cr8; @@ -72,29 +69,26 @@ struct kvm_run { union { /* KVM_EXIT_UNKNOWN */ struct { - __u32 hardware_exit_reason; + __u64 hardware_exit_reason; } hw; + /* KVM_EXIT_FAIL_ENTRY */ + struct { + __u64 hardware_entry_failure_reason; + } fail_entry; /* KVM_EXIT_EXCEPTION */ struct { __u32 exception; __u32 error_code; } ex; /* KVM_EXIT_IO */ - struct { + struct kvm_io { #define KVM_EXIT_IO_IN 0 #define KVM_EXIT_IO_OUT 1 __u8 direction; __u8 size; /* bytes */ - __u8 string; - __u8 string_down; - __u8 rep; - __u8 pad; __u16 port; - __u64 count; - union { - __u64 address; - __u32 value; - }; + __u32 count; + __u64 data_offset; /* relative to kvm_run start */ } io; struct { } debug; @@ -105,6 +99,13 @@ #define KVM_EXIT_IO_OUT 1 __u32 len; __u8 is_write; } mmio; + /* KVM_EXIT_HYPERCALL */ + struct { + __u64 args[6]; + __u64 ret; + __u32 longmode; + __u32 pad; + } hypercall; }; }; @@ -200,7 +201,7 @@ struct kvm_debug_guest { __u32 singlestep; }; -/* for KVM_GET_DIRTY_LOG */ +/* for KVM_GET_DIRTY_LOG and KVM_GET_MEM_MAP */ struct kvm_dirty_log { __u32 slot; __u32 padding; @@ -210,38 +211,72 @@ struct kvm_dirty_log { }; }; +struct kvm_cpuid_entry { + __u32 function; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding; +}; + +/* for KVM_SET_CPUID */ +struct kvm_cpuid { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry entries[0]; +}; + +/* for KVM_SET_SIGNAL_MASK */ +struct kvm_signal_mask { + __u32 len; + __u8 sigset[0]; +}; + #define KVMIO 0xAE /* * ioctls for /dev/kvm fds: */ -#define KVM_GET_API_VERSION _IO(KVMIO, 1) -#define KVM_CREATE_VM _IO(KVMIO, 2) /* returns a VM fd */ -#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list) +#define KVM_GET_API_VERSION _IO(KVMIO, 0x00) +#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ +#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) +/* + * Check if a kvm extension is available. Argument is extension number, + * return is 1 (yes) or 0 (no, sorry). + */ +#define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03) +/* + * Get size for mmap(vcpu_fd) + */ +#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ /* * ioctls for VM fds */ -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region) +#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. */ -#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int) -#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) +#define KVM_CREATE_VCPU _IO(KVMIO, 0x41) +#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) +#define KVM_GET_MEM_MAP _IOW(KVMIO, 0x43, struct kvm_dirty_log) /* * ioctls for vcpu fds */ -#define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run) -#define KVM_GET_REGS _IOR(KVMIO, 3, struct kvm_regs) -#define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) -#define KVM_GET_SREGS _IOR(KVMIO, 5, struct kvm_sregs) -#define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs) -#define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation) -#define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt) -#define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest) -#define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) -#define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs) +#define KVM_RUN _IO(KVMIO, 0x80) +#define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs) +#define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs) +#define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs) +#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) +#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) +#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) +#define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) +#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) +#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) +#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) +#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) #endif diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 326da7d..dff9ea3 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -29,6 +29,7 @@ #define MISC_DYNAMIC_MINOR 255 #define TUN_MINOR 200 #define HPET_MINOR 228 +#define KVM_MINOR 232 struct device;