summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-25 12:32:10 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-25 12:32:10 -0700
commit4b7227ca321ccf447cdc04538687c895db8b77f5 (patch)
tree72712127fc56aa2579e8a1508998bcabf6bd6c60 /drivers
parent5dae61b80564a5583ff4b56e357bdbc733fddb76 (diff)
parent1775826ceec51187aa868406585799b7e76ffa7d (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-xen-next
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-xen-next: (52 commits) xen: add balloon driver xen: allow compilation with non-flat memory xen: fold xen_sysexit into xen_iret xen: allow set_pte_at on init_mm to be lockless xen: disable preemption during tlb flush xen pvfb: Para-virtual framebuffer, keyboard and pointer driver xen: Add compatibility aliases for frontend drivers xen: Module autoprobing support for frontend drivers xen blkfront: Delay wait for block devices until after the disk is added xen/blkfront: use bdget_disk xen: Make xen-blkfront write its protocol ABI to xenstore xen: import arch generic part of xencomm xen: make grant table arch portable xen: replace callers of alloc_vm_area()/free_vm_area() with xen_ prefixed one xen: make include/xen/page.h portable moving those definitions under asm dir xen: add resend_irq_on_evtchn() definition into events.c Xen: make events.c portable for ia64/xen support xen: move events.c to drivers/xen for IA64/Xen support xen: move features.c from arch/x86/xen/features.c to drivers/xen xen: add missing definitions in include/xen/interface/vcpu.h which ia64/xen needs ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/block/xen-blkfront.c23
-rw-r--r--drivers/input/Kconfig9
-rw-r--r--drivers/input/Makefile2
-rw-r--r--drivers/input/xen-kbdfront.c340
-rw-r--r--drivers/net/xen-netfront.c2
-rw-r--r--drivers/video/Kconfig14
-rw-r--r--drivers/video/Makefile1
-rw-r--r--drivers/video/xen-fbfront.c550
-rw-r--r--drivers/xen/Kconfig19
-rw-r--r--drivers/xen/Makefile4
-rw-r--r--drivers/xen/balloon.c712
-rw-r--r--drivers/xen/events.c674
-rw-r--r--drivers/xen/features.c29
-rw-r--r--drivers/xen/grant-table.c37
-rw-r--r--drivers/xen/xenbus/xenbus_client.c6
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c32
-rw-r--r--drivers/xen/xencomm.c232
18 files changed, 2646 insertions, 42 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 3a0e3549739f..80f0ec91e2cf 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -97,4 +97,6 @@ source "drivers/dca/Kconfig"
source "drivers/auxdisplay/Kconfig"
source "drivers/uio/Kconfig"
+
+source "drivers/xen/Kconfig"
endmenu
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9c6f3f99208d..d771da816d95 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -47,6 +47,7 @@
#include <xen/interface/grant_table.h>
#include <xen/interface/io/blkif.h>
+#include <xen/interface/io/protocols.h>
#include <asm/xen/hypervisor.h>
@@ -74,7 +75,6 @@ static struct block_device_operations xlvbd_block_fops;
struct blkfront_info
{
struct xenbus_device *xbdev;
- dev_t dev;
struct gendisk *gd;
int vdevice;
blkif_vdev_t handle;
@@ -88,6 +88,7 @@ struct blkfront_info
struct blk_shadow shadow[BLK_RING_SIZE];
unsigned long shadow_free;
int feature_barrier;
+ int is_ready;
/**
* The number of people holding this device open. We won't allow a
@@ -614,6 +615,12 @@ again:
message = "writing event-channel";
goto abort_transaction;
}
+ err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
+ XEN_IO_PROTO_ABI_NATIVE);
+ if (err) {
+ message = "writing protocol";
+ goto abort_transaction;
+ }
err = xenbus_transaction_end(xbt, 0);
if (err) {
@@ -833,6 +840,8 @@ static void blkfront_connect(struct blkfront_info *info)
spin_unlock_irq(&blkif_io_lock);
add_disk(info->gd);
+
+ info->is_ready = 1;
}
/**
@@ -896,7 +905,7 @@ static void backend_changed(struct xenbus_device *dev,
break;
case XenbusStateClosing:
- bd = bdget(info->dev);
+ bd = bdget_disk(info->gd, 0);
if (bd == NULL)
xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
@@ -925,6 +934,13 @@ static int blkfront_remove(struct xenbus_device *dev)
return 0;
}
+static int blkfront_is_ready(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->dev.driver_data;
+
+ return info->is_ready;
+}
+
static int blkif_open(struct inode *inode, struct file *filep)
{
struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
@@ -971,6 +987,7 @@ static struct xenbus_driver blkfront = {
.remove = blkfront_remove,
.resume = blkfront_resume,
.otherend_changed = backend_changed,
+ .is_ready = blkfront_is_ready,
};
static int __init xlblk_init(void)
@@ -998,3 +1015,5 @@ module_exit(xlblk_exit);
MODULE_DESCRIPTION("Xen virtual block device frontend");
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
+MODULE_ALIAS("xen:vbd");
+MODULE_ALIAS("xenblk");
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index 9dea14db724c..5f9d860925a1 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -149,6 +149,15 @@ config INPUT_APMPOWER
To compile this driver as a module, choose M here: the
module will be called apm-power.
+config XEN_KBDDEV_FRONTEND
+ tristate "Xen virtual keyboard and mouse support"
+ depends on XEN_FBDEV_FRONTEND
+ default y
+ help
+ This driver implements the front-end of the Xen virtual
+ keyboard and mouse device driver. It communicates with a back-end
+ in another domain.
+
comment "Input Device Drivers"
source "drivers/input/keyboard/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 2ae87b19caa8..98c4f9a77876 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -23,3 +23,5 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN) += touchscreen/
obj-$(CONFIG_INPUT_MISC) += misc/
obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o
+
+obj-$(CONFIG_XEN_KBDDEV_FRONTEND) += xen-kbdfront.o
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
new file mode 100644
index 000000000000..0f47f4697cdf
--- /dev/null
+++ b/drivers/input/xen-kbdfront.c
@@ -0,0 +1,340 @@
+/*
+ * Xen para-virtual input device
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ *
+ * Based on linux/drivers/input/mouse/sermouse.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+/*
+ * TODO:
+ *
+ * Switch to grant tables together with xen-fbfront.c.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/input.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include <xen/interface/io/fbif.h>
+#include <xen/interface/io/kbdif.h>
+#include <xen/xenbus.h>
+
+struct xenkbd_info {
+ struct input_dev *kbd;
+ struct input_dev *ptr;
+ struct xenkbd_page *page;
+ int irq;
+ struct xenbus_device *xbdev;
+ char phys[32];
+};
+
+static int xenkbd_remove(struct xenbus_device *);
+static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *);
+static void xenkbd_disconnect_backend(struct xenkbd_info *);
+
+/*
+ * Note: if you need to send out events, see xenfb_do_update() for how
+ * to do that.
+ */
+
+static irqreturn_t input_handler(int rq, void *dev_id)
+{
+ struct xenkbd_info *info = dev_id;
+ struct xenkbd_page *page = info->page;
+ __u32 cons, prod;
+
+ prod = page->in_prod;
+ if (prod == page->in_cons)
+ return IRQ_HANDLED;
+ rmb(); /* ensure we see ring contents up to prod */
+ for (cons = page->in_cons; cons != prod; cons++) {
+ union xenkbd_in_event *event;
+ struct input_dev *dev;
+ event = &XENKBD_IN_RING_REF(page, cons);
+
+ dev = info->ptr;
+ switch (event->type) {
+ case XENKBD_TYPE_MOTION:
+ input_report_rel(dev, REL_X, event->motion.rel_x);
+ input_report_rel(dev, REL_Y, event->motion.rel_y);
+ break;
+ case XENKBD_TYPE_KEY:
+ dev = NULL;
+ if (test_bit(event->key.keycode, info->kbd->keybit))
+ dev = info->kbd;
+ if (test_bit(event->key.keycode, info->ptr->keybit))
+ dev = info->ptr;
+ if (dev)
+ input_report_key(dev, event->key.keycode,
+ event->key.pressed);
+ else
+ printk(KERN_WARNING
+ "xenkbd: unhandled keycode 0x%x\n",
+ event->key.keycode);
+ break;
+ case XENKBD_TYPE_POS:
+ input_report_abs(dev, ABS_X, event->pos.abs_x);
+ input_report_abs(dev, ABS_Y, event->pos.abs_y);
+ break;
+ }
+ if (dev)
+ input_sync(dev);
+ }
+ mb(); /* ensure we got ring contents */
+ page->in_cons = cons;
+ notify_remote_via_irq(info->irq);
+
+ return IRQ_HANDLED;
+}
+
+static int __devinit xenkbd_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int ret, i;
+ struct xenkbd_info *info;
+ struct input_dev *kbd, *ptr;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
+ return -ENOMEM;
+ }
+ dev->dev.driver_data = info;
+ info->xbdev = dev;
+ info->irq = -1;
+ snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
+
+ info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!info->page)
+ goto error_nomem;
+
+ /* keyboard */
+ kbd = input_allocate_device();
+ if (!kbd)
+ goto error_nomem;
+ kbd->name = "Xen Virtual Keyboard";
+ kbd->phys = info->phys;
+ kbd->id.bustype = BUS_PCI;
+ kbd->id.vendor = 0x5853;
+ kbd->id.product = 0xffff;
+ kbd->evbit[0] = BIT(EV_KEY);
+ for (i = KEY_ESC; i < KEY_UNKNOWN; i++)
+ set_bit(i, kbd->keybit);
+ for (i = KEY_OK; i < KEY_MAX; i++)
+ set_bit(i, kbd->keybit);
+
+ ret = input_register_device(kbd);
+ if (ret) {
+ input_free_device(kbd);
+ xenbus_dev_fatal(dev, ret, "input_register_device(kbd)");
+ goto error;
+ }
+ info->kbd = kbd;
+
+ /* pointing device */
+ ptr = input_allocate_device();
+ if (!ptr)
+ goto error_nomem;
+ ptr->name = "Xen Virtual Pointer";
+ ptr->phys = info->phys;
+ ptr->id.bustype = BUS_PCI;
+ ptr->id.vendor = 0x5853;
+ ptr->id.product = 0xfffe;
+ ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
+ for (i = BTN_LEFT; i <= BTN_TASK; i++)
+ set_bit(i, ptr->keybit);
+ ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+ input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
+ input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
+
+ ret = input_register_device(ptr);
+ if (ret) {
+ input_free_device(ptr);
+ xenbus_dev_fatal(dev, ret, "input_register_device(ptr)");
+ goto error;
+ }
+ info->ptr = ptr;
+
+ ret = xenkbd_connect_backend(dev, info);
+ if (ret < 0)
+ goto error;
+
+ return 0;
+
+ error_nomem:
+ ret = -ENOMEM;
+ xenbus_dev_fatal(dev, ret, "allocating device memory");
+ error:
+ xenkbd_remove(dev);
+ return ret;
+}
+
+static int xenkbd_resume(struct xenbus_device *dev)
+{
+ struct xenkbd_info *info = dev->dev.driver_data;
+
+ xenkbd_disconnect_backend(info);
+ memset(info->page, 0, PAGE_SIZE);
+ return xenkbd_connect_backend(dev, info);
+}
+
+static int xenkbd_remove(struct xenbus_device *dev)
+{
+ struct xenkbd_info *info = dev->dev.driver_data;
+
+ xenkbd_disconnect_backend(info);
+ if (info->kbd)
+ input_unregister_device(info->kbd);
+ if (info->ptr)
+ input_unregister_device(info->ptr);
+ free_page((unsigned long)info->page);
+ kfree(info);
+ return 0;
+}
+
+static int xenkbd_connect_backend(struct xenbus_device *dev,
+ struct xenkbd_info *info)
+{
+ int ret, evtchn;
+ struct xenbus_transaction xbt;
+
+ ret = xenbus_alloc_evtchn(dev, &evtchn);
+ if (ret)
+ return ret;
+ ret = bind_evtchn_to_irqhandler(evtchn, input_handler,
+ 0, dev->devicetype, info);
+ if (ret < 0) {
+ xenbus_free_evtchn(dev, evtchn);
+ xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler");
+ return ret;
+ }
+ info->irq = ret;
+
+ again:
+ ret = xenbus_transaction_start(&xbt);
+ if (ret) {
+ xenbus_dev_fatal(dev, ret, "starting transaction");
+ return ret;
+ }
+ ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
+ virt_to_mfn(info->page));
+ if (ret)
+ goto error_xenbus;
+ ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
+ evtchn);
+ if (ret)
+ goto error_xenbus;
+ ret = xenbus_transaction_end(xbt, 0);
+ if (ret) {
+ if (ret == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, ret, "completing transaction");
+ return ret;
+ }
+
+ xenbus_switch_state(dev, XenbusStateInitialised);
+ return 0;
+
+ error_xenbus:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, ret, "writing xenstore");
+ return ret;
+}
+
+static void xenkbd_disconnect_backend(struct xenkbd_info *info)
+{
+ if (info->irq >= 0)
+ unbind_from_irqhandler(info->irq, info);
+ info->irq = -1;
+}
+
+static void xenkbd_backend_changed(struct xenbus_device *dev,
+ enum xenbus_state backend_state)
+{
+ struct xenkbd_info *info = dev->dev.driver_data;
+ int ret, val;
+
+ switch (backend_state) {
+ case XenbusStateInitialising:
+ case XenbusStateInitialised:
+ case XenbusStateUnknown:
+ case XenbusStateClosed:
+ break;
+
+ case XenbusStateInitWait:
+InitWait:
+ ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+ "feature-abs-pointer", "%d", &val);
+ if (ret < 0)
+ val = 0;
+ if (val) {
+ ret = xenbus_printf(XBT_NIL, info->xbdev->nodename,
+ "request-abs-pointer", "1");
+ if (ret)
+ printk(KERN_WARNING
+ "xenkbd: can't request abs-pointer");
+ }
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+
+ case XenbusStateConnected:
+ /*
+ * Work around xenbus race condition: If backend goes
+ * through InitWait to Connected fast enough, we can
+ * get Connected twice here.
+ */
+ if (dev->state != XenbusStateConnected)
+ goto InitWait; /* no InitWait seen yet, fudge it */
+ break;
+
+ case XenbusStateClosing:
+ xenbus_frontend_closed(dev);
+ break;
+ }
+}
+
+static struct xenbus_device_id xenkbd_ids[] = {
+ { "vkbd" },
+ { "" }
+};
+
+static struct xenbus_driver xenkbd = {
+ .name = "vkbd",
+ .owner = THIS_MODULE,
+ .ids = xenkbd_ids,
+ .probe = xenkbd_probe,
+ .remove = xenkbd_remove,
+ .resume = xenkbd_resume,
+ .otherend_changed = xenkbd_backend_changed,
+};
+
+static int __init xenkbd_init(void)
+{
+ if (!is_running_on_xen())
+ return -ENODEV;
+
+ /* Nothing to do if running in dom0. */
+ if (is_initial_xendomain())
+ return -ENODEV;
+
+ return xenbus_register_frontend(&xenkbd);
+}
+
+static void __exit xenkbd_cleanup(void)
+{
+ xenbus_unregister_driver(&xenkbd);
+}
+
+module_init(xenkbd_init);
+module_exit(xenkbd_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 7483d45bc5bc..e62018a36133 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1809,3 +1809,5 @@ module_exit(netif_exit);
MODULE_DESCRIPTION("Xen virtual network device frontend");
MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:vif");
+MODULE_ALIAS("xennet");
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 1bd5fb30237d..e3dc8f8d0c3e 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1930,6 +1930,20 @@ config FB_VIRTUAL
If unsure, say N.
+config XEN_FBDEV_FRONTEND
+ tristate "Xen virtual frame buffer support"
+ depends on FB && XEN
+ select FB_SYS_FILLRECT
+ select FB_SYS_COPYAREA
+ select FB_SYS_IMAGEBLIT
+ select FB_SYS_FOPS
+ select FB_DEFERRED_IO
+ default y
+ help
+ This driver implements the front-end of the Xen virtual
+ frame buffer driver. It communicates with a back-end
+ in another domain.
+
source "drivers/video/omap/Kconfig"
source "drivers/video/backlight/Kconfig"
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 11c0e5e05f21..f172b9b73314 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -114,6 +114,7 @@ obj-$(CONFIG_FB_PS3) += ps3fb.o
obj-$(CONFIG_FB_SM501) += sm501fb.o
obj-$(CONFIG_FB_XILINX) += xilinxfb.o
obj-$(CONFIG_FB_OMAP) += omap/
+obj-$(CONFIG_XEN_FBDEV_FRONTEND) += xen-fbfront.o
# Platform or fallback drivers go here
obj-$(CONFIG_FB_UVESA) += uvesafb.o
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
new file mode 100644
index 000000000000..619a6f8d65a2
--- /dev/null
+++ b/drivers/video/xen-fbfront.c
@@ -0,0 +1,550 @@
+/*
+ * Xen para-virtual frame buffer device
+ *
+ * Copyright (C) 2005-2006 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ *
+ * Based on linux/drivers/video/q40fb.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+/*
+ * TODO:
+ *
+ * Switch to grant tables when they become capable of dealing with the
+ * frame buffer.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fb.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include <xen/interface/io/fbif.h>
+#include <xen/interface/io/protocols.h>
+#include <xen/xenbus.h>
+
+struct xenfb_info {
+ unsigned char *fb;
+ struct fb_info *fb_info;
+ int x1, y1, x2, y2; /* dirty rectangle,
+ protected by dirty_lock */
+ spinlock_t dirty_lock;
+ int nr_pages;
+ int irq;
+ struct xenfb_page *page;
+ unsigned long *mfns;
+ int update_wanted; /* XENFB_TYPE_UPDATE wanted */
+
+ struct xenbus_device *xbdev;
+};
+
+static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8;
+
+static int xenfb_remove(struct xenbus_device *);
+static void xenfb_init_shared_page(struct xenfb_info *);
+static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
+static void xenfb_disconnect_backend(struct xenfb_info *);
+
+static void xenfb_do_update(struct xenfb_info *info,
+ int x, int y, int w, int h)
+{
+ union xenfb_out_event event;
+ u32 prod;
+
+ event.type = XENFB_TYPE_UPDATE;
+ event.update.x = x;
+ event.update.y = y;
+ event.update.width = w;
+ event.update.height = h;
+
+ prod = info->page->out_prod;
+ /* caller ensures !xenfb_queue_full() */
+ mb(); /* ensure ring space available */
+ XENFB_OUT_RING_REF(info->page, prod) = event;
+ wmb(); /* ensure ring contents visible */
+ info->page->out_prod = prod + 1;
+
+ notify_remote_via_irq(info->irq);
+}
+
+static int xenfb_queue_full(struct xenfb_info *info)
+{
+ u32 cons, prod;
+
+ prod = info->page->out_prod;
+ cons = info->page->out_cons;
+ return prod - cons == XENFB_OUT_RING_LEN;
+}
+
+static void xenfb_refresh(struct xenfb_info *info,
+ int x1, int y1, int w, int h)
+{
+ unsigned long flags;
+ int y2 = y1 + h - 1;
+ int x2 = x1 + w - 1;
+
+ if (!info->update_wanted)
+ return;
+
+ spin_lock_irqsave(&info->dirty_lock, flags);
+
+ /* Combine with dirty rectangle: */
+ if (info->y1 < y1)
+ y1 = info->y1;
+ if (info->y2 > y2)
+ y2 = info->y2;
+ if (info->x1 < x1)
+ x1 = info->x1;
+ if (info->x2 > x2)
+ x2 = info->x2;
+
+ if (xenfb_queue_full(info)) {
+ /* Can't send right now, stash it in the dirty rectangle */
+ info->x1 = x1;
+ info->x2 = x2;
+ info->y1 = y1;
+ info->y2 = y2;
+ spin_unlock_irqrestore(&info->dirty_lock, flags);
+ return;
+ }
+
+ /* Clear dirty rectangle: */
+ info->x1 = info->y1 = INT_MAX;
+ info->x2 = info->y2 = 0;
+
+ spin_unlock_irqrestore(&info->dirty_lock, flags);
+
+ if (x1 <= x2 && y1 <= y2)
+ xenfb_do_update(info, x1, y1, x2 - x1 + 1, y2 - y1 + 1);
+}
+
+static void xenfb_deferred_io(struct fb_info *fb_info,
+ struct list_head *pagelist)
+{
+ struct xenfb_info *info = fb_info->par;
+ struct page *page;
+ unsigned long beg, end;
+ int y1, y2, miny, maxy;
+
+ miny = INT_MAX;
+ maxy = 0;
+ list_for_each_entry(page, pagelist, lru) {
+ beg = page->index << PAGE_SHIFT;
+ end = beg + PAGE_SIZE - 1;
+ y1 = beg / fb_info->fix.line_length;
+ y2 = end / fb_info->fix.line_length;
+ if (y2 >= fb_info->var.yres)
+ y2 = fb_info->var.yres - 1;
+ if (miny > y1)
+ miny = y1;
+ if (maxy < y2)
+ maxy = y2;
+ }
+ xenfb_refresh(info, 0, miny, fb_info->var.xres, maxy - miny + 1);
+}
+
+static struct fb_deferred_io xenfb_defio = {
+ .delay = HZ / 20,
+ .deferred_io = xenfb_deferred_io,
+};
+
+static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+ unsigned blue, unsigned transp,
+ struct fb_info *info)
+{
+ u32 v;
+
+ if (regno > info->cmap.len)
+ return 1;
+
+#define CNVT_TOHW(val, width) ((((val)<<(width))+0x7FFF-(val))>>16)
+ red = CNVT_TOHW(red, info->var.red.length);
+ green = CNVT_TOHW(green, info->var.green.length);
+ blue = CNVT_TOHW(blue, info->var.blue.length);
+ transp = CNVT_TOHW(transp, info->var.transp.length);
+#undef CNVT_TOHW
+
+ v = (red << info->var.red.offset) |
+ (green << info->var.green.offset) |
+ (blue << info->var.blue.offset);
+
+ switch (info->var.bits_per_pixel) {
+ case 16:
+ case 24:
+ case 32:
+ ((u32 *)info->pseudo_palette)[regno] = v;
+ break;
+ }
+
+ return 0;
+}
+
+static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
+{
+ struct xenfb_info *info = p->par;
+
+ sys_fillrect(p, rect);
+ xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height);
+}
+
+static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image)
+{
+ struct xenfb_info *info = p->par;
+
+ sys_imageblit(p, image);
+ xenfb_refresh(info, image->dx, image->dy, image->width, image->height);
+}
+
+static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
+{
+ struct xenfb_info *info = p->par;
+
+ sys_copyarea(p, area);
+ xenfb_refresh(info, area->dx, area->dy, area->width, area->height);
+}
+
+static ssize_t xenfb_write(struct fb_info *p, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct xenfb_info *info = p->par;
+ ssize_t res;
+
+ res = fb_sys_write(p, buf, count, ppos);
+ xenfb_refresh(info, 0, 0, info->page->width, info->page->height);
+ return res;
+}
+
+static struct fb_ops xenfb_fb_ops = {
+ .owner = THIS_MODULE,
+ .fb_read = fb_sys_read,
+ .fb_write = xenfb_write,
+ .fb_setcolreg = xenfb_setcolreg,
+ .fb_fillrect = xenfb_fillrect,
+ .fb_copyarea = xenfb_copyarea,
+ .fb_imageblit = xenfb_imageblit,
+};
+
+static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
+{
+ /*
+ * No in events recognized, simply ignore them all.
+ * If you need to recognize some, see xen-kbdfront's
+ * input_handler() for how to do that.
+ */
+ struct xenfb_info *info = dev_id;
+ struct xenfb_page *page = info->page;
+
+ if (page->in_cons != page->in_prod) {
+ info->page->in_cons = info->page->in_prod;
+ notify_remote_via_irq(info->irq);
+ }
+
+ /* Flush dirty rectangle: */
+ xenfb_refresh(info, INT_MAX, INT_MAX, -INT_MAX, -INT_MAX);
+
+ return IRQ_HANDLED;
+}
+
+static int __devinit xenfb_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ struct xenfb_info *info;
+ struct fb_info *fb_info;
+ int ret;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (info == NULL) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
+ return -ENOMEM;
+ }
+ dev->dev.driver_data = info;
+ info->xbdev = dev;
+ info->irq = -1;
+ info->x1 = info->y1 = INT_MAX;
+ spin_lock_init(&info->dirty_lock);
+
+ info->fb = vmalloc(xenfb_mem_len);
+ if (info->fb == NULL)
+ goto error_nomem;
+ memset(info->fb, 0, xenfb_mem_len);
+
+ info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
+ if (!info->mfns)
+ goto error_nomem;
+
+ /* set up shared page */
+ info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!info->page)
+ goto error_nomem;
+
+ xenfb_init_shared_page(info);
+
+ /* abusing framebuffer_alloc() to allocate pseudo_palette */
+ fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL);
+ if (fb_info == NULL)
+ goto error_nomem;
+
+ /* complete the abuse: */
+ fb_info->pseudo_palette = fb_info->par;
+ fb_info->par = info;
+
+ fb_info->screen_base = info->fb;
+
+ fb_info->fbops = &xenfb_fb_ops;
+ fb_info->var.xres_virtual = fb_info->var.xres = info->page->width;
+ fb_info->var.yres_virtual = fb_info->var.yres = info->page->height;
+ fb_info->var.bits_per_pixel = info->page->depth;
+
+ fb_info->var.red = (struct fb_bitfield){16, 8, 0};
+ fb_info->var.green = (struct fb_bitfield){8, 8, 0};
+ fb_info->var.blue = (struct fb_bitfield){0, 8, 0};
+
+ fb_info->var.activate = FB_ACTIVATE_NOW;
+ fb_info->var.height = -1;
+ fb_info->var.width = -1;
+ fb_info->var.vmode = FB_VMODE_NONINTERLACED;
+
+ fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
+ fb_info->fix.line_length = info->page->line_length;
+ fb_info->fix.smem_start = 0;
+ fb_info->fix.smem_len = xenfb_mem_len;
+ strcpy(fb_info->fix.id, "xen");
+ fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
+ fb_info->fix.accel = FB_ACCEL_NONE;
+
+ fb_info->flags = FBINFO_FLAG_DEFAULT;
+
+ ret = fb_alloc_cmap(&fb_info->cmap, 256, 0);
+ if (ret < 0) {
+ framebuffer_release(fb_info);
+ xenbus_dev_fatal(dev, ret, "fb_alloc_cmap");
+ goto error;
+ }
+
+ fb_info->fbdefio = &xenfb_defio;
+ fb_deferred_io_init(fb_info);
+
+ ret = register_framebuffer(fb_info);
+ if (ret) {
+ fb_deferred_io_cleanup(fb_info);
+ fb_dealloc_cmap(&fb_info->cmap);
+ framebuffer_release(fb_info);
+ xenbus_dev_fatal(dev, ret, "register_framebuffer");
+ goto error;
+ }
+ info->fb_info = fb_info;
+
+ ret = xenfb_connect_backend(dev, info);
+ if (ret < 0)
+ goto error;
+
+ return 0;
+
+ error_nomem:
+ ret = -ENOMEM;
+ xenbus_dev_fatal(dev, ret, "allocating device memory");
+ error:
+ xenfb_remove(dev);
+ return ret;
+}
+
+static int xenfb_resume(struct xenbus_device *dev)
+{
+ struct xenfb_info *info = dev->dev.driver_data;
+
+ xenfb_disconnect_backend(info);
+ xenfb_init_shared_page(info);
+ return xenfb_connect_backend(dev, info);
+}
+
+static int xenfb_remove(struct xenbus_device *dev)
+{
+ struct xenfb_info *info = dev->dev.driver_data;
+
+ xenfb_disconnect_backend(info);
+ if (info->fb_info) {
+ fb_deferred_io_cleanup(info->fb_info);
+ unregister_framebuffer(info->fb_info);
+ fb_dealloc_cmap(&info->fb_info->cmap);
+ framebuffer_release(info->fb_info);
+ }
+ free_page((unsigned long)info->page);
+ vfree(info->mfns);
+ vfree(info->fb);
+ kfree(info);
+
+ return 0;
+}
+
+static unsigned long vmalloc_to_mfn(void *address)
+{
+ return pfn_to_mfn(vmalloc_to_pfn(address));
+}
+
+static void xenfb_init_shared_page(struct xenfb_info *info)
+{
+ int i;
+
+ for (i = 0; i < info->nr_pages; i++)
+ info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
+
+ info->page->pd[0] = vmalloc_to_mfn(info->mfns);
+ info->page->pd[1] = 0;
+ info->page->width = XENFB_WIDTH;
+ info->page->height = XENFB_HEIGHT;
+ info->page->depth = XENFB_DEPTH;
+ info->page->line_length = (info->page->depth / 8) * info->page->width;
+ info->page->mem_length = xenfb_mem_len;
+ info->page->in_cons = info->page->in_prod = 0;
+ info->page->out_cons = info->page->out_prod = 0;
+}
+
+static int xenfb_connect_backend(struct xenbus_device *dev,
+ struct xenfb_info *info)
+{
+ int ret, evtchn;
+ struct xenbus_transaction xbt;
+
+ ret = xenbus_alloc_evtchn(dev, &evtchn);
+ if (ret)
+ return ret;
+ ret = bind_evtchn_to_irqhandler(evtchn, xenfb_event_handler,
+ 0, dev->devicetype, info);
+ if (ret < 0) {
+ xenbus_free_evtchn(dev, evtchn);
+ xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler");
+ return ret;
+ }
+ info->irq = ret;
+
+ again:
+ ret = xenbus_transaction_start(&xbt);
+ if (ret) {
+ xenbus_dev_fatal(dev, ret, "starting transaction");
+ return ret;
+ }
+ ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
+ virt_to_mfn(info->page));
+ if (ret)
+ goto error_xenbus;
+ ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
+ evtchn);
+ if (ret)
+ goto error_xenbus;
+ ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
+ XEN_IO_PROTO_ABI_NATIVE);
+ if (ret)
+ goto error_xenbus;
+ ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1");
+ if (ret)
+ goto error_xenbus;
+ ret = xenbus_transaction_end(xbt, 0);
+ if (ret) {
+ if (ret == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, ret, "completing transaction");
+ return ret;
+ }
+
+ xenbus_switch_state(dev, XenbusStateInitialised);
+ return 0;
+
+ error_xenbus:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, ret, "writing xenstore");
+ return ret;
+}
+
+static void xenfb_disconnect_backend(struct xenfb_info *info)
+{
+ if (info->irq >= 0)
+ unbind_from_irqhandler(info->irq, info);
+ info->irq = -1;
+}
+
+static void xenfb_backend_changed(struct xenbus_device *dev,
+ enum xenbus_state backend_state)
+{
+ struct xenfb_info *info = dev->dev.driver_data;
+ int val;
+
+ switch (backend_state) {
+ case XenbusStateInitialising:
+ case XenbusStateInitialised:
+ case XenbusStateUnknown:
+ case XenbusStateClosed:
+ break;
+
+ case XenbusStateInitWait:
+InitWait:
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+
+ case XenbusStateConnected:
+ /*
+ * Work around xenbus race condition: If backend goes
+ * through InitWait to Connected fast enough, we can
+ * get Connected twice here.
+ */
+ if (dev->state != XenbusStateConnected)
+ goto InitWait; /* no InitWait seen yet, fudge it */
+
+ if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+ "request-update", "%d", &val) < 0)
+ val = 0;
+ if (val)
+ info->update_wanted = 1;
+ break;
+
+ case XenbusStateClosing:
+ xenbus_frontend_closed(dev);
+ break;
+ }
+}
+
+static struct xenbus_device_id xenfb_ids[] = {
+ { "vfb" },
+ { "" }
+};
+
+static struct xenbus_driver xenfb = {
+ .name = "vfb",
+ .owner = THIS_MODULE,
+ .ids = xenfb_ids,
+ .probe = xenfb_probe,
+ .remove = xenfb_remove,
+ .resume = xenfb_resume,
+ .otherend_changed = xenfb_backend_changed,
+};
+
+static int __init xenfb_init(void)
+{
+ if (!is_running_on_xen())
+ return -ENODEV;
+
+ /* Nothing to do if running in dom0. */
+ if (is_initial_xendomain())
+ return -ENODEV;
+
+ return xenbus_register_frontend(&xenfb);
+}
+
+static void __exit xenfb_cleanup(void)
+{
+ xenbus_unregister_driver(&xenfb);
+}
+
+module_init(xenfb_init);
+module_exit(xenfb_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
new file mode 100644
index 000000000000..4b75a16de009
--- /dev/null
+++ b/drivers/xen/Kconfig
@@ -0,0 +1,19 @@
+config XEN_BALLOON
+ bool "Xen memory balloon driver"
+ depends on XEN
+ default y
+ help
+ The balloon driver allows the Xen domain to request more memory from
+ the system to expand the domain's memory allocation, or alternatively
+ return unneeded memory to the system.
+
+config XEN_SCRUB_PAGES
+ bool "Scrub pages before returning them to system"
+ depends on XEN_BALLOON
+ default y
+ help
+ Scrub pages before returning them to the system for reuse by
+ other domains. This makes sure that any confidential data
+ is not accidentally visible to other domains. Is it more
+ secure, but slightly less efficient.
+ If in doubt, say yes.
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 56592f0d6cef..37af04f1ffd9 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,2 +1,4 @@
-obj-y += grant-table.o
+obj-y += grant-table.o features.o events.o
obj-y += xenbus/
+obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
+obj-$(CONFIG_XEN_BALLOON) += balloon.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
new file mode 100644
index 000000000000..ab25ba6cbbb9
--- /dev/null
+++ b/drivers/xen/balloon.c
@@ -0,0 +1,712 @@
+/******************************************************************************
+ * balloon.c
+ *
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/mutex.h>
+#include <linux/highmem.h>
+#include <linux/list.h>
+#include <linux/sysdev.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+
+#include <xen/interface/memory.h>
+#include <xen/balloon.h>
+#include <xen/xenbus.h>
+#include <xen/features.h>
+#include <xen/page.h>
+
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+
+#define BALLOON_CLASS_NAME "memory"
+
+struct balloon_stats {
+ /* We aim for 'current allocation' == 'target allocation'. */
+ unsigned long current_pages;
+ unsigned long target_pages;
+ /* We may hit the hard limit in Xen. If we do then we remember it. */
+ unsigned long hard_limit;
+ /*
+ * Drivers may alter the memory reservation independently, but they
+ * must inform the balloon driver so we avoid hitting the hard limit.
+ */
+ unsigned long driver_pages;
+ /* Number of pages in high- and low-memory balloons. */
+ unsigned long balloon_low;
+ unsigned long balloon_high;
+};
+
+static DEFINE_MUTEX(balloon_mutex);
+
+static struct sys_device balloon_sysdev;
+
+static int register_balloon(struct sys_device *sysdev);
+
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and driver_pages, and
+ * balloon lists.
+ */
+static DEFINE_SPINLOCK(balloon_lock);
+
+static struct balloon_stats balloon_stats;
+
+/* We increase/decrease in batches which fit in a page */
+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
+
+/* VM /proc information for memory */
+extern unsigned long totalram_pages;
+
+#ifdef CONFIG_HIGHMEM
+extern unsigned long totalhigh_pages;
+#define inc_totalhigh_pages() (totalhigh_pages++)
+#define dec_totalhigh_pages() (totalhigh_pages--)
+#else
+#define inc_totalhigh_pages() do {} while(0)
+#define dec_totalhigh_pages() do {} while(0)
+#endif
+
+/* List of ballooned pages, threaded through the mem_map array. */
+static LIST_HEAD(ballooned_pages);
+
+/* Main work function, always executed in process context. */
+static void balloon_process(struct work_struct *work);
+static DECLARE_WORK(balloon_worker, balloon_process);
+static struct timer_list balloon_timer;
+
+/* When ballooning out (allocating memory to return to Xen) we don't really
+ want the kernel to try too hard since that can trigger the oom killer. */
+#define GFP_BALLOON \
+ (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
+
+static void scrub_page(struct page *page)
+{
+#ifdef CONFIG_XEN_SCRUB_PAGES
+ if (PageHighMem(page)) {
+ void *v = kmap(page);
+ clear_page(v);
+ kunmap(v);
+ } else {
+ void *v = page_address(page);
+ clear_page(v);
+ }
+#endif
+}
+
+/* balloon_append: add the given page to the balloon. */
+static void balloon_append(struct page *page)
+{
+ /* Lowmem is re-populated first, so highmem pages go at list tail. */
+ if (PageHighMem(page)) {
+ list_add_tail(&page->lru, &ballooned_pages);
+ balloon_stats.balloon_high++;
+ dec_totalhigh_pages();
+ } else {
+ list_add(&page->lru, &ballooned_pages);
+ balloon_stats.balloon_low++;
+ }
+}
+
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+static struct page *balloon_retrieve(void)
+{
+ struct page *page;
+
+ if (list_empty(&ballooned_pages))
+ return NULL;
+
+ page = list_entry(ballooned_pages.next, struct page, lru);
+ list_del(&page->lru);
+
+ if (PageHighMem(page)) {
+ balloon_stats.balloon_high--;
+ inc_totalhigh_pages();
+ }
+ else
+ balloon_stats.balloon_low--;
+
+ return page;
+}
+
+static struct page *balloon_first_page(void)
+{
+ if (list_empty(&ballooned_pages))
+ return NULL;
+ return list_entry(ballooned_pages.next, struct page, lru);
+}
+
+static struct page *balloon_next_page(struct page *page)
+{
+ struct list_head *next = page->lru.next;
+ if (next == &ballooned_pages)
+ return NULL;
+ return list_entry(next, struct page, lru);
+}
+
+static void balloon_alarm(unsigned long unused)
+{
+ schedule_work(&balloon_worker);
+}
+
+static unsigned long current_target(void)
+{
+ unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit);
+
+ target = min(target,
+ balloon_stats.current_pages +
+ balloon_stats.balloon_low +
+ balloon_stats.balloon_high);
+
+ return target;
+}
+
+static int increase_reservation(unsigned long nr_pages)
+{
+ unsigned long pfn, i, flags;
+ struct page *page;
+ long rc;
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+ if (nr_pages > ARRAY_SIZE(frame_list))
+ nr_pages = ARRAY_SIZE(frame_list);
+
+ spin_lock_irqsave(&balloon_lock, flags);
+
+ page = balloon_first_page();
+ for (i = 0; i < nr_pages; i++) {
+ BUG_ON(page == NULL);
+ frame_list[i] = page_to_pfn(page);;
+ page = balloon_next_page(page);
+ }
+
+ reservation.extent_start = (unsigned long)frame_list;
+ reservation.nr_extents = nr_pages;
+ rc = HYPERVISOR_memory_op(
+ XENMEM_populate_physmap, &reservation);
+ if (rc < nr_pages) {
+ if (rc > 0) {
+ int ret;
+
+ /* We hit the Xen hard limit: reprobe. */
+ reservation.nr_extents = rc;
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation);
+ BUG_ON(ret != rc);
+ }
+ if (rc >= 0)
+ balloon_stats.hard_limit = (balloon_stats.current_pages + rc -
+ balloon_stats.driver_pages);
+ goto out;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ page = balloon_retrieve();
+ BUG_ON(page == NULL);
+
+ pfn = page_to_pfn(page);
+ BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
+ phys_to_machine_mapping_valid(pfn));
+
+ set_phys_to_machine(pfn, frame_list[i]);
+
+ /* Link back into the page tables if not highmem. */
+ if (pfn < max_low_pfn) {
+ int ret;
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(frame_list[i], PAGE_KERNEL),
+ 0);
+ BUG_ON(ret);
+ }
+
+ /* Relinquish the page back to the allocator. */
+ ClearPageReserved(page);
+ init_page_count(page);
+ __free_page(page);
+ }
+
+ balloon_stats.current_pages += nr_pages;
+ totalram_pages = balloon_stats.current_pages;
+
+ out:
+ spin_unlock_irqrestore(&balloon_lock, flags);
+
+ return 0;
+}
+
+static int decrease_reservation(unsigned long nr_pages)
+{
+ unsigned long pfn, i, flags;
+ struct page *page;
+ int need_sleep = 0;
+ int ret;
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+ if (nr_pages > ARRAY_SIZE(frame_list))
+ nr_pages = ARRAY_SIZE(frame_list);
+
+ for (i = 0; i < nr_pages; i++) {
+ if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+ nr_pages = i;
+ need_sleep = 1;
+ break;
+ }
+
+ pfn = page_to_pfn(page);
+ frame_list[i] = pfn_to_mfn(pfn);
+
+ scrub_page(page);
+ }
+
+ /* Ensure that ballooned highmem pages don't have kmaps. */
+ kmap_flush_unused();
+ flush_tlb_all();
+
+ spin_lock_irqsave(&balloon_lock, flags);
+
+ /* No more mappings: invalidate P2M and add to balloon. */
+ for (i = 0; i < nr_pages; i++) {
+ pfn = mfn_to_pfn(frame_list[i]);
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ balloon_append(pfn_to_page(pfn));
+ }
+
+ reservation.extent_start = (unsigned long)frame_list;
+ reservation.nr_extents = nr_pages;
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+ BUG_ON(ret != nr_pages);
+
+ balloon_stats.current_pages -= nr_pages;
+ totalram_pages = balloon_stats.current_pages;
+
+ spin_unlock_irqrestore(&balloon_lock, flags);
+
+ return need_sleep;
+}
+
+/*
+ * We avoid multiple worker processes conflicting via the balloon mutex.
+ * We may of course race updates of the target counts (which are protected
+ * by the balloon lock), or with changes to the Xen hard limit, but we will
+ * recover from these in time.
+ */
+static void balloon_process(struct work_struct *work)
+{
+ int need_sleep = 0;
+ long credit;
+
+ mutex_lock(&balloon_mutex);
+
+ do {
+ credit = current_target() - balloon_stats.current_pages;
+ if (credit > 0)
+ need_sleep = (increase_reservation(credit) != 0);
+ if (credit < 0)
+ need_sleep = (decrease_reservation(-credit) != 0);
+
+#ifndef CONFIG_PREEMPT
+ if (need_resched())
+ schedule();
+#endif
+ } while ((credit != 0) && !need_sleep);
+
+ /* Schedule more work if there is some still to be done. */
+ if (current_target() != balloon_stats.current_pages)
+ mod_timer(&balloon_timer, jiffies + HZ);
+
+ mutex_unlock(&balloon_mutex);
+}
+
+/* Resets the Xen limit, sets new target, and kicks off processing. */
+void balloon_set_new_target(unsigned long target)
+{
+ /* No need for lock. Not read-modify-write updates. */
+ balloon_stats.hard_limit = ~0UL;
+ balloon_stats.target_pages = target;
+ schedule_work(&balloon_worker);
+}
+
+static struct xenbus_watch target_watch =
+{
+ .node = "memory/target"
+};
+
+/* React to a change in the target key */
+static void watch_target(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ unsigned long long new_target;
+ int err;
+
+ err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
+ if (err != 1) {
+ /* This is ok (for domain0 at least) - so just return */
+ return;
+ }
+
+ /* The given memory/target value is in KiB, so it needs converting to
+ * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+ */
+ balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+}
+
+static int balloon_init_watcher(struct notifier_block *notifier,
+ unsigned long event,
+ void *data)
+{
+ int err;
+
+ err = register_xenbus_watch(&target_watch);
+ if (err)
+ printk(KERN_ERR "Failed to set balloon watcher\n");
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block xenstore_notifier;
+
+static int __init balloon_init(void)
+{
+ unsigned long pfn;
+ struct page *page;
+
+ if (!is_running_on_xen())
+ return -ENODEV;
+
+ pr_info("xen_balloon: Initialising balloon driver.\n");
+
+ balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+ totalram_pages = balloon_stats.current_pages;
+ balloon_stats.target_pages = balloon_stats.current_pages;
+ balloon_stats.balloon_low = 0;
+ balloon_stats.balloon_high = 0;
+ balloon_stats.driver_pages = 0UL;
+ balloon_stats.hard_limit = ~0UL;
+
+ init_timer(&balloon_timer);
+ balloon_timer.data = 0;
+ balloon_timer.function = balloon_alarm;
+
+ register_balloon(&balloon_sysdev);
+
+ /* Initialise the balloon with excess memory space. */
+ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+ page = pfn_to_page(pfn);
+ if (!PageReserved(page))
+ balloon_append(page);
+ }
+
+ target_watch.callback = watch_target;
+ xenstore_notifier.notifier_call = balloon_init_watcher;
+
+ register_xenstore_notifier(&xenstore_notifier);
+
+ return 0;
+}
+
+subsys_initcall(balloon_init);
+
+static void balloon_exit(void)
+{
+ /* XXX - release balloon here */
+ return;
+}
+
+module_exit(balloon_exit);
+
+static void balloon_update_driver_allowance(long delta)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&balloon_lock, flags);
+ balloon_stats.driver_pages += delta;
+ spin_unlock_irqrestore(&balloon_lock, flags);
+}
+
+static int dealloc_pte_fn(
+ pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
+{
+ unsigned long mfn = pte_mfn(*pte);
+ int ret;
+ struct xen_memory_reservation reservation = {
+ .nr_extents = 1,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+ reservation.extent_start = (unsigned long)&mfn;
+ set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
+ set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+ BUG_ON(ret != 1);
+ return 0;
+}
+
+static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
+{
+ unsigned long vaddr, flags;
+ struct page *page, **pagevec;
+ int i, ret;
+
+ pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
+ if (pagevec == NULL)
+ return NULL;
+
+ for (i = 0; i < nr_pages; i++) {
+ page = pagevec[i] = alloc_page(GFP_KERNEL);
+ if (page == NULL)
+ goto err;
+
+ vaddr = (unsigned long)page_address(page);
+
+ scrub_page(page);
+
+ spin_lock_irqsave(&balloon_lock, flags);
+
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ unsigned long gmfn = page_to_pfn(page);
+ struct xen_memory_reservation reservation = {
+ .nr_extents = 1,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+ reservation.extent_start = (unsigned long)&gmfn;
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation);
+ if (ret == 1)
+ ret = 0; /* success */
+ } else {
+ ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
+ dealloc_pte_fn, NULL);
+ }
+
+ if (ret != 0) {
+ spin_unlock_irqrestore(&balloon_lock, flags);
+ __free_page(page);
+ goto err;
+ }
+
+ totalram_pages = --balloon_stats.current_pages;
+
+ spin_unlock_irqrestore(&balloon_lock, flags);
+ }
+
+ out:
+ schedule_work(&balloon_worker);
+ flush_tlb_all();
+ return pagevec;
+
+ err:
+ spin_lock_irqsave(&balloon_lock, flags);
+ while (--i >= 0)
+ balloon_append(pagevec[i]);
+ spin_unlock_irqrestore(&balloon_lock, flags);
+ kfree(pagevec);
+ pagevec = NULL;
+ goto out;
+}
+
+static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
+{
+ unsigned long flags;
+ int i;
+
+ if (pagevec == NULL)
+ return;
+
+ spin_lock_irqsave(&balloon_lock, flags);
+ for (i = 0; i < nr_pages; i++) {
+ BUG_ON(page_count(pagevec[i]) != 1);
+ balloon_append(pagevec[i]);
+ }
+ spin_unlock_irqrestore(&balloon_lock, flags);
+
+ kfree(pagevec);
+
+ schedule_work(&balloon_worker);
+}
+
+static void balloon_release_driver_page(struct page *page)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&balloon_lock, flags);
+ balloon_append(page);
+ balloon_stats.driver_pages--;
+ spin_unlock_irqrestore(&balloon_lock, flags);
+
+ schedule_work(&balloon_worker);
+}
+
+
+#define BALLOON_SHOW(name, format, args...) \
+ static ssize_t show_##name(struct sys_device *dev, \
+ char *buf) \
+ { \
+ return sprintf(buf, format, ##args); \
+ } \
+ static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+
+BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
+BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
+BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
+BALLOON_SHOW(hard_limit_kb,
+ (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n",
+ (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
+BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
+
+static ssize_t show_target_kb(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
+}
+
+static ssize_t store_target_kb(struct sys_device *dev,
+ const char *buf,
+ size_t count)
+{
+ char memstring[64], *endchar;
+ unsigned long long target_bytes;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (count <= 1)
+ return -EBADMSG; /* runt */
+ if (count > sizeof(memstring))
+ return -EFBIG; /* too long */
+ strcpy(memstring, buf);
+
+ target_bytes = memparse(memstring, &endchar);
+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+
+ return count;
+}
+
+static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
+ show_target_kb, store_target_kb);
+
+static struct sysdev_attribute *balloon_attrs[] = {
+ &attr_target_kb,
+};
+
+static struct attribute *balloon_info_attrs[] = {
+ &attr_current_kb.attr,
+ &attr_low_kb.attr,
+ &attr_high_kb.attr,
+ &attr_hard_limit_kb.attr,
+ &attr_driver_kb.attr,
+ NULL
+};
+
+static struct attribute_group balloon_info_group = {
+ .name = "info",
+ .attrs = balloon_info_attrs,
+};
+
+static struct sysdev_class balloon_sysdev_class = {
+ .name = BALLOON_CLASS_NAME,
+};
+
+static int register_balloon(struct sys_device *sysdev)
+{
+ int i, error;
+
+ error = sysdev_class_register(&balloon_sysdev_class);
+ if (error)
+ return error;
+
+ sysdev->id = 0;
+ sysdev->cls = &balloon_sysdev_class;
+
+ error = sysdev_register(sysdev);
+ if (error) {
+ sysdev_class_unregister(&balloon_sysdev_class);
+ return error;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
+ error = sysdev_create_file(sysdev, balloon_attrs[i]);
+ if (error)
+ goto fail;
+ }
+
+ error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
+ if (error)
+ goto fail;
+
+ return 0;
+
+ fail:
+ while (--i >= 0)
+ sysdev_remove_file(sysdev, balloon_attrs[i]);
+ sysdev_unregister(sysdev);
+ sysdev_class_unregister(&balloon_sysdev_class);
+ return error;
+}
+
+static void unregister_balloon(struct sys_device *sysdev)
+{
+ int i;
+
+ sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
+ sysdev_remove_file(sysdev, balloon_attrs[i]);
+ sysdev_unregister(sysdev);
+ sysdev_class_unregister(&balloon_sysdev_class);
+}
+
+static void balloon_sysfs_exit(void)
+{
+ unregister_balloon(&balloon_sysdev);
+}
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
new file mode 100644
index 000000000000..4f0f22b020ea
--- /dev/null
+++ b/drivers/xen/events.c
@@ -0,0 +1,674 @@
+/*
+ * Xen event channels
+ *
+ * Xen models interrupts with abstract event channels. Because each
+ * domain gets 1024 event channels, but NR_IRQ is not that large, we
+ * must dynamically map irqs<->event channels. The event channels
+ * interface with the rest of the kernel by defining a xen interrupt
+ * chip. When an event is recieved, it is mapped to an irq and sent
+ * through the normal interrupt processing path.
+ *
+ * There are four kinds of events which can be mapped to an event
+ * channel:
+ *
+ * 1. Inter-domain notifications. This includes all the virtual
+ * device events, since they're driven by front-ends in another domain
+ * (typically dom0).
+ * 2. VIRQs, typically used for timers. These are per-cpu events.
+ * 3. IPIs.
+ * 4. Hardware interrupts. Not supported at present.
+ *
+ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
+ */
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+/*
+ * This lock protects updates to the following mapping and reference-count
+ * arrays. The lock does not need to be acquired to read the mapping tables.
+ */
+static DEFINE_SPINLOCK(irq_mapping_update_lock);
+
+/* IRQ <-> VIRQ mapping. */
+static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
+
+/* IRQ <-> IPI mapping */
+static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
+
+/* Packed IRQ information: binding type, sub-type index, and event channel. */
+struct packed_irq
+{
+ unsigned short evtchn;
+ unsigned char index;
+ unsigned char type;
+};
+
+static struct packed_irq irq_info[NR_IRQS];
+
+/* Binding types. */
+enum {
+ IRQT_UNBOUND,
+ IRQT_PIRQ,
+ IRQT_VIRQ,
+ IRQT_IPI,
+ IRQT_EVTCHN
+};
+
+/* Convenient shorthand for packed representation of an unbound IRQ. */
+#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
+
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
+ [0 ... NR_EVENT_CHANNELS-1] = -1
+};
+static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
+static u8 cpu_evtchn[NR_EVENT_CHANNELS];
+
+/* Reference counts for bindings to IRQs. */
+static int irq_bindcount[NR_IRQS];
+
+/* Xen will never allocate port zero for any purpose. */
+#define VALID_EVTCHN(chn) ((chn) != 0)
+
+/*
+ * Force a proper event-channel callback from Xen after clearing the
+ * callback mask. We do this in a very simple manner, by making a call
+ * down into Xen. The pending flag will be checked by Xen on return.
+ */
+void force_evtchn_callback(void)
+{
+ (void)HYPERVISOR_xen_version(0, NULL);
+}
+EXPORT_SYMBOL_GPL(force_evtchn_callback);
+
+static struct irq_chip xen_dynamic_chip;
+
+/* Constructor for packed IRQ information. */
+static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
+{
+ return (struct packed_irq) { evtchn, index, type };
+}
+
+/*
+ * Accessors for packed IRQ information.
+ */
+static inline unsigned int evtchn_from_irq(int irq)
+{
+ return irq_info[irq].evtchn;
+}
+
+static inline unsigned int index_from_irq(int irq)
+{
+ return irq_info[irq].index;
+}
+
+static inline unsigned int type_from_irq(int irq)
+{
+ return irq_info[irq].type;
+}
+
+static inline unsigned long active_evtchns(unsigned int cpu,
+ struct shared_info *sh,
+ unsigned int idx)
+{
+ return (sh->evtchn_pending[idx] &
+ cpu_evtchn_mask[cpu][idx] &
+ ~sh->evtchn_mask[idx]);
+}
+
+static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
+{
+ int irq = evtchn_to_irq[chn];
+
+ BUG_ON(irq == -1);
+#ifdef CONFIG_SMP
+ irq_desc[irq].affinity = cpumask_of_cpu(cpu);
+#endif
+
+ __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
+ __set_bit(chn, cpu_evtchn_mask[cpu]);
+
+ cpu_evtchn[chn] = cpu;
+}
+
+static void init_evtchn_cpu_bindings(void)
+{
+#ifdef CONFIG_SMP
+ int i;
+ /* By default all event channels notify CPU#0. */
+ for (i = 0; i < NR_IRQS; i++)
+ irq_desc[i].affinity = cpumask_of_cpu(0);
+#endif
+
+ memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
+ memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
+}
+
+static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
+{
+ return cpu_evtchn[evtchn];
+}
+
+static inline void clear_evtchn(int port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_clear_bit(port, &s->evtchn_pending[0]);
+}
+
+static inline void set_evtchn(int port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_set_bit(port, &s->evtchn_pending[0]);
+}
+
+
+/**
+ * notify_remote_via_irq - send event to remote end of event channel via irq
+ * @irq: irq of event channel to send event to
+ *
+ * Unlike notify_remote_via_evtchn(), this is safe to use across
+ * save/restore. Notifications on a broken connection are silently
+ * dropped.
+ */
+void notify_remote_via_irq(int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ if (VALID_EVTCHN(evtchn))
+ notify_remote_via_evtchn(evtchn);
+}
+EXPORT_SYMBOL_GPL(notify_remote_via_irq);
+
+static void mask_evtchn(int port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_set_bit(port, &s->evtchn_mask[0]);
+}
+
+static void unmask_evtchn(int port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ unsigned int cpu = get_cpu();
+
+ BUG_ON(!irqs_disabled());
+
+ /* Slow path (hypercall) if this is a non-local port. */
+ if (unlikely(cpu != cpu_from_evtchn(port))) {
+ struct evtchn_unmask unmask = { .port = port };
+ (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+ } else {
+ struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
+
+ sync_clear_bit(port, &s->evtchn_mask[0]);
+
+ /*
+ * The following is basically the equivalent of
+ * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+ * the interrupt edge' if the channel is masked.
+ */
+ if (sync_test_bit(port, &s->evtchn_pending[0]) &&
+ !sync_test_and_set_bit(port / BITS_PER_LONG,
+ &vcpu_info->evtchn_pending_sel))
+ vcpu_info->evtchn_upcall_pending = 1;
+ }
+
+ put_cpu();
+}
+
+static int find_unbound_irq(void)
+{
+ int irq;
+
+ /* Only allocate from dynirq range */
+ for (irq = 0; irq < NR_IRQS; irq++)
+ if (irq_bindcount[irq] == 0)
+ break;
+
+ if (irq == NR_IRQS)
+ panic("No available IRQ to bind to: increase NR_IRQS!\n");
+
+ return irq;
+}
+
+int bind_evtchn_to_irq(unsigned int evtchn)
+{
+ int irq;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ irq = evtchn_to_irq[evtchn];
+
+ if (irq == -1) {
+ irq = find_unbound_irq();
+
+ dynamic_irq_init(irq);
+ set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ handle_level_irq, "event");
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
+ }
+
+ irq_bindcount[irq]++;
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ return irq;
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
+
+static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+{
+ struct evtchn_bind_ipi bind_ipi;
+ int evtchn, irq;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ irq = per_cpu(ipi_to_irq, cpu)[ipi];
+ if (irq == -1) {
+ irq = find_unbound_irq();
+ if (irq < 0)
+ goto out;
+
+ dynamic_irq_init(irq);
+ set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ handle_level_irq, "ipi");
+
+ bind_ipi.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+ &bind_ipi) != 0)
+ BUG();
+ evtchn = bind_ipi.port;
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+
+ per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+
+ bind_evtchn_to_cpu(evtchn, cpu);
+ }
+
+ irq_bindcount[irq]++;
+
+ out:
+ spin_unlock(&irq_mapping_update_lock);
+ return irq;
+}
+
+
+static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+{
+ struct evtchn_bind_virq bind_virq;
+ int evtchn, irq;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ irq = per_cpu(virq_to_irq, cpu)[virq];
+
+ if (irq == -1) {
+ bind_virq.virq = virq;
+ bind_virq.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq) != 0)
+ BUG();
+ evtchn = bind_virq.port;
+
+ irq = find_unbound_irq();
+
+ dynamic_irq_init(irq);
+ set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ handle_level_irq, "virq");
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+
+ per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+ bind_evtchn_to_cpu(evtchn, cpu);
+ }
+
+ irq_bindcount[irq]++;
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ return irq;
+}
+
+static void unbind_from_irq(unsigned int irq)
+{
+ struct evtchn_close close;
+ int evtchn = evtchn_from_irq(irq);
+
+ spin_lock(&irq_mapping_update_lock);
+
+ if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
+ close.port = evtchn;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+
+ switch (type_from_irq(irq)) {
+ case IRQT_VIRQ:
+ per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
+ [index_from_irq(irq)] = -1;
+ break;
+ default:
+ break;
+ }
+
+ /* Closed ports are implicitly re-bound to VCPU0. */
+ bind_evtchn_to_cpu(evtchn, 0);
+
+ evtchn_to_irq[evtchn] = -1;
+ irq_info[irq] = IRQ_UNBOUND;
+
+ dynamic_irq_init(irq);
+ }
+
+ spin_unlock(&irq_mapping_update_lock);
+}
+
+int bind_evtchn_to_irqhandler(unsigned int evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ unsigned int irq;
+ int retval;
+
+ irq = bind_evtchn_to_irq(evtchn);
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
+ if (retval != 0) {
+ unbind_from_irq(irq);
+ return retval;
+ }
+
+ return irq;
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
+
+int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
+ irq_handler_t handler,
+ unsigned long irqflags, const char *devname, void *dev_id)
+{
+ unsigned int irq;
+ int retval;
+
+ irq = bind_virq_to_irq(virq, cpu);
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
+ if (retval != 0) {
+ unbind_from_irq(irq);
+ return retval;
+ }
+
+ return irq;
+}
+EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
+
+int bind_ipi_to_irqhandler(enum ipi_vector ipi,
+ unsigned int cpu,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+{
+ int irq, retval;
+
+ irq = bind_ipi_to_irq(ipi, cpu);
+ if (irq < 0)
+ return irq;
+
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
+ if (retval != 0) {
+ unbind_from_irq(irq);
+ return retval;
+ }
+
+ return irq;
+}
+
+void unbind_from_irqhandler(unsigned int irq, void *dev_id)
+{
+ free_irq(irq, dev_id);
+ unbind_from_irq(irq);
+}
+EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
+
+void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
+{
+ int irq = per_cpu(ipi_to_irq, cpu)[vector];
+ BUG_ON(irq < 0);
+ notify_remote_via_irq(irq);
+}
+
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+{
+ struct shared_info *sh = HYPERVISOR_shared_info;
+ int cpu = smp_processor_id();
+ int i;
+ unsigned long flags;
+ static DEFINE_SPINLOCK(debug_lock);
+
+ spin_lock_irqsave(&debug_lock, flags);
+
+ printk("vcpu %d\n ", cpu);
+
+ for_each_online_cpu(i) {
+ struct vcpu_info *v = per_cpu(xen_vcpu, i);
+ printk("%d: masked=%d pending=%d event_sel %08lx\n ", i,
+ (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask,
+ v->evtchn_upcall_pending,
+ v->evtchn_pending_sel);
+ }
+ printk("pending:\n ");
+ for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+ printk("%08lx%s", sh->evtchn_pending[i],
+ i % 8 == 0 ? "\n " : " ");
+ printk("\nmasks:\n ");
+ for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%08lx%s", sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nunmasked:\n ");
+ for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\npending list:\n");
+ for(i = 0; i < NR_EVENT_CHANNELS; i++) {
+ if (sync_test_bit(i, sh->evtchn_pending)) {
+ printk(" %d: event %d -> irq %d\n",
+ cpu_evtchn[i], i,
+ evtchn_to_irq[i]);
+ }
+ }
+
+ spin_unlock_irqrestore(&debug_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+
+/*
+ * Search the CPUs pending events bitmasks. For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for
+ * handling.
+ *
+ * Xen uses a two-level bitmap to speed searching. The first level is
+ * a bitset of words which contain pending event bits. The second
+ * level is a bitset of pending events themselves.
+ */
+void xen_evtchn_do_upcall(struct pt_regs *regs)
+{
+ int cpu = get_cpu();
+ struct shared_info *s = HYPERVISOR_shared_info;
+ struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
+ static DEFINE_PER_CPU(unsigned, nesting_count);
+ unsigned count;
+
+ do {
+ unsigned long pending_words;
+
+ vcpu_info->evtchn_upcall_pending = 0;
+
+ if (__get_cpu_var(nesting_count)++)
+ goto out;
+
+#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
+ /* Clear master flag /before/ clearing selector flag. */
+ rmb();
+#endif
+ pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
+ while (pending_words != 0) {
+ unsigned long pending_bits;
+ int word_idx = __ffs(pending_words);
+ pending_words &= ~(1UL << word_idx);
+
+ while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
+ int bit_idx = __ffs(pending_bits);
+ int port = (word_idx * BITS_PER_LONG) + bit_idx;
+ int irq = evtchn_to_irq[port];
+
+ if (irq != -1)
+ xen_do_IRQ(irq, regs);
+ }
+ }
+
+ BUG_ON(!irqs_disabled());
+
+ count = __get_cpu_var(nesting_count);
+ __get_cpu_var(nesting_count) = 0;
+ } while(count != 1);
+
+out:
+ put_cpu();
+}
+
+/* Rebind an evtchn so that it gets delivered to a specific cpu */
+static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+{
+ struct evtchn_bind_vcpu bind_vcpu;
+ int evtchn = evtchn_from_irq(irq);
+
+ if (!VALID_EVTCHN(evtchn))
+ return;
+
+ /* Send future instances of this interrupt to other vcpu. */
+ bind_vcpu.port = evtchn;
+ bind_vcpu.vcpu = tcpu;
+
+ /*
+ * If this fails, it usually just indicates that we're dealing with a
+ * virq or IPI channel, which don't actually need to be rebound. Ignore
+ * it, but don't do the xenlinux-level rebind in that case.
+ */
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
+ bind_evtchn_to_cpu(evtchn, tcpu);
+}
+
+
+static void set_affinity_irq(unsigned irq, cpumask_t dest)
+{
+ unsigned tcpu = first_cpu(dest);
+ rebind_irq_to_cpu(irq, tcpu);
+}
+
+int resend_irq_on_evtchn(unsigned int irq)
+{
+ int masked, evtchn = evtchn_from_irq(irq);
+ struct shared_info *s = HYPERVISOR_shared_info;
+
+ if (!VALID_EVTCHN(evtchn))
+ return 1;
+
+ masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
+ sync_set_bit(evtchn, s->evtchn_pending);
+ if (!masked)
+ unmask_evtchn(evtchn);
+
+ return 1;
+}
+
+static void enable_dynirq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ if (VALID_EVTCHN(evtchn))
+ unmask_evtchn(evtchn);
+}
+
+static void disable_dynirq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ if (VALID_EVTCHN(evtchn))
+ mask_evtchn(evtchn);
+}
+
+static void ack_dynirq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ move_native_irq(irq);
+
+ if (VALID_EVTCHN(evtchn))
+ clear_evtchn(evtchn);
+}
+
+static int retrigger_dynirq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+ struct shared_info *sh = HYPERVISOR_shared_info;
+ int ret = 0;
+
+ if (VALID_EVTCHN(evtchn)) {
+ int masked;
+
+ masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
+ sync_set_bit(evtchn, sh->evtchn_pending);
+ if (!masked)
+ unmask_evtchn(evtchn);
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static struct irq_chip xen_dynamic_chip __read_mostly = {
+ .name = "xen-dyn",
+ .mask = disable_dynirq,
+ .unmask = enable_dynirq,
+ .ack = ack_dynirq,
+ .set_affinity = set_affinity_irq,
+ .retrigger = retrigger_dynirq,
+};
+
+void __init xen_init_IRQ(void)
+{
+ int i;
+
+ init_evtchn_cpu_bindings();
+
+ /* No event channels are 'live' right now. */
+ for (i = 0; i < NR_EVENT_CHANNELS; i++)
+ mask_evtchn(i);
+
+ /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
+ for (i = 0; i < NR_IRQS; i++)
+ irq_bindcount[i] = 0;
+
+ irq_ctx_init(smp_processor_id());
+}
diff --git a/drivers/xen/features.c b/drivers/xen/features.c
new file mode 100644
index 000000000000..0707714e40d6
--- /dev/null
+++ b/drivers/xen/features.c
@@ -0,0 +1,29 @@
+/******************************************************************************
+ * features.c
+ *
+ * Xen feature flags.
+ *
+ * Copyright (c) 2006, Ian Campbell, XenSource Inc.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/features.h>
+
+u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
+EXPORT_SYMBOL_GPL(xen_features);
+
+void xen_setup_features(void)
+{
+ struct xen_feature_info fi;
+ int i, j;
+
+ for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
+ fi.submap_idx = i;
+ if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
+ break;
+ for (j = 0; j < 32; j++)
+ xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
+ }
+}
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index d85dc6d41c2a..52b6b41b909d 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -439,24 +439,6 @@ static inline unsigned int max_nr_grant_frames(void)
return xen_max;
}
-static int map_pte_fn(pte_t *pte, struct page *pmd_page,
- unsigned long addr, void *data)
-{
- unsigned long **frames = (unsigned long **)data;
-
- set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
- (*frames)++;
- return 0;
-}
-
-static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
- unsigned long addr, void *data)
-{
-
- set_pte_at(&init_mm, addr, pte, __pte(0));
- return 0;
-}
-
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
@@ -470,7 +452,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
setup.dom = DOMID_SELF;
setup.nr_frames = nr_gframes;
- setup.frame_list = frames;
+ set_xen_guest_handle(setup.frame_list, frames);
rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
if (rc == -ENOSYS) {
@@ -480,17 +462,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
BUG_ON(rc || setup.status);
- if (shared == NULL) {
- struct vm_struct *area;
- area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
- BUG_ON(area == NULL);
- shared = area->addr;
- }
- rc = apply_to_page_range(&init_mm, (unsigned long)shared,
- PAGE_SIZE * nr_gframes,
- map_pte_fn, &frames);
+ rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
+ &shared);
BUG_ON(rc);
- frames -= nr_gframes; /* adjust after map_pte_fn() */
kfree(frames);
@@ -506,10 +480,7 @@ static int gnttab_resume(void)
static int gnttab_suspend(void)
{
- apply_to_page_range(&init_mm, (unsigned long)shared,
- PAGE_SIZE * nr_grant_frames,
- unmap_pte_fn, NULL);
-
+ arch_gnttab_unmap_shared(shared, nr_grant_frames);
return 0;
}
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 9fd2f70ab46d..0f86b0ff7879 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -399,7 +399,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
*vaddr = NULL;
- area = alloc_vm_area(PAGE_SIZE);
+ area = xen_alloc_vm_area(PAGE_SIZE);
if (!area)
return -ENOMEM;
@@ -409,7 +409,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
BUG();
if (op.status != GNTST_okay) {
- free_vm_area(area);
+ xen_free_vm_area(area);
xenbus_dev_fatal(dev, op.status,
"mapping in shared page %d from domain %d",
gnt_ref, dev->otherend_id);
@@ -508,7 +508,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
BUG();
if (op.status == GNTST_okay)
- free_vm_area(area);
+ xen_free_vm_area(area);
else
xenbus_dev_error(dev, op.status,
"unmapping page at handle %d error %d",
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 4750de316ad3..57ceb5346b74 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -88,6 +88,16 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv)
return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
}
+static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+
+ if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype))
+ return -ENOMEM;
+
+ return 0;
+}
+
/* device/<type>/<id> => <type>-<id> */
static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
{
@@ -166,6 +176,7 @@ static struct xen_bus_type xenbus_frontend = {
.bus = {
.name = "xen",
.match = xenbus_match,
+ .uevent = xenbus_uevent,
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
@@ -438,6 +449,12 @@ static ssize_t xendev_show_devtype(struct device *dev,
}
DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
+static ssize_t xendev_show_modalias(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
+}
+DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
@@ -492,10 +509,16 @@ int xenbus_probe_node(struct xen_bus_type *bus,
err = device_create_file(&xendev->dev, &dev_attr_devtype);
if (err)
- goto fail_remove_file;
+ goto fail_remove_nodename;
+
+ err = device_create_file(&xendev->dev, &dev_attr_modalias);
+ if (err)
+ goto fail_remove_devtype;
return 0;
-fail_remove_file:
+fail_remove_devtype:
+ device_remove_file(&xendev->dev, &dev_attr_devtype);
+fail_remove_nodename:
device_remove_file(&xendev->dev, &dev_attr_nodename);
fail_unregister:
device_unregister(&xendev->dev);
@@ -846,6 +869,7 @@ static int is_disconnected_device(struct device *dev, void *data)
{
struct xenbus_device *xendev = to_xenbus_device(dev);
struct device_driver *drv = data;
+ struct xenbus_driver *xendrv;
/*
* A device with no driver will never connect. We care only about
@@ -858,7 +882,9 @@ static int is_disconnected_device(struct device *dev, void *data)
if (drv && (dev->driver != drv))
return 0;
- return (xendev->state != XenbusStateConnected);
+ xendrv = to_xenbus_driver(dev->driver);
+ return (xendev->state != XenbusStateConnected ||
+ (xendrv->is_ready && !xendrv->is_ready(xendev)));
}
static int exists_disconnected_device(struct device_driver *drv)
diff --git a/drivers/xen/xencomm.c b/drivers/xen/xencomm.c
new file mode 100644
index 000000000000..797cb4e31f07
--- /dev/null
+++ b/drivers/xen/xencomm.c
@@ -0,0 +1,232 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <xen/xencomm.h>
+#include <xen/interface/xen.h>
+#ifdef __ia64__
+#include <asm/xen/xencomm.h> /* for is_kern_addr() */
+#endif
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+static int xencomm_init(struct xencomm_desc *desc,
+ void *buffer, unsigned long bytes)
+{
+ unsigned long recorded = 0;
+ int i = 0;
+
+ while ((recorded < bytes) && (i < desc->nr_addrs)) {
+ unsigned long vaddr = (unsigned long)buffer + recorded;
+ unsigned long paddr;
+ int offset;
+ int chunksz;
+
+ offset = vaddr % PAGE_SIZE; /* handle partial pages */
+ chunksz = min(PAGE_SIZE - offset, bytes - recorded);
+
+ paddr = xencomm_vtop(vaddr);
+ if (paddr == ~0UL) {
+ printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n",
+ __func__, vaddr);
+ return -EINVAL;
+ }
+
+ desc->address[i++] = paddr;
+ recorded += chunksz;
+ }
+
+ if (recorded < bytes) {
+ printk(KERN_DEBUG
+ "%s: could only translate %ld of %ld bytes\n",
+ __func__, recorded, bytes);
+ return -ENOSPC;
+ }
+
+ /* mark remaining addresses invalid (just for safety) */
+ while (i < desc->nr_addrs)
+ desc->address[i++] = XENCOMM_INVALID;
+
+ desc->magic = XENCOMM_MAGIC;
+
+ return 0;
+}
+
+static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
+ void *buffer, unsigned long bytes)
+{
+ struct xencomm_desc *desc;
+ unsigned long buffer_ulong = (unsigned long)buffer;
+ unsigned long start = buffer_ulong & PAGE_MASK;
+ unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
+ unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
+ unsigned long size = sizeof(*desc) +
+ sizeof(desc->address[0]) * nr_addrs;
+
+ /*
+ * slab allocator returns at least sizeof(void*) aligned pointer.
+ * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
+ * cross page boundary.
+ */
+ if (sizeof(*desc) > sizeof(void *)) {
+ unsigned long order = get_order(size);
+ desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
+ order);
+ if (desc == NULL)
+ return NULL;
+
+ desc->nr_addrs =
+ ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
+ sizeof(*desc->address);
+ } else {
+ desc = kmalloc(size, gfp_mask);
+ if (desc == NULL)
+ return NULL;
+
+ desc->nr_addrs = nr_addrs;
+ }
+ return desc;
+}
+
+void xencomm_free(struct xencomm_handle *desc)
+{
+ if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
+ struct xencomm_desc *desc__ = (struct xencomm_desc *)desc;
+ if (sizeof(*desc__) > sizeof(void *)) {
+ unsigned long size = sizeof(*desc__) +
+ sizeof(desc__->address[0]) * desc__->nr_addrs;
+ unsigned long order = get_order(size);
+ free_pages((unsigned long)__va(desc), order);
+ } else
+ kfree(__va(desc));
+ }
+}
+
+static int xencomm_create(void *buffer, unsigned long bytes,
+ struct xencomm_desc **ret, gfp_t gfp_mask)
+{
+ struct xencomm_desc *desc;
+ int rc;
+
+ pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
+
+ if (bytes == 0) {
+ /* don't create a descriptor; Xen recognizes NULL. */
+ BUG_ON(buffer != NULL);
+ *ret = NULL;
+ return 0;
+ }
+
+ BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
+
+ desc = xencomm_alloc(gfp_mask, buffer, bytes);
+ if (!desc) {
+ printk(KERN_DEBUG "%s failure\n", "xencomm_alloc");
+ return -ENOMEM;
+ }
+
+ rc = xencomm_init(desc, buffer, bytes);
+ if (rc) {
+ printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc);
+ xencomm_free((struct xencomm_handle *)__pa(desc));
+ return rc;
+ }
+
+ *ret = desc;
+ return 0;
+}
+
+/* check if memory address is within VMALLOC region */
+static int is_phys_contiguous(unsigned long addr)
+{
+ if (!is_kernel_addr(addr))
+ return 0;
+
+ return (addr < VMALLOC_START) || (addr >= VMALLOC_END);
+}
+
+static struct xencomm_handle *xencomm_create_inline(void *ptr)
+{
+ unsigned long paddr;
+
+ BUG_ON(!is_phys_contiguous((unsigned long)ptr));
+
+ paddr = (unsigned long)xencomm_pa(ptr);
+ BUG_ON(paddr & XENCOMM_INLINE_FLAG);
+ return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
+}
+
+/* "mini" routine, for stack-based communications: */
+static int xencomm_create_mini(void *buffer,
+ unsigned long bytes, struct xencomm_mini *xc_desc,
+ struct xencomm_desc **ret)
+{
+ int rc = 0;
+ struct xencomm_desc *desc;
+ BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
+
+ desc = (void *)xc_desc;
+
+ desc->nr_addrs = XENCOMM_MINI_ADDRS;
+
+ rc = xencomm_init(desc, buffer, bytes);
+ if (!rc)
+ *ret = desc;
+
+ return rc;
+}
+
+struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
+{
+ int rc;
+ struct xencomm_desc *desc;
+
+ if (is_phys_contiguous((unsigned long)ptr))
+ return xencomm_create_inline(ptr);
+
+ rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
+
+ if (rc || desc == NULL)
+ return NULL;
+
+ return xencomm_pa(desc);
+}
+
+struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
+ struct xencomm_mini *xc_desc)
+{
+ int rc;
+ struct xencomm_desc *desc = NULL;
+
+ if (is_phys_contiguous((unsigned long)ptr))
+ return xencomm_create_inline(ptr);
+
+ rc = xencomm_create_mini(ptr, bytes, xc_desc,
+ &desc);
+
+ if (rc)
+ return NULL;
+
+ return xencomm_pa(desc);
+}