Index: compile/.keep_me =================================================================== RCS file: compile/.keep_me diff -N compile/.keep_me Index: conf/XENCONF =================================================================== RCS file: conf/XENCONF diff -N conf/XENCONF --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ conf/XENCONF 13 Dec 2005 01:05:19 -0000 @@ -0,0 +1,137 @@ +# +# GENERIC -- Generic kernel configuration file for FreeBSD/i386 +# +# For more information on this file, please read the handbook section on +# Kernel Configuration Files: +# +# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html +# +# The handbook is also available locally in /usr/share/doc/handbook +# if you've installed the doc distribution, otherwise always see the +# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the +# latest information. +# +# An exhaustive list of options and more detailed explanations of the +# device lines is also present in the ../../conf/NOTES and NOTES files. +# If you are in doubt as to the purpose or necessity of a line, check first +# in NOTES. +# +# $FreeBSD: src/sys/i386/conf/GENERIC,v 1.394.2.3 2004/01/26 19:42:11 nectar Exp $ + +machine i386-xen +cpu I686_CPU +ident XEN + +#To statically compile in device wiring instead of /boot/device.hints +#hints "GENERIC.hints" #Default places to look for devices. + +makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols + +options SCHED_4BSD #4BSD scheduler +options INET #InterNETworking +options INET6 #IPv6 communications protocols +options FFS #Berkeley Fast Filesystem +options SOFTUPDATES #Enable FFS soft updates support +options UFS_ACL #Support for access control lists +options UFS_DIRHASH #Improve performance on big directories +options MD_ROOT #MD is a potential root device +options NFSCLIENT #Network Filesystem Client +options NFSSERVER #Network Filesystem Server +# options NFS_ROOT #NFS usable as /, requires NFSCLIENT +#options MSDOSFS #MSDOS Filesystem +#options CD9660 #ISO 9660 Filesystem +options PROCFS #Process filesystem (requires PSEUDOFS) +options PSEUDOFS #Pseudo-filesystem framework +options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!] +options COMPAT_FREEBSD4 #Compatible with FreeBSD4 +options SCSI_DELAY=15000 #Delay (in ms) before probing SCSI +options KTRACE #ktrace(1) support +options SYSVSHM #SYSV-style shared memory +options SYSVMSG #SYSV-style message queues +options SYSVSEM #SYSV-style semaphores +options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions +options KBD_INSTALL_CDEV # install a CDEV entry in /dev +options CPU_DISABLE_SSE # don't turn on SSE framework with Xen +#options PFIL_HOOKS # pfil(9) framework + +# Debugging for use in -current +options KDB #Enable the kernel debugger +options INVARIANTS #Enable calls of extra sanity checking +options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS +#options WITNESS #Enable checks to detect deadlocks and cycles +#options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed + +# To make an SMP kernel, the next two are needed +#options SMP # Symmetric MultiProcessor Kernel +#device apic # I/O APIC + +# SCSI peripherals +device scbus # SCSI bus (required for SCSI) +#device ch # SCSI media changers +device da # Direct Access (disks) +#device sa # Sequential Access (tape etc) +#device cd # CD +device pass # Passthrough device (direct SCSI access) +#device ses # SCSI Environmental Services (and SAF-TE) + +# atkbdc0 controls both the keyboard and the PS/2 mouse +#device atkbdc # AT keyboard controller +#device atkbd # AT keyboard +#device psm # PS/2 mouse + +# device vga # VGA video card driver + +#device splash # Splash screen and screen saver support + +# syscons is the default console driver, resembling an SCO console +#device sc + +# Enable this for the pcvt (VT220 compatible) console driver +#device vt +#options XSERVER # support for X server on a vt console +#options FAT_CURSOR # start with block cursor + +#device agp # support several AGP chipsets + +# Floating point support - do not disable. +device npx + +# Serial (COM) ports +#device sio # 8250, 16[45]50 based serial ports + +# Parallel port +#device ppc +#device ppbus # Parallel port bus (required) +#device lpt # Printer +#device plip # TCP/IP over parallel +#device ppi # Parallel port interface device +#device vpo # Requires scbus and da + +# If you've got a "dumb" serial or parallel PCI card that is +# supported by the puc(4) glue driver, uncomment the following +# line to enable it (connects to the sio and/or ppc drivers): +#device puc + + +# Pseudo devices - the number indicates how many units to allocate. +device random # Entropy device +device loop # Network loopback +device ether # Ethernet support +device tun # Packet tunnel. +device pty # Pseudo-ttys (telnet etc) +device md # Memory "disks" +device gif # IPv6 and IPv4 tunneling +device faith # IPv6-to-IPv4 relaying (translation) + +# The `bpf' device enables the Berkeley Packet Filter. +# Be aware of the administrative consequences of enabling this! +device bpf # Berkeley packet filter + +#options BOOTP +options XEN +options MCLSHIFT=12 # this has to be enabled for Xen as we can only have one cluster per page +options MSIZE=256 +options DIAGNOSTIC +options MAXMEM=(256*1024) +#options NOXENDEBUG=1 # Turn off Debugging printfs + Index: drivers/console/console.c =================================================================== RCS file: drivers/console/console.c diff -N drivers/console/console.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/console/console.c 26 Dec 2005 08:06:16 -0000 @@ -0,0 +1,551 @@ +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "opt_ddb.h" +#ifdef DDB +#include +#endif + +static char driver_name[] = "xc"; +devclass_t xc_devclass; +static void xcstart (struct tty *); +static int xcparam (struct tty *, struct termios *); +static void xcstop (struct tty *, int); +static void xc_timeout(void *); +static void __xencons_tx_flush(void); +static boolean_t xcons_putc(int c); + +/* switch console so that shutdown can occur gracefully */ +static void xc_shutdown(void *arg, int howto); +static int xc_mute; + +void xcons_force_flush(void); + +static cn_probe_t xccnprobe; +static cn_init_t xccninit; +static cn_getc_t xccngetc; +static cn_putc_t xccnputc; +static cn_checkc_t xccncheckc; + +#define XC_POLLTIME (hz/10) + +CONS_DRIVER(xc, xccnprobe, xccninit, NULL, xccngetc, + xccncheckc, xccnputc, NULL); + +static int xen_console_up; +static boolean_t xc_start_needed; +static struct callout xc_callout; +struct mtx cn_mtx; + +#define RBUF_SIZE 1024 +#define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1)) +#define WBUF_SIZE 4096 +#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) +static char wbuf[WBUF_SIZE]; +static char rbuf[RBUF_SIZE]; +static int rc, rp; +static int cnsl_evt_reg; +static unsigned int wc, wp; /* write_cons, write_prod */ +#ifdef nomore +static boolean_t xc_tx_task_queued; +static struct task xencons_tx_flush_task = { {NULL},0,0,&xencons_tx_flush_task_routine,NULL }; +static void xencons_tx_flush_task_routine(void *,int ); +#endif + +#define CDEV_MAJOR 12 +#define XCUNIT(x) (minor(x)) +#define ISTTYOPEN(tp) ((tp) && ((tp)->t_state & TS_ISOPEN)) +#define CN_LOCK_INIT(x, _name) \ + mtx_init(&x, _name, _name, MTX_SPIN) +#define CN_LOCK(l, f) mtx_lock_irqsave(&(l), (f)) +#define CN_UNLOCK(l, f) mtx_unlock_irqrestore(&(l), (f)) +#define CN_LOCK_ASSERT(x) mtx_assert(&x, MA_OWNED) +#define CN_LOCK_DESTROY(x) mtx_destroy(&x) + + +static struct tty *xccons; + +struct xc_softc { + int xc_unit; + struct cdev *xc_dev; +}; + + +static d_open_t xcopen; +static d_close_t xcclose; +static d_ioctl_t xcioctl; + +static struct cdevsw xc_cdevsw = { + .d_version = D_VERSION, + .d_flags = D_TTY | D_NEEDGIANT, + .d_name = driver_name, + .d_open = xcopen, + .d_close = xcclose, + .d_read = ttyread, + .d_write = ttywrite, + .d_ioctl = xcioctl, + .d_poll = ttypoll, + .d_kqfilter = ttykqfilter, +}; + +static void +xccnprobe(struct consdev *cp) +{ + cp->cn_pri = CN_REMOTE; + cp->cn_tp = xccons; + sprintf(cp->cn_name, "%s0", driver_name); +} + + +static void +xccninit(struct consdev *cp) +{ + CN_LOCK_INIT(cn_mtx,"XCONS LOCK"); + +} +int +xccngetc(struct consdev *dev) +{ + int c; + if (xc_mute) + return 0; + do { + if ((c = xccncheckc(dev)) == -1) { + /* polling without sleeping in Xen doesn't work well. + * Sleeping gives other things like clock a chance to + * run + */ + tsleep(&cn_mtx, PWAIT | PCATCH, "console sleep", + XC_POLLTIME); + } + } while( c == -1 ); + return c; +} + +int +xccncheckc(struct consdev *dev) +{ + int ret = (xc_mute ? 0 : -1); + int flags; + CN_LOCK(cn_mtx, flags); + if ( (rp - rc) ){ + /* we need to return only one char */ + ret = (int)rbuf[RBUF_MASK(rc)]; + rc++; + } + CN_UNLOCK(cn_mtx, flags); + return(ret); +} + +static void +xccnputc(struct consdev *dev, int c) +{ + int flags; + CN_LOCK(cn_mtx, flags); + xcons_putc(c); + CN_UNLOCK(cn_mtx, flags); +} + +static boolean_t +xcons_putc(int c) +{ + int force_flush = xc_mute || +#ifdef DDB + db_active || +#endif + panicstr; /* we're not gonna recover, so force + * flush + */ + + if ( (wp-wc) < (WBUF_SIZE-1) ){ + if ( (wbuf[WBUF_MASK(wp++)] = c) == '\n' ) { + wbuf[WBUF_MASK(wp++)] = '\r'; +#ifdef notyet + if (force_flush) + xcons_force_flush(); +#endif + } + } else if (force_flush) { +#ifdef notyet + xcons_force_flush(); +#endif + } + if (cnsl_evt_reg) + __xencons_tx_flush(); + + /* inform start path that we're pretty full */ + return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE; +} + +static void +xc_identify(driver_t *driver, device_t parent) +{ + device_t child; + child = BUS_ADD_CHILD(parent, 0, driver_name, 0); + device_set_driver(child, driver); + device_set_desc(child, "Xen Console"); +} + +static int +xc_probe(device_t dev) +{ + struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev); + + sc->xc_unit = device_get_unit(dev); + return (0); +} + +static int +xc_attach(device_t dev) +{ + struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev); + TRACE_ENTER; + + sc->xc_dev = make_dev(&xc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "xc%r", 0); + xccons = ttymalloc(NULL); + + sc->xc_dev->si_drv1 = (void *)sc; + sc->xc_dev->si_tty = xccons; + + xccons->t_oproc = xcstart; + xccons->t_param = xcparam; + xccons->t_stop = xcstop; + xccons->t_dev = sc->xc_dev; + + callout_init(&xc_callout, 0); + + xencons_ring_init(); + (void)xencons_ring_register_receiver(xencons_rx); + cnsl_evt_reg = 1; + + callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons); + + /* register handler to flush console on shutdown */ + if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown, + NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) + printf("xencons: shutdown event registration failed!\n"); + + TRACE_EXIT; + return (0); +} + +/* + * return 0 for all console input, force flush all output. + */ +static void +xc_shutdown(void *arg, int howto) +{ + xc_mute = 1; +#ifdef notyet + xcons_force_flush(); +#endif +} + +void +xencons_rx(char *buf, unsigned len) +{ + int i, flags; + struct tty *tp = xccons; + + TRACE_ENTER; + CN_LOCK(cn_mtx, flags); + for ( i = 0; i < len; i++ ) { + if ( xen_console_up ) + (*linesw[tp->t_line]->l_rint)(buf[i], tp); + else + rbuf[RBUF_MASK(rp++)] = buf[i]; + } + CN_UNLOCK(cn_mtx, flags); + TRACE_EXIT; +} + +static void +__xencons_tx_flush(void) +{ + int sz, work_done = 0; + TRACE_ENTER; +#ifdef notyet + while (x_char) { + if (xencons_ring_send(&x_char, 1) == 1) { + x_char = 0; + work_done = 1; + } + } +#endif + while ( wc != wp ) { + int sent; + sz = wp - wc; + if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) ) + sz = WBUF_SIZE - WBUF_MASK(wc); + sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz); + if (sent == 0) + break; + wc += sent; + work_done = 1; + + } + + if ( work_done && xen_console_up ) + ttwakeup(xccons); + TRACE_EXIT; +} + +void +xencons_tx(void) +{ + unsigned long flags; + CN_LOCK(cn_mtx, flags); + __xencons_tx_flush(); + CN_UNLOCK(cn_mtx, flags); +} +#ifdef nomore +static void +xencons_tx_flush_task_routine(void * data, int arg) +{ + int flags; + CN_LOCK(cn_mtx, flags); + xc_tx_task_queued = FALSE; + __xencons_tx_flush(); + CN_UNLOCK(cn_mtx, flags); +} +#endif +int +xcopen(struct cdev *dev, int flag, int mode, struct thread *td) +{ + struct xc_softc *sc; + int unit = XCUNIT(dev); + struct tty *tp; + int s, error; + + sc = (struct xc_softc *)device_get_softc( + devclass_get_device(xc_devclass, unit)); + if (sc == NULL) + return (ENXIO); + + TRACE_ENTER; + tp = dev->si_tty; + s = spltty(); + if (!ISTTYOPEN(tp)) { + tp->t_state |= TS_CARR_ON; + ttychars(tp); + tp->t_iflag = TTYDEF_IFLAG; + tp->t_oflag = TTYDEF_OFLAG; + tp->t_cflag = TTYDEF_CFLAG|CLOCAL; + tp->t_lflag = TTYDEF_LFLAG; + tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; + xcparam(tp, &tp->t_termios); + ttsetwater(tp); + } else if (tp->t_state & TS_XCLUDE && suser(td)) { + splx(s); + return (EBUSY); + } + splx(s); + + xen_console_up = 1; + + error = (*linesw[tp->t_line]->l_open)(dev, tp); + TRACE_EXIT; + return error; +} + +int +xcclose(struct cdev *dev, int flag, int mode, struct thread *td) +{ + struct tty *tp = dev->si_tty; + + if (tp == NULL) + return (0); + xen_console_up = 0; + + spltty(); + (*linesw[tp->t_line]->l_close)(tp, flag); + tty_close(tp); + spl0(); + return (0); +} + + +int +xcioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) +{ + struct tty *tp = dev->si_tty; + int error; + + error = (*linesw[tp->t_line]->l_ioctl)(tp, cmd, data, flag, td); + if (error != ENOIOCTL) + return (error); + + error = ttioctl(tp, cmd, data, flag); + + if (error != ENOIOCTL) + return (error); + + return (ENOTTY); +} + +static inline int +__xencons_put_char(int ch) +{ + char _ch = (char)ch; + if ( (wp - wc) == WBUF_SIZE ) + return 0; + wbuf[WBUF_MASK(wp++)] = _ch; + return 1; +} + + +static void +xcstart(struct tty *tp) +{ + int flags; + int s; + boolean_t cons_full = FALSE; + + s = spltty(); + CN_LOCK(cn_mtx, flags); + if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { + ttwwakeup(tp); + CN_UNLOCK(cn_mtx, flags); + return; + } + + tp->t_state |= TS_BUSY; + while (tp->t_outq.c_cc != 0 && !cons_full) + cons_full = xcons_putc(getc(&tp->t_outq)); + + /* if the console is close to full leave our state as busy */ + if (!cons_full) { + tp->t_state &= ~TS_BUSY; + ttwwakeup(tp); + } else { + /* let the timeout kick us in a bit */ + xc_start_needed = TRUE; + } + CN_UNLOCK(cn_mtx, flags); + splx(s); +} + +static void +xcstop(struct tty *tp, int flag) +{ + + if (tp->t_state & TS_BUSY) { + if ((tp->t_state & TS_TTSTOP) == 0) { + tp->t_state |= TS_FLUSH; + } + } +} + +static void +xc_timeout(void *v) +{ + struct tty *tp; + int c; + + tp = (struct tty *)v; + + while ((c = xccncheckc(NULL)) != -1) { + if (tp->t_state & TS_ISOPEN) { + (*linesw[tp->t_line]->l_rint)(c, tp); + } + } + + if (xc_start_needed) { + xc_start_needed = FALSE; + xcstart(tp); + } + + callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp); +} + +/* + * Set line parameters. + */ +int +xcparam(struct tty *tp, struct termios *t) +{ + tp->t_ispeed = t->c_ispeed; + tp->t_ospeed = t->c_ospeed; + tp->t_cflag = t->c_cflag; + return (0); +} + + +static device_method_t xc_methods[] = { + DEVMETHOD(device_identify, xc_identify), + DEVMETHOD(device_probe, xc_probe), + DEVMETHOD(device_attach, xc_attach), + {0, 0} +}; + +static driver_t xc_driver = { + driver_name, + xc_methods, + sizeof(struct xc_softc), +}; +#if 0 +/*** Forcibly flush console data before dying. ***/ +void +xcons_force_flush(void) +{ + int sz; + + /* + * We use dangerous control-interface functions that require a quiescent + * system and no interrupts. Try to ensure this with a global cli(). + */ + cli(); + + /* Spin until console data is flushed through to the domain controller. */ + while ( (wc != wp) && !ctrl_if_transmitter_empty() ) + { + /* Interrupts are disabled -- we must manually reap responses. */ + ctrl_if_discard_responses(); + + if ( (sz = wp - wc) == 0 ) + continue; + if ( sz > sizeof(msg.msg) ) + sz = sizeof(msg.msg); + if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) ) + sz = WBUF_SIZE - WBUF_MASK(wc); + + msg.type = CMSG_CONSOLE; + msg.subtype = CMSG_CONSOLE_DATA; + msg.length = sz; + memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz); + + if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ) + wc += sz; + } +} +#endif +DRIVER_MODULE(xc, nexus, xc_driver, xc_devclass, 0, 0); +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: t + * End: + */ Index: drivers/console/xencons_ring.c =================================================================== RCS file: drivers/console/xencons_ring.c diff -N drivers/console/xencons_ring.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/console/xencons_ring.c 26 Dec 2005 08:12:01 -0000 @@ -0,0 +1,145 @@ +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + + +extern char *console_page /* XXX need to map in start_info->console_mfn */; +#if 0 +#define XENCONS_RING_SIZE (PAGE_SIZE/2 - sizeof (struct ring_head)) +#define XENCONS_IDX(cnt) ((cnt) % XENCONS_RING_SIZE) +#define XENCONS_FULL(ring) (((ring)->prod - (ring)->cons) == XENCONS_RING_SIZE) +#endif +static inline struct xencons_interface * +xencons_interface(void) +{ + return (struct xencons_interface *)console_page; +} + +int +xencons_ring_send(const char *data, unsigned len) +{ + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; + int sent; + + intf = xencons_interface(); + cons = intf->out_cons; + prod = intf->out_prod; + sent = 0; + + mb(); + PANIC_IF((prod - cons) > sizeof(intf->out)); + + while ((sent < len) && ((prod - cons) < sizeof(intf->out))) + intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; + + wmb(); + intf->out_prod = prod; + + notify_remote_via_evtchn(xen_start_info->console_evtchn); + + return sent; + +} + + +static xencons_receiver_func *xencons_receiver; + +static void +handle_input(void *unused) +{ + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; + + intf = xencons_interface(); + + cons = intf->out_cons; + prod = intf->out_prod; + + + + while (cons != prod) { + xencons_rx(intf->in + MASK_XENCONS_IDX(cons, intf->in), 1); + cons++; + } + + mb(); + intf->in_cons = cons; + + notify_remote_via_evtchn(xen_start_info->console_evtchn); + + xencons_tx(); +} + +void +xencons_ring_register_receiver(xencons_receiver_func *f) +{ + xencons_receiver = f; +} + +int +xencons_ring_init(void) +{ + int err; + + if (!xen_start_info->console_evtchn) + return 0; + + err = bind_evtchn_to_irqhandler(xen_start_info->console_evtchn, + "xencons", handle_input, + INTR_TYPE_MISC | INTR_MPSAFE); + if (err) { + XENPRINTF("XEN console request irq failed %i\n", err); + return err; + } + + return 0; +} +#ifdef notyet +void +xencons_suspend(void) +{ + + if (!xen_start_info->console_evtchn) + return; + + unbind_evtchn_from_irqhandler(xen_start_info->console_evtchn, NULL); +} + +void +xencons_resume(void) +{ + + (void)xencons_ring_init(); +} +#endif +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 8 + * tab-width: 4 + * indent-tabs-mode: t + * End: + */ Index: drivers/console/xencons_ring.h =================================================================== RCS file: drivers/console/xencons_ring.h diff -N drivers/console/xencons_ring.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/console/xencons_ring.h 26 Dec 2005 07:38:31 -0000 @@ -0,0 +1,13 @@ +#ifndef _XENCONS_RING_H +#define _XENCONS_RING_H + +int xencons_ring_init(void); +int xencons_ring_send(const char *data, unsigned len); +void xencons_rx(char *buf, unsigned len); +void xencons_tx(void); + + +typedef void (xencons_receiver_func)(char *buf, unsigned len); +void xencons_ring_register_receiver(xencons_receiver_func *f); + +#endif /* _XENCONS_RING_H */ Index: drivers/evtchn/evtchn_dev.c =================================================================== RCS file: drivers/evtchn/evtchn_dev.c diff -N drivers/evtchn/evtchn_dev.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/evtchn/evtchn_dev.c 13 Dec 2005 01:12:00 -0000 @@ -0,0 +1,411 @@ +/****************************************************************************** + * evtchn.c + * + * Xenolinux driver for receiving and demuxing event-channel signals. + * + * Copyright (c) 2004, K A Fraser + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +typedef struct evtchn_sotfc { + + struct selinfo ev_rsel; +} evtchn_softc_t; + + +#ifdef linuxcrap +/* NB. This must be shared amongst drivers if more things go in /dev/xen */ +static devfs_handle_t xen_dev_dir; +#endif + +/* Only one process may open /dev/xen/evtchn at any time. */ +static unsigned long evtchn_dev_inuse; + +/* Notification ring, accessed via /dev/xen/evtchn. */ + +#define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */ + +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) +static uint16_t *ring; +static unsigned int ring_cons, ring_prod, ring_overflow; + +/* Which ports is user-space bound to? */ +static uint32_t bound_ports[32]; + +/* Unique address for processes to sleep on */ +static void *evtchn_waddr = ˚ + +static struct mtx lock, upcall_lock; + +static d_read_t evtchn_read; +static d_write_t evtchn_write; +static d_ioctl_t evtchn_ioctl; +static d_poll_t evtchn_poll; +static d_open_t evtchn_open; +static d_close_t evtchn_close; + + +void +evtchn_device_upcall(int port) +{ + mtx_lock(&upcall_lock); + + mask_evtchn(port); + clear_evtchn(port); + + if ( ring != NULL ) { + if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) { + ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port; + if ( ring_cons == ring_prod++ ) { + wakeup(evtchn_waddr); + } + } + else { + ring_overflow = 1; + } + } + + mtx_unlock(&upcall_lock); +} + +static void +__evtchn_reset_buffer_ring(void) +{ + /* Initialise the ring to empty. Clear errors. */ + ring_cons = ring_prod = ring_overflow = 0; +} + +static int +evtchn_read(struct cdev *dev, struct uio *uio, int ioflag) +{ + int rc; + unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0; + count = uio->uio_resid; + + count &= ~1; /* even number of bytes */ + + if ( count == 0 ) + { + rc = 0; + goto out; + } + + if ( count > PAGE_SIZE ) + count = PAGE_SIZE; + + for ( ; ; ) { + if ( (c = ring_cons) != (p = ring_prod) ) + break; + + if ( ring_overflow ) { + rc = EFBIG; + goto out; + } + + if (sst != 0) { + rc = EINTR; + goto out; + } + + /* PCATCH == check for signals before and after sleeping + * PWAIT == priority of waiting on resource + */ + sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10); + } + + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ + if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) { + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t); + bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t); + } + else { + bytes1 = (p - c) * sizeof(uint16_t); + bytes2 = 0; + } + + /* Truncate chunks according to caller's maximum byte count. */ + if ( bytes1 > count ) { + bytes1 = count; + bytes2 = 0; + } + else if ( (bytes1 + bytes2) > count ) { + bytes2 = count - bytes1; + } + + if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) || + ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio))) + /* keeping this around as its replacement is not equivalent + * copyout(&ring[0], &buf[bytes1], bytes2) + */ + { + rc = EFAULT; + goto out; + } + + ring_cons += (bytes1 + bytes2) / sizeof(uint16_t); + + rc = bytes1 + bytes2; + + out: + + return rc; +} + +static int +evtchn_write(struct cdev *dev, struct uio *uio, int ioflag) +{ + int rc, i, count; + + count = uio->uio_resid; + + uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); + + + if ( kbuf == NULL ) + return ENOMEM; + + count &= ~1; /* even number of bytes */ + + if ( count == 0 ) { + rc = 0; + goto out; + } + + if ( count > PAGE_SIZE ) + count = PAGE_SIZE; + + if ( uiomove(kbuf, count, uio) != 0 ) { + rc = EFAULT; + goto out; + } + + mtx_lock_spin(&lock); + for ( i = 0; i < (count/2); i++ ) + if ( test_bit(kbuf[i], &bound_ports[0]) ) + unmask_evtchn(kbuf[i]); + mtx_unlock_spin(&lock); + + rc = count; + + out: + free(kbuf, M_DEVBUF); + return rc; +} + +static int +evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, + int mode, struct thread *td __unused) +{ + int rc = 0; + + mtx_lock_spin(&lock); + + switch ( cmd ) + { + case EVTCHN_RESET: + __evtchn_reset_buffer_ring(); + break; + case EVTCHN_BIND: + if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) ) + unmask_evtchn((int)arg); + else + rc = EINVAL; + break; + case EVTCHN_UNBIND: + if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) ) + mask_evtchn((int)arg); + else + rc = EINVAL; + break; + default: + rc = ENOSYS; + break; + } + + mtx_unlock_spin(&lock); + + return rc; +} + +static int +evtchn_poll(struct cdev *dev, int poll_events, struct thread *td) +{ + + evtchn_softc_t *sc; + unsigned int mask = POLLOUT | POLLWRNORM; + + sc = dev->si_drv1; + + if ( ring_cons != ring_prod ) + mask |= POLLIN | POLLRDNORM; + else if ( ring_overflow ) + mask = POLLERR; + else + selrecord(td, &sc->ev_rsel); + + + return mask; +} + + +static int +evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td) +{ + uint16_t *_ring; + + if (flag & O_NONBLOCK) + return EBUSY; + + if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) ) + return EBUSY; + + if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL ) + return ENOMEM; + + mtx_lock_spin(&lock); + ring = _ring; + __evtchn_reset_buffer_ring(); + mtx_unlock_spin(&lock); + + + return 0; +} + +static int +evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused) +{ + int i; + + mtx_lock_spin(&lock); + if (ring != NULL) { + free(ring, M_DEVBUF); + ring = NULL; + } + for ( i = 0; i < NR_EVENT_CHANNELS; i++ ) + if ( synch_test_and_clear_bit(i, &bound_ports[0]) ) + mask_evtchn(i); + mtx_unlock_spin(&lock); + + evtchn_dev_inuse = 0; + + return 0; +} + + + +/* XXX wild assed guess as to a safe major number */ +#define EVTCHN_MAJOR 140 + +static struct cdevsw evtchn_devsw = { + d_version: D_VERSION_00, + d_open: evtchn_open, + d_close: evtchn_close, + d_read: evtchn_read, + d_write: evtchn_write, + d_ioctl: evtchn_ioctl, + d_poll: evtchn_poll, + d_name: "evtchn", + d_flags: 0, +}; + + +/* XXX - if this device is ever supposed to support use by more than one process + * this global static will have to go away + */ +static struct cdev *evtchn_dev; + + + +static int +evtchn_init(void *dummy __unused) +{ + /* XXX I believe we don't need these leaving them here for now until we + * have some semblance of it working + */ +#if 0 + devfs_handle_t symlink_handle; + int err, pos; + char link_dest[64]; +#endif + mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF); + + /* (DEVFS) create '/dev/misc/evtchn'. */ + evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn"); + + mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS); + + evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK); + bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t)); + + /* XXX I don't think we need any of this rubbish */ +#if 0 + if ( err != 0 ) + { + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); + return err; + } + + /* (DEVFS) create directory '/dev/xen'. */ + xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL); + + /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */ + pos = devfs_generate_path(evtchn_miscdev.devfs_handle, + &link_dest[3], + sizeof(link_dest) - 3); + if ( pos >= 0 ) + strncpy(&link_dest[pos], "../", 3); + /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */ + (void)devfs_mk_symlink(xen_dev_dir, + "evtchn", + DEVFS_FL_DEFAULT, + &link_dest[pos], + &symlink_handle, + NULL); + + /* (DEVFS) automatically destroy the symlink with its destination. */ + devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); +#endif + printk("Event-channel device installed.\n"); + + return 0; +} + + +SYSINIT(evtchn_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_init, NULL); + + +#if 0 + +static void cleanup_module(void) +{ + destroy_dev(evtchn_dev); +; +} + +module_init(init_module); +module_exit(cleanup_module); +#endif Index: drivers/xenbus/init.txt =================================================================== RCS file: drivers/xenbus/init.txt diff -N drivers/xenbus/init.txt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/xenbus/init.txt 16 Dec 2005 12:38:30 -0000 @@ -0,0 +1,14 @@ + + +- frontend driver initializes static xenbus_driver with _ids, _probe, _remove, +_resume, _otherend_changed + + - initialization calls xenbus_register_frontend(xenbus_driver) + + - xenbus_register_frontend sets read_otherend details to read_backend_details + then calls xenbus_register_driver_common(xenbus_driver, xenbus_frontend) + + - xenbus_register_driver_common sets underlying driver name to xenbus_driver name + underlying driver bus to xenbus_frontend's bus, driver's probe to xenbus_dev_probe + driver's remove to xenbus_dev_remove then calls driver_register + Index: drivers/xenbus/xenbus_client.c =================================================================== RCS file: drivers/xenbus/xenbus_client.c diff -N drivers/xenbus/xenbus_client.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/xenbus/xenbus_client.c 16 Dec 2005 06:24:07 -0000 @@ -0,0 +1,266 @@ +/****************************************************************************** + * Client-facing interface for the Xenbus driver. In other words, the + * interface between the Xenbus and the device-specific code, be it the + * frontend or the backend of that driver. + * + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#if 0 +#define DPRINTK(fmt, args...) \ + printk("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + + +#define EXPORT_SYMBOL(x) +#define kmalloc(size, unused) malloc(size, M_DEVBUF, M_WAITOK) +#define kfree(ptr) free(ptr, M_DEVBUF) +#define BUG_ON PANIC_IF + +int +xenbus_watch_path(struct xenbus_device *dev, char *path, + struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)) +{ + int err; + + watch->node = path; + watch->callback = callback; + + err = register_xenbus_watch(watch); + + if (err) { + watch->node = NULL; + watch->callback = NULL; + xenbus_dev_fatal(dev, err, "adding watch on %s", path); + } + + return err; +} +EXPORT_SYMBOL(xenbus_watch_path); + + +int xenbus_watch_path2(struct xenbus_device *dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)) +{ + int err; + char *state = + kmalloc(strlen(path) + 1 + strlen(path2) + 1, GFP_KERNEL); + if (!state) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); + return -ENOMEM; + } + strcpy(state, path); + strcat(state, "/"); + strcat(state, path2); + + err = xenbus_watch_path(dev, state, watch, callback); + + if (err) { + kfree(state); + } + return err; +} +EXPORT_SYMBOL(xenbus_watch_path2); + + +int xenbus_switch_state(struct xenbus_device *dev, + struct xenbus_transaction *xbt, + XenbusState state) +{ + /* We check whether the state is currently set to the given value, and + if not, then the state is set. We don't want to unconditionally + write the given state, because we don't want to fire watches + unnecessarily. Furthermore, if the node has gone, we don't write + to it, as the device will be tearing down, and we don't want to + resurrect that directory. + */ + + int current_state; + + int err = xenbus_scanf(xbt, dev->nodename, "state", "%d", + ¤t_state); + if ((err == 1 && (XenbusState)current_state == state) || + err == -ENOENT) + return 0; + + err = xenbus_printf(xbt, dev->nodename, "state", "%d", state); + if (err) { + xenbus_dev_fatal(dev, err, "writing new state"); + return err; + } + return 0; +} +EXPORT_SYMBOL(xenbus_switch_state); + + +/** + * Return the path to the error node for the given device, or NULL on failure. + * If the value returned is non-NULL, then it is the caller's to kfree. + */ +static char *error_path(struct xenbus_device *dev) +{ + char *path_buffer = kmalloc(strlen("error/") + strlen(dev->nodename) + + 1, GFP_KERNEL); + if (path_buffer == NULL) { + return NULL; + } + + strcpy(path_buffer, "error/"); + strcpy(path_buffer + strlen("error/"), dev->nodename); + + return path_buffer; +} + + +static void _dev_error(struct xenbus_device *dev, int err, const char *fmt, + va_list ap) +{ + int ret; + unsigned int len; + char *printf_buffer = NULL, *path_buffer = NULL; + +#define PRINTF_BUFFER_SIZE 4096 + printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); + if (printf_buffer == NULL) + goto fail; + + len = sprintf(printf_buffer, "%i ", -err); + ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); + + BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1); + dev->has_error = 1; + + path_buffer = error_path(dev); + + if (path_buffer == NULL) { + printk("xenbus: failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); + goto fail; + } + + if (xenbus_write(NULL, path_buffer, "error", printf_buffer) != 0) { + printk("xenbus: failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); + goto fail; + } + +fail: + if (printf_buffer) + kfree(printf_buffer); + if (path_buffer) + kfree(path_buffer); +} + + +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, + ...) +{ + va_list ap; + + va_start(ap, fmt); + _dev_error(dev, err, fmt, ap); + va_end(ap); +} +EXPORT_SYMBOL(xenbus_dev_error); + + +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, + ...) +{ + va_list ap; + + va_start(ap, fmt); + _dev_error(dev, err, fmt, ap); + va_end(ap); + + xenbus_switch_state(dev, NULL, XenbusStateClosing); +} +EXPORT_SYMBOL(xenbus_dev_fatal); + + +int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) +{ + int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); + if (err < 0) + xenbus_dev_fatal(dev, err, "granting access to ring page"); + return err; +} +EXPORT_SYMBOL(xenbus_grant_ring); + + +int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) +{ + evtchn_op_t op = { + .cmd = EVTCHNOP_alloc_unbound, + .u.alloc_unbound.dom = DOMID_SELF, + .u.alloc_unbound.remote_dom = dev->otherend_id }; + + int err = HYPERVISOR_event_channel_op(&op); + if (err) + xenbus_dev_fatal(dev, err, "allocating event channel"); + else + *port = op.u.alloc_unbound.port; + return err; +} +EXPORT_SYMBOL(xenbus_alloc_evtchn); + + +XenbusState xenbus_read_driver_state(const char *path) +{ + XenbusState result; + + int err = xenbus_gather(NULL, path, "state", "%d", &result, NULL); + if (err) + result = XenbusStateClosed; + + return result; +} +EXPORT_SYMBOL(xenbus_read_driver_state); + + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ Index: drivers/xenbus/xenbus_comms.c =================================================================== RCS file: drivers/xenbus/xenbus_comms.c diff -N drivers/xenbus/xenbus_comms.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/xenbus/xenbus_comms.c 26 Dec 2005 09:14:14 -0000 @@ -0,0 +1,219 @@ +/****************************************************************************** + * xenbus_comms.c + * + * Low level code to talks to Xen Store: ringbuffer and event channel. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include +#include +#include +#include +#include + +static int xenbus_irq; + +extern void xenbus_probe(void *); +extern int xenstored_ready; +#if 0 +static DECLARE_WORK(probe_work, xenbus_probe, NULL); +#endif +int xb_wait; +extern int xb_inited; +extern char *xen_store; +#define wake_up wakeup +#define xb_waitq xb_wait +#define pr_debug(a,b,c) + +static inline struct xenstore_domain_interface *xenstore_domain_interface(void) +{ + return (struct xenstore_domain_interface *)xen_store; +} + +static void +wake_waiting(void * arg __attribute__((unused))) +{ +#if 0 + if (unlikely(xenstored_ready == 0)) { + xenstored_ready = 1; + schedule_work(&probe_work); + } +#endif + wakeup(&xb_wait); +} + +static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) +{ + return ((prod - cons) <= XENSTORE_RING_SIZE); +} + +static void *get_output_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + char *buf, uint32_t *len) +{ + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); + if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) + *len = XENSTORE_RING_SIZE - (prod - cons); + return buf + MASK_XENSTORE_IDX(prod); +} + +static const void *get_input_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + const char *buf, uint32_t *len) +{ + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); + if ((prod - cons) < *len) + *len = prod - cons; + return buf + MASK_XENSTORE_IDX(cons); +} + +int xb_write(const void *tdata, unsigned len) +{ + struct xenstore_domain_interface *intf = xenstore_domain_interface(); + XENSTORE_RING_IDX cons, prod; + const char *data = (const char *)tdata; + + while (len != 0) { + void *dst; + unsigned int avail; + if (xb_inited) + wait_event_interruptible(&xb_waitq, + (intf->req_prod - intf->req_cons) != + XENSTORE_RING_SIZE); + + /* Read indexes, then verify. */ + cons = intf->req_cons; + prod = intf->req_prod; + mb(); + if (!check_indexes(cons, prod)) + return -EIO; + + dst = get_output_chunk(cons, prod, intf->req, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + + memcpy(dst, data, avail); + data += avail; + len -= avail; + + /* Other side must not see new header until data is there. */ + wmb(); + intf->req_prod += avail; + + /* This implies mb() before other side sees interrupt. */ + notify_remote_via_evtchn(xen_start_info->store_evtchn); + } + + return 0; +} + +int xb_read(void *tdata, unsigned len) +{ + struct xenstore_domain_interface *intf = xenstore_domain_interface(); + XENSTORE_RING_IDX cons, prod; + char *data = (char *)tdata; + + while (len != 0) { + unsigned int avail; + const char *src; + + wait_event_interruptible(&xb_waitq, + intf->rsp_cons != intf->rsp_prod); + + /* Read indexes, then verify. */ + cons = intf->rsp_cons; + prod = intf->rsp_prod; + mb(); + if (!check_indexes(cons, prod)) + return -EIO; + + src = get_input_chunk(cons, prod, intf->rsp, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + + /* We must read header before we read data. */ + rmb(); + + memcpy(data, src, avail); + data += avail; + len -= avail; + + /* Other side must not see free space until we've copied out */ + mb(); + intf->rsp_cons += avail; + + pr_debug("Finished read of %i bytes (%i to go)\n", avail, len); + + /* Implies mb(): they will see new header. */ + notify_remote_via_evtchn(xen_start_info->store_evtchn); + } + + return 0; +} + +/* Set up interrupt handler off store event channel. */ +int xb_init_comms(void) +{ + int err; + + if (xenbus_irq) + unbind_from_irqhandler(xenbus_irq, &xb_waitq); + + err = bind_evtchn_to_irqhandler( + xen_start_info->store_evtchn, "xenbus", wake_waiting, INTR_TYPE_NET); + if (err <= 0) { + printk("XENBUS request irq failed %i\n", err); + return err; + } + + xenbus_irq = err; + + return 0; +} + +/* + * Local variables: + * c-file-style: "bsd" + * indent-tabs-mode: t + * c-indent-level: 4 + * c-basic-offset: 8 + * tab-width: 4 + * End: + */ Index: drivers/xenbus/xenbus_comms.h =================================================================== RCS file: drivers/xenbus/xenbus_comms.h diff -N drivers/xenbus/xenbus_comms.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/xenbus/xenbus_comms.h 26 Dec 2005 20:13:52 -0000 @@ -0,0 +1,63 @@ +/* + * Private include for xenbus communications. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XENBUS_COMMS_H +#define _XENBUS_COMMS_H + +int xs_init(void); +int xb_init_comms(void); + +/* Low level routines. */ +int xb_write(const void *data, unsigned len); +int xb_read(void *data, unsigned len); +int xs_input_avail(void); +extern int xb_waitq; + +static __inline int wait_event_interruptible(void *wchan, int condition) +{ + int ret = 0; + for (;;) { + if (condition) + break; + if ((ret = !tsleep(wchan, PWAIT | PCATCH, "wait_event", hz/10))) + break; + } + return ret; +} + + +#endif /* _XENBUS_COMMS_H */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ Index: drivers/xenbus/xenbus_dev.c =================================================================== RCS file: drivers/xenbus/xenbus_dev.c diff -N drivers/xenbus/xenbus_dev.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/xenbus/xenbus_dev.c 16 Dec 2005 05:58:14 -0000 @@ -0,0 +1,266 @@ +/* + * xenbus_dev.c + * + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * Copyright (c) 2005, Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include + +#include +#include +#include + + +#define EXPORT_SYMBOL(x) +#define kmalloc(size, unused) malloc(size, M_DEVBUF, M_WAITOK) +#define kfree(ptr) free(ptr, M_DEVBUF) +#define BUG_ON PANIC_IF +#define semaphore sema +#define rw_semaphore sema +typedef struct mtx spinlock_t; +#define spin_lock mtx_lock +#define spin_unlock mtx_unlock +#define DEFINE_SPINLOCK(lock) struct mtx lock +#define DECLARE_MUTEX(lock) struct sema lock +#define down sema_wait +#define up sema_post +#define down_read sema_wait +#define up_read sema_post +#define down_write sema_wait +#define up_write sema_post +#define u32 uint32_t +#define simple_strtoul strtoul + +struct xenbus_dev_transaction { + LIST_ENTRY(xenbus_dev_transaction) list; + struct xenbus_transaction *handle; +}; + +struct xenbus_dev_data { + /* In-progress transaction. */ + LIST_HEAD(xdd_list_head, xenbus_dev_transaction) transactions; + + /* Partial request. */ + unsigned int len; + union { + struct xsd_sockmsg msg; + char buffer[PAGE_SIZE]; + } u; + + /* Response queue. */ +#define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1)) + char read_buffer[PAGE_SIZE]; + unsigned int read_cons, read_prod; + int read_waitq; +}; +#if 0 +static struct proc_dir_entry *xenbus_dev_intf; +#endif +static int +xenbus_dev_read(struct cdev *dev, struct uio *uio, int ioflag) +{ + int i = 0; + struct xenbus_dev_data *u = dev->si_drv1; + + if (wait_event_interruptible(&u->read_waitq, + u->read_prod != u->read_cons)) + return EINTR; + + for (i = 0; i < uio->uio_iov[0].iov_len; i++) { + if (u->read_cons == u->read_prod) + break; + copyout(&u->read_buffer[MASK_READ_IDX(u->read_cons)], (char *)uio->uio_iov[0].iov_base+i, 1); + u->read_cons++; + uio->uio_resid--; + } + return 0; +} + +static void queue_reply(struct xenbus_dev_data *u, + char *data, unsigned int len) +{ + int i; + + for (i = 0; i < len; i++, u->read_prod++) + u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i]; + + BUG_ON((u->read_prod - u->read_cons) > sizeof(u->read_buffer)); + + wakeup(&u->read_waitq); +} + +static int +xenbus_dev_write(struct cdev *dev, struct uio *uio, int ioflag) +{ + int err = 0; + struct xenbus_dev_data *u = dev->si_drv1; + struct xenbus_dev_transaction *trans; + void *reply; + int len = uio->uio_iov[0].iov_len; + + if ((len + u->len) > sizeof(u->u.buffer)) + return -EINVAL; + + if (copyin(u->u.buffer + u->len, uio->uio_iov[0].iov_base, len) != 0) + return -EFAULT; + + u->len += len; + if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) + return len; + + switch (u->u.msg.type) { + case XS_TRANSACTION_START: + case XS_TRANSACTION_END: + case XS_DIRECTORY: + case XS_READ: + case XS_GET_PERMS: + case XS_RELEASE: + case XS_GET_DOMAIN_PATH: + case XS_WRITE: + case XS_MKDIR: + case XS_RM: + case XS_SET_PERMS: + reply = xenbus_dev_request_and_reply(&u->u.msg); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + } else { + if (u->u.msg.type == XS_TRANSACTION_START) { + trans = kmalloc(sizeof(*trans), GFP_KERNEL); + trans->handle = (struct xenbus_transaction *) + simple_strtoul(reply, NULL, 0); + LIST_INSERT_HEAD(&u->transactions, trans, list); + } else if (u->u.msg.type == XS_TRANSACTION_END) { + LIST_FOREACH(trans, &u->transactions, + list) + if ((unsigned long)trans->handle == + (unsigned long)u->u.msg.tx_id) + break; +#if 0 /* XXX does this mean the list is empty? */ + BUG_ON(&trans->list == &u->transactions); +#endif + LIST_REMOVE(trans, list); + kfree(trans); + } + queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); + queue_reply(u, (char *)reply, u->u.msg.len); + kfree(reply); + } + break; + + default: + err = EINVAL; + break; + } + + if (err == 0) { + u->len = 0; + err = len; + } + + return err; +} + +static int xenbus_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + struct xenbus_dev_data *u; + + if (xen_start_info->store_evtchn == 0) + return ENOENT; +#if 0 /* XXX figure out if equiv needed */ + nonseekable_open(inode, filp); +#endif + u = kmalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return ENOMEM; + + memset(u, 0, sizeof(*u)); + LIST_INIT(&u->transactions); + + dev->si_drv1 = u; + + return 0; +} + +static int xenbus_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +{ + struct xenbus_dev_data *u = dev->si_drv1; + struct xenbus_dev_transaction *trans, *tmp; + + LIST_FOREACH_SAFE(trans, &u->transactions, list, tmp) { + xenbus_transaction_end(trans->handle, 1); + LIST_REMOVE(trans, list); + kfree(trans); + } + + kfree(u); + return 0; +} + +static struct cdevsw xenbus_dev_cdevsw = { + .d_version = D_VERSION, + .d_read = xenbus_dev_read, + .d_write = xenbus_dev_write, + .d_open = xenbus_dev_open, + .d_close = xenbus_dev_close, + .d_name = "xenbus_dev", +}; + +static int +xenbus_dev_sysinit(void) +{ + make_dev(&xenbus_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, "xenbus"); + + return 0; +} +SYSINIT(xenbus_dev_sysinit, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, xenbus_dev_sysinit, NULL); +/* SYSINIT NEEDED XXX */ + + + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ Index: drivers/xenbus/xenbus_probe.c =================================================================== RCS file: drivers/xenbus/xenbus_probe.c diff -N drivers/xenbus/xenbus_probe.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/xenbus/xenbus_probe.c 26 Dec 2005 22:16:33 -0000 @@ -0,0 +1,1225 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#if 0 +#define DPRINTK(fmt, args...) \ + printk("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define EXPORT_SYMBOL(x) +#define kmalloc(size, unused) malloc(size, M_DEVBUF, M_WAITOK) +#define kfree(ptr) free(ptr, M_DEVBUF) +#define BUG_ON PANIC_IF +#define semaphore sema +#define rw_semaphore sema +typedef struct mtx spinlock_t; +#define spin_lock mtx_lock +#define spin_unlock mtx_unlock +#define DEFINE_SPINLOCK(lock) struct mtx lock +#define DECLARE_MUTEX(lock) struct sema lock +#define down sema_wait +#define up sema_post +#define down_read sema_wait +#define up_read sema_post +#define down_write sema_wait +#define up_write sema_post +#define u32 uint32_t +#define list_del(head, ent) TAILQ_REMOVE(head, ent, list) +#define simple_strtoul strtoul +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) +#define list_empty TAILQ_EMPTY +#define wake_up wakeup +#define KERN_WARNING +#define BUS_ID_SIZE 128 + +extern struct semaphore xenwatch_mutex; + +EVENTHANDLER_DECLARE(xenstore_event, xenstore_event_handler_t); +static struct eventhandler_list *xenstore_chain; +int xb_inited = 0; +device_t xenbus_frontend_dev; +device_t xenbus_backend_dev; + +#define streq(a, b) (strcmp((a), (b)) == 0) + +static char *kasprintf(const char *fmt, ...); + + +/* If something in array of ids matches this device, return it. */ +static const struct xenbus_device_id * +match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) +{ + for (; !streq(arr->devicetype, ""); arr++) { + if (streq(arr->devicetype, dev->devicetype)) + return arr; + } + return NULL; +} + +#if 0 +static int xenbus_match(device_t _dev) +{ + struct xenbus_driver *drv; + struct xenbus_device *dev; + + dev = device_get_softc(_dev); + drv = dev->driver; + + if (!drv->ids) + return 0; + + return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; +} +#endif + +struct xen_bus_type +{ + char *root; + unsigned int levels; + int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename); + int (*probe)(const char *type, const char *dir, int unit); +#if 0 + struct bus_type bus; + struct device dev; +#endif + device_t bus; + device_t dev; +}; + + +/* device// => - */ +static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) +{ + nodename = strchr(nodename, '/'); + if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) { + printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); + return EINVAL; + } + + strlcpy(bus_id, nodename + 1, BUS_ID_SIZE); + if (!strchr(bus_id, '/')) { + printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); + return EINVAL; + } + *strchr(bus_id, '/') = '-'; + return 0; +} + + +static int read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node) +{ + int err = xenbus_gather(NULL, xendev->nodename, + id_node, "%i", &xendev->otherend_id, + path_node, NULL, &xendev->otherend, + NULL); + if (err) { + xenbus_dev_fatal(xendev, err, + "reading other end details from %s", + xendev->nodename); + return err; + } + if (strlen(xendev->otherend) == 0 || + !xenbus_exists(NULL, xendev->otherend, "")) { + xenbus_dev_fatal(xendev, ENOENT, "missing other end from %s", + xendev->nodename); + kfree(xendev->otherend); + xendev->otherend = NULL; + return ENOENT; + } + + return 0; +} + + +static int read_backend_details(struct xenbus_device *xendev) +{ + return read_otherend_details(xendev, "backend-id", "backend"); +} + + +static int read_frontend_details(struct xenbus_device *xendev) +{ + return read_otherend_details(xendev, "frontend-id", "frontend"); +} + + +static void free_otherend_details(struct xenbus_device *dev) +{ + kfree(dev->otherend); + dev->otherend = NULL; +} + + +static void free_otherend_watch(struct xenbus_device *dev) +{ + if (dev->otherend_watch.node) { + unregister_xenbus_watch(&dev->otherend_watch); + kfree(dev->otherend_watch.node); + dev->otherend_watch.node = NULL; + } +} + + +/* Bus type for frontend drivers. */ +static int xenbus_probe_frontend(const char *type, const char *name, int unit); +static struct xen_bus_type xenbus_frontend = { + .root = "device", + .levels = 2, /* device/type/ */ + .get_bus_id = frontend_bus_id, + .probe = xenbus_probe_frontend, +#if 0 + /* this initialization needs to happen dynamically */ + .bus = { + .name = "xen", + .match = xenbus_match, + }, + .dev = { + .bus_id = "xen", + }, +#endif +}; + +/* backend/// => -- */ +static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) +{ + int domid, err; + const char *devid, *type; + char *frontend; + unsigned int typelen; + + type = strchr(nodename, '/'); + if (!type) + return EINVAL; + type++; + typelen = strcspn(type, "/"); + if (!typelen || type[typelen] != '/') + return EINVAL; + + devid = strrchr(nodename, '/') + 1; + + err = xenbus_gather(NULL, nodename, "frontend-id", "%i", &domid, + "frontend", NULL, &frontend, + NULL); + if (err) + return err; + if (strlen(frontend) == 0) + err = ERANGE; + + if (!err && !xenbus_exists(NULL, frontend, "")) + err = ENOENT; + + if (err) { + kfree(frontend); + return err; + } + + if (snprintf(bus_id, BUS_ID_SIZE, + "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE) + return ENOSPC; + return 0; +} +#if 0 +static int xenbus_hotplug_backend(device_t dev, char **envp, + int num_envp, char *buffer, int buffer_size) +{ + panic("implement me"); +#if 0 + struct xenbus_device *xdev; + struct xenbus_driver *drv = NULL; + int i = 0; + int length = 0; + char *basepath_end; + char *frontend_id; + + DPRINTK(""); + + if (dev == NULL) + return ENODEV; + + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return ENODEV; + + if (dev->driver) + drv = to_xenbus_driver(dev->driver); + + /* stuff we want to pass to /sbin/hotplug */ + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "XENBUS_TYPE=%s", xdev->devicetype); + + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "XENBUS_PATH=%s", xdev->nodename); + + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "XENBUS_BASE_PATH=%s", xdev->nodename); + + basepath_end = strrchr(envp[i - 1], '/'); + length -= strlen(basepath_end); + *basepath_end = '\0'; + basepath_end = strrchr(envp[i - 1], '/'); + length -= strlen(basepath_end); + *basepath_end = '\0'; + + basepath_end++; + frontend_id = kmalloc(strlen(basepath_end) + 1, GFP_KERNEL); + strcpy(frontend_id, basepath_end); + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "XENBUS_FRONTEND_ID=%s", frontend_id); + kfree(frontend_id); + + /* terminate, set to next free slot, shrink available space */ + envp[i] = NULL; + envp = &envp[i]; + num_envp -= i; + buffer = &buffer[length]; + buffer_size -= length; + + if (drv && drv->hotplug) + return drv->hotplug(xdev, envp, num_envp, buffer, + buffer_size); + +#endif + return 0; +} +#endif + +static int xenbus_probe_backend(const char *type, const char *domid, int unit); +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type// */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, + /* at init time */ +#if 0 + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .hotplug = xenbus_hotplug_backend, + }, + .dev = { + .bus_id = "xen-backend", + }, +#endif +}; + + +static void otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + panic("implement me"); +#if 0 + struct xenbus_device *dev = + container_of(watch, struct xenbus_device, otherend_watch); + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + XenbusState state; + + /* Protect us against watches firing on old details when the otherend + details change, say immediately after a resume. */ + if (!dev->otherend || + strncmp(dev->otherend, vec[XS_WATCH_PATH], + strlen(dev->otherend))) { + DPRINTK("Ignoring watch at %s", vec[XS_WATCH_PATH]); + return; + } + + state = xenbus_read_driver_state(dev->otherend); + + DPRINTK("state is %d, %s, %s", + state, dev->otherend_watch.node, vec[XS_WATCH_PATH]); + if (drv->otherend_changed) + drv->otherend_changed(dev, state); +#endif +} + + +static int talk_to_otherend(struct xenbus_device *dev) +{ + panic("implement me"); +#if 0 + + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + int err; + + free_otherend_watch(dev); + free_otherend_details(dev); + + err = drv->read_otherend_details(dev); + if (err) + return err; +#endif + return xenbus_watch_path2(dev, dev->otherend, "state", + &dev->otherend_watch, otherend_changed); + +} + + +int xenbus_dev_probe(device_t _dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + struct xenbus_driver *drv = dev->driver; + const struct xenbus_device_id *id; + int err; + + DPRINTK(""); + + err = talk_to_otherend(dev); + if (err) { + printk(KERN_WARNING + "xenbus_probe: talk_to_otherend on %s failed.\n", + dev->nodename); + return err; + } + + if (!drv->probe) { + err = ENODEV; + goto fail; + } + + id = match_device(drv->ids, dev); + if (!id) { + err = ENODEV; + goto fail; + } + + err = drv->probe(dev, id); + if (err) + goto fail; + + return 0; +fail: + xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); + xenbus_switch_state(dev, NULL, XenbusStateClosed); + return ENODEV; + +} + +int xenbus_dev_remove(device_t _dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + struct xenbus_driver *drv = dev->driver; + + DPRINTK(""); + + free_otherend_watch(dev); + free_otherend_details(dev); + + if (drv->remove) + drv->remove(dev); + + xenbus_switch_state(dev, NULL, XenbusStateClosed); + return 0; +} + +static int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus) +{ +#if 0 + int ret; + /* this all happens in the driver itself + * doing this here simple serves to obfuscate + */ + + drv->driver.name = drv->name; + drv->driver.bus = &bus->bus; + drv->driver.owner = drv->owner; + drv->driver.probe = xenbus_dev_probe; + drv->driver.remove = xenbus_dev_remove; + + down(&xenwatch_mutex); + ret = driver_register(&drv->driver); + up(&xenwatch_mutex); + return ret; +#endif + return 0; +} + +int xenbus_register_frontend(struct xenbus_driver *drv) +{ + drv->read_otherend_details = read_backend_details; + + return xenbus_register_driver_common(drv, &xenbus_frontend); +} +EXPORT_SYMBOL(xenbus_register_frontend); + +int xenbus_register_backend(struct xenbus_driver *drv) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend); +} +EXPORT_SYMBOL(xenbus_register_backend); + +void xenbus_unregister_driver(struct xenbus_driver *drv) +{ +#if 0 + driver_unregister(&drv->driver); +#endif +} +EXPORT_SYMBOL(xenbus_unregister_driver); + +struct xb_find_info +{ + struct xenbus_device *dev; + const char *nodename; +}; + +#if 0 +static int cmp_dev(device_t dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct xb_find_info *info = data; + + if (streq(xendev->nodename, info->nodename)) { + info->dev = xendev; + /* XXX ref counting? */ +#if 0 + get_device(dev); +#endif + return 1; + } + return 0; +} +#endif + +static struct xenbus_device *xenbus_device_find(const char *nodename, + device_t bus) +{ +#if 0 + struct xb_find_info info = { .dev = NULL, .nodename = nodename }; + + bus_for_each_dev(bus, NULL, &info, cmp_dev); + return info.dev; +#endif + int unit = 0; + char *classname = "bad"; + panic("need to split nodename into classname and unit"); + + return device_get_softc(device_find_child(bus, classname, unit)); +} + +#if 0 +static int cleanup_dev(device_t dev, void *data) +{ + struct xenbus_device *xendev = device_get_softc(dev); + struct xb_find_info *info = data; + int len = strlen(info->nodename); + + DPRINTK("%s", info->nodename); + + if (!strncmp(xendev->nodename, info->nodename, len)) { + info->dev = xendev; +#if 0 + get_device(dev); +#endif + return 1; + } + return 0; +} + +#endif +static void xenbus_cleanup_devices(const char *path, device_t bus) +{ + panic("unimplemented"); +#if 0 + struct xb_find_info info = { .nodename = path }; + + do { + info.dev = NULL; + bus_for_each_dev(bus, NULL, &info, cleanup_dev); + if (info.dev) { + device_unregister(&info.dev->dev); + put_device(&info.dev->dev); + } + } while (info.dev); +#endif +} + +#if 0 +static void xenbus_dev_free(struct xenbus_device *xendev) +{ + kfree(xendev); +} + + +void xenbus_dev_release(device_t dev) +{ + /* + * nothing to do softc gets freed with the device + */ + +} +#endif +/* Simplified asprintf. */ +static char *kasprintf(const char *fmt, ...) +{ + va_list ap; + unsigned int len; + char *p, dummy[1]; + + va_start(ap, fmt); + /* FIXME: vsnprintf has a bug, NULL should work */ + len = vsnprintf(dummy, 0, fmt, ap); + va_end(ap); + + p = kmalloc(len + 1, GFP_KERNEL); + if (!p) + return NULL; + va_start(ap, fmt); + vsprintf(p, fmt, ap); + va_end(ap); + return p; +} + +#if 0 +static ssize_t xendev_show_nodename(struct device *dev, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); +} +DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); + +static ssize_t xendev_show_devtype(struct device *dev, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); +} +DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); +#endif + +static int xenbus_probe_node(struct xen_bus_type *bus, + const char *type, + const char *nodename, + int unit) +{ +#define CHECK_FAIL \ + do { \ + if (err) \ + goto fail; \ + } \ + while (0) \ + + + + int err; + struct xenbus_device *xendev; + size_t stringlen; + char *tmpstring; + device_t child; + + XenbusState state = xenbus_read_driver_state(nodename); + + if (state != XenbusStateInitialising) { + /* Device is not new, so ignore it. This can happen if a + device is going away after switching to Closed. */ + return 0; + } + + stringlen = strlen(nodename) + 1 + strlen(type) + 1; + xendev = kmalloc(sizeof(*xendev) + stringlen, GFP_KERNEL); + if (!xendev) + return ENOMEM; + memset(xendev, 0, sizeof(*xendev)); + + /* Copy the strings into the extra space. */ + + tmpstring = (char *)(xendev + 1); + strcpy(tmpstring, nodename); + xendev->nodename = tmpstring; + + tmpstring += strlen(tmpstring) + 1; + strcpy(tmpstring, type); + xendev->devicetype = tmpstring; + +/* XXX WIP */ +#if 0 + child = BUS_ADD_CHILD(xenbus_dev, 0, nodename, unit); +#else + child = NULL; +#endif + err = DEVICE_PROBE(child); + CHECK_FAIL; + + err = DEVICE_ATTACH(child); + CHECK_FAIL; + + panic("XXX implement me"); +#if 0 + xendev->dev.parent = &bus->dev; + xendev->dev.bus = &bus->bus; + xendev->dev.release = xenbus_dev_release; + + err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename); + CHECK_FAIL; + + /* Register with generic device framework. */ + err = device_register(&xendev->dev); + CHECK_FAIL; + + device_create_file(&xendev->dev, &dev_attr_nodename); + device_create_file(&xendev->dev, &dev_attr_devtype); +#endif + return 0; + +#undef CHECK_FAIL + +fail: +#if 0 + xenbus_dev_free(xendev); +#endif + return err; +} + +/* device// */ +static int xenbus_probe_frontend(const char *type, const char *name, int unit) +{ + char *nodename; + int err; + + nodename = kasprintf("%s/%s/%s", xenbus_frontend.root, type, name); + if (!nodename) + return ENOMEM; + + DPRINTK("%s", nodename); + + err = xenbus_probe_node(&xenbus_frontend, type, nodename, unit); + kfree(nodename); + return err; +} + +/* backend/// */ +static int xenbus_probe_backend_unit(const char *dir, + const char *type, + const char *name, + int unit) +{ + char *nodename; + int err; + + nodename = kasprintf("%s/%s", dir, name); + if (!nodename) + return ENOMEM; + + DPRINTK("%s\n", nodename); + + err = xenbus_probe_node(&xenbus_backend, type, nodename, unit); + kfree(nodename); + return err; +} + +/* backend// */ +static int xenbus_probe_backend(const char *type, const char *domid, int unit) +{ + char *nodename; + int err = 0; + char **dir; + unsigned int i, dir_n = 0; + + DPRINTK(""); + + nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid); + if (!nodename) + return ENOMEM; + + dir = xenbus_directory(NULL, nodename, "", &dir_n); + if (IS_ERR(dir)) { + kfree(nodename); + return PTR_ERR(dir); + } + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_backend_unit(nodename, type, dir[i], unit); + if (err) + break; + } + kfree(dir); + kfree(nodename); + return err; +} + +static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) +{ + int err = 0; + char **dir; + unsigned int dir_n = 0; + int i; + + dir = xenbus_directory(NULL, bus->root, type, &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + for (i = 0; i < dir_n; i++) { + err = bus->probe(type, dir[i], i); + if (err) + break; + } + kfree(dir); + return err; +} + +static int xenbus_probe_devices(struct xen_bus_type *bus) +{ + int err = 0; + char **dir; + unsigned int i, dir_n; + + dir = xenbus_directory(NULL, bus->root, "", &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_device_type(bus, dir[i]); + if (err) + break; + } + kfree(dir); + + return err; +} + +static unsigned int char_count(const char *str, char c) +{ + unsigned int i, ret = 0; + + for (i = 0; str[i]; i++) + if (str[i] == c) + ret++; + return ret; +} + +static int strsep_len(const char *str, char c, unsigned int len) +{ + unsigned int i; + + for (i = 0; str[i]; i++) + if (str[i] == c) { + if (len == 0) + return i; + len--; + } + return (len == 0) ? i : ERANGE; +} + +static void dev_changed(const char *node, struct xen_bus_type *bus) +{ + int exists, rootlen; + struct xenbus_device *dev; + char type[BUS_ID_SIZE]; + const char *p; + char *root; + + panic("XXX check me"); + if (char_count(node, '/') < 2) + return; + + exists = xenbus_exists(NULL, node, ""); + if (!exists) { + xenbus_cleanup_devices(node, bus->bus); + return; + } + + /* backend//... or device//... */ + p = strchr(node, '/') + 1; + snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p); + type[BUS_ID_SIZE-1] = '\0'; + + rootlen = strsep_len(node, '/', bus->levels); + if (rootlen < 0) + return; + root = kasprintf("%.*s", rootlen, node); + if (!root) + return; + + dev = xenbus_device_find(root, bus->bus); + if (!dev) + xenbus_probe_node(bus, type, root, 0); +#if 0 + else + put_device(&dev->dev); +#endif + kfree(root); +} + +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); +} + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); +} + +/* We watch for devices appearing and vanishing. */ +static struct xenbus_watch fe_watch = { + .node = "device", + .callback = frontend_changed, +}; + +static struct xenbus_watch be_watch = { + .node = "backend", + .callback = backend_changed, +}; + +#ifdef notyet + +static int suspend_dev(device_t dev, void *data) +{ + int err = 0; + struct xenbus_driver *drv; + struct xenbus_device *xdev; + + DPRINTK(""); + + xdev = device_get_softc(dev); + + drv = xdev->driver; + + if (device_get_driver(dev) == NULL) + return 0; + + if (drv->suspend) + err = drv->suspend(xdev); +#if 0 + /* bus_id ? */ + if (err) + printk(KERN_WARNING + "xenbus: suspend %s failed: %i\n", dev->bus_id, err); +#endif + return 0; +} + + + +static int resume_dev(device_t dev, void *data) +{ + int err; + struct xenbus_driver *drv; + struct xenbus_device *xdev; + + DPRINTK(""); + + if (device_get_driver(dev) == NULL) + return 0; + xdev = device_get_softc(dev); + drv = xdev->driver; + + err = talk_to_otherend(xdev); +#if 0 + if (err) { + printk(KERN_WARNING + "xenbus: resume (talk_to_otherend) %s failed: %i\n", + dev->bus_id, err); + return err; + } +#endif + if (drv->resume) + err = drv->resume(xdev); +#if 0 + /* bus_id? */ + if (err) + printk(KERN_WARNING + "xenbus: resume %s failed: %i\n", dev->bus_id, err); +#endif + return err; +} + +#endif +void xenbus_suspend(void) +{ + DPRINTK(""); + panic("implement me"); +#if 0 + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev); +#endif + xs_suspend(); +} +EXPORT_SYMBOL(xenbus_suspend); + +void xenbus_resume(void) +{ + xb_init_comms(); + xs_resume(); + panic("implement me"); +#if 0 + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev); +#endif +} +EXPORT_SYMBOL(xenbus_resume); + + +/* A flag to determine if xenstored is 'ready' (i.e. has started) */ +int xenstored_ready = 0; + + +int register_xenstore_notifier(xenstore_event_handler_t func, void *arg, int priority) +{ + int ret = 0; + + if (xenstored_ready > 0) + ret = func(NULL); + else + eventhandler_register(xenstore_chain, "xenstore", func, arg, priority); + + return ret; +} +EXPORT_SYMBOL(register_xenstore_notifier); +#if 0 +void unregister_xenstore_notifier(struct notifier_block *nb) +{ + notifier_chain_unregister(&xenstore_chain, nb); +} +EXPORT_SYMBOL(unregister_xenstore_notifier); +#endif + + +static void xenbus_probe(device_t dev) +{ + BUG_ON((xenstored_ready <= 0)); + + xenbus_frontend.dev = dev; + + /* Enumerate devices in xenstore. */ + xenbus_probe_devices(&xenbus_frontend); + xenbus_probe_devices(&xenbus_backend); + + /* Watch for changes. */ + register_xenbus_watch(&fe_watch); + register_xenbus_watch(&be_watch); + + /* Notify others that xenstore is up */ + EVENTHANDLER_INVOKE(xenstore_event); +} + +#ifdef DOM0 +static struct proc_dir_entry *xsd_mfn_intf; +static struct proc_dir_entry *xsd_port_intf; + + +static int xsd_mfn_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + len = sprintf(page, "%ld", xen_start_info->store_mfn); + *eof = 1; + return len; +} + +static int xsd_port_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "%d", xen_start_info->store_evtchn); + *eof = 1; + return len; +} + +#endif +static int xenbus_identify(driver_t driver, device_t parent) +{ + int err = 0, dom0; + + DPRINTK(""); + + +#if 0 + if (xen_init() < 0) { + DPRINTK("failed"); + return ENODEV; + } + + + /* Register ourselves with the kernel bus & device subsystems */ + bus_register(&xenbus_frontend.bus); + bus_register(&xenbus_backend.bus); + device_register(&xenbus_frontend.dev); + device_register(&xenbus_backend.dev); +#endif + + /* + ** Domain0 doesn't have a store_evtchn or store_mfn yet. + */ + dom0 = (xen_start_info->store_evtchn == 0); + + +#ifdef DOM0 + if (dom0) { + + unsigned long page; + evtchn_op_t op = { 0 }; + int ret; + + + /* Allocate page. */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + return ENOMEM; + + /* We don't refcnt properly, so set reserved on page. + * (this allocation is permanent) */ + SetPageReserved(virt_to_page(page)); + + xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); + + /* Next allocate a local port which xenstored can bind to */ + op.cmd = EVTCHNOP_alloc_unbound; + op.u.alloc_unbound.dom = DOMID_SELF; + op.u.alloc_unbound.remote_dom = 0; + + ret = HYPERVISOR_event_channel_op(&op); + BUG_ON(ret); + xen_start_info->store_evtchn = op.u.alloc_unbound.port; + + /* And finally publish the above info in /proc/xen */ + if((xsd_mfn_intf = create_xen_proc_entry("xsd_mfn", 0400))) + xsd_mfn_intf->read_proc = xsd_mfn_read; + if((xsd_port_intf = create_xen_proc_entry("xsd_port", 0400))) + xsd_port_intf->read_proc = xsd_port_read; + } +#endif + /* Initialize the interface to xenstore. */ + err = xs_init(); + if (err) { + printk(KERN_WARNING + "XENBUS: Error initializing xenstore comms: %i\n", err); + return err; + } + + if (!dom0) { + xenstored_ready = 1; + xenbus_frontend_dev = BUS_ADD_CHILD(parent, 0, "xenbus_frontend", 0); + if (xenbus_frontend_dev == NULL) + panic("xenbus: could not attach"); + xenbus_backend_dev = BUS_ADD_CHILD(parent, 0, "xenbus_backend", 0); + if (xenbus_backend_dev == NULL) + panic("xenbus: could not attach"); + } + + return 0; +} +#if 0 +SYSINIT(xenbus_probe_sysinit, SI_SUB_PSEUDO, SI_ORDER_FIRST, xenbus_probe_sysinit, NULL); +#endif + +static device_method_t xenbus_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, xenbus_identify), + DEVMETHOD(device_probe, xenbus_probe), + DEVMETHOD(device_attach, bus_generic_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, xenbus_suspend), + DEVMETHOD(device_resume, xenbus_resume), + + /* Bus interface */ + DEVMETHOD(bus_print_child, bus_generic_print_child), +#if 0 + DEVMETHOD(bus_add_child, bus_generic_add_child), +#endif + DEVMETHOD(bus_read_ivar, bus_generic_read_ivar), + DEVMETHOD(bus_write_ivar, bus_generic_write_ivar), +#if 0 + DEVMETHOD(bus_set_resource, bus_generic_set_resource), + DEVMETHOD(bus_get_resource, bus_generic_get_resource), +#endif + DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), + DEVMETHOD(bus_release_resource, bus_generic_release_resource), +#if 0 + DEVMETHOD(bus_delete_resource, bus_generic_delete_resource), +#endif + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), + DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), + DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), + + { 0, 0 } +}; + + +static driver_t xenbus_driver = { + "xenbus", + xenbus_methods, + 1, /* no softc */ +}; +static devclass_t xenbus_devclass; + +DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); + + + + + + + +/* + * Local variables: + * c-file-style: "bsd" + * indent-tabs-mode: t + * c-indent-level: 4 + * c-basic-offset: 8 + * tab-width: 4 + * End: + */ Index: drivers/xenbus/xenbus_xs.c =================================================================== RCS file: drivers/xenbus/xenbus_xs.c diff -N drivers/xenbus/xenbus_xs.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ drivers/xenbus/xenbus_xs.c 26 Dec 2005 20:43:02 -0000 @@ -0,0 +1,877 @@ +/****************************************************************************** + * xenbus_xs.c + * + * This is the kernel equivalent of the "xs" library. We don't need everything + * and we use xenbus_comms for communication. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + + +#define DEBUGGING + +#define EXPORT_SYMBOL(x) +#define kmalloc(size, unused) malloc(size, M_DEVBUF, M_WAITOK) +#define kfree(ptr) free(ptr, M_DEVBUF) +#define BUG_ON PANIC_IF +#define semaphore sema +#define rw_semaphore sema +typedef struct mtx spinlock_t; +#define DECLARE_MUTEX(lock) struct sema lock +#ifndef DEBUGGING +#define DEFINE_SPINLOCK(lock) struct mtx lock +#define spin_lock mtx_lock +#define spin_unlock mtx_unlock +#define down sema_wait +#define up sema_post +#define down_read sema_wait +#define up_read sema_post +#define down_write sema_wait +#define up_write sema_post +#else +#define DEFINE_SPINLOCK(lock) +#define spin_lock(a) +#define spin_unlock(a) +#define down(a) +#define up(a) +#define down_read(a) +#define up_read(a) +#define down_write(a) +#define up_write(a) +#endif + +#define u32 uint32_t +#define list_del(head, ent) TAILQ_REMOVE(head, ent, list) +#define simple_strtoul strtoul +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) +#define list_empty TAILQ_EMPTY +#define wake_up wakeup + +#define streq(a, b) (strcmp((a), (b)) == 0) + +struct kvec { + const void *iov_base; + size_t iov_len; +}; + +struct xs_stored_msg { + TAILQ_ENTRY(xs_stored_msg) list; + + struct xsd_sockmsg hdr; + + union { + /* Queued replies. */ + struct { + char *body; + } reply; + + /* Queued watch events. */ + struct { + struct xenbus_watch *handle; + char **vec; + unsigned int vec_size; + } watch; + } u; +}; + +struct xs_handle { + /* A list of replies. Currently only one will ever be outstanding. */ + TAILQ_HEAD(xs_handle_list, xs_stored_msg) reply_list; + spinlock_t reply_lock; + int reply_waitq; + + /* One request at a time. */ + struct semaphore request_mutex; + + /* Protect transactions against save/restore. */ + struct rw_semaphore suspend_mutex; +}; + +static struct xs_handle xs_state; + +/* List of registered watches, and a lock to protect it. */ +static LIST_HEAD(watch_list_head, xenbus_watch) watches; +#ifndef DEBUGGING +static DEFINE_SPINLOCK(watches_lock); +#endif +/* List of pending watch callback events, and a lock to protect it. */ +static TAILQ_HEAD(event_list_head, xs_stored_msg) watch_events; +#ifndef DEBUGGING +static DEFINE_SPINLOCK(watch_events_lock); +#endif +/* + * Details of the xenwatch callback kernel thread. The thread waits on the + * watch_events_waitq for work to do (queued on watch_events list). When it + * wakes up it acquires the xenwatch_mutex before reading the list and + * carrying out work. + */ +static pid_t xenwatch_pid; +/* static */ DECLARE_MUTEX(xenwatch_mutex); +static int watch_events_waitq; + +static int get_error(const char *errorstring) +{ + unsigned int i; + + for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) { + if (i == ARRAY_SIZE(xsd_errors) - 1) { + printk("XENBUS xen store gave: unknown error %s", + errorstring); + return EINVAL; + } + } + return xsd_errors[i].errnum; +} + +static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) +{ + struct xs_stored_msg *msg; + char *body; + + spin_lock(&xs_state.reply_lock); + + while (list_empty(&xs_state.reply_list)) { + spin_unlock(&xs_state.reply_lock); + wait_event_interruptible(&xs_state.reply_waitq, + !list_empty(&xs_state.reply_list)); + spin_lock(&xs_state.reply_lock); + } + + msg = TAILQ_FIRST(&xs_state.reply_list); + list_del(&xs_state.reply_list, msg); + + spin_unlock(&xs_state.reply_lock); + + *type = msg->hdr.type; + if (len) + *len = msg->hdr.len; + body = msg->u.reply.body; + + kfree(msg); + + return body; +} + +#if 0 +/* Emergency write. UNUSED*/ +void xenbus_debug_write(const char *str, unsigned int count) +{ + struct xsd_sockmsg msg = { 0 }; + + msg.type = XS_DEBUG; + msg.len = sizeof("print") + count + 1; + + down(&xs_state.request_mutex); + xb_write(&msg, sizeof(msg)); + xb_write("print", sizeof("print")); + xb_write(str, count); + xb_write("", 1); + up(&xs_state.request_mutex); +} + +#endif +void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) +{ + void *ret; + struct xsd_sockmsg req_msg = *msg; + int err; + + if (req_msg.type == XS_TRANSACTION_START) + down_read(&xs_state.suspend_mutex); + + down(&xs_state.request_mutex); + + err = xb_write(msg, sizeof(*msg) + msg->len); + if (err) { + msg->type = XS_ERROR; + ret = ERR_PTR(err); + } else { + ret = read_reply(&msg->type, &msg->len); + } + + up(&xs_state.request_mutex); + + if ((msg->type == XS_TRANSACTION_END) || + ((req_msg.type == XS_TRANSACTION_START) && + (msg->type == XS_ERROR))) + up_read(&xs_state.suspend_mutex); + + return ret; +} + +/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ +static void *xs_talkv(struct xenbus_transaction *t, + enum xsd_sockmsg_type type, + const struct kvec *iovec, + unsigned int num_vecs, + unsigned int *len) +{ + struct xsd_sockmsg msg; + void *ret = NULL; + unsigned int i; + int err; + + msg.tx_id = (u32)(unsigned long)t; + msg.req_id = 0; + msg.type = type; + msg.len = 0; + for (i = 0; i < num_vecs; i++) + msg.len += iovec[i].iov_len; + + down(&xs_state.request_mutex); + + err = xb_write(&msg, sizeof(msg)); + if (err) { + up(&xs_state.request_mutex); + return ERR_PTR(err); + } + + for (i = 0; i < num_vecs; i++) { + err = xb_write(iovec[i].iov_base, iovec[i].iov_len);; + if (err) { + up(&xs_state.request_mutex); + return ERR_PTR(err); + } + } + + ret = read_reply(&msg.type, len); + + up(&xs_state.request_mutex); + + if (IS_ERR(ret)) + return ret; + + if (msg.type == XS_ERROR) { + err = get_error(ret); + kfree(ret); + return ERR_PTR(-err); + } + + BUG_ON(msg.type != type); + return ret; +} + +/* Simplified version of xs_talkv: single message. */ +static void *xs_single(struct xenbus_transaction *t, + enum xsd_sockmsg_type type, + const char *string, + unsigned int *len) +{ + struct kvec iovec; + + iovec.iov_base = (const void *)string; + iovec.iov_len = strlen(string) + 1; + return xs_talkv(t, type, &iovec, 1, len); +} + +/* Many commands only need an ack, don't care what it says. */ +static int xs_error(char *reply) +{ + if (IS_ERR(reply)) + return PTR_ERR(reply); + kfree(reply); + return 0; +} + +static unsigned int count_strings(const char *strings, unsigned int len) +{ + unsigned int num; + const char *p; + + for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) + num++; + + return num; +} + +/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ +static char *join(const char *dir, const char *name) +{ + char *buffer; + + buffer = kmalloc(strlen(dir) + strlen("/") + strlen(name) + 1, + GFP_KERNEL); + if (buffer == NULL) + return ERR_PTR(ENOMEM); + + strcpy(buffer, dir); + if (!streq(name, "")) { + strcat(buffer, "/"); + strcat(buffer, name); + } + + return buffer; +} + +static char **split(char *strings, unsigned int len, unsigned int *num) +{ + char *p, **ret; + + /* Count the strings. */ + *num = count_strings(strings, len); + + /* Transfer to one big alloc for easy freeing. */ + ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL); + if (!ret) { + kfree(strings); + return ERR_PTR(ENOMEM); + } + memcpy(&ret[*num], strings, len); + kfree(strings); + + strings = (char *)&ret[*num]; + for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) + ret[(*num)++] = p; + + return ret; +} + +char **xenbus_directory(struct xenbus_transaction *t, + const char *dir, const char *node, unsigned int *num) +{ + char *strings, *path; + unsigned int len; + + path = join(dir, node); + if (IS_ERR(path)) + return (char **)path; + + strings = xs_single(t, XS_DIRECTORY, path, &len); + kfree(path); + if (IS_ERR(strings)) + return (char **)strings; + + return split(strings, len, num); +} +EXPORT_SYMBOL(xenbus_directory); + +/* Check if a path exists. Return 1 if it does. */ +int xenbus_exists(struct xenbus_transaction *t, + const char *dir, const char *node) +{ + char **d; + int dir_n; + + d = xenbus_directory(t, dir, node, &dir_n); + if (IS_ERR(d)) + return 0; + kfree(d); + return 1; +} +EXPORT_SYMBOL(xenbus_exists); + +/* Get the value of a single file. + * Returns a kmalloced value: call free() on it after use. + * len indicates length in bytes. + */ +void *xenbus_read(struct xenbus_transaction *t, + const char *dir, const char *node, unsigned int *len) +{ + char *path; + void *ret; + + path = join(dir, node); + if (IS_ERR(path)) + return (void *)path; + + ret = xs_single(t, XS_READ, path, len); + kfree(path); + return ret; +} +EXPORT_SYMBOL(xenbus_read); + +/* Write the value of a single file. + * Returns -err on failure. + */ +int xenbus_write(struct xenbus_transaction *t, + const char *dir, const char *node, const char *string) +{ + char *path; + struct kvec iovec[2]; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + iovec[0].iov_base = path; + iovec[0].iov_len = strlen(path) + 1; + iovec[1].iov_base = string; + iovec[1].iov_len = strlen(string); + + ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL(xenbus_write); + +/* Create a new directory. */ +int xenbus_mkdir(struct xenbus_transaction *t, + const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_MKDIR, path, NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL(xenbus_mkdir); + +/* Destroy a file or directory (directories must be empty). */ +int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_RM, path, NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL(xenbus_rm); + +/* Start a transaction: changes by others will not be seen during this + * transaction, and changes will not be visible to others until end. + */ +struct xenbus_transaction *xenbus_transaction_start(void) +{ + char *id_str; + unsigned long id; + + down_read(&xs_state.suspend_mutex); + + id_str = xs_single(NULL, XS_TRANSACTION_START, "", NULL); + if (IS_ERR(id_str)) { + up_read(&xs_state.suspend_mutex); + return (struct xenbus_transaction *)id_str; + } + + id = simple_strtoul(id_str, NULL, 0); + kfree(id_str); + + return (struct xenbus_transaction *)id; +} +EXPORT_SYMBOL(xenbus_transaction_start); + +/* End a transaction. + * If abandon is true, transaction is discarded instead of committed. + */ +int xenbus_transaction_end(struct xenbus_transaction *t, int abort) +{ + char abortstr[2]; + int err; + + if (abort) + strcpy(abortstr, "F"); + else + strcpy(abortstr, "T"); + + err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); + + up_read(&xs_state.suspend_mutex); + + return err; +} +EXPORT_SYMBOL(xenbus_transaction_end); + +/* Single read and scanf: returns -errno or num scanned. */ +int xenbus_scanf(struct xenbus_transaction *t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + char *val; + + val = xenbus_read(t, dir, node, NULL); + if (IS_ERR(val)) + return PTR_ERR(val); + + va_start(ap, fmt); + ret = vsscanf(val, fmt, ap); + va_end(ap); + kfree(val); + /* Distinctive errno. */ + if (ret == 0) + return ERANGE; + return ret; +} +EXPORT_SYMBOL(xenbus_scanf); + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(struct xenbus_transaction *t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; +#define PRINTF_BUFFER_SIZE 4096 + char *printf_buffer; + + printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); + if (printf_buffer == NULL) + return ENOMEM; + + va_start(ap, fmt); + ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); + va_end(ap); + + BUG_ON(ret > PRINTF_BUFFER_SIZE-1); + ret = xenbus_write(t, dir, node, printf_buffer); + + kfree(printf_buffer); + + return ret; +} +EXPORT_SYMBOL(xenbus_printf); + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...) +{ + va_list ap; + const char *name; + int ret = 0; + + va_start(ap, dir); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + p = xenbus_read(t, dir, name, NULL); + if (IS_ERR(p)) { + ret = PTR_ERR(p); + break; + } + if (fmt) { + if (sscanf(p, fmt, result) == 0) + ret = EINVAL; + kfree(p); + } else + *(char **)result = p; + } + va_end(ap); + return ret; +} +EXPORT_SYMBOL(xenbus_gather); + +static int xs_watch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(NULL, XS_WATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + +static int xs_unwatch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(NULL, XS_UNWATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + +static struct xenbus_watch *find_watch(const char *token) +{ + struct xenbus_watch *i, *cmp; + + cmp = (void *)simple_strtoul(token, NULL, 16); + + LIST_FOREACH(i, &watches, list) + if (i == cmp) + return i; + + return NULL; +} + +/* Register callback to watch this node. */ +int register_xenbus_watch(struct xenbus_watch *watch) +{ + /* Pointer in ascii is the token. */ + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + + down_read(&xs_state.suspend_mutex); + + spin_lock(&watches_lock); + BUG_ON(find_watch(token) != NULL); + LIST_INSERT_HEAD(&watches, watch, list); + spin_unlock(&watches_lock); + + err = xs_watch(watch->node, token); + + /* Ignore errors due to multiple registration. */ + if ((err != 0) && (err != EEXIST)) { + spin_lock(&watches_lock); + LIST_REMOVE(watch, list); + spin_unlock(&watches_lock); + } + + up_read(&xs_state.suspend_mutex); + + return err; +} +EXPORT_SYMBOL(register_xenbus_watch); + +void unregister_xenbus_watch(struct xenbus_watch *watch) +{ + struct xs_stored_msg *msg, *tmp; + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + + down_read(&xs_state.suspend_mutex); + + spin_lock(&watches_lock); + BUG_ON(!find_watch(token)); + LIST_REMOVE(watch, list); + spin_unlock(&watches_lock); + + err = xs_unwatch(watch->node, token); + if (err) + printk("XENBUS Failed to release watch %s: %i\n", + watch->node, err); + + up_read(&xs_state.suspend_mutex); + + /* Cancel pending watch events. */ + spin_lock(&watch_events_lock); + TAILQ_FOREACH_SAFE(msg, &watch_events, list, tmp) { + if (msg->u.watch.handle != watch) + continue; + list_del(&watch_events, msg); + kfree(msg->u.watch.vec); + kfree(msg); + } + spin_unlock(&watch_events_lock); + + /* Flush any currently-executing callback, unless we are it. :-) */ + if (curproc->p_pid != xenwatch_pid) { + down(&xenwatch_mutex); + up(&xenwatch_mutex); + } +} +EXPORT_SYMBOL(unregister_xenbus_watch); + +void xs_suspend(void) +{ + down_write(&xs_state.suspend_mutex); + down(&xs_state.request_mutex); +} + +void xs_resume(void) +{ + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + + up(&xs_state.request_mutex); + + /* No need for watches_lock: the suspend_mutex is sufficient. */ + LIST_FOREACH(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(watch->node, token); + } + + up_write(&xs_state.suspend_mutex); +} + +static void xenwatch_thread(void *unused) +{ + struct xs_stored_msg *msg; + + for (;;) { + wait_event_interruptible(&watch_events_waitq, + !list_empty(&watch_events)); + + down(&xenwatch_mutex); + + spin_lock(&watch_events_lock); + msg = TAILQ_FIRST(&watch_events); + if (msg) + list_del(&watch_events, msg); + spin_unlock(&watch_events_lock); + + if (msg != NULL) { + + msg->u.watch.handle->callback( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + kfree(msg->u.watch.vec); + kfree(msg); + } + + up(&xenwatch_mutex); + } +} + +static int process_msg(void) +{ + struct xs_stored_msg *msg; + char *body; + int err; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (msg == NULL) + return ENOMEM; + + err = xb_read(&msg->hdr, sizeof(msg->hdr)); + if (err) { + kfree(msg); + return err; + } + + body = kmalloc(msg->hdr.len + 1, GFP_KERNEL); + if (body == NULL) { + kfree(msg); + return ENOMEM; + } + + err = xb_read(body, msg->hdr.len); + if (err) { + kfree(body); + kfree(msg); + return err; + } + body[msg->hdr.len] = '\0'; + + if (msg->hdr.type == XS_WATCH_EVENT) { + msg->u.watch.vec = split(body, msg->hdr.len, + &msg->u.watch.vec_size); + if (IS_ERR(msg->u.watch.vec)) { + kfree(msg); + return PTR_ERR(msg->u.watch.vec); + } + + spin_lock(&watches_lock); + msg->u.watch.handle = find_watch( + msg->u.watch.vec[XS_WATCH_TOKEN]); + if (msg->u.watch.handle != NULL) { + spin_lock(&watch_events_lock); + TAILQ_INSERT_TAIL(&watch_events, msg, list); + wake_up(&watch_events_waitq); + spin_unlock(&watch_events_lock); + } else { + kfree(msg->u.watch.vec); + kfree(msg); + } + spin_unlock(&watches_lock); + } else { + msg->u.reply.body = body; + spin_lock(&xs_state.reply_lock); + TAILQ_INSERT_TAIL(&xs_state.reply_list, msg, list); + spin_unlock(&xs_state.reply_lock); + wake_up(&xs_state.reply_waitq); + } + + return 0; +} + +static void xenbus_thread(void *unused) +{ + int err; + + for (;;) { + err = process_msg(); + if (err) + printf("XENBUS error %d while reading " + "message\n", err); + } +} + +int xs_init(void) +{ + int err; + struct proc *p; + + TAILQ_INIT(&xs_state.reply_list); + mtx_init(&xs_state.reply_lock, "state reply", NULL, MTX_SPIN); + sema_init(&xs_state.request_mutex, 0, "xenstore request"); + sema_init(&xs_state.suspend_mutex, 0, "xenstore suspend"); + /* Initialize the shared memory rings to talk to xenstored */ + err = xb_init_comms(); + if (err) + return err; + + err = kthread_create(xenwatch_thread, NULL, &p, + RFHIGHPID, 0, "xenwatch"); + if (err) + return err; + xenwatch_pid = p->p_pid; + + err = kthread_create(xenbus_thread, NULL, NULL, + RFHIGHPID, 0, "xenbus"); + + return err; +} + +/* + * Local variables: + * c-file-style: "bsd" + * indent-tabs-mode: t + * c-indent-level: 4 + * c-basic-offset: 8 + * tab-width: 4 + * End: + */ Index: i386-xen/clock.c =================================================================== RCS file: i386-xen/clock.c diff -N i386-xen/clock.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ i386-xen/clock.c 8 Dec 2005 12:07:43 -0000 @@ -0,0 +1,650 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz and Don Ahn. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 + */ + +#include +__FBSDID("$FreeBSD: src/sys/i386/isa/clock.c,v 1.207 2003/11/13 10:02:12 phk Exp $"); + +/* #define DELAYDEBUG */ +/* + * Routines to handle clock hardware. + */ + +/* + * inittodr, settodr and support routines written + * by Christoph Robitschko + * + * reintroduced and updated by Chris Stenton 8/10/94 + */ + +#include "opt_clock.h" +#include "opt_isa.h" +#include "opt_mca.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#if defined(SMP) +#include +#endif +#include + +#include +#include +#include + +/* XEN specific defines */ +#include +#include /* needed by machine/pmap.h */ +#include /* needed by machine/pmap.h */ +#include /* needed by xen-os.h */ +#include +#include /* needed by xenfunc.h */ +#include + +/* + * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we + * can use a simple formula for leap years. + */ +#define LEAPYEAR(y) (((u_int)(y) % 4 == 0) ? 1 : 0) +#define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) + +int adjkerntz; /* local offset from GMT in seconds */ +int clkintr_pending; +int disable_rtc_set = 1; /* disable resettodr() if != 0 */ +int pscnt = 1; +int psdiv = 1; +int statclock_disable; +#ifndef TIMER_FREQ +#define TIMER_FREQ 1193182 +#endif +u_int timer_freq = TIMER_FREQ; +struct mtx clock_lock; + + +static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; + +/* Values for timerX_state: */ +#define RELEASED 0 +#define RELEASE_PENDING 1 +#define ACQUIRED 2 +#define ACQUIRE_PENDING 3 + +/* Cached *multiplier* to convert TSC counts to microseconds. + * (see the equation below). + * Equal to 2^32 * (1 / (clocks per usec) ). + * Initialized in time_init. + */ +static unsigned long fast_gettimeoffset_quotient; + +/* These are peridically updated in shared_info, and then copied here. */ +static uint32_t shadow_tsc_stamp; +static uint64_t shadow_system_time; +static struct timeval shadow_tv; + +#define do_div(n,base) ({ \ + unsigned long __upper, __low, __high, __mod, __base; \ + __base = (base); \ + __asm("":"=a" (__low), "=d" (__high):"A" (n)); \ + __upper = __high; \ + if (__high) { \ + __upper = __high % (__base); \ + __high = __high / (__base); \ + } \ + __asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \ + __asm("":"=A" (n):"a" (__low),"d" (__high)); \ + __mod; \ +}) + +#define DEFINE_PER_CPU(type, name) \ + __typeof__(type) per_cpu__##name + +#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) + +/* These are peridically updated in shared_info, and then copied here. */ +struct shadow_time_info { + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_timestamp; /* Time, in nanosecs, since boot. */ + uint32_t tsc_to_nsec_mul; + uint32_t tsc_to_usec_mul; + int tsc_shift; + uint32_t version; +}; +static uint64_t processed_system_time;/* System time (ns) at last processing. */ +static DEFINE_PER_CPU(uint64_t, processed_system_time); +static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); + + +#define NS_PER_TICK (1000000000ULL/hz) + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + + +/* convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_mhz * 10^6)) + * ns = cycles * (10^3 / cpu_mhz) + * + * Then we use scaling math (suggested by george@mvista.com) to get: + * ns = cycles * (10^3 * SC / cpu_mhz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * -johnstul@us.ibm.com "math is hard, lets go shopping!" + */ +static unsigned long cyc2ns_scale; +#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ + +static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +{ + cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; +} + +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; +} + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline uint64_t +scale_delta(uint64_t delta, uint32_t mul_frac, int shift) +{ + uint64_t product; +#ifdef __i386__ + uint32_t tmp1, tmp2; +#endif + + if ( shift < 0 ) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "add %4,%%eax ; " + "xor %5,%5 ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), "2" (mul_frac) ); +#else + __asm__ ( + "mul %%rdx ; shrd $32,%%rdx,%%rax" + : "=a" (product) : "0" (delta), "d" ((uint64_t)mul_frac) ); +#endif + + return product; +} + +static uint64_t get_nsec_offset(struct shadow_time_info *shadow) +{ + uint64_t now, delta; + rdtscll(now); + delta = now - shadow->tsc_timestamp; + return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); +} + +/* + * Reads a consistent set of time-base values from Xen, into a shadow data + * area. Must be called with the xtime_lock held for writing. + */ +static void __get_time_values_from_xen(void) +{ + shared_info_t *s = HYPERVISOR_shared_info; + struct vcpu_time_info *src; + struct shadow_time_info *dst; + + src = &s->vcpu_info[smp_processor_id()].time; + dst = &per_cpu(shadow_time, smp_processor_id()); + + do { + dst->version = src->version; + rmb(); + dst->tsc_timestamp = src->tsc_timestamp; + dst->system_timestamp = src->system_time; + dst->tsc_to_nsec_mul = src->tsc_to_system_mul; + dst->tsc_shift = src->tsc_shift; + rmb(); + } + while ((src->version & 1) | (dst->version ^ src->version)); + + dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000; +} + +static inline int time_values_up_to_date(int cpu) +{ + struct vcpu_time_info *src; + struct shadow_time_info *dst; + + src = &HYPERVISOR_shared_info->vcpu_info[cpu].time; + dst = &per_cpu(shadow_time, cpu); + + return (dst->version == src->version); +} + +static void (*timer_func)(struct clockframe *frame) = hardclock; + +static unsigned xen_get_timecount(struct timecounter *tc); + +static struct timecounter xen_timecounter = { + xen_get_timecount, /* get_timecount */ + 0, /* no poll_pps */ + ~0u, /* counter_mask */ + 0, /* frequency */ + "ixen", /* name */ + 0 /* quality */ +}; + + +static void +clkintr(struct clockframe *frame) +{ + int64_t delta_cpu, delta; + int cpu = smp_processor_id(); + struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); + long ticks = 0; + + do { + __get_time_values_from_xen(); + + delta = delta_cpu = + shadow->system_timestamp + get_nsec_offset(shadow); + + delta -= processed_system_time; + delta_cpu -= per_cpu(processed_system_time, cpu); + + } while (!time_values_up_to_date(cpu)); + + if (unlikely(delta < (int64_t)-1000000) || unlikely(delta_cpu < 0)) { + printk("Timer ISR: Time went backwards: %lld\n", delta); + return; + } + + /* Process elapsed ticks since last call. */ + while ( delta >= NS_PER_TICK ) + { + ticks++; + delta -= NS_PER_TICK; + processed_system_time += NS_PER_TICK; + } + /* Local CPU jiffy work. */ + while (delta_cpu >= NS_PER_TICK) { + delta_cpu -= NS_PER_TICK; + per_cpu(processed_system_time, cpu) += NS_PER_TICK; +#if 0 + update_process_times(user_mode(regs)); + profile_tick(CPU_PROFILING, regs); +#endif + } + if (ticks > 0) { + if (frame) timer_func(frame); + } + + if (cpu != 0) + return; + /* + * Take synchronised time from Xen once a minute if we're not + * synchronised ourselves, and we haven't chosen to keep an independent + * time base. + */ + + /* XXX TODO */ +} + +#include "opt_ddb.h" +static uint32_t +getit(void) +{ + __get_time_values_from_xen(); + return shadow_tsc_stamp; +} + +/* + * Wait "n" microseconds. + * Relies on timer 1 counting down from (timer_freq / hz) + * Note: timer had better have been programmed before this is first used! + */ +void +DELAY(int n) +{ + int delta, ticks_left; + uint32_t tick, prev_tick; +#ifdef DELAYDEBUG + int getit_calls = 1; + int n1; + static int state = 0; + + if (state == 0) { + state = 1; + for (n1 = 1; n1 <= 10000000; n1 *= 10) + DELAY(n1); + state = 2; + } + if (state == 1) + printf("DELAY(%d)...", n); +#endif + /* + * Read the counter first, so that the rest of the setup overhead is + * counted. Guess the initial overhead is 20 usec (on most systems it + * takes about 1.5 usec for each of the i/o's in getit(). The loop + * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The + * multiplications and divisions to scale the count take a while). + * + * However, if ddb is active then use a fake counter since reading + * the i8254 counter involves acquiring a lock. ddb must not go + * locking for many reasons, but it calls here for at least atkbd + * input. + */ + prev_tick = getit(); + + n -= 0; /* XXX actually guess no initial overhead */ + /* + * Calculate (n * (timer_freq / 1e6)) without using floating point + * and without any avoidable overflows. + */ + if (n <= 0) + ticks_left = 0; + else if (n < 256) + /* + * Use fixed point to avoid a slow division by 1000000. + * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. + * 2^15 is the first power of 2 that gives exact results + * for n between 0 and 256. + */ + ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; + else + /* + * Don't bother using fixed point, although gcc-2.7.2 + * generates particularly poor code for the long long + * division, since even the slow way will complete long + * before the delay is up (unless we're interrupted). + */ + ticks_left = ((u_int)n * (long long)timer_freq + 999999) + / 1000000; + + while (ticks_left > 0) { + tick = getit(); +#ifdef DELAYDEBUG + ++getit_calls; +#endif + delta = tick - prev_tick; + prev_tick = tick; + if (delta < 0) { + /* + * Guard against timer0_max_count being wrong. + * This shouldn't happen in normal operation, + * but it may happen if set_timer_freq() is + * traced. + */ + /* delta += timer0_max_count; ??? */ + if (delta < 0) + delta = 0; + } + ticks_left -= delta; + } +#ifdef DELAYDEBUG + if (state == 1) + printf(" %d calls to getit() at %d usec each\n", + getit_calls, (n + 5) / getit_calls); +#endif +} + + +int +sysbeep(int pitch, int period) +{ + return (0); +} + +/* + * Restore all the timers non-atomically (XXX: should be atomically). + * + * This function is called from pmtimer_resume() to restore all the timers. + * This should not be necessary, but there are broken laptops that do not + * restore all the timers on resume. + */ +void +timer_restore(void) +{ + /* Get timebases for new environment. */ + __get_time_values_from_xen(); + + /* Reset our own concept of passage of system time. */ + processed_system_time = shadow_system_time; +} + +void +startrtclock() +{ + unsigned long long alarm; + uint64_t __cpu_khz; + uint32_t cpu_khz; + struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_info[0].time; + + __cpu_khz = 1000000ULL << 32; + do_div(__cpu_khz, info->tsc_to_system_mul); + if ( info->tsc_shift < 0 ) + cpu_khz = __cpu_khz << -info->tsc_shift; + else + cpu_khz = __cpu_khz >> info->tsc_shift; + + printk("Xen reported: %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + + /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz = + (2^32 * 1 / (clocks/us)) */ + { + unsigned long eax=0, edx=1000; + __asm__("divl %2" + :"=a" (fast_gettimeoffset_quotient), "=d" (edx) + :"r" (cpu_khz), + "0" (eax), "1" (edx)); + } + + set_cyc2ns_scale(cpu_khz/1000); + timer_freq = tsc_freq = xen_timecounter.tc_frequency = cpu_khz * 1000; + tc_init(&xen_timecounter); + + + rdtscll(alarm); +} + +/* + * Initialize the time of day register, based on the time base which is, e.g. + * from a filesystem. + */ +void +inittodr(time_t base) +{ + int s, y; + struct timespec ts; + + s = splclock(); + if (base) { + ts.tv_sec = base; + ts.tv_nsec = 0; + tc_setclock(&ts); + } + + y = time_second - shadow_tv.tv_sec; + if (y <= -2 || y >= 2) { + /* badly off, adjust it */ + ts.tv_sec = shadow_tv.tv_sec; + ts.tv_nsec = shadow_tv.tv_usec * 1000; + tc_setclock(&ts); + } + splx(s); +} + +/* + * Write system time back to RTC. Not supported for guest domains. + */ +void +resettodr() +{ +} + + +/* + * Start clocks running. + */ +void +cpu_initclocks(void) +{ + int diag; + int time_irq = bind_virq_to_irq(VIRQ_TIMER); + + if ((diag = intr_add_handler("clk", time_irq, + (driver_intr_t *)clkintr, NULL, + INTR_TYPE_CLK | INTR_FAST, NULL))) { + panic("failed to register clock interrupt: %d\n", diag); + } + + /* should fast clock be enabled ? */ + + /* initialize xen values */ + __get_time_values_from_xen(); + processed_system_time = shadow_system_time; + per_cpu(processed_system_time, 0) = processed_system_time; + +} + +#ifdef SMP +void +ap_cpu_initclocks(void) +{ + int irq; + int cpu = smp_processor_id(); + + per_cpu(processed_system_time, cpu) = shadow_system_time; + + irq = bind_virq_to_irq(VIRQ_TIMER); + PCPU_SET(time_irq, irq); + PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr, + NULL, INTR_TYPE_CLK | INTR_FAST, NULL)); +} +#endif + +void +cpu_startprofclock(void) +{ + + printf("cpu_startprofclock: profiling clock is not supported\n"); +} + +void +cpu_stopprofclock(void) +{ + + printf("cpu_stopprofclock: profiling clock is not supported\n"); +} + +static uint32_t +xen_get_timecount(struct timecounter *tc) +{ + __get_time_values_from_xen(); + return shadow_tsc_stamp; +} + +/* + * Track behavior of cur_timer->get_offset() functionality in timer_tsc.c + */ + +#if 0 +static uint32_t +xen_get_offset(void) +{ + register unsigned long eax, edx; + + /* Read the Time Stamp Counter */ + + rdtsc(eax,edx); + + /* .. relative to previous jiffy (32 bits is enough) */ + eax -= shadow_tsc_stamp; + + /* + * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient + * = (tsc_low delta) * (usecs_per_clock) + * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) + * + * Using a mull instead of a divl saves up to 31 clock cycles + * in the critical path. + */ + + __asm__("mull %2" + :"=a" (eax), "=d" (edx) + :"rm" (fast_gettimeoffset_quotient), + "0" (eax)); + + /* our adjusted time offset in microseconds */ + return edx; +} +#endif +void +idle_block(void) +{ + if (HYPERVISOR_set_timer_op(processed_system_time + NS_PER_TICK) == 0) + HYPERVISOR_sched_op(SCHEDOP_block, 0); +} Index: i386-xen/evtchn.c =================================================================== RCS file: i386-xen/evtchn.c diff -N i386-xen/evtchn.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ i386-xen/evtchn.c 23 Dec 2005 03:48:01 -0000 @@ -0,0 +1,789 @@ +/****************************************************************************** + * evtchn.c + * + * Communication via Xen event channels. + * + * Copyright (c) 2002-2004, K A Fraser + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +static inline unsigned long __ffs(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + +static struct mtx irq_mapping_update_lock; + +#define TODO printf("%s: not implemented!\n", __func__) + +#ifdef CONFIG_SMP + +static uint8_t cpu_evtchn[NR_EVENT_CHANNELS]; +static uint32_t cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/32]; + +#define active_evtchns(cpu,sh,idx) \ + ((sh)->evtchn_pending[idx] & \ + cpu_evtchn_mask[cpu][idx] & \ + ~(sh)->evtchn_mask[idx]) + +void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +{ + clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); + set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); + cpu_evtchn[chn] = cpu; +} + +#else + +#define active_evtchns(cpu,sh,idx) \ + ((sh)->evtchn_pending[idx] & \ + ~(sh)->evtchn_mask[idx]) + +void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +{ +} +#endif +/* IRQ <-> event-channel mappings. */ +static int evtchn_to_irq[NR_EVENT_CHANNELS]; +static int irq_to_evtchn[NR_IRQS]; + +static int virq_to_irq[MAX_VIRT_CPUS][NR_VIRQS]; +static int ipi_to_evtchn[MAX_VIRT_CPUS][NR_VIRQS]; + + +/* Reference counts for bindings to IRQs. */ +static int irq_bindcount[NR_IRQS]; + +#define VALID_EVTCHN(_chn) ((_chn) >= 0) + +/* + * Force a proper event-channel callback from Xen after clearing the + * callback mask. We do this in a very simple manner, by making a call + * down into Xen. The pending flag will be checked by Xen on return. + */ +void force_evtchn_callback(void) +{ + (void)HYPERVISOR_xen_version(0, NULL); +} + +void +evtchn_do_upcall(struct trapframe *frame) +{ + unsigned long l1, l2; + unsigned int l1i, l2i, port; + int irq, cpu = smp_processor_id(); + shared_info_t *s = HYPERVISOR_shared_info; + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; + + vcpu_info->evtchn_upcall_pending = 0; + + /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ + l1 = xen_xchg(&vcpu_info->evtchn_pending_sel, 0); + + while ( l1 != 0 ) + { + + l1i = __ffs(l1); + l1 &= ~(1 << l1i); + + while ( (l2 = active_evtchns(cpu, s, l1i)) != 0 ) + { + + l2i = __ffs(l2); + l2 &= ~(1 << l2i); + + port = (l1i << 5) + l2i; + if ( (irq = evtchn_to_irq[port]) != -1 ) { + struct intsrc *isrc = intr_lookup_source(irq); + intr_execute_handlers(isrc, frame); + } else { + evtchn_device_upcall(port); + } + } + + } +} + + +static int +find_unbound_irq(void) +{ + int irq; + + for ( irq = 0; irq < NR_IRQS; irq++ ) + if ( irq_bindcount[irq] == 0 ) + break; + + if ( irq == NR_IRQS ) + panic("No available IRQ to bind to: increase NR_IRQS!\n"); + + return irq; +} + +int +bind_virq_to_irq(int virq) +{ + evtchn_op_t op; + int evtchn, irq; + + mtx_lock(&irq_mapping_update_lock); + + if ( (irq = PCPU_GET(virq_to_irq)[virq]) == -1 ) + { + op.cmd = EVTCHNOP_bind_virq; + op.u.bind_virq.virq = virq; + op.u.bind_virq.vcpu = PCPU_GET(cpuid); + if ( HYPERVISOR_event_channel_op(&op) != 0 ) + panic("Failed to bind virtual IRQ %d\n", virq); + evtchn = op.u.bind_virq.port; + + irq = find_unbound_irq(); + evtchn_to_irq[evtchn] = irq; + irq_to_evtchn[irq] = evtchn; + + PCPU_GET(virq_to_irq)[virq] = irq; + bind_evtchn_to_cpu(evtchn, smp_processor_id()); + } + + irq_bindcount[irq]++; + + mtx_unlock(&irq_mapping_update_lock); + + return irq; +} + +void +unbind_virq_from_irq(int virq) +{ + evtchn_op_t op = { .cmd = EVTCHNOP_close }; + int irq = PCPU_GET(virq_to_irq)[virq]; + int evtchn = irq_to_evtchn[irq]; + + mtx_lock(&irq_mapping_update_lock); + + if ( --irq_bindcount[irq] == 0 ) + { + op.u.close.port = evtchn; + if ( HYPERVISOR_event_channel_op(&op) != 0 ) + panic("Failed to unbind virtual IRQ %d\n", virq); + + /* + * This is a slight hack. Interdomain ports can be allocated directly + * by userspace, and at that point they get bound by Xen to vcpu 0. We + * therefore need to make sure that if we get an event on an event + * channel we don't know about vcpu 0 handles it. Binding channels to + * vcpu 0 when closing them achieves this. + */ + bind_evtchn_to_cpu(evtchn, 0); + evtchn_to_irq[evtchn] = -1; + irq_to_evtchn[irq] = -1; + PCPU_GET(virq_to_irq)[virq] = -1; + } + + mtx_unlock(&irq_mapping_update_lock); +} + + +/* This is only used when a vcpu from an xm save. The ipi is expected + to have been bound before we suspended, and so all of the xenolinux + state is set up; we only need to restore the Xen side of things. + The irq number has to be the same, but the evtchn number can + change. */ +void +_bind_ipi_to_irq(int ipi, int vcpu, int irq) +{ + evtchn_op_t op; + int evtchn; + + mtx_lock(&irq_mapping_update_lock); + + op.cmd = EVTCHNOP_bind_ipi; + if ( HYPERVISOR_event_channel_op(&op) != 0 ) + panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu); + ev