From 74e919d2307d9014400c818b82e752c623a4da94 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Dec 2020 10:52:21 +0100 Subject: um: virtio: clean up a comment There's no 'simtime' device, because implementing that through virtio was just too much complexity. Clean up the comment that still refers to it. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/virtio_uml.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 5d957b7e7fd5..515d648d3c3d 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -914,8 +914,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT /* * When we get an interrupt, we must bounce it through the simulation - * calendar (the simtime device), except for the simtime device itself - * since that's part of the simulation control. + * calendar (the time-travel=ext:... socket). */ if (time_travel_mode == TT_MODE_EXTERNAL && callback) { info->callback = callback; -- cgit v1.2.3 From 10c2b5aeb21c439251e5e828bf1362f89ab3de49 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Dec 2020 10:52:22 +0100 Subject: um: virtio: fix handling of messages without payload If we have a message without payload, we call full_read() with len set to 0, which causes it to return -ECONNRESET. Catch this case and explicitly return 0 for it so we can actually use the zero-size config-changed message. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/virtio_uml.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 515d648d3c3d..ef620caed8e7 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -97,6 +97,9 @@ static int full_read(int fd, void *buf, int len, bool abortable) { int rc; + if (!len) + return 0; + do { rc = os_read_file(fd, buf, len); if (rc > 0) { -- cgit v1.2.3 From 9b84512cfe601759f66ee594b2d5aa07788251ea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Dec 2020 10:52:23 +0100 Subject: um: virtio: disable VQs during suspend If the system is suspended, the device shouldn't be able to send anything to it. Disable virtqueues in suspend to simulate this, and as we might be only using s2idle (kernel services are still on), prevent sending anything on them as well. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/virtio_uml.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'arch') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index ef620caed8e7..9db9d00b2dc2 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -65,6 +65,7 @@ struct virtio_uml_vq_info { vq_callback_t *callback; struct time_travel_event defer; #endif + bool suspended; }; extern unsigned long long physmem_size, highmem; @@ -725,6 +726,9 @@ static bool vu_notify(struct virtqueue *vq) const uint64_t n = 1; int rc; + if (info->suspended) + return true; + time_travel_propagate_time(); if (info->kick_fd < 0) { @@ -1288,6 +1292,36 @@ static const struct of_device_id virtio_uml_match[] = { }; MODULE_DEVICE_TABLE(of, virtio_uml_match); +static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + struct virtio_uml_vq_info *info = vq->priv; + + info->suspended = true; + vhost_user_set_vring_enable(vu_dev, vq->index, false); + } + + return 0; +} + +static int virtio_uml_resume(struct platform_device *pdev) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + struct virtio_uml_vq_info *info = vq->priv; + + info->suspended = false; + vhost_user_set_vring_enable(vu_dev, vq->index, true); + } + + return 0; +} + static struct platform_driver virtio_uml_driver = { .probe = virtio_uml_probe, .remove = virtio_uml_remove, @@ -1295,6 +1329,8 @@ static struct platform_driver virtio_uml_driver = { .name = "virtio-uml", .of_match_table = virtio_uml_match, }, + .suspend = virtio_uml_suspend, + .resume = virtio_uml_resume, }; static int __init virtio_uml_init(void) -- cgit v1.2.3 From c8177aba37cac6b6dd0e5511fde9fc2d9e7f2f38 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Dec 2020 10:52:24 +0100 Subject: um: time-travel: rework interrupt handling in ext mode In external time-travel mode, where time is controlled via the controller application socket, interrupt handling is a little tricky. For example on virtio, the following happens: * we receive a message (that requires an ACK) on the vhost-user socket * we add a time-travel event to handle the interrupt (this causes communication on the time socket) * we ACK the original vhost-user message * we then handle the interrupt once the event is triggered This protocol ensures that the sender of the interrupt only continues to run in the simulation when the time-travel event has been added. So far, this was only done in the virtio driver, but it was actually wrong, because only virtqueue interrupts were handled this way, and config change interrupts were handled immediately. Additionally, the messages were actually handled in the real Linux interrupt handler, but Linux interrupt handlers are part of the simulation and shouldn't run while there's no time event. To really do this properly and only handle all kinds of interrupts in the time-travel event when we are scheduled to run in the simulation, rework this to plug in to the lower interrupt layers in UML directly: Add a um_request_irq_tt() function that let's a time-travel aware driver request an interrupt with an additional timetravel_handler() that is called outside of the context of the simulation, to handle the message only. It then adds an event to the time-travel calendar if necessary, and no "real" Linux code runs outside of the time simulation. This also hooks in with suspend/resume properly now, since this new timetravel_handler() can run while Linux is suspended and interrupts are disabled, and decide to wake up (or not) the system based on the message it received. Importantly in this case, it ACKs the message before the system even resumes and interrupts are re-enabled, thus allowing the simulation to progress properly. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/virtio_uml.c | 92 +++++++++--------- arch/um/include/linux/time-internal.h | 6 ++ arch/um/include/shared/irq_kern.h | 60 ++++++++++++ arch/um/kernel/irq.c | 171 +++++++++++++++++++++++++++++----- arch/um/kernel/time.c | 7 ++ 5 files changed, 267 insertions(+), 69 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 9db9d00b2dc2..bda3ab468938 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -55,16 +55,14 @@ struct virtio_uml_device { u64 protocol_features; u8 status; u8 registered:1; + + u8 config_changed_irq:1; + uint64_t vq_irq_vq_map; }; struct virtio_uml_vq_info { int kick_fd, call_fd; char name[32]; -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT - struct virtqueue *vq; - vq_callback_t *callback; - struct time_travel_event defer; -#endif bool suspended; }; @@ -351,9 +349,9 @@ static void vhost_user_reply(struct virtio_uml_device *vu_dev, rc, size); } -static irqreturn_t vu_req_interrupt(int irq, void *data) +static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, + struct time_travel_event *ev) { - struct virtio_uml_device *vu_dev = data; struct virtqueue *vq; int response = 1; struct { @@ -371,14 +369,14 @@ static irqreturn_t vu_req_interrupt(int irq, void *data) switch (msg.msg.header.request) { case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: - virtio_config_changed(&vu_dev->vdev); + vu_dev->config_changed_irq = true; response = 0; break; case VHOST_USER_SLAVE_VRING_CALL: virtio_device_for_each_vq((&vu_dev->vdev), vq) { if (vq->index == msg.msg.payload.vring_state.index) { response = 0; - vring_interrupt(0 /* ignored */, vq); + vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); break; } } @@ -392,12 +390,45 @@ static irqreturn_t vu_req_interrupt(int irq, void *data) msg.msg.header.request); } + if (ev) + time_travel_add_irq_event(ev); + if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) vhost_user_reply(vu_dev, &msg.msg, response); return IRQ_HANDLED; } +static irqreturn_t vu_req_interrupt(int irq, void *data) +{ + struct virtio_uml_device *vu_dev = data; + irqreturn_t ret = IRQ_HANDLED; + + if (!um_irq_timetravel_handler_used()) + ret = vu_req_read_message(vu_dev, NULL); + + if (vu_dev->vq_irq_vq_map) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index)) + vring_interrupt(0 /* ignored */, vq); + } + vu_dev->vq_irq_vq_map = 0; + } else if (vu_dev->config_changed_irq) { + virtio_config_changed(&vu_dev->vdev); + vu_dev->config_changed_irq = false; + } + + return ret; +} + +static void vu_req_interrupt_comm_handler(int irq, int fd, void *data, + struct time_travel_event *ev) +{ + vu_req_read_message(data, ev); +} + static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) { int rc, req_fds[2]; @@ -408,9 +439,10 @@ static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) return rc; vu_dev->req_fd = req_fds[0]; - rc = um_request_irq(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, - vu_req_interrupt, IRQF_SHARED, - vu_dev->pdev->name, vu_dev); + rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, + vu_req_interrupt, IRQF_SHARED, + vu_dev->pdev->name, vu_dev, + vu_req_interrupt_comm_handler); if (rc < 0) goto err_close; @@ -882,23 +914,6 @@ out: return rc; } -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT -static void vu_defer_irq_handle(struct time_travel_event *d) -{ - struct virtio_uml_vq_info *info; - - info = container_of(d, struct virtio_uml_vq_info, defer); - info->callback(info->vq); -} - -static void vu_defer_irq_callback(struct virtqueue *vq) -{ - struct virtio_uml_vq_info *info = vq->priv; - - time_travel_add_irq_event(&info->defer); -} -#endif - static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, unsigned index, vq_callback_t *callback, const char *name, bool ctx) @@ -918,18 +933,6 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, pdev->id, name); -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT - /* - * When we get an interrupt, we must bounce it through the simulation - * calendar (the time-travel=ext:... socket). - */ - if (time_travel_mode == TT_MODE_EXTERNAL && callback) { - info->callback = callback; - callback = vu_defer_irq_callback; - time_travel_set_event_fn(&info->defer, vu_defer_irq_handle); - } -#endif - vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, ctx, vu_notify, callback, info->name); if (!vq) { @@ -938,9 +941,6 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, } vq->priv = info; num = virtqueue_get_vring_size(vq); -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT - info->vq = vq; -#endif if (vu_dev->protocol_features & BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { @@ -999,6 +999,10 @@ static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, int i, queue_idx = 0, rc; struct virtqueue *vq; + /* not supported for now */ + if (WARN_ON(nvqs > 64)) + return -EINVAL; + rc = vhost_user_set_mem_table(vu_dev); if (rc) return rc; diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index 68e45e950137..8688e16e832b 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -7,6 +7,7 @@ #ifndef __TIMER_INTERNAL_H__ #define __TIMER_INTERNAL_H__ #include +#include #define TIMER_MULTIPLIER 256 #define TIMER_MIN_DELTA 500 @@ -74,6 +75,11 @@ static inline void time_travel_propagate_time(void) static inline void time_travel_wait_readable(int fd) { } + +static inline void time_travel_add_irq_event(struct time_travel_event *e) +{ + WARN_ON(1); +} #endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ /* diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h index 7807de593bda..f2dc817abb7c 100644 --- a/arch/um/include/shared/irq_kern.h +++ b/arch/um/include/shared/irq_kern.h @@ -7,6 +7,7 @@ #define __IRQ_KERN_H__ #include +#include #include #include "irq_user.h" @@ -15,5 +16,64 @@ int um_request_irq(int irq, int fd, enum um_irq_type type, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +/** + * um_request_irq_tt - request an IRQ with timetravel handler + * + * @irq: the IRQ number, or %UM_IRQ_ALLOC + * @fd: The file descriptor to request an IRQ for + * @type: read or write + * @handler: the (generic style) IRQ handler + * @irqflags: Linux IRQ flags + * @devname: name for this to show + * @dev_id: data pointer to pass to the IRQ handler + * @timetravel_handler: the timetravel interrupt handler, invoked with the IRQ + * number, fd, dev_id and time-travel event pointer. + * + * Returns: The interrupt number assigned or a negative error. + * + * Note that the timetravel handler is invoked only if the time_travel_mode is + * %TT_MODE_EXTERNAL, and then it is invoked even while the system is suspended! + * This function must call time_travel_add_irq_event() for the event passed with + * an appropriate delay, before sending an ACK on the socket it was invoked for. + * + * If this was called while the system is suspended, then adding the event will + * cause the system to resume. + * + * Since this function will almost certainly have to handle the FD's condition, + * a read will consume the message, and after that it is up to the code using + * it to pass such a message to the @handler in whichever way it can. + * + * If time_travel_mode is not %TT_MODE_EXTERNAL the @timetravel_handler will + * not be invoked at all and the @handler must handle the FD becoming + * readable (or writable) instead. Use um_irq_timetravel_handler_used() to + * distinguish these cases. + * + * See virtio_uml.c for an example. + */ +int um_request_irq_tt(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *)); +#else +static inline +int um_request_irq_tt(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *)) +{ + return um_request_irq(irq, fd, type, handler, irqflags, + devname, dev_id); +} +#endif + +static inline bool um_irq_timetravel_handler_used(void) +{ + return time_travel_mode == TT_MODE_EXTERNAL; +} + void um_free_irq(int irq, void *dev_id); #endif diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 3741d2380060..82af5191e73d 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include extern void free_irqs(void); @@ -38,6 +38,12 @@ struct irq_reg { bool active; bool pending; bool wakeup; +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + bool pending_on_resume; + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *); + struct time_travel_event event; +#endif }; struct irq_entry { @@ -51,6 +57,7 @@ struct irq_entry { static DEFINE_SPINLOCK(irq_lock); static LIST_HEAD(active_fds); static DECLARE_BITMAP(irqs_allocated, NR_IRQS); +static bool irqs_suspended; static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) { @@ -74,9 +81,65 @@ static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) } } -void sigio_handler_suspend(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +static void irq_event_handler(struct time_travel_event *ev) { - /* nothing */ + struct irq_reg *reg = container_of(ev, struct irq_reg, event); + + /* do nothing if suspended - just to cause a wakeup */ + if (irqs_suspended) + return; + + generic_handle_irq(reg->irq); +} + +static bool irq_do_timetravel_handler(struct irq_entry *entry, + enum um_irq_type t) +{ + struct irq_reg *reg = &entry->reg[t]; + + if (!reg->timetravel_handler) + return false; + + /* prevent nesting - we'll get it again later when we SIGIO ourselves */ + if (reg->pending_on_resume) + return true; + + reg->timetravel_handler(reg->irq, entry->fd, reg->id, ®->event); + + if (!reg->event.pending) + return false; + + if (irqs_suspended) + reg->pending_on_resume = true; + return true; +} +#else +static bool irq_do_timetravel_handler(struct irq_entry *entry, + enum um_irq_type t) +{ + return false; +} +#endif + +static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, + struct uml_pt_regs *regs) +{ + struct irq_reg *reg = &entry->reg[t]; + + if (!reg->events) + return; + + if (os_epoll_triggered(idx, reg->events) <= 0) + return; + + if (irq_do_timetravel_handler(entry, t)) + return; + + if (irqs_suspended) + return; + + irq_io_loop(reg, regs); } void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) @@ -84,6 +147,9 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) struct irq_entry *irq_entry; int n, i; + if (irqs_suspended && !um_irq_timetravel_handler_used()) + return; + while (1) { /* This is now lockless - epoll keeps back-referencesto the irqs * which have trigger it so there is no need to walk the irq @@ -105,19 +171,13 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) irq_entry = os_epoll_get_data_pointer(i); - for (t = 0; t < NUM_IRQ_TYPES; t++) { - int events = irq_entry->reg[t].events; - - if (!events) - continue; - - if (os_epoll_triggered(i, events) > 0) - irq_io_loop(&irq_entry->reg[t], regs); - } + for (t = 0; t < NUM_IRQ_TYPES; t++) + sigio_reg_handler(i, irq_entry, t, regs); } } - free_irqs(); + if (!irqs_suspended) + free_irqs(); } static struct irq_entry *get_irq_entry_by_fd(int fd) @@ -169,7 +229,9 @@ static void update_or_free_irq_entry(struct irq_entry *entry) free_irq_entry(entry, false); } -static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id) +static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *)) { struct irq_entry *irq_entry; int err, events = os_event_mask(type); @@ -206,6 +268,13 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id) irq_entry->reg[type].active = true; irq_entry->reg[type].events = events; +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + if (um_irq_timetravel_handler_used()) { + irq_entry->reg[type].timetravel_handler = timetravel_handler; + irq_entry->reg[type].event.fn = irq_event_handler; + } +#endif + WARN_ON(!update_irq_entry(irq_entry)); spin_unlock_irqrestore(&irq_lock, flags); @@ -339,9 +408,12 @@ void um_free_irq(int irq, void *dev) } EXPORT_SYMBOL(um_free_irq); -int um_request_irq(int irq, int fd, enum um_irq_type type, - irq_handler_t handler, unsigned long irqflags, - const char *devname, void *dev_id) +static int +_um_request_irq(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *)) { int err; @@ -360,7 +432,7 @@ int um_request_irq(int irq, int fd, enum um_irq_type type, return -ENOSPC; if (fd != -1) { - err = activate_fd(irq, fd, type, dev_id); + err = activate_fd(irq, fd, type, dev_id, timetravel_handler); if (err) goto error; } @@ -374,20 +446,41 @@ error: clear_bit(irq, irqs_allocated); return err; } + +int um_request_irq(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id) +{ + return _um_request_irq(irq, fd, type, handler, irqflags, + devname, dev_id, NULL); +} EXPORT_SYMBOL(um_request_irq); +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +int um_request_irq_tt(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, + void (*timetravel_handler)(int, int, void *, + struct time_travel_event *)) +{ + return _um_request_irq(irq, fd, type, handler, irqflags, + devname, dev_id, timetravel_handler); +} +EXPORT_SYMBOL(um_request_irq_tt); +#endif + #ifdef CONFIG_PM_SLEEP void um_irqs_suspend(void) { struct irq_entry *entry; unsigned long flags; - sig_info[SIGIO] = sigio_handler_suspend; + irqs_suspended = true; spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { enum um_irq_type t; - bool wake = false; + bool clear = true; for (t = 0; t < NUM_IRQ_TYPES; t++) { if (!entry->reg[t].events) @@ -400,13 +493,17 @@ void um_irqs_suspend(void) * any FDs that should be suspended. */ if (entry->reg[t].wakeup || - entry->reg[t].irq == SIGIO_WRITE_IRQ) { - wake = true; + entry->reg[t].irq == SIGIO_WRITE_IRQ +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + || entry->reg[t].timetravel_handler +#endif + ) { + clear = false; break; } } - if (!wake) { + if (clear) { entry->suspended = true; os_clear_fd_async(entry->fd); entry->sigio_workaround = @@ -421,7 +518,31 @@ void um_irqs_resume(void) struct irq_entry *entry; unsigned long flags; - spin_lock_irqsave(&irq_lock, flags); + + local_irq_save(flags); +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + /* + * We don't need to lock anything here since we're in resume + * and nothing else is running, but have disabled IRQs so we + * don't try anything else with the interrupt list from there. + */ + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type t; + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + struct irq_reg *reg = &entry->reg[t]; + + if (reg->pending_on_resume) { + irq_enter(); + generic_handle_irq(reg->irq); + irq_exit(); + reg->pending_on_resume = false; + } + } + } +#endif + + spin_lock(&irq_lock); list_for_each_entry(entry, &active_fds, list) { if (entry->suspended) { int err = os_set_fd_async(entry->fd); @@ -437,7 +558,7 @@ void um_irqs_resume(void) } spin_unlock_irqrestore(&irq_lock, flags); - sig_info[SIGIO] = sigio_handler; + irqs_suspended = false; send_sigio_to_self(); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 315248b03941..8144406b6ea3 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -278,6 +278,7 @@ static void __time_travel_add_event(struct time_travel_event *e, { struct time_travel_event *tmp; bool inserted = false; + unsigned long flags; if (e->pending) return; @@ -285,6 +286,7 @@ static void __time_travel_add_event(struct time_travel_event *e, e->pending = true; e->time = time; + local_irq_save(flags); list_for_each_entry(tmp, &time_travel_events, list) { /* * Add the new entry before one with higher time, @@ -307,6 +309,7 @@ static void __time_travel_add_event(struct time_travel_event *e, tmp = time_travel_first_event(); time_travel_ext_update_request(tmp->time); time_travel_next_event = tmp->time; + local_irq_restore(flags); } static void time_travel_add_event(struct time_travel_event *e, @@ -383,10 +386,14 @@ static void time_travel_deliver_event(struct time_travel_event *e) static bool time_travel_del_event(struct time_travel_event *e) { + unsigned long flags; + if (!e->pending) return false; + local_irq_save(flags); list_del(&e->list); e->pending = false; + local_irq_restore(flags); return true; } -- cgit v1.2.3 From 1fcf9da389018d0d81509ec6419a3fff14f3ebfd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Dec 2020 10:52:25 +0100 Subject: um: virtio: allow devices to be configured for wakeup With all the IRQ machinery being in place, we can allow virtio devices to additionally be configured as wakeup sources, in which case basically any interrupt from them wakes us up. Note that this requires a call FD because the VQs are all disabled. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/virtio_uml.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index bda3ab468938..91ddf74ca888 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -55,6 +55,7 @@ struct virtio_uml_device { u64 protocol_features; u8 status; u8 registered:1; + u8 suspended:1; u8 config_changed_irq:1; uint64_t vq_irq_vq_map; @@ -390,7 +391,7 @@ static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, msg.msg.header.request); } - if (ev) + if (ev && !vu_dev->suspended) time_travel_add_irq_event(ev); if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) @@ -1135,6 +1136,8 @@ static int virtio_uml_probe(struct platform_device *pdev) platform_set_drvdata(pdev, vu_dev); + device_set_wakeup_capable(&vu_dev->vdev.dev, true); + rc = register_virtio_device(&vu_dev->vdev); if (rc) put_device(&vu_dev->vdev.dev); @@ -1308,7 +1311,12 @@ static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) vhost_user_set_vring_enable(vu_dev, vq->index, false); } - return 0; + if (!device_may_wakeup(&vu_dev->vdev.dev)) { + vu_dev->suspended = true; + return 0; + } + + return irq_set_irq_wake(vu_dev->irq, 1); } static int virtio_uml_resume(struct platform_device *pdev) @@ -1323,7 +1331,12 @@ static int virtio_uml_resume(struct platform_device *pdev) vhost_user_set_vring_enable(vu_dev, vq->index, true); } - return 0; + vu_dev->suspended = false; + + if (!device_may_wakeup(&vu_dev->vdev.dev)) + return 0; + + return irq_set_irq_wake(vu_dev->irq, 0); } static struct platform_driver virtio_uml_driver = { -- cgit v1.2.3 From 3a5f4154741f9e0a6fad06020d07533b76e0057e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 11:47:55 +0000 Subject: um: fix spelling mistake in Kconfig "privleges" -> "privileges" There is a spelling mistake in the Kconfig help text. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Richard Weinberger --- arch/um/drivers/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 2e7b8e0e7194..58e3ffc17b7d 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -323,7 +323,7 @@ config UML_NET_SLIRP frames. In general, slirp allows the UML the same IP connectivity to the outside world that the host user is permitted, and unlike other transports, SLiRP works without the need of root level - privleges, setuid binaries, or SLIP devices on the host. This + privileges, setuid binaries, or SLIP devices on the host. This also means not every type of connection is possible, but most situations can be accommodated with carefully crafted slirp commands that can be passed along as part of the network device's -- cgit v1.2.3 From 731ecea3e5495aa6bd3cb8587f5267cf5e4220e2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Dec 2020 14:07:58 +0000 Subject: mm: Remove arch_remap() and mm-arch-hooks.h powerpc was the last provider of arch_remap() and the last user of mm-arch-hooks.h. Since commit 526a9c4a7234 ("powerpc/vdso: Provide vdso_remap()"), arch_remap() hence mm-arch-hooks.h are not used anymore. Remove them. Signed-off-by: Christophe Leroy Signed-off-by: Richard Weinberger --- arch/um/include/asm/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 1c63b260ecc4..314979467db1 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -14,7 +14,6 @@ generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h generic-y += mcs_spinlock.h -generic-y += mm-arch-hooks.h generic-y += mmiowb.h generic-y += module.lds.h generic-y += param.h -- cgit v1.2.3 From e1e22d0d9183aaaf65acf0cb529cb51ddbc12e08 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Jan 2021 22:07:41 +0100 Subject: um: print register names in wait_for_stub Since we're basically debugging the userspace (it runs in ptrace) it's useful to dump out the registers - but they're not readable, so if something goes wrong it's hard to say what. Print the names of registers in the register dump so it's easier to look at. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/os-Linux/skas/process.c | 55 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 0621d521208e..ed4bbffe8d7a 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -28,6 +28,54 @@ int is_skas_winch(int pid, int fd, void *data) return pid == getpgrp(); } +static const char *ptrace_reg_name(int idx) +{ +#define R(n) case HOST_##n: return #n + + switch (idx) { +#ifdef __x86_64__ + R(BX); + R(CX); + R(DI); + R(SI); + R(DX); + R(BP); + R(AX); + R(R8); + R(R9); + R(R10); + R(R11); + R(R12); + R(R13); + R(R14); + R(R15); + R(ORIG_AX); + R(CS); + R(SS); + R(EFLAGS); +#elif defined(__i386__) + R(IP); + R(SP); + R(EFLAGS); + R(AX); + R(BX); + R(CX); + R(DX); + R(SI); + R(DI); + R(BP); + R(CS); + R(SS); + R(DS); + R(FS); + R(ES); + R(GS); + R(ORIG_AX); +#endif + } + return ""; +} + static int ptrace_dump_regs(int pid) { unsigned long regs[MAX_REG_NR]; @@ -37,8 +85,11 @@ static int ptrace_dump_regs(int pid) return -errno; printk(UM_KERN_ERR "Stub registers -\n"); - for (i = 0; i < ARRAY_SIZE(regs); i++) - printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]); + for (i = 0; i < ARRAY_SIZE(regs); i++) { + const char *regname = ptrace_reg_name(i); + + printk(UM_KERN_ERR "\t%s\t(%2d): %lx\n", regname, i, regs[i]); + } return 0; } -- cgit v1.2.3 From 47da29763ec9a153b9b685bff9db659e4e09e494 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Jan 2021 22:08:02 +0100 Subject: um: mm: check more comprehensively for stub changes If userspace tries to change the stub, we need to kill it, because otherwise it can escape the virtual machine. In a few cases the stub checks weren't good, e.g. if userspace just tries to mmap(0x100000 - 0x1000, 0x3000, ...) it could succeed to get a new private/anonymous mapping replacing the stubs. Fix this by checking everywhere, and checking for _overlap_, not just direct changes. Cc: stable@vger.kernel.org Fixes: 3963333fe676 ("uml: cover stubs with a VMA") Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/kernel/tlb.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 61776790cd67..89468da6bf88 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -125,6 +125,9 @@ static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, struct host_vm_op *last; int fd = -1, ret = 0; + if (virt + len > STUB_START && virt < STUB_END) + return -EINVAL; + if (hvc->userspace) fd = phys_mapping(phys, &offset); else @@ -162,7 +165,7 @@ static int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; - if ((addr >= STUB_START) && (addr < STUB_END)) + if (addr + len > STUB_START && addr < STUB_END) return -EINVAL; if (hvc->index != 0) { @@ -192,6 +195,9 @@ static int add_mprotect(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; + if (addr + len > STUB_START && addr < STUB_END) + return -EINVAL; + if (hvc->index != 0) { last = &hvc->ops[hvc->index - 1]; if ((last->type == MPROTECT) && @@ -472,6 +478,10 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) struct mm_id *mm_id; address &= PAGE_MASK; + + if (address >= STUB_START && address < STUB_END) + goto kill; + pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto kill; -- cgit v1.2.3 From a7d48886cacf8b426e0079bca9639d2657cf2d38 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Jan 2021 22:08:03 +0100 Subject: um: defer killing userspace on page table update failures In some cases we can get to fix_range_common() with mmap_sem held, and in others we get there without it being held. For example, we get there with it held from sys_mprotect(), and without it held from fork_handler(). Avoid any issues in this and simply defer killing the task until it runs the next time. Do it on the mm so that another task that shares the same mm can't continue running afterwards. Cc: stable@vger.kernel.org Fixes: 468f65976a8d ("um: Fix hung task in fix_range_common()") Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/shared/skas/mm_id.h | 1 + arch/um/kernel/tlb.c | 7 +++---- arch/um/os-Linux/skas/process.c | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h index 4337b4ced095..e82e203f5f41 100644 --- a/arch/um/include/shared/skas/mm_id.h +++ b/arch/um/include/shared/skas/mm_id.h @@ -12,6 +12,7 @@ struct mm_id { int pid; } u; unsigned long stack; + int kill; }; #endif diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 89468da6bf88..5be1b0da9f3b 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -352,12 +352,11 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, /* This is not an else because ret is modified above */ if (ret) { + struct mm_id *mm_idp = ¤t->mm->context.id; + printk(KERN_ERR "fix_range_common: failed, killing current " "process: %d\n", task_tgid_vnr(current)); - /* We are under mmap_lock, release it such that current can terminate */ - mmap_write_unlock(current->mm); - force_sig(SIGKILL); - do_signal(¤t->thread.regs); + mm_idp->kill = 1; } } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index ed4bbffe8d7a..d910e25c273e 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -300,6 +300,7 @@ static int userspace_tramp(void *stack) } int userspace_pid[NR_CPUS]; +int kill_userspace_mm[NR_CPUS]; /** * start_userspace() - prepare a new userspace process @@ -393,6 +394,8 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) interrupt_end(); while (1) { + if (kill_userspace_mm[0]) + fatal_sigsegv(); /* * This can legitimately fail if the process loads a @@ -714,4 +717,5 @@ void reboot_skas(void) void __switch_mm(struct mm_id *mm_idp) { userspace_pid[0] = mm_idp->u.pid; + kill_userspace_mm[0] = mm_idp->kill; } -- cgit v1.2.3 From 84b2789d61156db0224724806b20110c0d34b07c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Jan 2021 22:09:42 +0100 Subject: um: separate child and parent errors in clone stub If the two are mixed up, then it looks as though the parent returned an error if the child failed (before) the mmap(), and then the resulting process never gets killed. Fix this by splitting the child and parent errors, reporting and using them appropriately. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/shared/skas/stub-data.h | 2 +- arch/um/kernel/skas/clone.c | 25 +++++++++++-------------- arch/um/os-Linux/skas/process.c | 23 +++++++++++++---------- arch/x86/um/shared/sysdep/stub_32.h | 2 +- arch/x86/um/shared/sysdep/stub_64.h | 2 +- 5 files changed, 27 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h index 6b01d97a9386..5e3ade3fb38b 100644 --- a/arch/um/include/shared/skas/stub-data.h +++ b/arch/um/include/shared/skas/stub-data.h @@ -11,7 +11,7 @@ struct stub_data { unsigned long offset; int fd; - long err; + long parent_err, child_err; }; #endif diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index bfb70c456b30..7c592c788cbf 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -24,29 +24,26 @@ void __attribute__ ((__section__ (".__syscall_stub"))) stub_clone_handler(void) { - struct stub_data *data = (struct stub_data *) STUB_DATA; + int stack; + struct stub_data *data = (void *) ((unsigned long)&stack & ~(UM_KERN_PAGE_SIZE - 1)); long err; err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, - STUB_DATA + UM_KERN_PAGE_SIZE / 2 - sizeof(void *)); - if (err != 0) - goto out; + (unsigned long)data + UM_KERN_PAGE_SIZE / 2 - sizeof(void *)); + if (err) { + data->parent_err = err; + goto done; + } err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); - if (err) - goto out; + if (err) { + data->child_err = err; + goto done; + } remap_stack(data->fd, data->offset); goto done; - out: - /* - * save current result. - * Parent: pid; - * child: retcode of mmap already saved and it jumps around this - * assignment - */ - data->err = err; done: trap_myself(); } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index d910e25c273e..623b0aeadf4c 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -545,8 +545,14 @@ int copy_context_skas0(unsigned long new_stack, int pid) * and child's mmap2 calls */ *data = ((struct stub_data) { - .offset = MMAP_OFFSET(new_offset), - .fd = new_fd + .offset = MMAP_OFFSET(new_offset), + .fd = new_fd, + .parent_err = -ESRCH, + .child_err = 0, + }); + + *child_data = ((struct stub_data) { + .child_err = -ESRCH, }); err = ptrace_setregs(pid, thread_regs); @@ -564,9 +570,6 @@ int copy_context_skas0(unsigned long new_stack, int pid) return err; } - /* set a well known return code for detection of child write failure */ - child_data->err = 12345678; - /* * Wait, until parent has finished its work: read child's pid from * parent's stack, and check, if bad result. @@ -581,7 +584,7 @@ int copy_context_skas0(unsigned long new_stack, int pid) wait_stub_done(pid); - pid = data->err; + pid = data->parent_err; if (pid < 0) { printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports " "error %d\n", -pid); @@ -593,10 +596,10 @@ int copy_context_skas0(unsigned long new_stack, int pid) * child's stack and check it. */ wait_stub_done(pid); - if (child_data->err != STUB_DATA) { - printk(UM_KERN_ERR "copy_context_skas0 - stub-child reports " - "error %ld\n", child_data->err); - err = child_data->err; + if (child_data->child_err != STUB_DATA) { + printk(UM_KERN_ERR "copy_context_skas0 - stub-child %d reports " + "error %ld\n", pid, data->child_err); + err = data->child_err; goto out_kill; } diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h index 51fd256c75f0..8ea69211e53c 100644 --- a/arch/x86/um/shared/sysdep/stub_32.h +++ b/arch/x86/um/shared/sysdep/stub_32.h @@ -86,7 +86,7 @@ static inline void remap_stack(int fd, unsigned long offset) "d" (PROT_READ | PROT_WRITE), "S" (MAP_FIXED | MAP_SHARED), "D" (fd), "a" (offset), - "i" (&((struct stub_data *) STUB_DATA)->err) + "i" (&((struct stub_data *) STUB_DATA)->child_err) : "memory"); } diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h index 994df93c5ed3..b7b8b8e4359d 100644 --- a/arch/x86/um/shared/sysdep/stub_64.h +++ b/arch/x86/um/shared/sysdep/stub_64.h @@ -92,7 +92,7 @@ static inline void remap_stack(long fd, unsigned long offset) "d" (PROT_READ | PROT_WRITE), "g" (MAP_FIXED | MAP_SHARED), "g" (fd), "g" (offset), - "i" (&((struct stub_data *) STUB_DATA)->err) + "i" (&((struct stub_data *) STUB_DATA)->child_err) : __syscall_clobber, "r10", "r8", "r9" ); } -- cgit v1.2.3 From 9f0b4807a44ff81cf59421c8a86641efec586610 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Jan 2021 22:09:43 +0100 Subject: um: rework userspace stubs to not hard-code stub location The userspace stacks mostly have a stack (and in the case of the syscall stub we can just set their stack pointer) that points to the location of the stub data page already. Rework the stubs to use the stack pointer to derive the start of the data page, rather than requiring it to be hard-coded. In the clone stub, also integrate the int3 into the stack remap, since we really must not use the stack while we remap it. This prepares for putting the stub at a variable location that's not part of the normal address space of the userspace processes running inside the UML machine. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/shared/as-layout.h | 16 ++++----------- arch/um/include/shared/common-offsets.h | 6 ++++++ arch/um/kernel/skas/clone.c | 3 +-- arch/um/os-Linux/skas/mem.c | 2 ++ arch/x86/um/shared/sysdep/stub_32.h | 33 ++++++++++++++++++++---------- arch/x86/um/shared/sysdep/stub_64.h | 36 ++++++++++++++++++++++----------- arch/x86/um/stub_32.S | 17 ++++++++++------ arch/x86/um/stub_64.S | 5 ++--- arch/x86/um/stub_segv.c | 5 +++-- 9 files changed, 75 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index 5f286ef2721b..56408bf3480d 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -20,18 +20,10 @@ * 'UL' and other type specifiers unilaterally. We * use the following macros to deal with this. */ - -#ifdef __ASSEMBLY__ -#define _UML_AC(X, Y) (Y) -#else -#define __UML_AC(X, Y) (X(Y)) -#define _UML_AC(X, Y) __UML_AC(X, Y) -#endif - -#define STUB_START _UML_AC(, 0x100000) -#define STUB_CODE _UML_AC((unsigned long), STUB_START) -#define STUB_DATA _UML_AC((unsigned long), STUB_CODE + UM_KERN_PAGE_SIZE) -#define STUB_END _UML_AC((unsigned long), STUB_DATA + UM_KERN_PAGE_SIZE) +#define STUB_START 0x100000UL +#define STUB_CODE STUB_START +#define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE) +#define STUB_END (STUB_DATA + UM_KERN_PAGE_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 16a51a8c800f..edc90ab73734 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* for use by sys-$SUBARCH/kernel-offsets.c */ +#include DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); @@ -43,3 +44,8 @@ DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT); #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT); #endif + +/* for stub */ +DEFINE(UML_STUB_FIELD_OFFSET, offsetof(struct stub_data, offset)); +DEFINE(UML_STUB_FIELD_CHILD_ERR, offsetof(struct stub_data, child_err)); +DEFINE(UML_STUB_FIELD_FD, offsetof(struct stub_data, fd)); diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 7c592c788cbf..592cdb138441 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -41,8 +41,7 @@ stub_clone_handler(void) goto done; } - remap_stack(data->fd, data->offset); - goto done; + remap_stack_and_trap(); done: trap_myself(); diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index c546d16f8dfe..3b4975ee67e2 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -40,6 +40,8 @@ static int __init init_syscall_regs(void) syscall_regs[REGS_IP_INDEX] = STUB_CODE + ((unsigned long) batch_syscall_stub - (unsigned long) __syscall_stub_start); + syscall_regs[REGS_SP_INDEX] = STUB_DATA; + return 0; } diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h index 8ea69211e53c..c3891c1ada26 100644 --- a/arch/x86/um/shared/sysdep/stub_32.h +++ b/arch/x86/um/shared/sysdep/stub_32.h @@ -7,8 +7,8 @@ #define __SYSDEP_STUB_H #include +#include -#define STUB_SYSCALL_RET EAX #define STUB_MMAP_NR __NR_mmap2 #define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT) @@ -77,17 +77,28 @@ static inline void trap_myself(void) __asm("int3"); } -static inline void remap_stack(int fd, unsigned long offset) +static void inline remap_stack_and_trap(void) { - __asm__ volatile ("movl %%eax,%%ebp ; movl %0,%%eax ; int $0x80 ;" - "movl %7, %%ebx ; movl %%eax, (%%ebx)" - : : "g" (STUB_MMAP_NR), "b" (STUB_DATA), - "c" (UM_KERN_PAGE_SIZE), - "d" (PROT_READ | PROT_WRITE), - "S" (MAP_FIXED | MAP_SHARED), "D" (fd), - "a" (offset), - "i" (&((struct stub_data *) STUB_DATA)->child_err) - : "memory"); + __asm__ volatile ( + "movl %%esp,%%ebx ;" + "andl %0,%%ebx ;" + "movl %1,%%eax ;" + "movl %%ebx,%%edi ; addl %2,%%edi ; movl (%%edi),%%edi ;" + "movl %%ebx,%%ebp ; addl %3,%%ebp ; movl (%%ebp),%%ebp ;" + "int $0x80 ;" + "addl %4,%%ebx ; movl %%eax, (%%ebx) ;" + "int $3" + : : + "g" (~(UM_KERN_PAGE_SIZE - 1)), + "g" (STUB_MMAP_NR), + "g" (UML_STUB_FIELD_FD), + "g" (UML_STUB_FIELD_OFFSET), + "g" (UML_STUB_FIELD_CHILD_ERR), + "c" (UM_KERN_PAGE_SIZE), + "d" (PROT_READ | PROT_WRITE), + "S" (MAP_FIXED | MAP_SHARED) + : + "memory"); } #endif diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h index b7b8b8e4359d..6e2626b77a2e 100644 --- a/arch/x86/um/shared/sysdep/stub_64.h +++ b/arch/x86/um/shared/sysdep/stub_64.h @@ -7,8 +7,8 @@ #define __SYSDEP_STUB_H #include +#include -#define STUB_SYSCALL_RET PT_INDEX(RAX) #define STUB_MMAP_NR __NR_mmap #define MMAP_OFFSET(o) (o) @@ -82,18 +82,30 @@ static inline void trap_myself(void) __asm("int3"); } -static inline void remap_stack(long fd, unsigned long offset) +static inline void remap_stack_and_trap(void) { - __asm__ volatile ("movq %4,%%r10 ; movq %5,%%r8 ; " - "movq %6, %%r9; " __syscall "; movq %7, %%rbx ; " - "movq %%rax, (%%rbx)": - : "a" (STUB_MMAP_NR), "D" (STUB_DATA), - "S" (UM_KERN_PAGE_SIZE), - "d" (PROT_READ | PROT_WRITE), - "g" (MAP_FIXED | MAP_SHARED), "g" (fd), - "g" (offset), - "i" (&((struct stub_data *) STUB_DATA)->child_err) - : __syscall_clobber, "r10", "r8", "r9" ); + __asm__ volatile ( + "movq %0,%%rax ;" + "movq %%rsp,%%rdi ;" + "andq %1,%%rdi ;" + "movq %2,%%r10 ;" + "movq %%rdi,%%r8 ; addq %3,%%r8 ; movq (%%r8),%%r8 ;" + "movq %%rdi,%%r9 ; addq %4,%%r9 ; movq (%%r9),%%r9 ;" + __syscall ";" + "movq %%rsp,%%rdi ; andq %1,%%rdi ;" + "addq %5,%%rdi ; movq %%rax, (%%rdi) ;" + "int3" + : : + "g" (STUB_MMAP_NR), + "g" (~(UM_KERN_PAGE_SIZE - 1)), + "g" (MAP_FIXED | MAP_SHARED), + "g" (UML_STUB_FIELD_FD), + "g" (UML_STUB_FIELD_OFFSET), + "g" (UML_STUB_FIELD_CHILD_ERR), + "S" (UM_KERN_PAGE_SIZE), + "d" (PROT_READ | PROT_WRITE) + : + __syscall_clobber, "r10", "r8", "r9"); } #endif diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S index a193e88536a9..8291899e6aaf 100644 --- a/arch/x86/um/stub_32.S +++ b/arch/x86/um/stub_32.S @@ -5,21 +5,22 @@ .globl batch_syscall_stub batch_syscall_stub: - /* load pointer to first operation */ - mov $(STUB_DATA+8), %esp - + /* %esp comes in as "top of page" */ + mov %esp, %ecx + /* %esp has pointer to first operation */ + add $8, %esp again: /* load length of additional data */ mov 0x0(%esp), %eax /* if(length == 0) : end of list */ /* write possible 0 to header */ - mov %eax, STUB_DATA+4 + mov %eax, 0x4(%ecx) cmpl $0, %eax jz done /* save current pointer */ - mov %esp, STUB_DATA+4 + mov %esp, 0x4(%ecx) /* skip additional data */ add %eax, %esp @@ -38,6 +39,10 @@ again: /* execute syscall */ int $0x80 + /* restore top of page pointer in %ecx */ + mov %esp, %ecx + andl $(~UM_KERN_PAGE_SIZE) + 1, %ecx + /* check return value */ pop %ebx cmp %ebx, %eax @@ -45,7 +50,7 @@ again: done: /* save return value */ - mov %eax, STUB_DATA + mov %eax, (%ecx) /* stop */ int3 diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S index 8a95c5b2eaf9..f3404640197a 100644 --- a/arch/x86/um/stub_64.S +++ b/arch/x86/um/stub_64.S @@ -4,9 +4,8 @@ .section .__syscall_stub, "ax" .globl batch_syscall_stub batch_syscall_stub: - mov $(STUB_DATA), %rbx - /* load pointer to first operation */ - mov %rbx, %rsp + /* %rsp has the pointer to first operation */ + mov %rsp, %rbx add $0x10, %rsp again: /* load length of additional data */ diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index 27361cbb7ca9..21836eaf1725 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -11,10 +11,11 @@ void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig, siginfo_t *info, void *p) { + int stack; ucontext_t *uc = p; + struct faultinfo *f = (void *)(((unsigned long)&stack) & ~(UM_KERN_PAGE_SIZE - 1)); - GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA), - &uc->uc_mcontext); + GET_FAULTINFO_FROM_MC(*f, &uc->uc_mcontext); trap_myself(); } -- cgit v1.2.3 From bfc58e2b98e99737409cd9f4d86a79677c5b887c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Jan 2021 22:09:44 +0100 Subject: um: remove process stub VMA This mostly reverts the old commit 3963333fe676 ("uml: cover stubs with a VMA") which had added a VMA to the existing PTEs. However, there's no real reason to have the PTEs in the first place and the VMA cannot be 'fixed' in place, which leads to bugs that userspace could try to unmap them and be forcefully killed, or such. Also, there's a bit of an ugly hole in userspace's address space. Simplify all this: just install the stub code/page at the top of the (inner) address space, i.e. put it just above TASK_SIZE. The pages are simply hard-coded to be mapped in the userspace process we use to implement an mm context, and they're out of reach of the inner mmap/munmap/mprotect etc. since they're above TASK_SIZE. Getting rid of the VMA also makes vma_merge() no longer hit one of the VM_WARN_ON()s there because we installed a VMA while the code assumes the stack VMA is the first one. It also removes a lockdep warning about mmap_sem usage since we no longer have uml_setup_stubs() and thus no longer need to do any manipulation that would require mmap_sem in activate_mm(). Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/asm/Kbuild | 1 + arch/um/include/asm/mmu_context.h | 29 +------------ arch/um/include/shared/as-layout.h | 3 +- arch/um/kernel/exec.c | 4 +- arch/um/kernel/skas/mmu.c | 87 -------------------------------------- arch/um/kernel/tlb.c | 15 ------- arch/um/kernel/um_arch.c | 5 +++ arch/um/os-Linux/skas/process.c | 4 -- arch/x86/um/os-Linux/task_size.c | 2 +- 9 files changed, 11 insertions(+), 139 deletions(-) (limited to 'arch') diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 314979467db1..a58811dc054c 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -25,3 +25,4 @@ generic-y += topology.h generic-y += trace_clock.h generic-y += word-at-a-time.h generic-y += kprobes.h +generic-y += mm_hooks.h diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index f8a100770691..68e2eb9cfb47 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -10,33 +10,9 @@ #include #include +#include #include -extern void uml_setup_stubs(struct mm_struct *mm); -/* - * Needed since we do not use the asm-generic/mm_hooks.h: - */ -static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) -{ - uml_setup_stubs(mm); - return 0; -} -extern void arch_exit_mmap(struct mm_struct *mm); -static inline void arch_unmap(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ -} -static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, - bool write, bool execute, bool foreign) -{ - /* by default, allow everything */ - return true; -} - -/* - * end asm-generic/mm_hooks.h functions - */ - extern void force_flush_all(void); #define activate_mm activate_mm @@ -47,9 +23,6 @@ static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) * when the new ->mm is used for the first time. */ __switch_mm(&new->context.id); - mmap_write_lock_nested(new, SINGLE_DEPTH_NESTING); - uml_setup_stubs(new); - mmap_write_unlock(new); } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index 56408bf3480d..9a0bd648d872 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -20,7 +20,7 @@ * 'UL' and other type specifiers unilaterally. We * use the following macros to deal with this. */ -#define STUB_START 0x100000UL +#define STUB_START stub_start #define STUB_CODE STUB_START #define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE) #define STUB_END (STUB_DATA + UM_KERN_PAGE_SIZE) @@ -46,6 +46,7 @@ extern unsigned long long highmem; extern unsigned long brk_start; extern unsigned long host_task_size; +extern unsigned long stub_start; extern int linux_main(int argc, char **argv); extern void uml_finishsetup(void); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index e8fd5d540b05..4d8498100341 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -26,9 +26,7 @@ void flush_thread(void) arch_flush_thread(¤t->thread.arch); - ret = unmap(¤t->mm->context.id, 0, STUB_START, 0, &data); - ret = ret || unmap(¤t->mm->context.id, STUB_END, - host_task_size - STUB_END, 1, &data); + ret = unmap(¤t->mm->context.id, 0, TASK_SIZE, 1, &data); if (ret) { printk(KERN_ERR "flush_thread - clearing address space failed, " "err = %d\n", ret); diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index d9961163da66..125df465e8ea 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -14,47 +14,6 @@ #include #include -static int init_stub_pte(struct mm_struct *mm, unsigned long proc, - unsigned long kernel) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = pgd_offset(mm, proc); - - p4d = p4d_alloc(mm, pgd, proc); - if (!p4d) - goto out; - - pud = pud_alloc(mm, p4d, proc); - if (!pud) - goto out_pud; - - pmd = pmd_alloc(mm, pud, proc); - if (!pmd) - goto out_pmd; - - pte = pte_alloc_map(mm, pmd, proc); - if (!pte) - goto out_pte; - - *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); - *pte = pte_mkread(*pte); - return 0; - - out_pte: - pmd_free(mm, pmd); - out_pmd: - pud_free(mm, pud); - out_pud: - p4d_free(mm, p4d); - out: - return -ENOMEM; -} - int init_new_context(struct task_struct *task, struct mm_struct *mm) { struct mm_context *from_mm = NULL; @@ -98,52 +57,6 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) return ret; } -void uml_setup_stubs(struct mm_struct *mm) -{ - int err, ret; - - ret = init_stub_pte(mm, STUB_CODE, - (unsigned long) __syscall_stub_start); - if (ret) - goto out; - - ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack); - if (ret) - goto out; - - mm->context.stub_pages[0] = virt_to_page(__syscall_stub_start); - mm->context.stub_pages[1] = virt_to_page(mm->context.id.stack); - - /* dup_mmap already holds mmap_lock */ - err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START, - VM_READ | VM_MAYREAD | VM_EXEC | - VM_MAYEXEC | VM_DONTCOPY | VM_PFNMAP, - mm->context.stub_pages); - if (err) { - printk(KERN_ERR "install_special_mapping returned %d\n", err); - goto out; - } - return; - -out: - force_sigsegv(SIGSEGV); -} - -void arch_exit_mmap(struct mm_struct *mm) -{ - pte_t *pte; - - pte = virt_to_pte(mm, STUB_CODE); - if (pte != NULL) - pte_clear(mm, STUB_CODE, pte); - - pte = virt_to_pte(mm, STUB_DATA); - if (pte == NULL) - return; - - pte_clear(mm, STUB_DATA, pte); -} - void destroy_context(struct mm_struct *mm) { struct mm_context *mmu = &mm->context; diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 5be1b0da9f3b..bc38f79ca3a3 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -125,9 +125,6 @@ static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, struct host_vm_op *last; int fd = -1, ret = 0; - if (virt + len > STUB_START && virt < STUB_END) - return -EINVAL; - if (hvc->userspace) fd = phys_mapping(phys, &offset); else @@ -165,9 +162,6 @@ static int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; - if (addr + len > STUB_START && addr < STUB_END) - return -EINVAL; - if (hvc->index != 0) { last = &hvc->ops[hvc->index - 1]; if ((last->type == MUNMAP) && @@ -195,9 +189,6 @@ static int add_mprotect(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; - if (addr + len > STUB_START && addr < STUB_END) - return -EINVAL; - if (hvc->index != 0) { last = &hvc->ops[hvc->index - 1]; if ((last->type == MPROTECT) && @@ -232,9 +223,6 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, pte = pte_offset_kernel(pmd, addr); do { - if ((addr >= STUB_START) && (addr < STUB_END)) - continue; - r = pte_read(*pte); w = pte_write(*pte); x = pte_exec(*pte); @@ -478,9 +466,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) address &= PAGE_MASK; - if (address >= STUB_START && address < STUB_END) - goto kill; - pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto kill; diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 80e2660782a0..74e07e748a9b 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -249,6 +249,7 @@ void uml_finishsetup(void) } /* Set during early boot */ +unsigned long stub_start; unsigned long task_size; EXPORT_SYMBOL(task_size); @@ -283,6 +284,10 @@ int __init linux_main(int argc, char **argv) add_arg(DEFAULT_COMMAND_LINE_CONSOLE); host_task_size = os_get_top_address(); + /* reserve two pages for the stubs */ + host_task_size -= 2 * PAGE_SIZE; + stub_start = host_task_size; + /* * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps * out diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 623b0aeadf4c..fba674fac8b7 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -251,10 +251,6 @@ static int userspace_tramp(void *stack) signal(SIGTERM, SIG_DFL); signal(SIGWINCH, SIG_IGN); - /* - * This has a pte, but it can't be mapped in with the usual - * tlb_flush mechanism because this is part of that mechanism - */ fd = phys_mapping(to_phys(__syscall_stub_start), &offset); addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c index e62174638f00..1dc9adc20b1c 100644 --- a/arch/x86/um/os-Linux/task_size.c +++ b/arch/x86/um/os-Linux/task_size.c @@ -145,7 +145,7 @@ out: unsigned long os_get_top_address(void) { /* The old value of CONFIG_TOP_ADDR */ - return 0x7fc0000000; + return 0x7fc0002000; } #endif -- cgit v1.2.3 From dde8b58d512703d396e02427de1053b4d912aa42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:12:18 +0100 Subject: um: add a pseudo RTC Add a pseudo RTC that simply is able to send an alarm signal waking up the system at a given time in the future. Since apparently timerfd_create() FDs don't support SIGIO, we use the sigio-creating helper thread, which just learned to do suspend/resume properly in the previous patch. For time-travel mode, OTOH, just add an event at the specified time in the future, and that's already sufficient to wake up the system at that point in time since suspend will just be in an "endless wait". For s2idle support also call pm_system_wakeup(). Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/Kconfig | 11 ++ arch/um/drivers/Makefile | 2 + arch/um/drivers/rtc.h | 15 +++ arch/um/drivers/rtc_kern.c | 211 ++++++++++++++++++++++++++++++++++ arch/um/drivers/rtc_user.c | 80 +++++++++++++ arch/um/include/linux/time-internal.h | 11 ++ arch/um/kernel/time.c | 10 +- 7 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 arch/um/drivers/rtc.h create mode 100644 arch/um/drivers/rtc_kern.c create mode 100644 arch/um/drivers/rtc_user.c (limited to 'arch') diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 58e3ffc17b7d..03ba34b61115 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -346,3 +346,14 @@ config VIRTIO_UML help This driver provides support for virtio based paravirtual device drivers over vhost-user sockets. + +config UML_RTC + bool "UML RTC driver" + depends on RTC_CLASS + # there's no use in this if PM_SLEEP isn't enabled ... + depends on PM_SLEEP + help + When PM_SLEEP is configured, it may be desirable to wake up using + rtcwake, especially in time-travel mode. This driver enables that + by providing a fake RTC clock that causes a wakeup at the right + time. diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 2a249f619467..dcc64a02f81f 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -17,6 +17,7 @@ hostaudio-objs := hostaudio_kern.o ubd-objs := ubd_kern.o ubd_user.o port-objs := port_kern.o port_user.o harddog-objs := harddog_kern.o harddog_user.o +rtc-objs := rtc_kern.o rtc_user.o LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a) @@ -62,6 +63,7 @@ obj-$(CONFIG_UML_WATCHDOG) += harddog.o obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-$(CONFIG_UML_RANDOM) += random.o obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o +obj-$(CONFIG_UML_RTC) += rtc.o # pcap_user.o must be added explicitly. USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o diff --git a/arch/um/drivers/rtc.h b/arch/um/drivers/rtc.h new file mode 100644 index 000000000000..95e41c7d35c4 --- /dev/null +++ b/arch/um/drivers/rtc.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg + */ +#ifndef __UM_RTC_H__ +#define __UM_RTC_H__ + +int uml_rtc_start(bool timetravel); +int uml_rtc_enable_alarm(unsigned long long delta_seconds); +void uml_rtc_disable_alarm(void); +void uml_rtc_stop(bool timetravel); +void uml_rtc_send_timetravel_alarm(void); + +#endif /* __UM_RTC_H__ */ diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c new file mode 100644 index 000000000000..97ceb205cfe6 --- /dev/null +++ b/arch/um/drivers/rtc_kern.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "rtc.h" + +static time64_t uml_rtc_alarm_time; +static bool uml_rtc_alarm_enabled; +static struct rtc_device *uml_rtc; +static int uml_rtc_irq_fd, uml_rtc_irq; + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + +static void uml_rtc_time_travel_alarm(struct time_travel_event *ev) +{ + uml_rtc_send_timetravel_alarm(); +} + +static struct time_travel_event uml_rtc_alarm_event = { + .fn = uml_rtc_time_travel_alarm, +}; +#endif + +static int uml_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct timespec64 ts; + + /* Use this to get correct time in time-travel mode */ + read_persistent_clock64(&ts); + rtc_time64_to_tm(timespec64_to_ktime(ts) / NSEC_PER_SEC, tm); + + return 0; +} + +static int uml_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + rtc_time64_to_tm(uml_rtc_alarm_time, &alrm->time); + alrm->enabled = uml_rtc_alarm_enabled; + + return 0; +} + +static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) +{ + unsigned long long secs; + + if (!enable && !uml_rtc_alarm_enabled) + return 0; + + uml_rtc_alarm_enabled = enable; + + secs = uml_rtc_alarm_time - ktime_get_real_seconds(); + + if (time_travel_mode == TT_MODE_OFF) { + if (!enable) { + uml_rtc_disable_alarm(); + return 0; + } + + /* enable or update */ + return uml_rtc_enable_alarm(secs); + } else { + time_travel_del_event(¨_rtc_alarm_event); + + if (enable) + time_travel_add_event_rel(¨_rtc_alarm_event, + secs * NSEC_PER_SEC); + } + + return 0; +} + +static int uml_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + uml_rtc_alarm_irq_enable(dev, 0); + uml_rtc_alarm_time = rtc_tm_to_time64(&alrm->time); + uml_rtc_alarm_irq_enable(dev, alrm->enabled); + + return 0; +} + +static const struct rtc_class_ops uml_rtc_ops = { + .read_time = uml_rtc_read_time, + .read_alarm = uml_rtc_read_alarm, + .alarm_irq_enable = uml_rtc_alarm_irq_enable, + .set_alarm = uml_rtc_set_alarm, +}; + +static irqreturn_t uml_rtc_interrupt(int irq, void *data) +{ + unsigned long long c = 0; + + /* alarm triggered, it's now off */ + uml_rtc_alarm_enabled = false; + + os_read_file(uml_rtc_irq_fd, &c, sizeof(c)); + WARN_ON(c == 0); + + pm_system_wakeup(); + rtc_update_irq(uml_rtc, 1, RTC_IRQF | RTC_AF); + + return IRQ_HANDLED; +} + +static int uml_rtc_setup(void) +{ + int err; + + err = uml_rtc_start(time_travel_mode != TT_MODE_OFF); + if (WARN(err < 0, "err = %d\n", err)) + return err; + + uml_rtc_irq_fd = err; + + err = um_request_irq(UM_IRQ_ALLOC, uml_rtc_irq_fd, IRQ_READ, + uml_rtc_interrupt, 0, "rtc", NULL); + if (err < 0) { + uml_rtc_stop(time_travel_mode != TT_MODE_OFF); + return err; + } + + irq_set_irq_wake(err, 1); + + uml_rtc_irq = err; + return 0; +} + +static void uml_rtc_cleanup(void) +{ + um_free_irq(uml_rtc_irq, NULL); + uml_rtc_stop(time_travel_mode != TT_MODE_OFF); +} + +static int uml_rtc_probe(struct platform_device *pdev) +{ + int err; + + err = uml_rtc_setup(); + if (err) + return err; + + uml_rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(uml_rtc)) { + err = PTR_ERR(uml_rtc); + goto cleanup; + } + + uml_rtc->ops = ¨_rtc_ops; + + device_init_wakeup(&pdev->dev, 1); + + err = devm_rtc_register_device(uml_rtc); + if (err) + goto cleanup; + + return 0; +cleanup: + uml_rtc_cleanup(); + return err; +} + +static int uml_rtc_remove(struct platform_device *pdev) +{ + device_init_wakeup(&pdev->dev, 0); + uml_rtc_cleanup(); + return 0; +} + +static struct platform_driver uml_rtc_driver = { + .probe = uml_rtc_probe, + .remove = uml_rtc_remove, + .driver = { + .name = "uml-rtc", + }, +}; + +static int __init uml_rtc_init(void) +{ + struct platform_device *pdev; + int err; + + err = platform_driver_register(¨_rtc_driver); + if (err) + return err; + + pdev = platform_device_alloc("uml-rtc", 0); + if (!pdev) { + err = -ENOMEM; + goto unregister; + } + + err = platform_device_add(pdev); + if (err) + goto unregister; + return 0; + +unregister: + platform_device_put(pdev); + platform_driver_unregister(¨_rtc_driver); + return err; +} +device_initcall(uml_rtc_init); diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c new file mode 100644 index 000000000000..4016bc1d577e --- /dev/null +++ b/arch/um/drivers/rtc_user.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "rtc.h" + +static int uml_rtc_irq_fds[2]; + +void uml_rtc_send_timetravel_alarm(void) +{ + unsigned long long c = 1; + + CATCH_EINTR(write(uml_rtc_irq_fds[1], &c, sizeof(c))); +} + +int uml_rtc_start(bool timetravel) +{ + int err; + + if (timetravel) { + int err = os_pipe(uml_rtc_irq_fds, 1, 1); + if (err) + goto fail; + } else { + uml_rtc_irq_fds[0] = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC); + if (uml_rtc_irq_fds[0] < 0) { + err = -errno; + goto fail; + } + + /* apparently timerfd won't send SIGIO, use workaround */ + sigio_broken(uml_rtc_irq_fds[0]); + err = add_sigio_fd(uml_rtc_irq_fds[0]); + if (err < 0) { + close(uml_rtc_irq_fds[0]); + goto fail; + } + } + + return uml_rtc_irq_fds[0]; +fail: + uml_rtc_stop(timetravel); + return err; +} + +int uml_rtc_enable_alarm(unsigned long long delta_seconds) +{ + struct itimerspec it = { + .it_value = { + .tv_sec = delta_seconds, + }, + }; + + if (timerfd_settime(uml_rtc_irq_fds[0], 0, &it, NULL)) + return -errno; + return 0; +} + +void uml_rtc_disable_alarm(void) +{ + uml_rtc_enable_alarm(0); +} + +void uml_rtc_stop(bool timetravel) +{ + if (timetravel) + os_close_file(uml_rtc_irq_fds[1]); + else + ignore_sigio_fd(uml_rtc_irq_fds[0]); + os_close_file(uml_rtc_irq_fds[0]); +} diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index 8688e16e832b..759956ab0108 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -55,6 +55,9 @@ static inline void time_travel_wait_readable(int fd) } void time_travel_add_irq_event(struct time_travel_event *e); +void time_travel_add_event_rel(struct time_travel_event *e, + unsigned long long delay_ns); +bool time_travel_del_event(struct time_travel_event *e); #else struct time_travel_event { }; @@ -80,6 +83,14 @@ static inline void time_travel_add_irq_event(struct time_travel_event *e) { WARN_ON(1); } + +/* + * not inlines so the data structure need not exist, + * cause linker failures + */ +extern void time_travel_not_configured(void); +#define time_travel_add_event_rel(...) time_travel_not_configured() +#define time_travel_del_event(...) time_travel_not_configured() #endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ /* diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 8144406b6ea3..e0cdb9694fb8 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -321,6 +321,12 @@ static void time_travel_add_event(struct time_travel_event *e, __time_travel_add_event(e, time); } +void time_travel_add_event_rel(struct time_travel_event *e, + unsigned long long delay_ns) +{ + time_travel_add_event(e, time_travel_time + delay_ns); +} + void time_travel_periodic_timer(struct time_travel_event *e) { time_travel_add_event(&time_travel_timer_event, @@ -384,7 +390,7 @@ static void time_travel_deliver_event(struct time_travel_event *e) } } -static bool time_travel_del_event(struct time_travel_event *e) +bool time_travel_del_event(struct time_travel_event *e) { unsigned long flags; @@ -594,6 +600,8 @@ extern u64 time_travel_ext_req(u32 op, u64 time); /* these are empty macros so the struct/fn need not exist */ #define time_travel_add_event(e, time) do { } while (0) +/* externally not usable - redefine here so we can */ +#undef time_travel_del_event #define time_travel_del_event(e) do { } while (0) #endif -- cgit v1.2.3 From cc3ac20fc265ea498c57c3cab0e228553f8d92d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Feb 2021 13:22:51 +0100 Subject: um: io.h: include This may be needed for size_t if something doesn't get it included elsewhere before including , so add the include. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/asm/io.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/um/include/asm/io.h b/arch/um/include/asm/io.h index cef03e3aa0f9..6ce18d343997 100644 --- a/arch/um/include/asm/io.h +++ b/arch/um/include/asm/io.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_UM_IO_H #define _ASM_UM_IO_H +#include #define ioremap ioremap static inline void __iomem *ioremap(phys_addr_t offset, size_t size) -- cgit v1.2.3 From ddad5187fc2a12cb84c9d1ac8ecb816708a2986b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Feb 2021 13:23:02 +0100 Subject: um: irq.h: include This will get the (no-op) definition of irq_canonicalize() which some code might want. We could define that ourselves, but it seems like we'd likely want generic extensions in the future, if any. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/asm/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h index 547bff7b3a89..3f5d3e8228fc 100644 --- a/arch/um/include/asm/irq.h +++ b/arch/um/include/asm/irq.h @@ -33,4 +33,5 @@ #define NR_IRQS 64 +#include #endif -- cgit v1.2.3