From c1a15d08f497150a91ba4e61bab54b8f5c8b49b9 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 17 Apr 2013 20:18:55 +0200 Subject: xen-blkback: print stats about persistent grants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Roger Pau Monné Cc: xen-devel@lists.xen.org Cc: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index dd5b2fed97e9..f7526dbb204d 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -382,10 +382,12 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { pr_info("xen-blkback (%s): oo %3llu | rd %4llu | wr %4llu | f %4llu" - " | ds %4llu\n", + " | ds %4llu | pg: %4u/%4u\n", current->comm, blkif->st_oo_req, blkif->st_rd_req, blkif->st_wr_req, - blkif->st_f_req, blkif->st_ds_req); + blkif->st_f_req, blkif->st_ds_req, + blkif->persistent_gnt_c, + max_mapped_grant_pages(blkif->blk_protocol)); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; -- cgit v1.2.3-58-ga151 From c6cc142dac52e62e1e8a2aff5de1300202b96c66 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 17 Apr 2013 20:18:56 +0200 Subject: xen-blkback: use balloon pages for all mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using balloon pages for all granted pages allows us to simplify the logic in blkback, especially in the xen_blkbk_map function, since now we can decide if we want to map a grant persistently or not after we have actually mapped it. This could not be done before because persistent grants used ballooned pages, whereas non-persistent grants used pages from the kernel. This patch also introduces several changes, the first one is that the list of free pages is no longer global, now each blkback instance has it's own list of free pages that can be used to map grants. Also, a run time parameter (max_buffer_pages) has been added in order to tune the maximum number of free pages each blkback instance will keep in it's buffer. Signed-off-by: Roger Pau Monné Cc: xen-devel@lists.xen.org Cc: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- Documentation/ABI/stable/sysfs-bus-xen-backend | 8 + drivers/block/xen-blkback/blkback.c | 286 ++++++++++++++----------- drivers/block/xen-blkback/common.h | 5 + drivers/block/xen-blkback/xenbus.c | 3 + 4 files changed, 181 insertions(+), 121 deletions(-) (limited to 'drivers') diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend index 3d5951c8bf5f..e04afe0cca99 100644 --- a/Documentation/ABI/stable/sysfs-bus-xen-backend +++ b/Documentation/ABI/stable/sysfs-bus-xen-backend @@ -73,3 +73,11 @@ KernelVersion: 3.0 Contact: Konrad Rzeszutek Wilk Description: Number of sectors written by the frontend. + +What: /sys/module/xen_blkback/parameters/max_buffer_pages +Date: March 2013 +KernelVersion: 3.10 +Contact: Roger Pau Monné +Description: + Maximum number of free pages to keep in each block + backend buffer. diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index f7526dbb204d..8245c6bb9539 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -63,6 +63,21 @@ static int xen_blkif_reqs = 64; module_param_named(reqs, xen_blkif_reqs, int, 0); MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); +/* + * Maximum number of unused free pages to keep in the internal buffer. + * Setting this to a value too low will reduce memory used in each backend, + * but can have a performance penalty. + * + * A sane value is xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST, but can + * be set to a lower value that might degrade performance on some intensive + * IO workloads. + */ + +static int xen_blkif_max_buffer_pages = 704; +module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644); +MODULE_PARM_DESC(max_buffer_pages, +"Maximum number of free pages to keep in each block backend buffer"); + /* Run-time switchable: /sys/module/blkback/parameters/ */ static unsigned int log_stats; module_param(log_stats, int, 0644); @@ -82,10 +97,14 @@ struct pending_req { int status; struct list_head free_list; DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; #define BLKBACK_INVALID_HANDLE (~0) +/* Number of free pages to remove on each call to free_xenballooned_pages */ +#define NUM_BATCH_FREE_PAGES 10 + struct xen_blkbk { struct pending_req *pending_reqs; /* List of all 'pending_req' available */ @@ -93,8 +112,6 @@ struct xen_blkbk { /* And its spinlock. */ spinlock_t pending_free_lock; wait_queue_head_t pending_free_wq; - /* The list of all pages that are available. */ - struct page **pending_pages; /* And the grant handles that are available. */ grant_handle_t *pending_grant_handles; }; @@ -143,14 +160,66 @@ static inline int vaddr_pagenr(struct pending_req *req, int seg) BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } -#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] +static inline int get_free_page(struct xen_blkif *blkif, struct page **page) +{ + unsigned long flags; + + spin_lock_irqsave(&blkif->free_pages_lock, flags); + if (list_empty(&blkif->free_pages)) { + BUG_ON(blkif->free_pages_num != 0); + spin_unlock_irqrestore(&blkif->free_pages_lock, flags); + return alloc_xenballooned_pages(1, page, false); + } + BUG_ON(blkif->free_pages_num == 0); + page[0] = list_first_entry(&blkif->free_pages, struct page, lru); + list_del(&page[0]->lru); + blkif->free_pages_num--; + spin_unlock_irqrestore(&blkif->free_pages_lock, flags); -static inline unsigned long vaddr(struct pending_req *req, int seg) + return 0; +} + +static inline void put_free_pages(struct xen_blkif *blkif, struct page **page, + int num) { - unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); - return (unsigned long)pfn_to_kaddr(pfn); + unsigned long flags; + int i; + + spin_lock_irqsave(&blkif->free_pages_lock, flags); + for (i = 0; i < num; i++) + list_add(&page[i]->lru, &blkif->free_pages); + blkif->free_pages_num += num; + spin_unlock_irqrestore(&blkif->free_pages_lock, flags); +} + +static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num) +{ + /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */ + struct page *page[NUM_BATCH_FREE_PAGES]; + unsigned int num_pages = 0; + unsigned long flags; + + spin_lock_irqsave(&blkif->free_pages_lock, flags); + while (blkif->free_pages_num > num) { + BUG_ON(list_empty(&blkif->free_pages)); + page[num_pages] = list_first_entry(&blkif->free_pages, + struct page, lru); + list_del(&page[num_pages]->lru); + blkif->free_pages_num--; + if (++num_pages == NUM_BATCH_FREE_PAGES) { + spin_unlock_irqrestore(&blkif->free_pages_lock, flags); + free_xenballooned_pages(num_pages, page); + spin_lock_irqsave(&blkif->free_pages_lock, flags); + num_pages = 0; + } + } + spin_unlock_irqrestore(&blkif->free_pages_lock, flags); + if (num_pages != 0) + free_xenballooned_pages(num_pages, page); } +#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) + #define pending_handle(_req, _seg) \ (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) @@ -170,7 +239,7 @@ static void make_response(struct xen_blkif *blkif, u64 id, (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL) -static void add_persistent_gnt(struct rb_root *root, +static int add_persistent_gnt(struct rb_root *root, struct persistent_gnt *persistent_gnt) { struct rb_node **new = &(root->rb_node), *parent = NULL; @@ -186,14 +255,15 @@ static void add_persistent_gnt(struct rb_root *root, else if (persistent_gnt->gnt > this->gnt) new = &((*new)->rb_right); else { - pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n"); - BUG(); + pr_alert_ratelimited(DRV_PFX " trying to add a gref that's already in the tree\n"); + return -EINVAL; } } /* Add new node and rebalance tree. */ rb_link_node(&(persistent_gnt->node), parent, new); rb_insert_color(&(persistent_gnt->node), root); + return 0; } static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, @@ -215,7 +285,8 @@ static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, return NULL; } -static void free_persistent_gnts(struct rb_root *root, unsigned int num) +static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, + unsigned int num) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -240,7 +311,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); BUG_ON(ret); - free_xenballooned_pages(segs_to_unmap, pages); + put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; } @@ -422,13 +493,19 @@ int xen_blkif_schedule(void *arg) if (do_block_io_op(blkif)) blkif->waiting_reqs = 1; + /* Shrink if we have more than xen_blkif_max_buffer_pages */ + shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages); + if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); } + /* Since we are shutting down remove all pages from the buffer */ + shrink_free_pagepool(blkif, 0 /* All */); + /* Free all persistent grant pages */ if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) - free_persistent_gnts(&blkif->persistent_gnts, + free_persistent_gnts(blkif, &blkif->persistent_gnts, blkif->persistent_gnt_c); BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); @@ -457,23 +534,25 @@ static void xen_blkbk_unmap(struct pending_req *req) struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; grant_handle_t handle; + struct xen_blkif *blkif = req->blkif; int ret; for (i = 0; i < req->nr_pages; i++) { if (!test_bit(i, req->unmap_seg)) continue; handle = pending_handle(req, i); + pages[invcount] = req->pages[i]; if (handle == BLKBACK_INVALID_HANDLE) continue; - gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), + gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[invcount]), GNTMAP_host_map, handle); pending_handle(req, i) = BLKBACK_INVALID_HANDLE; - pages[invcount] = virt_to_page(vaddr(req, i)); invcount++; } ret = gnttab_unmap_refs(unmap, NULL, pages, invcount); BUG_ON(ret); + put_free_pages(blkif, pages, invcount); } static int xen_blkbk_map(struct blkif_request *req, @@ -487,8 +566,7 @@ static int xen_blkbk_map(struct blkif_request *req, struct persistent_gnt *persistent_gnt = NULL; struct xen_blkif *blkif = pending_req->blkif; phys_addr_t addr = 0; - int i, j; - bool new_map; + int i, seg_idx, new_map_idx; int nseg = req->u.rw.nr_segments; int segs_to_map = 0; int ret = 0; @@ -517,68 +595,16 @@ static int xen_blkbk_map(struct blkif_request *req, * We are using persistent grants and * the grant is already mapped */ - new_map = false; - } else if (use_persistent_gnts && - blkif->persistent_gnt_c < - max_mapped_grant_pages(blkif->blk_protocol)) { - /* - * We are using persistent grants, the grant is - * not mapped but we have room for it - */ - new_map = true; - persistent_gnt = kmalloc( - sizeof(struct persistent_gnt), - GFP_KERNEL); - if (!persistent_gnt) - return -ENOMEM; - if (alloc_xenballooned_pages(1, &persistent_gnt->page, - false)) { - kfree(persistent_gnt); - return -ENOMEM; - } - persistent_gnt->gnt = req->u.rw.seg[i].gref; - persistent_gnt->handle = BLKBACK_INVALID_HANDLE; - - pages_to_gnt[segs_to_map] = - persistent_gnt->page; - addr = (unsigned long) pfn_to_kaddr( - page_to_pfn(persistent_gnt->page)); - - add_persistent_gnt(&blkif->persistent_gnts, - persistent_gnt); - blkif->persistent_gnt_c++; - pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", - persistent_gnt->gnt, blkif->persistent_gnt_c, - max_mapped_grant_pages(blkif->blk_protocol)); - } else { - /* - * We are either using persistent grants and - * hit the maximum limit of grants mapped, - * or we are not using persistent grants. - */ - if (use_persistent_gnts && - !blkif->vbd.overflow_max_grants) { - blkif->vbd.overflow_max_grants = 1; - pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n", - blkif->domid, blkif->vbd.handle); - } - new_map = true; - pages[i] = blkbk->pending_page(pending_req, i); - addr = vaddr(pending_req, i); - pages_to_gnt[segs_to_map] = - blkbk->pending_page(pending_req, i); - } - - if (persistent_gnt) { pages[i] = persistent_gnt->page; persistent_gnts[i] = persistent_gnt; } else { + if (get_free_page(blkif, &pages[i])) + goto out_of_memory; + addr = vaddr(pages[i]); + pages_to_gnt[segs_to_map] = pages[i]; persistent_gnts[i] = NULL; - } - - if (new_map) { flags = GNTMAP_host_map; - if (!persistent_gnt && + if (!use_persistent_gnts && (pending_req->operation != BLKIF_OP_READ)) flags |= GNTMAP_readonly; gnttab_set_map_op(&map[segs_to_map++], addr, @@ -598,48 +624,81 @@ static int xen_blkbk_map(struct blkif_request *req, * the page from the other domain. */ bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); - for (i = 0, j = 0; i < nseg; i++) { - if (!persistent_gnts[i] || - persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) { + for (seg_idx = 0, new_map_idx = 0; seg_idx < nseg; seg_idx++) { + if (!persistent_gnts[seg_idx]) { /* This is a newly mapped grant */ - BUG_ON(j >= segs_to_map); - if (unlikely(map[j].status != 0)) { + BUG_ON(new_map_idx >= segs_to_map); + if (unlikely(map[new_map_idx].status != 0)) { pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); - map[j].handle = BLKBACK_INVALID_HANDLE; + pending_handle(pending_req, seg_idx) = BLKBACK_INVALID_HANDLE; ret |= 1; - if (persistent_gnts[i]) { - rb_erase(&persistent_gnts[i]->node, - &blkif->persistent_gnts); - blkif->persistent_gnt_c--; - kfree(persistent_gnts[i]); - persistent_gnts[i] = NULL; - } + new_map_idx++; + /* + * No need to set unmap_seg bit, since + * we can not unmap this grant because + * the handle is invalid. + */ + continue; } + pending_handle(pending_req, seg_idx) = map[new_map_idx].handle; + } else { + /* This grant is persistent and already mapped */ + goto next; } - if (persistent_gnts[i]) { - if (persistent_gnts[i]->handle == - BLKBACK_INVALID_HANDLE) { + if (use_persistent_gnts && + blkif->persistent_gnt_c < + max_mapped_grant_pages(blkif->blk_protocol)) { + /* + * We are using persistent grants, the grant is + * not mapped but we have room for it + */ + persistent_gnt = kmalloc(sizeof(struct persistent_gnt), + GFP_KERNEL); + if (!persistent_gnt) { /* - * If this is a new persistent grant - * save the handler + * If we don't have enough memory to + * allocate the persistent_gnt struct + * map this grant non-persistenly */ - persistent_gnts[i]->handle = map[j++].handle; + goto next_unmap; } - pending_handle(pending_req, i) = - persistent_gnts[i]->handle; - - if (ret) - continue; - } else { - pending_handle(pending_req, i) = map[j++].handle; - bitmap_set(pending_req->unmap_seg, i, 1); - - if (ret) - continue; + persistent_gnt->gnt = map[new_map_idx].ref; + persistent_gnt->handle = map[new_map_idx].handle; + persistent_gnt->page = pages[seg_idx]; + if (add_persistent_gnt(&blkif->persistent_gnts, + persistent_gnt)) { + kfree(persistent_gnt); + persistent_gnt = NULL; + goto next_unmap; + } + blkif->persistent_gnt_c++; + pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", + persistent_gnt->gnt, blkif->persistent_gnt_c, + max_mapped_grant_pages(blkif->blk_protocol)); + new_map_idx++; + goto next; + } + if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) { + blkif->vbd.overflow_max_grants = 1; + pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n", + blkif->domid, blkif->vbd.handle); } - seg[i].offset = (req->u.rw.seg[i].first_sect << 9); +next_unmap: + /* + * We could not map this grant persistently, so use it as + * a non-persistent grant. + */ + bitmap_set(pending_req->unmap_seg, seg_idx, 1); + new_map_idx++; +next: + seg[seg_idx].offset = (req->u.rw.seg[seg_idx].first_sect << 9); } return ret; + +out_of_memory: + pr_alert(DRV_PFX "%s: out of memory\n", __func__); + put_free_pages(blkif, pages_to_gnt, segs_to_map); + return -ENOMEM; } static int dispatch_discard_io(struct xen_blkif *blkif, @@ -863,7 +922,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, int operation; struct blk_plug plug; bool drain = false; - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page **pages = pending_req->pages; switch (req->operation) { case BLKIF_OP_READ: @@ -1090,22 +1149,14 @@ static int __init xen_blkif_init(void) xen_blkif_reqs, GFP_KERNEL); blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); - blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * - mmap_pages, GFP_KERNEL); - if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || - !blkbk->pending_pages) { + if (!blkbk->pending_reqs || !blkbk->pending_grant_handles) { rc = -ENOMEM; goto out_of_memory; } for (i = 0; i < mmap_pages; i++) { blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); - if (blkbk->pending_pages[i] == NULL) { - rc = -ENOMEM; - goto out_of_memory; - } } rc = xen_blkif_interface_init(); if (rc) @@ -1130,13 +1181,6 @@ static int __init xen_blkif_init(void) failed_init: kfree(blkbk->pending_reqs); kfree(blkbk->pending_grant_handles); - if (blkbk->pending_pages) { - for (i = 0; i < mmap_pages; i++) { - if (blkbk->pending_pages[i]) - __free_page(blkbk->pending_pages[i]); - } - kfree(blkbk->pending_pages); - } kfree(blkbk); blkbk = NULL; return rc; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 60103e2517ba..6c73c3855e65 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -220,6 +220,11 @@ struct xen_blkif { struct rb_root persistent_gnts; unsigned int persistent_gnt_c; + /* buffer of free pages to map grant refs */ + spinlock_t free_pages_lock; + int free_pages_num; + struct list_head free_pages; + /* statistics */ unsigned long st_print; unsigned long long st_rd_req; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 8bfd1bcf95ec..24f7f6d87717 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -118,6 +118,9 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) blkif->st_print = jiffies; init_waitqueue_head(&blkif->waiting_to_free); blkif->persistent_gnts.rb_node = NULL; + spin_lock_init(&blkif->free_pages_lock); + INIT_LIST_HEAD(&blkif->free_pages); + blkif->free_pages_num = 0; return blkif; } -- cgit v1.2.3-58-ga151 From 3f3aad5e6686ed49242bbf86de378b39f119ec9d Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 17 Apr 2013 20:18:57 +0200 Subject: xen-blkback: implement LRU mechanism for persistent grants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mechanism allows blkback to change the number of grants persistently mapped at run time. The algorithm uses a simple LRU mechanism that removes (if needed) the persistent grants that have not been used since the last LRU run, or if all grants have been used it removes the first grants in the list (that are not in use). The algorithm allows the user to change the maximum number of persistent grants, by changing max_persistent_grants in sysfs. Since we are storing the persistent grants used inside the request struct (to be able to mark them as "unused" when unmapping), we no longer need the bitmap (unmap_seg). Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xen.org Signed-off-by: Konrad Rzeszutek Wilk --- Documentation/ABI/stable/sysfs-bus-xen-backend | 10 + drivers/block/xen-blkback/blkback.c | 287 ++++++++++++++++++++----- drivers/block/xen-blkback/common.h | 18 ++ drivers/block/xen-blkback/xenbus.c | 2 + 4 files changed, 260 insertions(+), 57 deletions(-) (limited to 'drivers') diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend index e04afe0cca99..947db11350bc 100644 --- a/Documentation/ABI/stable/sysfs-bus-xen-backend +++ b/Documentation/ABI/stable/sysfs-bus-xen-backend @@ -81,3 +81,13 @@ Contact: Roger Pau Monné Description: Maximum number of free pages to keep in each block backend buffer. + +What: /sys/module/xen_blkback/parameters/max_persistent_grants +Date: March 2013 +KernelVersion: 3.10 +Contact: Roger Pau Monné +Description: + Maximum number of grants to map persistently in + blkback. If the frontend tries to use more than + max_persistent_grants, the LRU kicks in and starts + removing 5% of max_persistent_grants every 100ms. diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 8245c6bb9539..17052f74ebe5 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -78,6 +78,36 @@ module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644); MODULE_PARM_DESC(max_buffer_pages, "Maximum number of free pages to keep in each block backend buffer"); +/* + * Maximum number of grants to map persistently in blkback. For maximum + * performance this should be the total numbers of grants that can be used + * to fill the ring, but since this might become too high, specially with + * the use of indirect descriptors, we set it to a value that provides good + * performance without using too much memory. + * + * When the list of persistent grants is full we clean it up using a LRU + * algorithm. + */ + +static int xen_blkif_max_pgrants = 352; +module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); +MODULE_PARM_DESC(max_persistent_grants, + "Maximum number of grants to map persistently"); + +/* + * The LRU mechanism to clean the lists of persistent grants needs to + * be executed periodically. The time interval between consecutive executions + * of the purge mechanism is set in ms. + */ +#define LRU_INTERVAL 100 + +/* + * When the persistent grants list is full we will remove unused grants + * from the list. The percent number of grants to be removed at each LRU + * execution. + */ +#define LRU_PERCENT_CLEAN 5 + /* Run-time switchable: /sys/module/blkback/parameters/ */ static unsigned int log_stats; module_param(log_stats, int, 0644); @@ -96,8 +126,8 @@ struct pending_req { unsigned short operation; int status; struct list_head free_list; - DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; #define BLKBACK_INVALID_HANDLE (~0) @@ -118,36 +148,6 @@ struct xen_blkbk { static struct xen_blkbk *blkbk; -/* - * Maximum number of grant pages that can be mapped in blkback. - * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of - * pages that blkback will persistently map. - * Currently, this is: - * RING_SIZE = 32 (for all known ring types) - * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11 - * sizeof(struct persistent_gnt) = 48 - * So the maximum memory used to store the grants is: - * 32 * 11 * 48 = 16896 bytes - */ -static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol) -{ - switch (protocol) { - case BLKIF_PROTOCOL_NATIVE: - return __CONST_RING_SIZE(blkif, PAGE_SIZE) * - BLKIF_MAX_SEGMENTS_PER_REQUEST; - case BLKIF_PROTOCOL_X86_32: - return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) * - BLKIF_MAX_SEGMENTS_PER_REQUEST; - case BLKIF_PROTOCOL_X86_64: - return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) * - BLKIF_MAX_SEGMENTS_PER_REQUEST; - default: - BUG(); - } - return 0; -} - - /* * Little helpful macro to figure out the index and virtual address of the * pending_pages[..]. For each 'pending_req' we have have up to @@ -239,13 +239,29 @@ static void make_response(struct xen_blkif *blkif, u64 id, (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL) -static int add_persistent_gnt(struct rb_root *root, +/* + * We don't need locking around the persistent grant helpers + * because blkback uses a single-thread for each backed, so we + * can be sure that this functions will never be called recursively. + * + * The only exception to that is put_persistent_grant, that can be called + * from interrupt context (by xen_blkbk_unmap), so we have to use atomic + * bit operations to modify the flags of a persistent grant and to count + * the number of used grants. + */ +static int add_persistent_gnt(struct xen_blkif *blkif, struct persistent_gnt *persistent_gnt) { - struct rb_node **new = &(root->rb_node), *parent = NULL; + struct rb_node **new = NULL, *parent = NULL; struct persistent_gnt *this; + if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) { + if (!blkif->vbd.overflow_max_grants) + blkif->vbd.overflow_max_grants = 1; + return -EBUSY; + } /* Figure out where to put new node */ + new = &blkif->persistent_gnts.rb_node; while (*new) { this = container_of(*new, struct persistent_gnt, node); @@ -260,18 +276,23 @@ static int add_persistent_gnt(struct rb_root *root, } } + bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE); + set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); /* Add new node and rebalance tree. */ rb_link_node(&(persistent_gnt->node), parent, new); - rb_insert_color(&(persistent_gnt->node), root); + rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts); + blkif->persistent_gnt_c++; + atomic_inc(&blkif->persistent_gnt_in_use); return 0; } -static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, +static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif, grant_ref_t gref) { struct persistent_gnt *data; - struct rb_node *node = root->rb_node; + struct rb_node *node = NULL; + node = blkif->persistent_gnts.rb_node; while (node) { data = container_of(node, struct persistent_gnt, node); @@ -279,12 +300,29 @@ static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, node = node->rb_left; else if (gref > data->gnt) node = node->rb_right; - else + else { + if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) { + pr_alert_ratelimited(DRV_PFX " requesting a grant already in use\n"); + return NULL; + } + set_bit(PERSISTENT_GNT_ACTIVE, data->flags); + atomic_inc(&blkif->persistent_gnt_in_use); return data; + } } return NULL; } +static void put_persistent_gnt(struct xen_blkif *blkif, + struct persistent_gnt *persistent_gnt) +{ + if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + pr_alert_ratelimited(DRV_PFX " freeing a grant already unused"); + set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); + clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); + atomic_dec(&blkif->persistent_gnt_in_use); +} + static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, unsigned int num) { @@ -322,6 +360,129 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, BUG_ON(num != 0); } +static void unmap_purged_grants(struct work_struct *work) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnt; + int ret, segs_to_unmap = 0; + struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work); + + while(!list_empty(&blkif->persistent_purge_list)) { + persistent_gnt = list_first_entry(&blkif->persistent_purge_list, + struct persistent_gnt, + remove_node); + list_del(&persistent_gnt->remove_node); + + gnttab_set_unmap_op(&unmap[segs_to_unmap], + vaddr(persistent_gnt->page), + GNTMAP_host_map, + persistent_gnt->handle); + + pages[segs_to_unmap] = persistent_gnt->page; + + if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) { + ret = gnttab_unmap_refs(unmap, NULL, pages, + segs_to_unmap); + BUG_ON(ret); + put_free_pages(blkif, pages, segs_to_unmap); + segs_to_unmap = 0; + } + kfree(persistent_gnt); + } + if (segs_to_unmap > 0) { + ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); + BUG_ON(ret); + put_free_pages(blkif, pages, segs_to_unmap); + } +} + +static void purge_persistent_gnt(struct xen_blkif *blkif) +{ + struct persistent_gnt *persistent_gnt; + struct rb_node *n; + unsigned int num_clean, total; + bool scan_used = false; + struct rb_root *root; + + if (blkif->persistent_gnt_c < xen_blkif_max_pgrants || + (blkif->persistent_gnt_c == xen_blkif_max_pgrants && + !blkif->vbd.overflow_max_grants)) { + return; + } + + if (work_pending(&blkif->persistent_purge_work)) { + pr_alert_ratelimited(DRV_PFX "Scheduled work from previous purge is still pending, cannot purge list\n"); + return; + } + + num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; + num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean; + num_clean = min(blkif->persistent_gnt_c, num_clean); + if (num_clean > + (blkif->persistent_gnt_c - + atomic_read(&blkif->persistent_gnt_in_use))) + return; + + /* + * At this point, we can assure that there will be no calls + * to get_persistent_grant (because we are executing this code from + * xen_blkif_schedule), there can only be calls to put_persistent_gnt, + * which means that the number of currently used grants will go down, + * but never up, so we will always be able to remove the requested + * number of grants. + */ + + total = num_clean; + + pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean); + + INIT_LIST_HEAD(&blkif->persistent_purge_list); + root = &blkif->persistent_gnts; +purge_list: + foreach_grant_safe(persistent_gnt, n, root, node) { + BUG_ON(persistent_gnt->handle == + BLKBACK_INVALID_HANDLE); + + if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + continue; + if (!scan_used && + (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags))) + continue; + + rb_erase(&persistent_gnt->node, root); + list_add(&persistent_gnt->remove_node, + &blkif->persistent_purge_list); + if (--num_clean == 0) + goto finished; + } + /* + * If we get here it means we also need to start cleaning + * grants that were used since last purge in order to cope + * with the requested num + */ + if (!scan_used) { + pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean); + scan_used = true; + goto purge_list; + } +finished: + /* Remove the "used" flag from all the persistent grants */ + foreach_grant_safe(persistent_gnt, n, root, node) { + BUG_ON(persistent_gnt->handle == + BLKBACK_INVALID_HANDLE); + clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); + } + blkif->persistent_gnt_c -= (total - num_clean); + blkif->vbd.overflow_max_grants = 0; + + /* We can defer this work */ + INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants); + schedule_work(&blkif->persistent_purge_work); + pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total); + return; +} + /* * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ @@ -453,12 +614,12 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { pr_info("xen-blkback (%s): oo %3llu | rd %4llu | wr %4llu | f %4llu" - " | ds %4llu | pg: %4u/%4u\n", + " | ds %4llu | pg: %4u/%4d\n", current->comm, blkif->st_oo_req, blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req, blkif->st_ds_req, blkif->persistent_gnt_c, - max_mapped_grant_pages(blkif->blk_protocol)); + xen_blkif_max_pgrants); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; @@ -470,6 +631,7 @@ int xen_blkif_schedule(void *arg) { struct xen_blkif *blkif = arg; struct xen_vbd *vbd = &blkif->vbd; + unsigned long timeout; xen_blkif_get(blkif); @@ -479,13 +641,21 @@ int xen_blkif_schedule(void *arg) if (unlikely(vbd->size != vbd_sz(vbd))) xen_vbd_resize(blkif); - wait_event_interruptible( + timeout = msecs_to_jiffies(LRU_INTERVAL); + + timeout = wait_event_interruptible_timeout( blkif->wq, - blkif->waiting_reqs || kthread_should_stop()); - wait_event_interruptible( + blkif->waiting_reqs || kthread_should_stop(), + timeout); + if (timeout == 0) + goto purge_gnt_list; + timeout = wait_event_interruptible_timeout( blkbk->pending_free_wq, !list_empty(&blkbk->pending_free) || - kthread_should_stop()); + kthread_should_stop(), + timeout); + if (timeout == 0) + goto purge_gnt_list; blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ @@ -493,6 +663,13 @@ int xen_blkif_schedule(void *arg) if (do_block_io_op(blkif)) blkif->waiting_reqs = 1; +purge_gnt_list: + if (blkif->vbd.feature_gnt_persistent && + time_after(jiffies, blkif->next_lru)) { + purge_persistent_gnt(blkif); + blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL); + } + /* Shrink if we have more than xen_blkif_max_buffer_pages */ shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages); @@ -538,8 +715,10 @@ static void xen_blkbk_unmap(struct pending_req *req) int ret; for (i = 0; i < req->nr_pages; i++) { - if (!test_bit(i, req->unmap_seg)) + if (req->persistent_gnts[i] != NULL) { + put_persistent_gnt(blkif, req->persistent_gnts[i]); continue; + } handle = pending_handle(req, i); pages[invcount] = req->pages[i]; if (handle == BLKBACK_INVALID_HANDLE) @@ -561,8 +740,8 @@ static int xen_blkbk_map(struct blkif_request *req, struct page *pages[]) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt **persistent_gnts = pending_req->persistent_gnts; struct persistent_gnt *persistent_gnt = NULL; struct xen_blkif *blkif = pending_req->blkif; phys_addr_t addr = 0; @@ -574,9 +753,6 @@ static int xen_blkbk_map(struct blkif_request *req, use_persistent_gnts = (blkif->vbd.feature_gnt_persistent); - BUG_ON(blkif->persistent_gnt_c > - max_mapped_grant_pages(pending_req->blkif->blk_protocol)); - /* * Fill out preq.nr_sects with proper amount of sectors, and setup * assign map[..] with the PFN of the page in our domain with the @@ -587,7 +763,7 @@ static int xen_blkbk_map(struct blkif_request *req, if (use_persistent_gnts) persistent_gnt = get_persistent_gnt( - &blkif->persistent_gnts, + blkif, req->u.rw.seg[i].gref); if (persistent_gnt) { @@ -623,7 +799,6 @@ static int xen_blkbk_map(struct blkif_request *req, * so that when we access vaddr(pending_req,i) it has the contents of * the page from the other domain. */ - bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); for (seg_idx = 0, new_map_idx = 0; seg_idx < nseg; seg_idx++) { if (!persistent_gnts[seg_idx]) { /* This is a newly mapped grant */ @@ -646,11 +821,10 @@ static int xen_blkbk_map(struct blkif_request *req, goto next; } if (use_persistent_gnts && - blkif->persistent_gnt_c < - max_mapped_grant_pages(blkif->blk_protocol)) { + blkif->persistent_gnt_c < xen_blkif_max_pgrants) { /* * We are using persistent grants, the grant is - * not mapped but we have room for it + * not mapped but we might have room for it. */ persistent_gnt = kmalloc(sizeof(struct persistent_gnt), GFP_KERNEL); @@ -665,16 +839,16 @@ static int xen_blkbk_map(struct blkif_request *req, persistent_gnt->gnt = map[new_map_idx].ref; persistent_gnt->handle = map[new_map_idx].handle; persistent_gnt->page = pages[seg_idx]; - if (add_persistent_gnt(&blkif->persistent_gnts, + if (add_persistent_gnt(blkif, persistent_gnt)) { kfree(persistent_gnt); persistent_gnt = NULL; goto next_unmap; } - blkif->persistent_gnt_c++; + persistent_gnts[seg_idx] = persistent_gnt; pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", persistent_gnt->gnt, blkif->persistent_gnt_c, - max_mapped_grant_pages(blkif->blk_protocol)); + xen_blkif_max_pgrants); new_map_idx++; goto next; } @@ -688,7 +862,6 @@ next_unmap: * We could not map this grant persistently, so use it as * a non-persistent grant. */ - bitmap_set(pending_req->unmap_seg, seg_idx, 1); new_map_idx++; next: seg[seg_idx].offset = (req->u.rw.seg[seg_idx].first_sect << 9); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 6c73c3855e65..af9bed48f773 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -182,12 +182,23 @@ struct xen_vbd { struct backend_info; +/* Number of available flags */ +#define PERSISTENT_GNT_FLAGS_SIZE 2 +/* This persistent grant is currently in use */ +#define PERSISTENT_GNT_ACTIVE 0 +/* + * This persistent grant has been used, this flag is set when we remove the + * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently. + */ +#define PERSISTENT_GNT_WAS_ACTIVE 1 struct persistent_gnt { struct page *page; grant_ref_t gnt; grant_handle_t handle; + DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE); struct rb_node node; + struct list_head remove_node; }; struct xen_blkif { @@ -219,6 +230,12 @@ struct xen_blkif { /* tree to store persistent grants */ struct rb_root persistent_gnts; unsigned int persistent_gnt_c; + atomic_t persistent_gnt_in_use; + unsigned long next_lru; + + /* used by the kworker that offload work from the persistent purge */ + struct list_head persistent_purge_list; + struct work_struct persistent_purge_work; /* buffer of free pages to map grant refs */ spinlock_t free_pages_lock; @@ -262,6 +279,7 @@ int xen_blkif_xenbus_init(void); irqreturn_t xen_blkif_be_int(int irq, void *dev_id); int xen_blkif_schedule(void *arg); +int xen_blkif_purge_persistent(void *arg); int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct backend_info *be, int state); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 24f7f6d87717..e0fd92a2a4cd 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -98,6 +98,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); + return; } } @@ -121,6 +122,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) spin_lock_init(&blkif->free_pages_lock); INIT_LIST_HEAD(&blkif->free_pages); blkif->free_pages_num = 0; + atomic_set(&blkif->persistent_gnt_in_use, 0); return blkif; } -- cgit v1.2.3-58-ga151 From bb6acb289fbaac0e99eb552abdefc80a2186ef3f Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 17 Apr 2013 20:18:58 +0200 Subject: xen-blkback: move pending handles list from blkbk to pending_req MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moving grant ref handles from blkbk to pending_req will allow us to get rid of the shared blkbk structure. Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xen.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 17052f74ebe5..ae7dc92ad3cf 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -128,6 +128,7 @@ struct pending_req { struct list_head free_list; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + grant_handle_t grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; #define BLKBACK_INVALID_HANDLE (~0) @@ -142,8 +143,6 @@ struct xen_blkbk { /* And its spinlock. */ spinlock_t pending_free_lock; wait_queue_head_t pending_free_wq; - /* And the grant handles that are available. */ - grant_handle_t *pending_grant_handles; }; static struct xen_blkbk *blkbk; @@ -221,7 +220,7 @@ static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num) #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) #define pending_handle(_req, _seg) \ - (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) + (_req->grant_handles[_seg]) static int do_block_io_op(struct xen_blkif *blkif); @@ -1304,7 +1303,7 @@ static void make_response(struct xen_blkif *blkif, u64 id, static int __init xen_blkif_init(void) { - int i, mmap_pages; + int i; int rc = 0; if (!xen_domain()) @@ -1316,21 +1315,15 @@ static int __init xen_blkif_init(void) return -ENOMEM; } - mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * xen_blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * - mmap_pages, GFP_KERNEL); - if (!blkbk->pending_reqs || !blkbk->pending_grant_handles) { + if (!blkbk->pending_reqs) { rc = -ENOMEM; goto out_of_memory; } - for (i = 0; i < mmap_pages; i++) { - blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - } rc = xen_blkif_interface_init(); if (rc) goto failed_init; @@ -1353,7 +1346,6 @@ static int __init xen_blkif_init(void) pr_alert(DRV_PFX "%s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); - kfree(blkbk->pending_grant_handles); kfree(blkbk); blkbk = NULL; return rc; -- cgit v1.2.3-58-ga151 From bf0720c48c7cefd127ed2329e6d0e40b39fa4d0e Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 17 Apr 2013 20:18:59 +0200 Subject: xen-blkback: make the queue of free requests per backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the last dependency from blkbk by moving the list of free requests to blkif. This change reduces the contention on the list of available requests. Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xen.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 123 ++++++------------------------------ drivers/block/xen-blkback/common.h | 30 +++++++++ drivers/block/xen-blkback/xenbus.c | 26 ++++++++ 3 files changed, 74 insertions(+), 105 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index ae7dc92ad3cf..90a57552f4b7 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -49,20 +49,6 @@ #include #include "common.h" -/* - * These are rather arbitrary. They are fairly large because adjacent requests - * pulled from a communication ring are quite likely to end up being part of - * the same scatter/gather request at the disc. - * - * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** - * - * This will increase the chances of being able to write whole tracks. - * 64 should be enough to keep us competitive with Linux. - */ -static int xen_blkif_reqs = 64; -module_param_named(reqs, xen_blkif_reqs, int, 0); -MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); - /* * Maximum number of unused free pages to keep in the internal buffer. * Setting this to a value too low will reduce memory used in each backend, @@ -112,53 +98,11 @@ MODULE_PARM_DESC(max_persistent_grants, static unsigned int log_stats; module_param(log_stats, int, 0644); -/* - * Each outstanding request that we've passed to the lower device layers has a - * 'pending_req' allocated to it. Each buffer_head that completes decrements - * the pendcnt towards zero. When it hits zero, the specified domain has a - * response queued for it, with the saved 'id' passed back. - */ -struct pending_req { - struct xen_blkif *blkif; - u64 id; - int nr_pages; - atomic_t pendcnt; - unsigned short operation; - int status; - struct list_head free_list; - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - grant_handle_t grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -}; - #define BLKBACK_INVALID_HANDLE (~0) /* Number of free pages to remove on each call to free_xenballooned_pages */ #define NUM_BATCH_FREE_PAGES 10 -struct xen_blkbk { - struct pending_req *pending_reqs; - /* List of all 'pending_req' available */ - struct list_head pending_free; - /* And its spinlock. */ - spinlock_t pending_free_lock; - wait_queue_head_t pending_free_wq; -}; - -static struct xen_blkbk *blkbk; - -/* - * Little helpful macro to figure out the index and virtual address of the - * pending_pages[..]. For each 'pending_req' we have have up to - * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through - * 10 and would index in the pending_pages[..]. - */ -static inline int vaddr_pagenr(struct pending_req *req, int seg) -{ - return (req - blkbk->pending_reqs) * - BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; -} - static inline int get_free_page(struct xen_blkif *blkif, struct page **page) { unsigned long flags; @@ -485,18 +429,18 @@ finished: /* * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ -static struct pending_req *alloc_req(void) +static struct pending_req *alloc_req(struct xen_blkif *blkif) { struct pending_req *req = NULL; unsigned long flags; - spin_lock_irqsave(&blkbk->pending_free_lock, flags); - if (!list_empty(&blkbk->pending_free)) { - req = list_entry(blkbk->pending_free.next, struct pending_req, + spin_lock_irqsave(&blkif->pending_free_lock, flags); + if (!list_empty(&blkif->pending_free)) { + req = list_entry(blkif->pending_free.next, struct pending_req, free_list); list_del(&req->free_list); } - spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); + spin_unlock_irqrestore(&blkif->pending_free_lock, flags); return req; } @@ -504,17 +448,17 @@ static struct pending_req *alloc_req(void) * Return the 'pending_req' structure back to the freepool. We also * wake up the thread if it was waiting for a free page. */ -static void free_req(struct pending_req *req) +static void free_req(struct xen_blkif *blkif, struct pending_req *req) { unsigned long flags; int was_empty; - spin_lock_irqsave(&blkbk->pending_free_lock, flags); - was_empty = list_empty(&blkbk->pending_free); - list_add(&req->free_list, &blkbk->pending_free); - spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); + spin_lock_irqsave(&blkif->pending_free_lock, flags); + was_empty = list_empty(&blkif->pending_free); + list_add(&req->free_list, &blkif->pending_free); + spin_unlock_irqrestore(&blkif->pending_free_lock, flags); if (was_empty) - wake_up(&blkbk->pending_free_wq); + wake_up(&blkif->pending_free_wq); } /* @@ -649,8 +593,8 @@ int xen_blkif_schedule(void *arg) if (timeout == 0) goto purge_gnt_list; timeout = wait_event_interruptible_timeout( - blkbk->pending_free_wq, - !list_empty(&blkbk->pending_free) || + blkif->pending_free_wq, + !list_empty(&blkif->pending_free) || kthread_should_stop(), timeout); if (timeout == 0) @@ -907,7 +851,7 @@ static int dispatch_other_io(struct xen_blkif *blkif, struct blkif_request *req, struct pending_req *pending_req) { - free_req(pending_req); + free_req(blkif, pending_req); make_response(blkif, req->u.other.id, req->operation, BLKIF_RSP_EOPNOTSUPP); return -EIO; @@ -967,7 +911,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) if (atomic_read(&pending_req->blkif->drain)) complete(&pending_req->blkif->drain_complete); } - free_req(pending_req); + free_req(pending_req->blkif, pending_req); } } @@ -1010,7 +954,7 @@ __do_block_io_op(struct xen_blkif *blkif) break; } - pending_req = alloc_req(); + pending_req = alloc_req(blkif); if (NULL == pending_req) { blkif->st_oo_req++; more_to_do = 1; @@ -1044,7 +988,7 @@ __do_block_io_op(struct xen_blkif *blkif) goto done; break; case BLKIF_OP_DISCARD: - free_req(pending_req); + free_req(blkif, pending_req); if (dispatch_discard_io(blkif, &req)) goto done; break; @@ -1246,7 +1190,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, fail_response: /* Haven't submitted any bio's yet. */ make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR); - free_req(pending_req); + free_req(blkif, pending_req); msleep(1); /* back off a bit */ return -EIO; @@ -1303,51 +1247,20 @@ static void make_response(struct xen_blkif *blkif, u64 id, static int __init xen_blkif_init(void) { - int i; int rc = 0; if (!xen_domain()) return -ENODEV; - blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); - if (!blkbk) { - pr_alert(DRV_PFX "%s: out of memory!\n", __func__); - return -ENOMEM; - } - - - blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * - xen_blkif_reqs, GFP_KERNEL); - - if (!blkbk->pending_reqs) { - rc = -ENOMEM; - goto out_of_memory; - } - rc = xen_blkif_interface_init(); if (rc) goto failed_init; - INIT_LIST_HEAD(&blkbk->pending_free); - spin_lock_init(&blkbk->pending_free_lock); - init_waitqueue_head(&blkbk->pending_free_wq); - - for (i = 0; i < xen_blkif_reqs; i++) - list_add_tail(&blkbk->pending_reqs[i].free_list, - &blkbk->pending_free); - rc = xen_blkif_xenbus_init(); if (rc) goto failed_init; - return 0; - - out_of_memory: - pr_alert(DRV_PFX "%s: out of memory\n", __func__); failed_init: - kfree(blkbk->pending_reqs); - kfree(blkbk); - blkbk = NULL; return rc; } diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index af9bed48f773..e33fafa0facd 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -192,6 +192,9 @@ struct backend_info; */ #define PERSISTENT_GNT_WAS_ACTIVE 1 +/* Number of requests that we can fit in a ring */ +#define XEN_BLKIF_REQS 32 + struct persistent_gnt { struct page *page; grant_ref_t gnt; @@ -242,6 +245,14 @@ struct xen_blkif { int free_pages_num; struct list_head free_pages; + /* Allocation of pending_reqs */ + struct pending_req *pending_reqs; + /* List of all 'pending_req' available */ + struct list_head pending_free; + /* And its spinlock. */ + spinlock_t pending_free_lock; + wait_queue_head_t pending_free_wq; + /* statistics */ unsigned long st_print; unsigned long long st_rd_req; @@ -255,6 +266,25 @@ struct xen_blkif { wait_queue_head_t waiting_to_free; }; +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +struct pending_req { + struct xen_blkif *blkif; + u64 id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + grant_handle_t grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; + #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ (_v)->bdev->bd_part->nr_sects : \ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index e0fd92a2a4cd..1f1ade6d6e09 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -105,6 +105,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) static struct xen_blkif *xen_blkif_alloc(domid_t domid) { struct xen_blkif *blkif; + int i; blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL); if (!blkif) @@ -124,6 +125,21 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) blkif->free_pages_num = 0; atomic_set(&blkif->persistent_gnt_in_use, 0); + blkif->pending_reqs = kcalloc(XEN_BLKIF_REQS, + sizeof(blkif->pending_reqs[0]), + GFP_KERNEL); + if (!blkif->pending_reqs) { + kmem_cache_free(xen_blkif_cachep, blkif); + return ERR_PTR(-ENOMEM); + } + INIT_LIST_HEAD(&blkif->pending_free); + spin_lock_init(&blkif->pending_free_lock); + init_waitqueue_head(&blkif->pending_free_wq); + + for (i = 0; i < XEN_BLKIF_REQS; i++) + list_add_tail(&blkif->pending_reqs[i].free_list, + &blkif->pending_free); + return blkif; } @@ -203,8 +219,18 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) static void xen_blkif_free(struct xen_blkif *blkif) { + struct pending_req *req; + int i = 0; + if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); + + /* Check that there is no request in use */ + list_for_each_entry(req, &blkif->pending_free, free_list) + i++; + BUG_ON(i != XEN_BLKIF_REQS); + + kfree(blkif->pending_reqs); kmem_cache_free(xen_blkif_cachep, blkif); } -- cgit v1.2.3-58-ga151 From 31552ee32df89f97a61766cee51b8dabb1ae3f4f Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 17 Apr 2013 20:19:00 +0200 Subject: xen-blkback: expand map/unmap functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preparatory change for implementing indirect descriptors. Change xen_blkbk_{map/unmap} in order to be able to map/unmap a random amount of grants (previously it was limited to BLKIF_MAX_SEGMENTS_PER_REQUEST). Also, remove the usage of pending_req in the map/unmap functions, so we can map/unmap grants without needing to pass a pending_req. Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xen.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 141 ++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 55 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 90a57552f4b7..356722f65f88 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -163,10 +163,6 @@ static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num) #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) -#define pending_handle(_req, _seg) \ - (_req->grant_handles[_seg]) - - static int do_block_io_op(struct xen_blkif *blkif); static int dispatch_rw_block_io(struct xen_blkif *blkif, struct blkif_request *req, @@ -648,50 +644,57 @@ struct seg_buf { * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. */ -static void xen_blkbk_unmap(struct pending_req *req) +static void xen_blkbk_unmap(struct xen_blkif *blkif, + grant_handle_t handles[], + struct page *pages[], + struct persistent_gnt *persistent_gnts[], + int num) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; - grant_handle_t handle; - struct xen_blkif *blkif = req->blkif; int ret; - for (i = 0; i < req->nr_pages; i++) { - if (req->persistent_gnts[i] != NULL) { - put_persistent_gnt(blkif, req->persistent_gnts[i]); + for (i = 0; i < num; i++) { + if (persistent_gnts[i] != NULL) { + put_persistent_gnt(blkif, persistent_gnts[i]); continue; } - handle = pending_handle(req, i); - pages[invcount] = req->pages[i]; - if (handle == BLKBACK_INVALID_HANDLE) + if (handles[i] == BLKBACK_INVALID_HANDLE) continue; - gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[invcount]), - GNTMAP_host_map, handle); - pending_handle(req, i) = BLKBACK_INVALID_HANDLE; - invcount++; + unmap_pages[invcount] = pages[i]; + gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]), + GNTMAP_host_map, handles[i]); + handles[i] = BLKBACK_INVALID_HANDLE; + if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) { + ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, + invcount); + BUG_ON(ret); + put_free_pages(blkif, unmap_pages, invcount); + invcount = 0; + } + } + if (invcount) { + ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); + BUG_ON(ret); + put_free_pages(blkif, unmap_pages, invcount); } - - ret = gnttab_unmap_refs(unmap, NULL, pages, invcount); - BUG_ON(ret); - put_free_pages(blkif, pages, invcount); } -static int xen_blkbk_map(struct blkif_request *req, - struct pending_req *pending_req, - struct seg_buf seg[], - struct page *pages[]) +static int xen_blkbk_map(struct xen_blkif *blkif, grant_ref_t grefs[], + struct persistent_gnt *persistent_gnts[], + grant_handle_t handles[], + struct page *pages[], + int num, bool ro) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct persistent_gnt **persistent_gnts = pending_req->persistent_gnts; struct persistent_gnt *persistent_gnt = NULL; - struct xen_blkif *blkif = pending_req->blkif; phys_addr_t addr = 0; int i, seg_idx, new_map_idx; - int nseg = req->u.rw.nr_segments; int segs_to_map = 0; int ret = 0; + int last_map = 0, map_until = 0; int use_persistent_gnts; use_persistent_gnts = (blkif->vbd.feature_gnt_persistent); @@ -701,13 +704,14 @@ static int xen_blkbk_map(struct blkif_request *req, * assign map[..] with the PFN of the page in our domain with the * corresponding grant reference for each page. */ - for (i = 0; i < nseg; i++) { +again: + for (i = map_until; i < num; i++) { uint32_t flags; if (use_persistent_gnts) persistent_gnt = get_persistent_gnt( blkif, - req->u.rw.seg[i].gref); + grefs[i]); if (persistent_gnt) { /* @@ -723,13 +727,15 @@ static int xen_blkbk_map(struct blkif_request *req, pages_to_gnt[segs_to_map] = pages[i]; persistent_gnts[i] = NULL; flags = GNTMAP_host_map; - if (!use_persistent_gnts && - (pending_req->operation != BLKIF_OP_READ)) + if (!use_persistent_gnts && ro) flags |= GNTMAP_readonly; gnttab_set_map_op(&map[segs_to_map++], addr, - flags, req->u.rw.seg[i].gref, + flags, grefs[i], blkif->domid); } + map_until = i + 1; + if (segs_to_map == BLKIF_MAX_SEGMENTS_PER_REQUEST) + break; } if (segs_to_map) { @@ -742,26 +748,19 @@ static int xen_blkbk_map(struct blkif_request *req, * so that when we access vaddr(pending_req,i) it has the contents of * the page from the other domain. */ - for (seg_idx = 0, new_map_idx = 0; seg_idx < nseg; seg_idx++) { + for (seg_idx = last_map, new_map_idx = 0; seg_idx < map_until; seg_idx++) { if (!persistent_gnts[seg_idx]) { /* This is a newly mapped grant */ BUG_ON(new_map_idx >= segs_to_map); if (unlikely(map[new_map_idx].status != 0)) { pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); - pending_handle(pending_req, seg_idx) = BLKBACK_INVALID_HANDLE; + handles[seg_idx] = BLKBACK_INVALID_HANDLE; ret |= 1; - new_map_idx++; - /* - * No need to set unmap_seg bit, since - * we can not unmap this grant because - * the handle is invalid. - */ - continue; + goto next; } - pending_handle(pending_req, seg_idx) = map[new_map_idx].handle; + handles[seg_idx] = map[new_map_idx].handle; } else { - /* This grant is persistent and already mapped */ - goto next; + continue; } if (use_persistent_gnts && blkif->persistent_gnt_c < xen_blkif_max_pgrants) { @@ -777,7 +776,7 @@ static int xen_blkbk_map(struct blkif_request *req, * allocate the persistent_gnt struct * map this grant non-persistenly */ - goto next_unmap; + goto next; } persistent_gnt->gnt = map[new_map_idx].ref; persistent_gnt->handle = map[new_map_idx].handle; @@ -786,13 +785,12 @@ static int xen_blkbk_map(struct blkif_request *req, persistent_gnt)) { kfree(persistent_gnt); persistent_gnt = NULL; - goto next_unmap; + goto next; } persistent_gnts[seg_idx] = persistent_gnt; pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", persistent_gnt->gnt, blkif->persistent_gnt_c, xen_blkif_max_pgrants); - new_map_idx++; goto next; } if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) { @@ -800,15 +798,18 @@ static int xen_blkbk_map(struct blkif_request *req, pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n", blkif->domid, blkif->vbd.handle); } -next_unmap: /* * We could not map this grant persistently, so use it as * a non-persistent grant. */ - new_map_idx++; next: - seg[seg_idx].offset = (req->u.rw.seg[seg_idx].first_sect << 9); + new_map_idx++; } + segs_to_map = 0; + last_map = map_until; + if (map_until != num) + goto again; + return ret; out_of_memory: @@ -817,6 +818,31 @@ out_of_memory: return -ENOMEM; } +static int xen_blkbk_map_seg(struct blkif_request *req, + struct pending_req *pending_req, + struct seg_buf seg[], + struct page *pages[]) +{ + int i, rc; + grant_ref_t grefs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + + for (i = 0; i < req->u.rw.nr_segments; i++) + grefs[i] = req->u.rw.seg[i].gref; + + rc = xen_blkbk_map(pending_req->blkif, grefs, + pending_req->persistent_gnts, + pending_req->grant_handles, pending_req->pages, + req->u.rw.nr_segments, + (pending_req->operation != BLKIF_OP_READ)); + if (rc) + return rc; + + for (i = 0; i < req->u.rw.nr_segments; i++) + seg[i].offset = (req->u.rw.seg[i].first_sect << 9); + + return 0; +} + static int dispatch_discard_io(struct xen_blkif *blkif, struct blkif_request *req) { @@ -903,7 +929,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { - xen_blkbk_unmap(pending_req); + xen_blkbk_unmap(pending_req->blkif, pending_req->grant_handles, + pending_req->pages, + pending_req->persistent_gnts, + pending_req->nr_pages); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); xen_blkif_put(pending_req->blkif); @@ -1125,7 +1154,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (xen_blkbk_map(req, pending_req, seg, pages)) + if (xen_blkbk_map_seg(req, pending_req, seg, pages)) goto fail_flush; /* @@ -1186,7 +1215,9 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, return 0; fail_flush: - xen_blkbk_unmap(pending_req); + xen_blkbk_unmap(blkif, pending_req->grant_handles, + pending_req->pages, pending_req->persistent_gnts, + pending_req->nr_pages); fail_response: /* Haven't submitted any bio's yet. */ make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR); -- cgit v1.2.3-58-ga151 From 402b27f9f2c22309d5bb285628765bc27b82fcf5 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Thu, 18 Apr 2013 16:06:54 +0200 Subject: xen-block: implement indirect descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Indirect descriptors introduce a new block operation (BLKIF_OP_INDIRECT) that passes grant references instead of segments in the request. This grant references are filled with arrays of blkif_request_segment_aligned, this way we can send more segments in a request. The proposed implementation sets the maximum number of indirect grefs (frames filled with blkif_request_segment_aligned) to 256 in the backend and 32 in the frontend. The value in the frontend has been chosen experimentally, and the backend value has been set to a sane value that allows expanding the maximum number of indirect descriptors in the frontend if needed. The migration code has changed from the previous implementation, in which we simply remapped the segments on the shared ring. Now the maximum number of segments allowed in a request can change depending on the backend, so we have to requeue all the requests in the ring and in the queue and split the bios in them if they are bigger than the new maximum number of segments. [v2: Fixed minor comments by Konrad. [v1: Added padding to make the indirect request 64bit aligned. Added some BUGs, comments; fixed number of indirect pages in blkif_get_x86_{32/64}_req. Added description about the indirect operation in blkif.h] Signed-off-by: Roger Pau Monné [v3: Fixed spaces and tabs mix ups] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 134 +++++++--- drivers/block/xen-blkback/common.h | 98 +++++++- drivers/block/xen-blkback/xenbus.c | 7 + drivers/block/xen-blkfront.c | 490 +++++++++++++++++++++++++++++------- include/xen/interface/io/blkif.h | 53 ++++ 5 files changed, 657 insertions(+), 125 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 356722f65f88..1ebc0aa0f0e4 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -59,7 +59,7 @@ * IO workloads. */ -static int xen_blkif_max_buffer_pages = 704; +static int xen_blkif_max_buffer_pages = 1024; module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644); MODULE_PARM_DESC(max_buffer_pages, "Maximum number of free pages to keep in each block backend buffer"); @@ -75,7 +75,7 @@ MODULE_PARM_DESC(max_buffer_pages, * algorithm. */ -static int xen_blkif_max_pgrants = 352; +static int xen_blkif_max_pgrants = 1056; module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); MODULE_PARM_DESC(max_persistent_grants, "Maximum number of grants to map persistently"); @@ -636,10 +636,6 @@ purge_gnt_list: return 0; } -struct seg_buf { - unsigned int offset; - unsigned int nsec; -}; /* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. @@ -818,29 +814,69 @@ out_of_memory: return -ENOMEM; } -static int xen_blkbk_map_seg(struct blkif_request *req, - struct pending_req *pending_req, +static int xen_blkbk_map_seg(struct pending_req *pending_req, struct seg_buf seg[], struct page *pages[]) { - int i, rc; - grant_ref_t grefs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int rc; - for (i = 0; i < req->u.rw.nr_segments; i++) - grefs[i] = req->u.rw.seg[i].gref; - - rc = xen_blkbk_map(pending_req->blkif, grefs, + rc = xen_blkbk_map(pending_req->blkif, pending_req->grefs, pending_req->persistent_gnts, pending_req->grant_handles, pending_req->pages, - req->u.rw.nr_segments, + pending_req->nr_pages, (pending_req->operation != BLKIF_OP_READ)); - if (rc) - return rc; - for (i = 0; i < req->u.rw.nr_segments; i++) - seg[i].offset = (req->u.rw.seg[i].first_sect << 9); + return rc; +} - return 0; +static int xen_blkbk_parse_indirect(struct blkif_request *req, + struct pending_req *pending_req, + struct seg_buf seg[], + struct phys_req *preq) +{ + struct persistent_gnt **persistent = + pending_req->indirect_persistent_gnts; + struct page **pages = pending_req->indirect_pages; + struct xen_blkif *blkif = pending_req->blkif; + int indirect_grefs, rc, n, nseg, i; + struct blkif_request_segment_aligned *segments = NULL; + + nseg = pending_req->nr_pages; + indirect_grefs = INDIRECT_PAGES(nseg); + BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); + + rc = xen_blkbk_map(blkif, req->u.indirect.indirect_grefs, + persistent, pending_req->indirect_handles, + pages, indirect_grefs, true); + if (rc) + goto unmap; + + for (n = 0, i = 0; n < nseg; n++) { + if ((n % SEGS_PER_INDIRECT_FRAME) == 0) { + /* Map indirect segments */ + if (segments) + kunmap_atomic(segments); + segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]); + } + i = n % SEGS_PER_INDIRECT_FRAME; + pending_req->grefs[n] = segments[i].gref; + seg[n].nsec = segments[i].last_sect - + segments[i].first_sect + 1; + seg[n].offset = (segments[i].first_sect << 9); + if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) || + (segments[i].last_sect < segments[i].first_sect)) { + rc = -EINVAL; + goto unmap; + } + preq->nr_sects += seg[n].nsec; + } + +unmap: + if (segments) + kunmap_atomic(segments); + xen_blkbk_unmap(blkif, pending_req->indirect_handles, + pages, persistent, indirect_grefs); + return rc; } static int dispatch_discard_io(struct xen_blkif *blkif, @@ -1013,6 +1049,7 @@ __do_block_io_op(struct xen_blkif *blkif) case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: + case BLKIF_OP_INDIRECT: if (dispatch_rw_block_io(blkif, &req, pending_req)) goto done; break; @@ -1059,17 +1096,28 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, struct pending_req *pending_req) { struct phys_req preq; - struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct seg_buf *seg = pending_req->seg; unsigned int nseg; struct bio *bio = NULL; - struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct bio **biolist = pending_req->biolist; int i, nbio = 0; int operation; struct blk_plug plug; bool drain = false; struct page **pages = pending_req->pages; + unsigned short req_operation; + + req_operation = req->operation == BLKIF_OP_INDIRECT ? + req->u.indirect.indirect_op : req->operation; + if ((req->operation == BLKIF_OP_INDIRECT) && + (req_operation != BLKIF_OP_READ) && + (req_operation != BLKIF_OP_WRITE)) { + pr_debug(DRV_PFX "Invalid indirect operation (%u)\n", + req_operation); + goto fail_response; + } - switch (req->operation) { + switch (req_operation) { case BLKIF_OP_READ: blkif->st_rd_req++; operation = READ; @@ -1091,33 +1139,47 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } /* Check that the number of segments is sane. */ - nseg = req->u.rw.nr_segments; + nseg = req->operation == BLKIF_OP_INDIRECT ? + req->u.indirect.nr_segments : req->u.rw.nr_segments; if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || - unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + unlikely((req->operation != BLKIF_OP_INDIRECT) && + (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) || + unlikely((req->operation == BLKIF_OP_INDIRECT) && + (nseg > MAX_INDIRECT_SEGMENTS))) { pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", nseg); /* Haven't submitted any bio's yet. */ goto fail_response; } - preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; pending_req->blkif = blkif; pending_req->id = req->u.rw.id; - pending_req->operation = req->operation; + pending_req->operation = req_operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; - for (i = 0; i < nseg; i++) { - seg[i].nsec = req->u.rw.seg[i].last_sect - - req->u.rw.seg[i].first_sect + 1; - if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || - (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) + if (req->operation != BLKIF_OP_INDIRECT) { + preq.dev = req->u.rw.handle; + preq.sector_number = req->u.rw.sector_number; + for (i = 0; i < nseg; i++) { + pending_req->grefs[i] = req->u.rw.seg[i].gref; + seg[i].nsec = req->u.rw.seg[i].last_sect - + req->u.rw.seg[i].first_sect + 1; + seg[i].offset = (req->u.rw.seg[i].first_sect << 9); + if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->u.rw.seg[i].last_sect < + req->u.rw.seg[i].first_sect)) + goto fail_response; + preq.nr_sects += seg[i].nsec; + } + } else { + preq.dev = req->u.indirect.handle; + preq.sector_number = req->u.indirect.sector_number; + if (xen_blkbk_parse_indirect(req, pending_req, seg, &preq)) goto fail_response; - preq.nr_sects += seg[i].nsec; - } if (xen_vbd_translate(&preq, blkif, operation) != 0) { @@ -1154,7 +1216,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (xen_blkbk_map_seg(req, pending_req, seg, pages)) + if (xen_blkbk_map_seg(pending_req, seg, pages)) goto fail_flush; /* @@ -1220,7 +1282,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, pending_req->nr_pages); fail_response: /* Haven't submitted any bio's yet. */ - make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR); + make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR); free_req(blkif, pending_req); msleep(1); /* back off a bit */ return -EIO; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index e33fafa0facd..1ac53da8410f 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -50,6 +50,19 @@ __func__, __LINE__, ##args) +/* + * This is the maximum number of segments that would be allowed in indirect + * requests. This value will also be passed to the frontend. + */ +#define MAX_INDIRECT_SEGMENTS 256 + +#define SEGS_PER_INDIRECT_FRAME \ + (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) +#define MAX_INDIRECT_PAGES \ + ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) +#define INDIRECT_PAGES(_segs) \ + ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) + /* Not a real protocol. Used to generate ring structs which contain * the elements common to all protocols only. This way we get a * compiler-checkable way to use common struct elements, so we can @@ -83,12 +96,31 @@ struct blkif_x86_32_request_other { uint64_t id; /* private guest value, echoed in resp */ } __attribute__((__packed__)); +struct blkif_x86_32_request_indirect { + uint8_t indirect_op; + uint16_t nr_segments; + uint64_t id; + blkif_sector_t sector_number; + blkif_vdev_t handle; + uint16_t _pad1; + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; + /* + * The maximum number of indirect segments (and pages) that will + * be used is determined by MAX_INDIRECT_SEGMENTS, this value + * is also exported to the guest (via xenstore + * feature-max-indirect-segments entry), so the frontend knows how + * many indirect segments the backend supports. + */ + uint64_t _pad2; /* make it 64 byte aligned */ +} __attribute__((__packed__)); + struct blkif_x86_32_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_x86_32_request_rw rw; struct blkif_x86_32_request_discard discard; struct blkif_x86_32_request_other other; + struct blkif_x86_32_request_indirect indirect; } u; } __attribute__((__packed__)); @@ -127,12 +159,32 @@ struct blkif_x86_64_request_other { uint64_t id; /* private guest value, echoed in resp */ } __attribute__((__packed__)); +struct blkif_x86_64_request_indirect { + uint8_t indirect_op; + uint16_t nr_segments; + uint32_t _pad1; /* offsetof(blkif_..,u.indirect.id)==8 */ + uint64_t id; + blkif_sector_t sector_number; + blkif_vdev_t handle; + uint16_t _pad2; + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; + /* + * The maximum number of indirect segments (and pages) that will + * be used is determined by MAX_INDIRECT_SEGMENTS, this value + * is also exported to the guest (via xenstore + * feature-max-indirect-segments entry), so the frontend knows how + * many indirect segments the backend supports. + */ + uint32_t _pad3; /* make it 64 byte aligned */ +} __attribute__((__packed__)); + struct blkif_x86_64_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_x86_64_request_rw rw; struct blkif_x86_64_request_discard discard; struct blkif_x86_64_request_other other; + struct blkif_x86_64_request_indirect indirect; } u; } __attribute__((__packed__)); @@ -266,6 +318,11 @@ struct xen_blkif { wait_queue_head_t waiting_to_free; }; +struct seg_buf { + unsigned long offset; + unsigned int nsec; +}; + /* * Each outstanding request that we've passed to the lower device layers has a * 'pending_req' allocated to it. Each buffer_head that completes decrements @@ -280,9 +337,16 @@ struct pending_req { unsigned short operation; int status; struct list_head free_list; - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - grant_handle_t grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages[MAX_INDIRECT_SEGMENTS]; + struct persistent_gnt *persistent_gnts[MAX_INDIRECT_SEGMENTS]; + grant_handle_t grant_handles[MAX_INDIRECT_SEGMENTS]; + grant_ref_t grefs[MAX_INDIRECT_SEGMENTS]; + /* Indirect descriptors */ + struct persistent_gnt *indirect_persistent_gnts[MAX_INDIRECT_PAGES]; + struct page *indirect_pages[MAX_INDIRECT_PAGES]; + grant_handle_t indirect_handles[MAX_INDIRECT_PAGES]; + struct seg_buf seg[MAX_INDIRECT_SEGMENTS]; + struct bio *biolist[MAX_INDIRECT_SEGMENTS]; }; @@ -321,7 +385,7 @@ struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); static inline void blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src) { - int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j; dst->operation = src->operation; switch (src->operation) { case BLKIF_OP_READ: @@ -344,6 +408,18 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; + case BLKIF_OP_INDIRECT: + dst->u.indirect.indirect_op = src->u.indirect.indirect_op; + dst->u.indirect.nr_segments = src->u.indirect.nr_segments; + dst->u.indirect.handle = src->u.indirect.handle; + dst->u.indirect.id = src->u.indirect.id; + dst->u.indirect.sector_number = src->u.indirect.sector_number; + barrier(); + j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments)); + for (i = 0; i < j; i++) + dst->u.indirect.indirect_grefs[i] = + src->u.indirect.indirect_grefs[i]; + break; default: /* * Don't know how to translate this op. Only get the @@ -357,7 +433,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, static inline void blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) { - int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j; dst->operation = src->operation; switch (src->operation) { case BLKIF_OP_READ: @@ -380,6 +456,18 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; + case BLKIF_OP_INDIRECT: + dst->u.indirect.indirect_op = src->u.indirect.indirect_op; + dst->u.indirect.nr_segments = src->u.indirect.nr_segments; + dst->u.indirect.handle = src->u.indirect.handle; + dst->u.indirect.id = src->u.indirect.id; + dst->u.indirect.sector_number = src->u.indirect.sector_number; + barrier(); + j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments)); + for (i = 0; i < j; i++) + dst->u.indirect.indirect_grefs[i] = + src->u.indirect.indirect_grefs[i]; + break; default: /* * Don't know how to translate this op. Only get the diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 1f1ade6d6e09..afab208c54e3 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -107,6 +107,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) struct xen_blkif *blkif; int i; + BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); + blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL); if (!blkif) return ERR_PTR(-ENOMEM); @@ -709,6 +711,11 @@ again: dev->nodename); goto abort; } + err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u", + MAX_INDIRECT_SEGMENTS); + if (err) + dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)", + dev->nodename, err); err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a894f88762d8..82d63d5b1750 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -74,12 +74,27 @@ struct grant { struct blk_shadow { struct blkif_request req; struct request *request; - struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct grant **grants_used; + struct grant **indirect_grants; +}; + +struct split_bio { + struct bio *bio; + atomic_t pending; + int err; }; static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; +/* + * Maximum number of segments in indirect requests, the actual value used by + * the frontend driver is the minimum of this value and the value provided + * by the backend driver. + */ + +static unsigned int xen_blkif_max_segments = 32; + #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) /* @@ -98,7 +113,7 @@ struct blkfront_info enum blkif_state connected; int ring_ref; struct blkif_front_ring ring; - struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct scatterlist *sg; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; @@ -114,6 +129,7 @@ struct blkfront_info unsigned int discard_granularity; unsigned int discard_alignment; unsigned int feature_persistent:1; + unsigned int max_indirect_segments; int is_ready; }; @@ -142,6 +158,13 @@ static DEFINE_SPINLOCK(minor_lock); #define DEV_NAME "xvd" /* name in /dev */ +#define SEGS_PER_INDIRECT_FRAME \ + (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) +#define INDIRECT_GREFS(_segs) \ + ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) + +static int blkfront_setup_indirect(struct blkfront_info *info); + static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; @@ -358,7 +381,8 @@ static int blkif_queue_request(struct request *req) struct blkif_request *ring_req; unsigned long id; unsigned int fsect, lsect; - int i, ref; + int i, ref, n; + struct blkif_request_segment_aligned *segments = NULL; /* * Used to store if we are able to queue the request by just using @@ -369,21 +393,27 @@ static int blkif_queue_request(struct request *req) grant_ref_t gref_head; struct grant *gnt_list_entry = NULL; struct scatterlist *sg; + int nseg, max_grefs; if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; - /* Check if we have enought grants to allocate a requests */ - if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) { + max_grefs = info->max_indirect_segments ? + info->max_indirect_segments + + INDIRECT_GREFS(info->max_indirect_segments) : + BLKIF_MAX_SEGMENTS_PER_REQUEST; + + /* Check if we have enough grants to allocate a requests */ + if (info->persistent_gnts_c < max_grefs) { new_persistent_gnts = 1; if (gnttab_alloc_grant_references( - BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c, + max_grefs - info->persistent_gnts_c, &gref_head) < 0) { gnttab_request_free_callback( &info->callback, blkif_restart_queue_callback, info, - BLKIF_MAX_SEGMENTS_PER_REQUEST); + max_grefs); return 1; } } else @@ -394,42 +424,67 @@ static int blkif_queue_request(struct request *req) id = get_id_from_freelist(info); info->shadow[id].request = req; - ring_req->u.rw.id = id; - ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); - ring_req->u.rw.handle = info->handle; - - ring_req->operation = rq_data_dir(req) ? - BLKIF_OP_WRITE : BLKIF_OP_READ; - - if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { - /* - * Ideally we can do an unordered flush-to-disk. In case the - * backend onlysupports barriers, use that. A barrier request - * a superset of FUA, so we can implement it the same - * way. (It's also a FLUSH+FUA, since it is - * guaranteed ordered WRT previous writes.) - */ - ring_req->operation = info->flush_op; - } - if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { - /* id, sector_number and handle are set above. */ ring_req->operation = BLKIF_OP_DISCARD; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); + ring_req->u.discard.id = id; + ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; else ring_req->u.discard.flag = 0; } else { - ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, - info->sg); - BUG_ON(ring_req->u.rw.nr_segments > - BLKIF_MAX_SEGMENTS_PER_REQUEST); - - for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { + BUG_ON(info->max_indirect_segments == 0 && + req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); + BUG_ON(info->max_indirect_segments && + req->nr_phys_segments > info->max_indirect_segments); + nseg = blk_rq_map_sg(req->q, req, info->sg); + ring_req->u.rw.id = id; + if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) { + /* + * The indirect operation can only be a BLKIF_OP_READ or + * BLKIF_OP_WRITE + */ + BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA)); + ring_req->operation = BLKIF_OP_INDIRECT; + ring_req->u.indirect.indirect_op = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req); + ring_req->u.indirect.handle = info->handle; + ring_req->u.indirect.nr_segments = nseg; + } else { + ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); + ring_req->u.rw.handle = info->handle; + ring_req->operation = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { + /* + * Ideally we can do an unordered flush-to-disk. In case the + * backend onlysupports barriers, use that. A barrier request + * a superset of FUA, so we can implement it the same + * way. (It's also a FLUSH+FUA, since it is + * guaranteed ordered WRT previous writes.) + */ + ring_req->operation = info->flush_op; + } + ring_req->u.rw.nr_segments = nseg; + } + for_each_sg(info->sg, sg, nseg, i) { fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; + if ((ring_req->operation == BLKIF_OP_INDIRECT) && + (i % SEGS_PER_INDIRECT_FRAME == 0)) { + if (segments) + kunmap_atomic(segments); + + n = i / SEGS_PER_INDIRECT_FRAME; + gnt_list_entry = get_grant(&gref_head, info); + info->shadow[id].indirect_grants[n] = gnt_list_entry; + segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); + ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; + } + gnt_list_entry = get_grant(&gref_head, info); ref = gnt_list_entry->gref; @@ -441,8 +496,7 @@ static int blkif_queue_request(struct request *req) BUG_ON(sg->offset + sg->length > PAGE_SIZE); - shared_data = kmap_atomic( - pfn_to_page(gnt_list_entry->pfn)); + shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); bvec_data = kmap_atomic(sg_page(sg)); /* @@ -461,13 +515,23 @@ static int blkif_queue_request(struct request *req) kunmap_atomic(bvec_data); kunmap_atomic(shared_data); } - - ring_req->u.rw.seg[i] = - (struct blkif_request_segment) { - .gref = ref, - .first_sect = fsect, - .last_sect = lsect }; + if (ring_req->operation != BLKIF_OP_INDIRECT) { + ring_req->u.rw.seg[i] = + (struct blkif_request_segment) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; + } else { + n = i % SEGS_PER_INDIRECT_FRAME; + segments[n] = + (struct blkif_request_segment_aligned) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; + } } + if (segments) + kunmap_atomic(segments); } info->ring.req_prod_pvt++; @@ -542,7 +606,8 @@ wait: flush_requests(info); } -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, + unsigned int segments) { struct request_queue *rq; struct blkfront_info *info = gd->private_data; @@ -571,7 +636,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) blk_queue_max_segment_size(rq, PAGE_SIZE); /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + blk_queue_max_segments(rq, segments); /* Make sure buffer addresses are sector-aligned. */ blk_queue_dma_alignment(rq, 511); @@ -588,13 +653,16 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) static void xlvbd_flush(struct blkfront_info *info) { blk_queue_flush(info->rq, info->feature_flush); - printk(KERN_INFO "blkfront: %s: %s: %s %s\n", + printk(KERN_INFO "blkfront: %s: %s: %s %s %s %s %s\n", info->gd->disk_name, info->flush_op == BLKIF_OP_WRITE_BARRIER ? "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? "flush diskcache" : "barrier or flush"), - info->feature_flush ? "enabled" : "disabled", - info->feature_persistent ? "using persistent grants" : ""); + info->feature_flush ? "enabled;" : "disabled;", + "persistent grants:", + info->feature_persistent ? "enabled;" : "disabled;", + "indirect descriptors:", + info->max_indirect_segments ? "enabled;" : "disabled;"); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -734,7 +802,9 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, gd->driverfs_dev = &(info->xbdev->dev); set_capacity(gd, capacity); - if (xlvbd_init_blk_queue(gd, sector_size)) { + if (xlvbd_init_blk_queue(gd, sector_size, + info->max_indirect_segments ? : + BLKIF_MAX_SEGMENTS_PER_REQUEST)) { del_gendisk(gd); goto release; } @@ -818,6 +888,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) { struct grant *persistent_gnt; struct grant *n; + int i, j, segs; /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&info->io_lock); @@ -843,6 +914,47 @@ static void blkif_free(struct blkfront_info *info, int suspend) } BUG_ON(info->persistent_gnts_c != 0); + kfree(info->sg); + info->sg = NULL; + for (i = 0; i < BLK_RING_SIZE; i++) { + /* + * Clear persistent grants present in requests already + * on the shared ring + */ + if (!info->shadow[i].request) + goto free_shadow; + + segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ? + info->shadow[i].req.u.indirect.nr_segments : + info->shadow[i].req.u.rw.nr_segments; + for (j = 0; j < segs; j++) { + persistent_gnt = info->shadow[i].grants_used[j]; + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + __free_page(pfn_to_page(persistent_gnt->pfn)); + kfree(persistent_gnt); + } + + if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT) + /* + * If this is not an indirect operation don't try to + * free indirect segments + */ + goto free_shadow; + + for (j = 0; j < INDIRECT_GREFS(segs); j++) { + persistent_gnt = info->shadow[i].indirect_grants[j]; + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + __free_page(pfn_to_page(persistent_gnt->pfn)); + kfree(persistent_gnt); + } + +free_shadow: + kfree(info->shadow[i].grants_used); + info->shadow[i].grants_used = NULL; + kfree(info->shadow[i].indirect_grants); + info->shadow[i].indirect_grants = NULL; + } + /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); spin_unlock_irq(&info->io_lock); @@ -873,6 +985,10 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, char *bvec_data; void *shared_data; unsigned int offset = 0; + int nseg; + + nseg = s->req.operation == BLKIF_OP_INDIRECT ? + s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; if (bret->operation == BLKIF_OP_READ) { /* @@ -885,7 +1001,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); if (bvec->bv_offset < offset) i++; - BUG_ON(i >= s->req.u.rw.nr_segments); + BUG_ON(i >= nseg); shared_data = kmap_atomic( pfn_to_page(s->grants_used[i]->pfn)); bvec_data = bvec_kmap_irq(bvec, &flags); @@ -897,10 +1013,16 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, } } /* Add the persistent grant into the list of free grants */ - for (i = 0; i < s->req.u.rw.nr_segments; i++) { + for (i = 0; i < nseg; i++) { list_add(&s->grants_used[i]->node, &info->persistent_gnts); info->persistent_gnts_c++; } + if (s->req.operation == BLKIF_OP_INDIRECT) { + for (i = 0; i < INDIRECT_GREFS(nseg); i++) { + list_add(&s->indirect_grants[i]->node, &info->persistent_gnts); + info->persistent_gnts_c++; + } + } } static irqreturn_t blkif_interrupt(int irq, void *dev_id) @@ -1034,14 +1156,6 @@ static int setup_blkring(struct xenbus_device *dev, SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); - sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); - - /* Allocate memory for grants */ - err = fill_grant_buffer(info, BLK_RING_SIZE * - BLKIF_MAX_SEGMENTS_PER_REQUEST); - if (err) - goto fail; - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); if (err < 0) { free_page((unsigned long)sring); @@ -1223,13 +1337,84 @@ static int blkfront_probe(struct xenbus_device *dev, return 0; } +/* + * This is a clone of md_trim_bio, used to split a bio into smaller ones + */ +static void trim_bio(struct bio *bio, int offset, int size) +{ + /* 'bio' is a cloned bio which we need to trim to match + * the given offset and size. + * This requires adjusting bi_sector, bi_size, and bi_io_vec + */ + int i; + struct bio_vec *bvec; + int sofar = 0; + + size <<= 9; + if (offset == 0 && size == bio->bi_size) + return; + + bio->bi_sector += offset; + bio->bi_size = size; + offset <<= 9; + clear_bit(BIO_SEG_VALID, &bio->bi_flags); + + while (bio->bi_idx < bio->bi_vcnt && + bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { + /* remove this whole bio_vec */ + offset -= bio->bi_io_vec[bio->bi_idx].bv_len; + bio->bi_idx++; + } + if (bio->bi_idx < bio->bi_vcnt) { + bio->bi_io_vec[bio->bi_idx].bv_offset += offset; + bio->bi_io_vec[bio->bi_idx].bv_len -= offset; + } + /* avoid any complications with bi_idx being non-zero*/ + if (bio->bi_idx) { + memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, + (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); + bio->bi_vcnt -= bio->bi_idx; + bio->bi_idx = 0; + } + /* Make sure vcnt and last bv are not too big */ + bio_for_each_segment(bvec, bio, i) { + if (sofar + bvec->bv_len > size) + bvec->bv_len = size - sofar; + if (bvec->bv_len == 0) { + bio->bi_vcnt = i; + break; + } + sofar += bvec->bv_len; + } +} + +static void split_bio_end(struct bio *bio, int error) +{ + struct split_bio *split_bio = bio->bi_private; + + if (error) + split_bio->err = error; + + if (atomic_dec_and_test(&split_bio->pending)) { + split_bio->bio->bi_phys_segments = 0; + bio_endio(split_bio->bio, split_bio->err); + kfree(split_bio); + } + bio_put(bio); +} static int blkif_recover(struct blkfront_info *info) { int i; - struct blkif_request *req; + struct request *req, *n; struct blk_shadow *copy; - int j; + int rc; + struct bio *bio, *cloned_bio; + struct bio_list bio_list, merge_bio; + unsigned int segs, offset; + int pending, size; + struct split_bio *split_bio; + struct list_head requests; /* Stage 1: Make a safe copy of the shadow state. */ copy = kmemdup(info->shadow, sizeof(info->shadow), @@ -1244,36 +1429,64 @@ static int blkif_recover(struct blkfront_info *info) info->shadow_free = info->ring.req_prod_pvt; info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; - /* Stage 3: Find pending requests and requeue them. */ + rc = blkfront_setup_indirect(info); + if (rc) { + kfree(copy); + return rc; + } + + segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; + blk_queue_max_segments(info->rq, segs); + bio_list_init(&bio_list); + INIT_LIST_HEAD(&requests); for (i = 0; i < BLK_RING_SIZE; i++) { /* Not in use? */ if (!copy[i].request) continue; - /* Grab a request slot and copy shadow state into it. */ - req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); - *req = copy[i].req; - - /* We get a new request id, and must reset the shadow state. */ - req->u.rw.id = get_id_from_freelist(info); - memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i])); - - if (req->operation != BLKIF_OP_DISCARD) { - /* Rewrite any grant references invalidated by susp/resume. */ - for (j = 0; j < req->u.rw.nr_segments; j++) - gnttab_grant_foreign_access_ref( - req->u.rw.seg[j].gref, - info->xbdev->otherend_id, - pfn_to_mfn(copy[i].grants_used[j]->pfn), - 0); + /* + * Get the bios in the request so we can re-queue them. + */ + if (copy[i].request->cmd_flags & + (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { + /* + * Flush operations don't contain bios, so + * we need to requeue the whole request + */ + list_add(©[i].request->queuelist, &requests); + continue; } - info->shadow[req->u.rw.id].req = *req; - - info->ring.req_prod_pvt++; + merge_bio.head = copy[i].request->bio; + merge_bio.tail = copy[i].request->biotail; + bio_list_merge(&bio_list, &merge_bio); + copy[i].request->bio = NULL; + blk_put_request(copy[i].request); } kfree(copy); + /* + * Empty the queue, this is important because we might have + * requests in the queue with more segments than what we + * can handle now. + */ + spin_lock_irq(&info->io_lock); + while ((req = blk_fetch_request(info->rq)) != NULL) { + if (req->cmd_flags & + (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { + list_add(&req->queuelist, &requests); + continue; + } + merge_bio.head = req->bio; + merge_bio.tail = req->biotail; + bio_list_merge(&bio_list, &merge_bio); + req->bio = NULL; + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) + pr_alert("diskcache flush request found!\n"); + __blk_put_request(info->rq, req); + } + spin_unlock_irq(&info->io_lock); + xenbus_switch_state(info->xbdev, XenbusStateConnected); spin_lock_irq(&info->io_lock); @@ -1281,14 +1494,50 @@ static int blkif_recover(struct blkfront_info *info) /* Now safe for us to use the shared ring */ info->connected = BLKIF_STATE_CONNECTED; - /* Send off requeued requests */ - flush_requests(info); - /* Kick any other new requests queued since we resumed */ kick_pending_request_queues(info); + list_for_each_entry_safe(req, n, &requests, queuelist) { + /* Requeue pending requests (flush or discard) */ + list_del_init(&req->queuelist); + BUG_ON(req->nr_phys_segments > segs); + blk_requeue_request(info->rq, req); + } spin_unlock_irq(&info->io_lock); + while ((bio = bio_list_pop(&bio_list)) != NULL) { + /* Traverse the list of pending bios and re-queue them */ + if (bio_segments(bio) > segs) { + /* + * This bio has more segments than what we can + * handle, we have to split it. + */ + pending = (bio_segments(bio) + segs - 1) / segs; + split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO); + BUG_ON(split_bio == NULL); + atomic_set(&split_bio->pending, pending); + split_bio->bio = bio; + for (i = 0; i < pending; i++) { + offset = (i * segs * PAGE_SIZE) >> 9; + size = min((unsigned int)(segs * PAGE_SIZE) >> 9, + (unsigned int)(bio->bi_size >> 9) - offset); + cloned_bio = bio_clone(bio, GFP_NOIO); + BUG_ON(cloned_bio == NULL); + trim_bio(cloned_bio, offset, size); + cloned_bio->bi_private = split_bio; + cloned_bio->bi_end_io = split_bio_end; + submit_bio(cloned_bio->bi_rw, cloned_bio); + } + /* + * Now we have to wait for all those smaller bios to + * end, so we can also end the "parent" bio. + */ + continue; + } + /* We don't need to split this bio */ + submit_bio(bio->bi_rw, bio); + } + return 0; } @@ -1308,8 +1557,12 @@ static int blkfront_resume(struct xenbus_device *dev) blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); err = talk_to_blkback(dev, info); - if (info->connected == BLKIF_STATE_SUSPENDED && !err) - err = blkif_recover(info); + + /* + * We have to wait for the backend to switch to + * connected state, since we want to read which + * features it supports. + */ return err; } @@ -1387,6 +1640,61 @@ static void blkfront_setup_discard(struct blkfront_info *info) kfree(type); } +static int blkfront_setup_indirect(struct blkfront_info *info) +{ + unsigned int indirect_segments, segs; + int err, i; + + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-max-indirect-segments", "%u", &indirect_segments, + NULL); + if (err) { + info->max_indirect_segments = 0; + segs = BLKIF_MAX_SEGMENTS_PER_REQUEST; + } else { + info->max_indirect_segments = min(indirect_segments, + xen_blkif_max_segments); + segs = info->max_indirect_segments; + } + info->sg = kzalloc(sizeof(info->sg[0]) * segs, GFP_KERNEL); + if (info->sg == NULL) + goto out_of_memory; + sg_init_table(info->sg, segs); + + err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); + if (err) + goto out_of_memory; + + for (i = 0; i < BLK_RING_SIZE; i++) { + info->shadow[i].grants_used = kzalloc( + sizeof(info->shadow[i].grants_used[0]) * segs, + GFP_NOIO); + if (info->max_indirect_segments) + info->shadow[i].indirect_grants = kzalloc( + sizeof(info->shadow[i].indirect_grants[0]) * + INDIRECT_GREFS(segs), + GFP_NOIO); + if ((info->shadow[i].grants_used == NULL) || + (info->max_indirect_segments && + (info->shadow[i].indirect_grants == NULL))) + goto out_of_memory; + } + + + return 0; + +out_of_memory: + kfree(info->sg); + info->sg = NULL; + for (i = 0; i < BLK_RING_SIZE; i++) { + kfree(info->shadow[i].grants_used); + info->shadow[i].grants_used = NULL; + kfree(info->shadow[i].indirect_grants); + info->shadow[i].indirect_grants = NULL; + } + return -ENOMEM; +} + /* * Invoked when the backend is finally 'ready' (and has told produced * the details about the physical device - #sectors, size, etc). @@ -1414,8 +1722,15 @@ static void blkfront_connect(struct blkfront_info *info) set_capacity(info->gd, sectors); revalidate_disk(info->gd); - /* fall through */ + return; case BLKIF_STATE_SUSPENDED: + /* + * If we are recovering from suspension, we need to wait + * for the backend to announce it's features before + * reconnecting, at least we need to know if the backend + * supports indirect descriptors, and how many. + */ + blkif_recover(info); return; default: @@ -1483,6 +1798,13 @@ static void blkfront_connect(struct blkfront_info *info) else info->feature_persistent = persistent; + err = blkfront_setup_indirect(info); + if (err) { + xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", + info->xbdev->otherend); + return; + } + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index ffd4652de91c..65e12099ef89 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -102,6 +102,30 @@ typedef uint64_t blkif_sector_t; */ #define BLKIF_OP_DISCARD 5 +/* + * Recognized if "feature-max-indirect-segments" in present in the backend + * xenbus info. The "feature-max-indirect-segments" node contains the maximum + * number of segments allowed by the backend per request. If the node is + * present, the frontend might use blkif_request_indirect structs in order to + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The + * maximum number of indirect segments is fixed by the backend, but the + * frontend can issue requests with any number of indirect segments as long as + * it's less than the number provided by the backend. The indirect_grefs field + * in blkif_request_indirect should be filled by the frontend with the + * grant references of the pages that are holding the indirect segments. + * This pages are filled with an array of blkif_request_segment_aligned + * that hold the information about the segments. The number of indirect + * pages to use is determined by the maximum number of segments + * a indirect request contains. Every indirect page can contain a maximum + * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)), + * so to calculate the number of indirect pages to use we have to do + * ceil(indirect_segments/512). + * + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* + * create the "feature-max-indirect-segments" node! + */ +#define BLKIF_OP_INDIRECT 6 + /* * Maximum scatter/gather segments per request. * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. @@ -109,6 +133,16 @@ typedef uint64_t blkif_sector_t; */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 + +struct blkif_request_segment_aligned { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; + uint16_t _pad; /* padding to make it 8 bytes, so it's cache-aligned */ +} __attribute__((__packed__)); + struct blkif_request_rw { uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ @@ -147,12 +181,31 @@ struct blkif_request_other { uint64_t id; /* private guest value, echoed in resp */ } __attribute__((__packed__)); +struct blkif_request_indirect { + uint8_t indirect_op; + uint16_t nr_segments; +#ifdef CONFIG_X86_64 + uint32_t _pad1; /* offsetof(blkif_...,u.indirect.id) == 8 */ +#endif + uint64_t id; + blkif_sector_t sector_number; + blkif_vdev_t handle; + uint16_t _pad2; + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; +#ifdef CONFIG_X86_64 + uint32_t _pad3; /* make it 64 byte aligned */ +#else + uint64_t _pad3; /* make it 64 byte aligned */ +#endif +} __attribute__((__packed__)); + struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_request_rw rw; struct blkif_request_discard discard; struct blkif_request_other other; + struct blkif_request_indirect indirect; } u; } __attribute__((__packed__)); -- cgit v1.2.3-58-ga151 From bb642e8315fd573795e8b6fa9b9629064d73add1 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Thu, 2 May 2013 10:21:17 +0200 Subject: xen-blkback: allocate list of pending reqs in small chunks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allocate pending requests in smaller chunks instead of allocating them all at the same time. This change also removes the global array of pending_reqs, it is no longer necessay. Variables related to the grant mapping have been grouped into a struct called "grant_page", this allows to allocate them in smaller chunks, and also improves memory locality. Signed-off-by: Roger Pau Monné Reported-by: Sander Eikelenboom Tested-by: Sander Eikelenboom Reviewed-by: David Vrabel Cc: David Vrabel Cc: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 92 ++++++++++++++++--------------------- drivers/block/xen-blkback/common.h | 18 ++++---- drivers/block/xen-blkback/xenbus.c | 74 ++++++++++++++++++++++------- 3 files changed, 106 insertions(+), 78 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 1ebc0aa0f0e4..e79ab4559233 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -641,9 +641,7 @@ purge_gnt_list: * used in the 'pending_req'. */ static void xen_blkbk_unmap(struct xen_blkif *blkif, - grant_handle_t handles[], - struct page *pages[], - struct persistent_gnt *persistent_gnts[], + struct grant_page *pages[], int num) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -652,16 +650,16 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif, int ret; for (i = 0; i < num; i++) { - if (persistent_gnts[i] != NULL) { - put_persistent_gnt(blkif, persistent_gnts[i]); + if (pages[i]->persistent_gnt != NULL) { + put_persistent_gnt(blkif, pages[i]->persistent_gnt); continue; } - if (handles[i] == BLKBACK_INVALID_HANDLE) + if (pages[i]->handle == BLKBACK_INVALID_HANDLE) continue; - unmap_pages[invcount] = pages[i]; - gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]), - GNTMAP_host_map, handles[i]); - handles[i] = BLKBACK_INVALID_HANDLE; + unmap_pages[invcount] = pages[i]->page; + gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page), + GNTMAP_host_map, pages[i]->handle); + pages[i]->handle = BLKBACK_INVALID_HANDLE; if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) { ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); @@ -677,10 +675,8 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif, } } -static int xen_blkbk_map(struct xen_blkif *blkif, grant_ref_t grefs[], - struct persistent_gnt *persistent_gnts[], - grant_handle_t handles[], - struct page *pages[], +static int xen_blkbk_map(struct xen_blkif *blkif, + struct grant_page *pages[], int num, bool ro) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -707,26 +703,26 @@ again: if (use_persistent_gnts) persistent_gnt = get_persistent_gnt( blkif, - grefs[i]); + pages[i]->gref); if (persistent_gnt) { /* * We are using persistent grants and * the grant is already mapped */ - pages[i] = persistent_gnt->page; - persistent_gnts[i] = persistent_gnt; + pages[i]->page = persistent_gnt->page; + pages[i]->persistent_gnt = persistent_gnt; } else { - if (get_free_page(blkif, &pages[i])) + if (get_free_page(blkif, &pages[i]->page)) goto out_of_memory; - addr = vaddr(pages[i]); - pages_to_gnt[segs_to_map] = pages[i]; - persistent_gnts[i] = NULL; + addr = vaddr(pages[i]->page); + pages_to_gnt[segs_to_map] = pages[i]->page; + pages[i]->persistent_gnt = NULL; flags = GNTMAP_host_map; if (!use_persistent_gnts && ro) flags |= GNTMAP_readonly; gnttab_set_map_op(&map[segs_to_map++], addr, - flags, grefs[i], + flags, pages[i]->gref, blkif->domid); } map_until = i + 1; @@ -745,16 +741,16 @@ again: * the page from the other domain. */ for (seg_idx = last_map, new_map_idx = 0; seg_idx < map_until; seg_idx++) { - if (!persistent_gnts[seg_idx]) { + if (!pages[seg_idx]->persistent_gnt) { /* This is a newly mapped grant */ BUG_ON(new_map_idx >= segs_to_map); if (unlikely(map[new_map_idx].status != 0)) { pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); - handles[seg_idx] = BLKBACK_INVALID_HANDLE; + pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE; ret |= 1; goto next; } - handles[seg_idx] = map[new_map_idx].handle; + pages[seg_idx]->handle = map[new_map_idx].handle; } else { continue; } @@ -776,14 +772,14 @@ again: } persistent_gnt->gnt = map[new_map_idx].ref; persistent_gnt->handle = map[new_map_idx].handle; - persistent_gnt->page = pages[seg_idx]; + persistent_gnt->page = pages[seg_idx]->page; if (add_persistent_gnt(blkif, persistent_gnt)) { kfree(persistent_gnt); persistent_gnt = NULL; goto next; } - persistent_gnts[seg_idx] = persistent_gnt; + pages[seg_idx]->persistent_gnt = persistent_gnt; pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", persistent_gnt->gnt, blkif->persistent_gnt_c, xen_blkif_max_pgrants); @@ -814,15 +810,11 @@ out_of_memory: return -ENOMEM; } -static int xen_blkbk_map_seg(struct pending_req *pending_req, - struct seg_buf seg[], - struct page *pages[]) +static int xen_blkbk_map_seg(struct pending_req *pending_req) { int rc; - rc = xen_blkbk_map(pending_req->blkif, pending_req->grefs, - pending_req->persistent_gnts, - pending_req->grant_handles, pending_req->pages, + rc = xen_blkbk_map(pending_req->blkif, pending_req->segments, pending_req->nr_pages, (pending_req->operation != BLKIF_OP_READ)); @@ -834,9 +826,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, struct seg_buf seg[], struct phys_req *preq) { - struct persistent_gnt **persistent = - pending_req->indirect_persistent_gnts; - struct page **pages = pending_req->indirect_pages; + struct grant_page **pages = pending_req->indirect_pages; struct xen_blkif *blkif = pending_req->blkif; int indirect_grefs, rc, n, nseg, i; struct blkif_request_segment_aligned *segments = NULL; @@ -845,9 +835,10 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, indirect_grefs = INDIRECT_PAGES(nseg); BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); - rc = xen_blkbk_map(blkif, req->u.indirect.indirect_grefs, - persistent, pending_req->indirect_handles, - pages, indirect_grefs, true); + for (i = 0; i < indirect_grefs; i++) + pages[i]->gref = req->u.indirect.indirect_grefs[i]; + + rc = xen_blkbk_map(blkif, pages, indirect_grefs, true); if (rc) goto unmap; @@ -856,10 +847,10 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, /* Map indirect segments */ if (segments) kunmap_atomic(segments); - segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]); + segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page); } i = n % SEGS_PER_INDIRECT_FRAME; - pending_req->grefs[n] = segments[i].gref; + pending_req->segments[n]->gref = segments[i].gref; seg[n].nsec = segments[i].last_sect - segments[i].first_sect + 1; seg[n].offset = (segments[i].first_sect << 9); @@ -874,8 +865,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, unmap: if (segments) kunmap_atomic(segments); - xen_blkbk_unmap(blkif, pending_req->indirect_handles, - pages, persistent, indirect_grefs); + xen_blkbk_unmap(blkif, pages, indirect_grefs); return rc; } @@ -965,9 +955,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { - xen_blkbk_unmap(pending_req->blkif, pending_req->grant_handles, - pending_req->pages, - pending_req->persistent_gnts, + xen_blkbk_unmap(pending_req->blkif, + pending_req->segments, pending_req->nr_pages); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); @@ -1104,7 +1093,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, int operation; struct blk_plug plug; bool drain = false; - struct page **pages = pending_req->pages; + struct grant_page **pages = pending_req->segments; unsigned short req_operation; req_operation = req->operation == BLKIF_OP_INDIRECT ? @@ -1165,7 +1154,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, preq.dev = req->u.rw.handle; preq.sector_number = req->u.rw.sector_number; for (i = 0; i < nseg; i++) { - pending_req->grefs[i] = req->u.rw.seg[i].gref; + pages[i]->gref = req->u.rw.seg[i].gref; seg[i].nsec = req->u.rw.seg[i].last_sect - req->u.rw.seg[i].first_sect + 1; seg[i].offset = (req->u.rw.seg[i].first_sect << 9); @@ -1216,7 +1205,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (xen_blkbk_map_seg(pending_req, seg, pages)) + if (xen_blkbk_map_seg(pending_req)) goto fail_flush; /* @@ -1228,7 +1217,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, for (i = 0; i < nseg; i++) { while ((bio == NULL) || (bio_add_page(bio, - pages[i], + pages[i]->page, seg[i].nsec << 9, seg[i].offset) == 0)) { @@ -1277,8 +1266,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, return 0; fail_flush: - xen_blkbk_unmap(blkif, pending_req->grant_handles, - pending_req->pages, pending_req->persistent_gnts, + xen_blkbk_unmap(blkif, pending_req->segments, pending_req->nr_pages); fail_response: /* Haven't submitted any bio's yet. */ diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 1ac53da8410f..c6b4cb9af6c2 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -297,8 +297,6 @@ struct xen_blkif { int free_pages_num; struct list_head free_pages; - /* Allocation of pending_reqs */ - struct pending_req *pending_reqs; /* List of all 'pending_req' available */ struct list_head pending_free; /* And its spinlock. */ @@ -323,6 +321,13 @@ struct seg_buf { unsigned int nsec; }; +struct grant_page { + struct page *page; + struct persistent_gnt *persistent_gnt; + grant_handle_t handle; + grant_ref_t gref; +}; + /* * Each outstanding request that we've passed to the lower device layers has a * 'pending_req' allocated to it. Each buffer_head that completes decrements @@ -337,14 +342,9 @@ struct pending_req { unsigned short operation; int status; struct list_head free_list; - struct page *pages[MAX_INDIRECT_SEGMENTS]; - struct persistent_gnt *persistent_gnts[MAX_INDIRECT_SEGMENTS]; - grant_handle_t grant_handles[MAX_INDIRECT_SEGMENTS]; - grant_ref_t grefs[MAX_INDIRECT_SEGMENTS]; + struct grant_page *segments[MAX_INDIRECT_SEGMENTS]; /* Indirect descriptors */ - struct persistent_gnt *indirect_persistent_gnts[MAX_INDIRECT_PAGES]; - struct page *indirect_pages[MAX_INDIRECT_PAGES]; - grant_handle_t indirect_handles[MAX_INDIRECT_PAGES]; + struct grant_page *indirect_pages[MAX_INDIRECT_PAGES]; struct seg_buf seg[MAX_INDIRECT_SEGMENTS]; struct bio *biolist[MAX_INDIRECT_SEGMENTS]; }; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index afab208c54e3..4a4749c78942 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -105,7 +105,8 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) static struct xen_blkif *xen_blkif_alloc(domid_t domid) { struct xen_blkif *blkif; - int i; + struct pending_req *req, *n; + int i, j; BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); @@ -127,22 +128,51 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) blkif->free_pages_num = 0; atomic_set(&blkif->persistent_gnt_in_use, 0); - blkif->pending_reqs = kcalloc(XEN_BLKIF_REQS, - sizeof(blkif->pending_reqs[0]), - GFP_KERNEL); - if (!blkif->pending_reqs) { - kmem_cache_free(xen_blkif_cachep, blkif); - return ERR_PTR(-ENOMEM); - } INIT_LIST_HEAD(&blkif->pending_free); + + for (i = 0; i < XEN_BLKIF_REQS; i++) { + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + goto fail; + list_add_tail(&req->free_list, + &blkif->pending_free); + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { + req->segments[j] = kzalloc(sizeof(*req->segments[0]), + GFP_KERNEL); + if (!req->segments[j]) + goto fail; + } + for (j = 0; j < MAX_INDIRECT_PAGES; j++) { + req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]), + GFP_KERNEL); + if (!req->indirect_pages[j]) + goto fail; + } + } spin_lock_init(&blkif->pending_free_lock); init_waitqueue_head(&blkif->pending_free_wq); - for (i = 0; i < XEN_BLKIF_REQS; i++) - list_add_tail(&blkif->pending_reqs[i].free_list, - &blkif->pending_free); - return blkif; + +fail: + list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { + list_del(&req->free_list); + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { + if (!req->segments[j]) + break; + kfree(req->segments[j]); + } + for (j = 0; j < MAX_INDIRECT_PAGES; j++) { + if (!req->indirect_pages[j]) + break; + kfree(req->indirect_pages[j]); + } + kfree(req); + } + + kmem_cache_free(xen_blkif_cachep, blkif); + + return ERR_PTR(-ENOMEM); } static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, @@ -221,18 +251,28 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) static void xen_blkif_free(struct xen_blkif *blkif) { - struct pending_req *req; - int i = 0; + struct pending_req *req, *n; + int i = 0, j; if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); /* Check that there is no request in use */ - list_for_each_entry(req, &blkif->pending_free, free_list) + list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { + list_del(&req->free_list); + + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) + kfree(req->segments[j]); + + for (j = 0; j < MAX_INDIRECT_PAGES; j++) + kfree(req->indirect_pages[j]); + + kfree(req); i++; - BUG_ON(i != XEN_BLKIF_REQS); + } + + WARN_ON(i != XEN_BLKIF_REQS); - kfree(blkif->pending_reqs); kmem_cache_free(xen_blkif_cachep, blkif); } -- cgit v1.2.3-58-ga151 From b7649158a0d241f8d53d13ff7441858539e16656 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Thu, 2 May 2013 10:58:50 +0200 Subject: xen-blkfront: use a different scatterlist for each request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In blkif_queue_request blkfront iterates over the scatterlist in order to set the segments of the request, and in blkif_completion blkfront iterates over the raw request, which makes it hard to know the exact position of the source and destination memory positions. This can be solved by allocating a scatterlist for each request, that will be keep until the request is finished, allowing us to copy the data back to the original memory without having to iterate over the raw request. Oracle-Bug: 16660413 - LARGE ASYNCHRONOUS READS APPEAR BROKEN ON 2.6.39-400 CC: stable@vger.kernel.org Signed-off-by: Roger Pau Monné Reported-and-Tested-by: Anne Milicia Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 82d63d5b1750..bac8cf31319b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -76,6 +76,7 @@ struct blk_shadow { struct request *request; struct grant **grants_used; struct grant **indirect_grants; + struct scatterlist *sg; }; struct split_bio { @@ -113,7 +114,6 @@ struct blkfront_info enum blkif_state connected; int ring_ref; struct blkif_front_ring ring; - struct scatterlist *sg; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; @@ -438,7 +438,7 @@ static int blkif_queue_request(struct request *req) req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); BUG_ON(info->max_indirect_segments && req->nr_phys_segments > info->max_indirect_segments); - nseg = blk_rq_map_sg(req->q, req, info->sg); + nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg); ring_req->u.rw.id = id; if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) { /* @@ -469,7 +469,7 @@ static int blkif_queue_request(struct request *req) } ring_req->u.rw.nr_segments = nseg; } - for_each_sg(info->sg, sg, nseg, i) { + for_each_sg(info->shadow[id].sg, sg, nseg, i) { fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; @@ -914,8 +914,6 @@ static void blkif_free(struct blkfront_info *info, int suspend) } BUG_ON(info->persistent_gnts_c != 0); - kfree(info->sg); - info->sg = NULL; for (i = 0; i < BLK_RING_SIZE; i++) { /* * Clear persistent grants present in requests already @@ -953,6 +951,8 @@ free_shadow: info->shadow[i].grants_used = NULL; kfree(info->shadow[i].indirect_grants); info->shadow[i].indirect_grants = NULL; + kfree(info->shadow[i].sg); + info->shadow[i].sg = NULL; } /* No more gnttab callback work. */ @@ -979,12 +979,9 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, struct blkif_response *bret) { int i = 0; - struct bio_vec *bvec; - struct req_iterator iter; - unsigned long flags; + struct scatterlist *sg; char *bvec_data; void *shared_data; - unsigned int offset = 0; int nseg; nseg = s->req.operation == BLKIF_OP_INDIRECT ? @@ -997,19 +994,16 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, * than PAGE_SIZE, we have to keep track of the current offset, * to be sure we are copying the data from the right shared page. */ - rq_for_each_segment(bvec, s->request, iter) { - BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); - if (bvec->bv_offset < offset) - i++; - BUG_ON(i >= nseg); + for_each_sg(s->sg, sg, nseg, i) { + BUG_ON(sg->offset + sg->length > PAGE_SIZE); shared_data = kmap_atomic( pfn_to_page(s->grants_used[i]->pfn)); - bvec_data = bvec_kmap_irq(bvec, &flags); - memcpy(bvec_data, shared_data + bvec->bv_offset, - bvec->bv_len); - bvec_kunmap_irq(bvec_data, &flags); + bvec_data = kmap_atomic(sg_page(sg)); + memcpy(bvec_data + sg->offset, + shared_data + sg->offset, + sg->length); + kunmap_atomic(bvec_data); kunmap_atomic(shared_data); - offset = bvec->bv_offset + bvec->bv_len; } } /* Add the persistent grant into the list of free grants */ @@ -1656,10 +1650,6 @@ static int blkfront_setup_indirect(struct blkfront_info *info) xen_blkif_max_segments); segs = info->max_indirect_segments; } - info->sg = kzalloc(sizeof(info->sg[0]) * segs, GFP_KERNEL); - if (info->sg == NULL) - goto out_of_memory; - sg_init_table(info->sg, segs); err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); if (err) @@ -1669,26 +1659,29 @@ static int blkfront_setup_indirect(struct blkfront_info *info) info->shadow[i].grants_used = kzalloc( sizeof(info->shadow[i].grants_used[0]) * segs, GFP_NOIO); + info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO); if (info->max_indirect_segments) info->shadow[i].indirect_grants = kzalloc( sizeof(info->shadow[i].indirect_grants[0]) * INDIRECT_GREFS(segs), GFP_NOIO); if ((info->shadow[i].grants_used == NULL) || + (info->shadow[i].sg == NULL) || (info->max_indirect_segments && (info->shadow[i].indirect_grants == NULL))) goto out_of_memory; + sg_init_table(info->shadow[i].sg, segs); } return 0; out_of_memory: - kfree(info->sg); - info->sg = NULL; for (i = 0; i < BLK_RING_SIZE; i++) { kfree(info->shadow[i].grants_used); info->shadow[i].grants_used = NULL; + kfree(info->shadow[i].sg); + info->shadow[i].sg = NULL; kfree(info->shadow[i].indirect_grants); info->shadow[i].indirect_grants = NULL; } -- cgit v1.2.3-58-ga151 From 2d5dc3ba853344f39a41ae5bdb0a337b2ecaafa6 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 15 May 2013 10:39:34 -0400 Subject: xen-blkfront: Introduce a 'max' module parameter to alter the amount of indirect segments. The max module parameter (by default 32) is the maximum number of segments that the frontend will negotiate with the backend for indirect descriptors. Higher value means more potential throughput but more memory usage. The backend picks the minimum of the frontend and its default backend value. Signed-off-by: Konrad Rzeszutek Wilk --- Documentation/ABI/testing/sysfs-driver-xen-blkfront | 10 ++++++++++ drivers/block/xen-blkfront.c | 2 ++ 2 files changed, 12 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-driver-xen-blkfront (limited to 'drivers') diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront new file mode 100644 index 000000000000..c0a6cb7eb314 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront @@ -0,0 +1,10 @@ +What: /sys/module/xen_blkfront/parameters/max +Date: June 2013 +KernelVersion: 3.11 +Contact: Konrad Rzeszutek Wilk +Description: + Maximum number of segments that the frontend will negotiate + with the backend for indirect descriptors. The default value + is 32 - higher value means more potential throughput but more + memory usage. The backend picks the minimum of the frontend + and its default backend value. diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index bac8cf31319b..08bdfc3b2de2 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -95,6 +95,8 @@ static const struct block_device_operations xlvbd_block_fops; */ static unsigned int xen_blkif_max_segments = 32; +module_param_named(max, xen_blkif_max_segments, int, S_IRUGO); +MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)"); #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) -- cgit v1.2.3-58-ga151 From 7c4d7d710f7eb499ec483f25acc28b53adaa3260 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Mon, 13 May 2013 16:28:15 +0200 Subject: xen/blkback: Use physical sector size for setup Currently xen-blkback passes the logical sector size over xenbus and xen-blkfront sets up the paravirt disk with that logical block size. But newer drives usually have the logical sector size set to 512 for compatibility reasons and would show the actual sector size only in physical sector size. This results in the device being partitioned and accessed in dom0 with the correct sector size, but the guest thinks 512 bytes is the correct block size. And that results in poor performance. To fix this, blkback gets modified to pass also physical-sector-size over xenbus and blkfront to use both values to set up the paravirt disk. I did not just change the passed in sector-size because I am not sure having a bigger logical sector size than the physical one is valid (and that would happen if a newer dom0 kernel hits an older domU kernel). Also this way a domU set up before should still be accessible (just some tools might detect the unaligned setup). [v2: Make xenbus write failure non-fatal] [v3: Use xenbus_scanf instead of xenbus_gather] [v4: Rebased against segment changes] Signed-off-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 5 +++++ drivers/block/xen-blkfront.c | 21 ++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 4a4749c78942..7b06f943371c 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -782,6 +782,11 @@ again: dev->nodename); goto abort; } + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", + bdev_physical_block_size(be->blkif->vbd.bdev)); + if (err) + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", + dev->nodename); err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 08bdfc3b2de2..1a0f67c10ec7 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -609,6 +609,7 @@ wait: } static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, + unsigned int physical_sector_size, unsigned int segments) { struct request_queue *rq; @@ -631,6 +632,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); + blk_queue_physical_block_size(rq, physical_sector_size); blk_queue_max_hw_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ @@ -737,7 +739,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) static int xlvbd_alloc_gendisk(blkif_sector_t capacity, struct blkfront_info *info, - u16 vdisk_info, u16 sector_size) + u16 vdisk_info, u16 sector_size, + unsigned int physical_sector_size) { struct gendisk *gd; int nr_minors = 1; @@ -804,7 +807,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, gd->driverfs_dev = &(info->xbdev->dev); set_capacity(gd, capacity); - if (xlvbd_init_blk_queue(gd, sector_size, + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size, info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST)) { del_gendisk(gd); @@ -1698,6 +1701,7 @@ static void blkfront_connect(struct blkfront_info *info) { unsigned long long sectors; unsigned long sector_size; + unsigned int physical_sector_size; unsigned int binfo; int err; int barrier, flush, discard, persistent; @@ -1747,6 +1751,16 @@ static void blkfront_connect(struct blkfront_info *info) return; } + /* + * physcial-sector-size is a newer field, so old backends may not + * provide this. Assume physical sector size to be the same as + * sector_size in that case. + */ + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "physical-sector-size", "%u", &physical_sector_size); + if (err != 1) + physical_sector_size = sector_size; + info->feature_flush = 0; info->flush_op = 0; @@ -1800,7 +1814,8 @@ static void blkfront_connect(struct blkfront_info *info) return; } - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, + physical_sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); -- cgit v1.2.3-58-ga151 From 604c499cbbcc3d5fe5fb8d53306aa0fae1990109 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 16 Jan 2013 11:33:52 -0500 Subject: xen/blkback: Check device permissions before allowing OP_DISCARD We need to make sure that the device is not RO or that the request is not past the number of sectors we want to issue the DISCARD operation for. This fixes CVE-2013-2140. Cc: stable@vger.kernel.org Acked-by: Jan Beulich Acked-by: Ian Campbell [v1: Made it pr_warn instead of pr_debug] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index e79ab4559233..4119bcdefd1a 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -876,7 +876,18 @@ static int dispatch_discard_io(struct xen_blkif *blkif, int status = BLKIF_RSP_OKAY; struct block_device *bdev = blkif->vbd.bdev; unsigned long secure; + struct phys_req preq; + + preq.sector_number = req->u.discard.sector_number; + preq.nr_sects = req->u.discard.nr_sectors; + err = xen_vbd_translate(&preq, blkif, WRITE); + if (err) { + pr_warn(DRV_PFX "access denied: DISCARD [%llu->%llu] on dev=%04x\n", + preq.sector_number, + preq.sector_number + preq.nr_sects, blkif->vbd.pdevice); + goto fail_response; + } blkif->st_ds_req++; xen_blkif_get(blkif); @@ -887,7 +898,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif, err = blkdev_issue_discard(bdev, req->u.discard.sector_number, req->u.discard.nr_sectors, GFP_KERNEL, secure); - +fail_response: if (err == -EOPNOTSUPP) { pr_debug(DRV_PFX "discard op failed, not supported\n"); status = BLKIF_RSP_EOPNOTSUPP; -- cgit v1.2.3-58-ga151 From 8e3f8755545cc4a7f4da8e9ef76d6d32e0dca576 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 23 Jan 2013 16:54:32 -0500 Subject: xen/blkback: Check for insane amounts of request on the ring (v6). Check that the ring does not have an insane amount of requests (more than there could fit on the ring). If we detect this case we will stop processing the requests and wait until the XenBus disconnects the ring. The existing check RING_REQUEST_CONS_OVERFLOW which checks for how many responses we have created in the past (rsp_prod_pvt) vs requests consumed (req_cons) and whether said difference is greater or equal to the size of the ring, does not catch this case. Wha the condition does check if there is a need to process more as we still have a backlog of responses to finish. Note that both of those values (rsp_prod_pvt and req_cons) are not exposed on the shared ring. To understand this problem a mini crash course in ring protocol response/request updates is in place. There are four entries: req_prod and rsp_prod; req_event and rsp_event to track the ring entries. We are only concerned about the first two - which set the tone of this bug. The req_prod is a value incremented by frontend for each request put on the ring. Conversely the rsp_prod is a value incremented by the backend for each response put on the ring (rsp_prod gets set by rsp_prod_pvt when pushing the responses on the ring). Both values can wrap and are modulo the size of the ring (in block case that is 32). Please see RING_GET_REQUEST and RING_GET_RESPONSE for the more details. The culprit here is that if the difference between the req_prod and req_cons is greater than the ring size we have a problem. Fortunately for us, the '__do_block_io_op' loop: rc = blk_rings->common.req_cons; rp = blk_rings->common.sring->req_prod; while (rc != rp) { .. blk_rings->common.req_cons = ++rc; /* before make_response() */ } will loop up to the point when rc == rp. The macros inside of the loop (RING_GET_REQUEST) is smart and is indexing based on the modulo of the ring size. If the frontend has provided a bogus req_prod value we will loop until the 'rc == rp' - which means we could be processing already processed requests (or responses) often. The reason the RING_REQUEST_CONS_OVERFLOW is not helping here is b/c it only tracks how many responses we have internally produced and whether we would should process more. The astute reader will notice that the macro RING_REQUEST_CONS_OVERFLOW provides two arguments - more on this later. For example, if we were to enter this function with these values: blk_rings->common.sring->req_prod = X+31415 (X is the value from the last time __do_block_io_op was called). blk_rings->common.req_cons = X blk_rings->common.rsp_prod_pvt = X The RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, blk_rings->common.req_cons) is doing: req_cons - rsp_prod_pvt >= 32 Which is, X - X >= 32 or 0 >= 32 And that is false, so we continue on looping (this bug). If we re-use said macro RING_REQUEST_CONS_OVERFLOW and pass in the rp instead (sring->req_prod) of rc, the this macro can do the check: req_prod - rsp_prov_pvt >= 32 Which is, X + 31415 - X >= 32 , or 31415 >= 32 which is true, so we can error out and break out of the function. Unfortunatly the difference between rsp_prov_pvt and req_prod can be at 32 (which would error out in the macro). This condition exists when the backend is lagging behind with the responses and still has not finished responding to all of them (so make_response has not been called), and the rsp_prov_pvt + 32 == req_cons. This ends up with us not being able to use said macro. Hence introducing a new macro called RING_REQUEST_PROD_OVERFLOW which does a simple check of: req_prod - rsp_prod_pvt > RING_SIZE And with the X values from above: X + 31415 - X > 32 Returns true. Also not that if the ring is full (which is where the RING_REQUEST_CONS_OVERFLOW triggered), we would not hit the same condition: X + 32 - X > 32 Which is false. Lets use that macro. Note that in v5 of this patchset the macro was different - we used an earlier version. Cc: stable@vger.kernel.org [v1: Move the check outside the loop] [v2: Add a pr_warn as suggested by David] [v3: Use RING_REQUEST_CONS_OVERFLOW as suggested by Jan] [v4: Move wake_up after kthread_stop as suggested by Jan] [v5: Use RING_REQUEST_PROD_OVERFLOW instead] [v6: Use RING_REQUEST_PROD_OVERFLOW - Jan's version] Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Jan Beulich gadsa --- drivers/block/xen-blkback/blkback.c | 13 ++++++++++++- drivers/block/xen-blkback/common.h | 2 ++ drivers/block/xen-blkback/xenbus.c | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4119bcdefd1a..ea158fe0c9a4 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -571,6 +571,7 @@ int xen_blkif_schedule(void *arg) struct xen_blkif *blkif = arg; struct xen_vbd *vbd = &blkif->vbd; unsigned long timeout; + int ret; xen_blkif_get(blkif); @@ -599,8 +600,12 @@ int xen_blkif_schedule(void *arg) blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - if (do_block_io_op(blkif)) + ret = do_block_io_op(blkif); + if (ret > 0) blkif->waiting_reqs = 1; + if (ret == -EACCES) + wait_event_interruptible(blkif->shutdown_wq, + kthread_should_stop()); purge_gnt_list: if (blkif->vbd.feature_gnt_persistent && @@ -1009,6 +1014,12 @@ __do_block_io_op(struct xen_blkif *blkif) rp = blk_rings->common.sring->req_prod; rmb(); /* Ensure we see queued requests up to 'rp'. */ + if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) { + rc = blk_rings->common.rsp_prod_pvt; + pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n", + rp, rc, rp - rc, blkif->vbd.pdevice); + return -EACCES; + } while (rc != rp) { if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index c6b4cb9af6c2..8d8807563d99 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -314,6 +314,8 @@ struct xen_blkif { unsigned long long st_wr_sect; wait_queue_head_t waiting_to_free; + /* Thread shutdown wait queue. */ + wait_queue_head_t shutdown_wq; }; struct seg_buf { diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 7b06f943371c..2e5b69d612ac 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -151,6 +151,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) } spin_lock_init(&blkif->pending_free_lock); init_waitqueue_head(&blkif->pending_free_wq); + init_waitqueue_head(&blkif->shutdown_wq); return blkif; @@ -231,6 +232,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) { if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); + wake_up(&blkif->shutdown_wq); blkif->xenblkd = NULL; } -- cgit v1.2.3-58-ga151 From a3299ab18591d36ad5622f5064619123c439b779 Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:34:54 -0500 Subject: rsxx: Individual workqueues for interruptible events. Giving all interrupt based events their own workqueue to complete tasks on. This fixes a bug that would cause creg commands to timeout if too many are issued at once. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 23 ++++++++++++++++++++--- drivers/block/rsxx/cregs.c | 5 +++++ drivers/block/rsxx/rsxx_priv.h | 2 ++ 3 files changed, 27 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 5af21f2db29c..774f810c6a9c 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -163,12 +163,13 @@ static irqreturn_t rsxx_isr(int irq, void *pdata) } if (isr & CR_INTR_CREG) { - schedule_work(&card->creg_ctrl.done_work); + queue_work(card->creg_ctrl.creg_wq, + &card->creg_ctrl.done_work); handled++; } if (isr & CR_INTR_EVENT) { - schedule_work(&card->event_work); + queue_work(card->event_wq, &card->event_work); rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); handled++; } @@ -610,7 +611,11 @@ static int rsxx_pci_probe(struct pci_dev *dev, } /************* Setup Processor Command Interface *************/ - rsxx_creg_setup(card); + st = rsxx_creg_setup(card); + if (st) { + dev_err(CARD_TO_DEV(card), "Failed to setup creg interface.\n"); + goto failed_creg_setup; + } spin_lock_irq(&card->irq_lock); rsxx_enable_ier_and_isr(card, CR_INTR_CREG); @@ -650,6 +655,12 @@ static int rsxx_pci_probe(struct pci_dev *dev, } /************* Setup Card Event Handler *************/ + card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event"); + if (!card->event_wq) { + dev_err(CARD_TO_DEV(card), "Failed card event setup.\n"); + goto failed_event_handler; + } + INIT_WORK(&card->event_work, card_event_handler); st = rsxx_setup_dev(card); @@ -688,9 +699,15 @@ static int rsxx_pci_probe(struct pci_dev *dev, return 0; failed_create_dev: + destroy_workqueue(card->event_wq); + card->event_wq = NULL; +failed_event_handler: rsxx_dma_destroy(card); failed_dma_setup: failed_compatiblity_check: + destroy_workqueue(card->creg_ctrl.creg_wq); + card->creg_ctrl.creg_wq = NULL; +failed_creg_setup: spin_lock_irq(&card->irq_lock); rsxx_disable_ier_and_isr(card, CR_INTR_ALL); spin_unlock_irq(&card->irq_lock); diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c index 4b5c020a0a65..4914464c19fc 100644 --- a/drivers/block/rsxx/cregs.c +++ b/drivers/block/rsxx/cregs.c @@ -727,6 +727,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card) { card->creg_ctrl.active_cmd = NULL; + card->creg_ctrl.creg_wq = + create_singlethread_workqueue(DRIVER_NAME"_creg"); + if (!card->creg_ctrl.creg_wq) + return -ENOMEM; + INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done); mutex_init(&card->creg_ctrl.reset_lock); INIT_LIST_HEAD(&card->creg_ctrl.queue); diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index 382e8bf5c03b..0dd62d966772 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -134,6 +134,7 @@ struct rsxx_cardinfo { spinlock_t lock; bool active; struct creg_cmd *active_cmd; + struct workqueue_struct *creg_wq; struct work_struct done_work; struct list_head queue; unsigned int q_depth; @@ -154,6 +155,7 @@ struct rsxx_cardinfo { int buf_len; } log; + struct workqueue_struct *event_wq; struct work_struct event_work; unsigned int state; u64 size8; -- cgit v1.2.3-58-ga151 From 0ab4743ebc18c23bddf3e288cfc6221ec71533ac Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:36:26 -0500 Subject: rsxx: Restructured DMA cancel scheme. Before, DMAs would never be cancelled if there was a data stall or an EEH Permenant failure which would cause an unrecoverable I/O hang. The DMA cancellation mechanism has been modified to fix these issues and allows DMAs to be cancelled during the above mentioned events. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 17 ++++- drivers/block/rsxx/dev.c | 6 +- drivers/block/rsxx/dma.c | 161 +++++++++++++++++++---------------------- drivers/block/rsxx/rsxx_priv.h | 4 +- 4 files changed, 95 insertions(+), 93 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 774f810c6a9c..aca3f198e5cd 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -368,15 +368,26 @@ static void rsxx_eeh_failure(struct pci_dev *dev) { struct rsxx_cardinfo *card = pci_get_drvdata(dev); int i; + int cnt = 0; dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n"); card->eeh_state = 1; + card->halt = 1; + + for (i = 0; i < card->n_targets; i++) { + spin_lock_bh(&card->ctrl[i].queue_lock); + cnt = rsxx_cleanup_dma_queue(&card->ctrl[i], + &card->ctrl[i].queue); + spin_unlock_bh(&card->ctrl[i].queue_lock); - for (i = 0; i < card->n_targets; i++) - del_timer_sync(&card->ctrl[i].activity_timer); + cnt += rsxx_dma_cancel(&card->ctrl[i]); - rsxx_eeh_cancel_dmas(card); + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d queued DMAs on channel %d\n", + cnt, card->ctrl[i].id); + } } static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card) diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 4346d17d2949..604ad2dafa43 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -155,7 +155,8 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card, atomic_set(&meta->error, 1); if (atomic_dec_and_test(&meta->pending_dmas)) { - disk_stats_complete(card, meta->bio, meta->start_time); + if (!card->eeh_state && card->gendisk) + disk_stats_complete(card, meta->bio, meta->start_time); bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0); kmem_cache_free(bio_meta_pool, meta); @@ -196,7 +197,8 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio) atomic_set(&bio_meta->pending_dmas, 0); bio_meta->start_time = jiffies; - disk_stats_start(card, bio); + if (!unlikely(card->halt)) + disk_stats_start(card, bio); dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n", bio_data_dir(bio) ? 'W' : 'R', bio_meta, diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 0607513cfb41..213e40e4bd92 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -245,6 +245,22 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl, kmem_cache_free(rsxx_dma_pool, dma); } +int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, + struct list_head *q) +{ + struct rsxx_dma *dma; + struct rsxx_dma *tmp; + int cnt = 0; + + list_for_each_entry_safe(dma, tmp, q, list) { + list_del(&dma->list); + rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); + cnt++; + } + + return cnt; +} + static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma) { @@ -252,9 +268,9 @@ static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, * Requeued DMAs go to the front of the queue so they are issued * first. */ - spin_lock(&ctrl->queue_lock); + spin_lock_bh(&ctrl->queue_lock); list_add(&dma->list, &ctrl->queue); - spin_unlock(&ctrl->queue_lock); + spin_unlock_bh(&ctrl->queue_lock); } static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, @@ -329,6 +345,7 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, static void dma_engine_stalled(unsigned long data) { struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data; + int cnt; if (atomic_read(&ctrl->stats.hw_q_depth) == 0 || unlikely(ctrl->card->eeh_state)) @@ -349,6 +366,18 @@ static void dma_engine_stalled(unsigned long data) "DMA channel %d has stalled, faulting interface.\n", ctrl->id); ctrl->card->dma_fault = 1; + + /* Clean up the DMA queue */ + spin_lock(&ctrl->queue_lock); + cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); + + cnt += rsxx_dma_cancel(ctrl); + + if (cnt) + dev_info(CARD_TO_DEV(ctrl->card), + "Freed %d queued DMAs on channel %d\n", + cnt, ctrl->id); } } @@ -368,22 +397,22 @@ static void rsxx_issue_dmas(struct work_struct *work) return; while (1) { - spin_lock(&ctrl->queue_lock); + spin_lock_bh(&ctrl->queue_lock); if (list_empty(&ctrl->queue)) { - spin_unlock(&ctrl->queue_lock); + spin_unlock_bh(&ctrl->queue_lock); break; } - spin_unlock(&ctrl->queue_lock); + spin_unlock_bh(&ctrl->queue_lock); tag = pop_tracker(ctrl->trackers); if (tag == -1) break; - spin_lock(&ctrl->queue_lock); + spin_lock_bh(&ctrl->queue_lock); dma = list_entry(ctrl->queue.next, struct rsxx_dma, list); list_del(&dma->list); ctrl->stats.sw_q_depth--; - spin_unlock(&ctrl->queue_lock); + spin_unlock_bh(&ctrl->queue_lock); /* * This will catch any DMAs that slipped in right before the @@ -520,33 +549,10 @@ static void rsxx_dma_done(struct work_struct *work) rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id)); spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); - spin_lock(&ctrl->queue_lock); + spin_lock_bh(&ctrl->queue_lock); if (ctrl->stats.sw_q_depth) queue_work(ctrl->issue_wq, &ctrl->issue_dma_work); - spin_unlock(&ctrl->queue_lock); -} - -static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card, - struct list_head *q) -{ - struct rsxx_dma *dma; - struct rsxx_dma *tmp; - int cnt = 0; - - list_for_each_entry_safe(dma, tmp, q, list) { - list_del(&dma->list); - - if (dma->dma_addr) - pci_unmap_page(card->dev, dma->dma_addr, - get_dma_size(dma), - (dma->cmd == HW_CMD_BLK_WRITE) ? - PCI_DMA_TODEVICE : - PCI_DMA_FROMDEVICE); - kmem_cache_free(rsxx_dma_pool, dma); - cnt++; - } - - return cnt; + spin_unlock_bh(&ctrl->queue_lock); } static int rsxx_queue_discard(struct rsxx_cardinfo *card, @@ -698,10 +704,10 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, for (i = 0; i < card->n_targets; i++) { if (!list_empty(&dma_list[i])) { - spin_lock(&card->ctrl[i].queue_lock); + spin_lock_bh(&card->ctrl[i].queue_lock); card->ctrl[i].stats.sw_q_depth += dma_cnt[i]; list_splice_tail(&dma_list[i], &card->ctrl[i].queue); - spin_unlock(&card->ctrl[i].queue_lock); + spin_unlock_bh(&card->ctrl[i].queue_lock); queue_work(card->ctrl[i].issue_wq, &card->ctrl[i].issue_dma_work); @@ -711,8 +717,11 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, return 0; bvec_err: - for (i = 0; i < card->n_targets; i++) - rsxx_cleanup_dma_queue(card, &dma_list[i]); + for (i = 0; i < card->n_targets; i++) { + spin_lock_bh(&card->ctrl[i].queue_lock); + rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]); + spin_unlock_bh(&card->ctrl[i].queue_lock); + } return st; } @@ -918,13 +927,30 @@ failed_dma_setup: return st; } +int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl) +{ + struct rsxx_dma *dma; + int i; + int cnt = 0; + + /* Clean up issued DMAs */ + for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) { + dma = get_tracker_dma(ctrl->trackers, i); + if (dma) { + atomic_dec(&ctrl->stats.hw_q_depth); + rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); + push_tracker(ctrl->trackers, i); + cnt++; + } + } + + return cnt; +} void rsxx_dma_destroy(struct rsxx_cardinfo *card) { struct rsxx_dma_ctrl *ctrl; - struct rsxx_dma *dma; - int i, j; - int cnt = 0; + int i; for (i = 0; i < card->n_targets; i++) { ctrl = &card->ctrl[i]; @@ -943,33 +969,11 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card) del_timer_sync(&ctrl->activity_timer); /* Clean up the DMA queue */ - spin_lock(&ctrl->queue_lock); - cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue); - spin_unlock(&ctrl->queue_lock); + spin_lock_bh(&ctrl->queue_lock); + rsxx_cleanup_dma_queue(ctrl, &ctrl->queue); + spin_unlock_bh(&ctrl->queue_lock); - if (cnt) - dev_info(CARD_TO_DEV(card), - "Freed %d queued DMAs on channel %d\n", - cnt, i); - - /* Clean up issued DMAs */ - for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { - dma = get_tracker_dma(ctrl->trackers, j); - if (dma) { - pci_unmap_page(card->dev, dma->dma_addr, - get_dma_size(dma), - (dma->cmd == HW_CMD_BLK_WRITE) ? - PCI_DMA_TODEVICE : - PCI_DMA_FROMDEVICE); - kmem_cache_free(rsxx_dma_pool, dma); - cnt++; - } - } - - if (cnt) - dev_info(CARD_TO_DEV(card), - "Freed %d pending DMAs on channel %d\n", - cnt, i); + rsxx_dma_cancel(ctrl); vfree(ctrl->trackers); @@ -1013,7 +1017,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) cnt++; } - spin_lock(&card->ctrl[i].queue_lock); + spin_lock_bh(&card->ctrl[i].queue_lock); list_splice(&issued_dmas[i], &card->ctrl[i].queue); atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth); @@ -1028,7 +1032,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); } - spin_unlock(&card->ctrl[i].queue_lock); + spin_unlock_bh(&card->ctrl[i].queue_lock); } kfree(issued_dmas); @@ -1036,30 +1040,13 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) return 0; } -void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card) -{ - struct rsxx_dma *dma; - struct rsxx_dma *tmp; - int i; - - for (i = 0; i < card->n_targets; i++) { - spin_lock(&card->ctrl[i].queue_lock); - list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) { - list_del(&dma->list); - - rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED); - } - spin_unlock(&card->ctrl[i].queue_lock); - } -} - int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card) { struct rsxx_dma *dma; int i; for (i = 0; i < card->n_targets; i++) { - spin_lock(&card->ctrl[i].queue_lock); + spin_lock_bh(&card->ctrl[i].queue_lock); list_for_each_entry(dma, &card->ctrl[i].queue, list) { dma->dma_addr = pci_map_page(card->dev, dma->page, dma->pg_off, get_dma_size(dma), @@ -1067,12 +1054,12 @@ int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card) PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); if (!dma->dma_addr) { - spin_unlock(&card->ctrl[i].queue_lock); + spin_unlock_bh(&card->ctrl[i].queue_lock); kmem_cache_free(rsxx_dma_pool, dma); return -ENOMEM; } } - spin_unlock(&card->ctrl[i].queue_lock); + spin_unlock_bh(&card->ctrl[i].queue_lock); } return 0; diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index 0dd62d966772..60b6ed6779ac 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -39,6 +39,7 @@ #include #include #include +#include #include "rsxx.h" #include "rsxx_cfg.h" @@ -374,6 +375,8 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card, int rsxx_dma_setup(struct rsxx_cardinfo *card); void rsxx_dma_destroy(struct rsxx_cardinfo *card); int rsxx_dma_init(void); +int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q); +int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl); void rsxx_dma_cleanup(void); void rsxx_dma_queue_reset(struct rsxx_cardinfo *card); int rsxx_dma_configure(struct rsxx_cardinfo *card); @@ -384,7 +387,6 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, void *cb_data); int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl); int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card); -void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card); int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card); /***** cregs.c *****/ -- cgit v1.2.3-58-ga151 From 31a70bb4440c963e69ce210389d8119c70b5c39d Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:38:26 -0500 Subject: rsxx: Fixes soft-lockup issues during DMAs. The workqueue mechanism has been reworked to prevent soft lockup issues from occuring by adding in mutex sychronization. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/dma.c | 35 +++++++++++++++++++++++++++-------- drivers/block/rsxx/rsxx_priv.h | 1 + 2 files changed, 28 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 213e40e4bd92..b485a65b8de1 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -381,15 +381,13 @@ static void dma_engine_stalled(unsigned long data) } } -static void rsxx_issue_dmas(struct work_struct *work) +static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl) { - struct rsxx_dma_ctrl *ctrl; struct rsxx_dma *dma; int tag; int cmds_pending = 0; struct hw_cmd *hw_cmd_buf; - ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); hw_cmd_buf = ctrl->cmd.buf; if (unlikely(ctrl->card->halt) || @@ -469,9 +467,8 @@ static void rsxx_issue_dmas(struct work_struct *work) } } -static void rsxx_dma_done(struct work_struct *work) +static void rsxx_dma_done(struct rsxx_dma_ctrl *ctrl) { - struct rsxx_dma_ctrl *ctrl; struct rsxx_dma *dma; unsigned long flags; u16 count; @@ -479,7 +476,6 @@ static void rsxx_dma_done(struct work_struct *work) u8 tag; struct hw_status *hw_st_buf; - ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work); hw_st_buf = ctrl->status.buf; if (unlikely(ctrl->card->halt) || @@ -555,6 +551,28 @@ static void rsxx_dma_done(struct work_struct *work) spin_unlock_bh(&ctrl->queue_lock); } +static void rsxx_schedule_issue(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + + ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); + + mutex_lock(&ctrl->work_lock); + rsxx_issue_dmas(ctrl); + mutex_unlock(&ctrl->work_lock); +} + +static void rsxx_schedule_done(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + + ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work); + + mutex_lock(&ctrl->work_lock); + rsxx_dma_done(ctrl); + mutex_unlock(&ctrl->work_lock); +} + static int rsxx_queue_discard(struct rsxx_cardinfo *card, struct list_head *q, unsigned int laddr, @@ -789,6 +807,7 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev, spin_lock_init(&ctrl->trackers->lock); spin_lock_init(&ctrl->queue_lock); + mutex_init(&ctrl->work_lock); INIT_LIST_HEAD(&ctrl->queue); setup_timer(&ctrl->activity_timer, dma_engine_stalled, @@ -802,8 +821,8 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev, if (!ctrl->done_wq) return -ENOMEM; - INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas); - INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done); + INIT_WORK(&ctrl->issue_dma_work, rsxx_schedule_issue); + INIT_WORK(&ctrl->dma_done_work, rsxx_schedule_done); st = rsxx_hw_buffers_init(dev, ctrl); if (st) diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index 60b6ed6779ac..c968a6918b6e 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -115,6 +115,7 @@ struct rsxx_dma_ctrl { struct timer_list activity_timer; struct dma_tracker_list *trackers; struct rsxx_dma_stats stats; + struct mutex work_lock; }; struct rsxx_cardinfo { -- cgit v1.2.3-58-ga151 From 7b379cc3785bfa827249a265548a055e934eaaea Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:39:44 -0500 Subject: rsxx: Allow block size to be determined by configuration. Previously, the block size was determined by whether or not our Hardware could handle 512 byte accesses. Now, all of our Hardware can handle 512 and 4096 block sizes. This fix allows it to be user configurable. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/dev.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 604ad2dafa43..1fa7cccd6866 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -227,24 +227,6 @@ static bool rsxx_discard_supported(struct rsxx_cardinfo *card) return (pci_rev >= RSXX_DISCARD_SUPPORT); } -static unsigned short rsxx_get_logical_block_size( - struct rsxx_cardinfo *card) -{ - u32 capabilities = 0; - int st; - - st = rsxx_get_card_capabilities(card, &capabilities); - if (st) - dev_warn(CARD_TO_DEV(card), - "Failed reading card capabilities register\n"); - - /* Earlier firmware did not have support for 512 byte accesses */ - if (capabilities & CARD_CAP_SUBPAGE_WRITES) - return 512; - else - return RSXX_HW_BLK_SIZE; -} - int rsxx_attach_dev(struct rsxx_cardinfo *card) { mutex_lock(&card->dev_lock); @@ -307,7 +289,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) return -ENOMEM; } - blk_size = rsxx_get_logical_block_size(card); + blk_size = card->config.data.block_size; blk_queue_make_request(card->queue, rsxx_make_request); blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY); -- cgit v1.2.3-58-ga151 From fb065cd9e0058551b08d6d32ff0494848c9e213d Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:42:36 -0500 Subject: rsxx: Adding in sync_start module paramenter. Before, the partition table would have to be reread because our card was attached before it transistioned out of it's 'starting' state. This change will cause the driver to wait to attach the device until the adapter is ready. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'drivers') diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index aca3f198e5cd..0f1be41ccfa8 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -39,6 +39,7 @@ #include "rsxx_cfg.h" #define NO_LEGACY 0 +#define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */ MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver"); MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM"); @@ -49,6 +50,11 @@ static unsigned int force_legacy = NO_LEGACY; module_param(force_legacy, uint, 0444); MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts"); +static unsigned int sync_start = 1; +module_param(sync_start, uint, 0444); +MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete " + "until the card startup has completed."); + static DEFINE_IDA(rsxx_disk_ida); static DEFINE_SPINLOCK(rsxx_ida_lock); @@ -540,6 +546,7 @@ static int rsxx_pci_probe(struct pci_dev *dev, { struct rsxx_cardinfo *card; int st; + unsigned int sync_timeout; dev_info(&dev->dev, "PCI-Flash SSD discovered\n"); @@ -698,6 +705,33 @@ static int rsxx_pci_probe(struct pci_dev *dev, if (st) dev_crit(CARD_TO_DEV(card), "Failed issuing card startup\n"); + if (sync_start) { + sync_timeout = SYNC_START_TIMEOUT; + + dev_info(CARD_TO_DEV(card), + "Waiting for card to startup\n"); + + do { + ssleep(1); + sync_timeout--; + + rsxx_get_card_state(card, &card->state); + } while (sync_timeout && + (card->state == CARD_STATE_STARTING)); + + if (card->state == CARD_STATE_STARTING) { + dev_warn(CARD_TO_DEV(card), + "Card startup timed out\n"); + card->size8 = 0; + } else { + dev_info(CARD_TO_DEV(card), + "card state: %s\n", + rsxx_card_state_to_str(card->state)); + st = rsxx_get_card_size8(card, &card->size8); + if (st) + card->size8 = 0; + } + } } else if (card->state == CARD_STATE_GOOD || card->state == CARD_STATE_RD_ONLY_FAULT) { st = rsxx_get_card_size8(card, &card->size8); -- cgit v1.2.3-58-ga151 From f730e3dc6dc4698d55fd9bf6de33a5436900e9bd Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:43:58 -0500 Subject: rsxx: Changing the adapter name to the official name. Changing the adapter name from FlashSystem-80 to the official name: Flash Adapter 900GB Full Height. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- MAINTAINERS | 2 +- drivers/block/Kconfig | 4 ++-- drivers/block/rsxx/core.c | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/MAINTAINERS b/MAINTAINERS index 3d7782b9f90d..75e52e341a67 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3297,7 +3297,7 @@ F: Documentation/firmware_class/ F: drivers/base/firmware*.c F: include/linux/firmware.h -FLASHSYSTEM DRIVER (IBM FlashSystem 70/80 PCI SSD Flash Card) +FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card) M: Joshua Morris M: Philip Kelleher S: Maintained diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index b81ddfea1da0..e07a5fd58ad7 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -532,11 +532,11 @@ config BLK_DEV_RBD If unsure, say N. config BLK_DEV_RSXX - tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver" + tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver" depends on PCI help Device driver for IBM's high speed PCIe SSD - storage devices: FlashSystem-70 and FlashSystem-80. + storage device: Flash Adapter 900GB Full Height. To compile this driver as a module, choose M here: the module will be called rsxx. diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 0f1be41ccfa8..bd763f426774 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -41,7 +41,7 @@ #define NO_LEGACY 0 #define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */ -MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver"); +MODULE_DESCRIPTION("IBM Flash Adapter 900GB Full Height Device Driver"); MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRIVER_VERSION); @@ -336,7 +336,7 @@ static int rsxx_eeh_frozen(struct pci_dev *dev) int i; int st; - dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n"); + dev_warn(&dev->dev, "IBM Flash Adapter PCI: preparing for slot reset.\n"); card->eeh_state = 1; rsxx_mask_interrupts(card); @@ -376,7 +376,7 @@ static void rsxx_eeh_failure(struct pci_dev *dev) int i; int cnt = 0; - dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n"); + dev_err(&dev->dev, "IBM Flash Adapter PCI: disabling failed card.\n"); card->eeh_state = 1; card->halt = 1; @@ -450,7 +450,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) int st; dev_warn(&dev->dev, - "IBM FlashSystem PCI: recovering from slot reset.\n"); + "IBM Flash Adapter PCI: recovering from slot reset.\n"); st = pci_enable_device(dev); if (st) @@ -503,7 +503,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) &card->ctrl[i].issue_dma_work); } - dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n"); + dev_info(&dev->dev, "IBM Flash Adapter PCI: recovery complete.\n"); return PCI_ERS_RESULT_RECOVERED; -- cgit v1.2.3-58-ga151 From 66bc600363acd0acae84e878e5a06e7b7a38c014 Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:46:04 -0500 Subject: rsxx: Fixes DLPAR add kernel panic if partition still mounted. A kernel panic would occur on a DLPAR add if there was a partition still mounted during the DLPAR remove. This bug fix will allow the user to unmount the partition and bring the driver back into a good state after the DLPAR add. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 1fa7cccd6866..a092f58db212 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -171,6 +171,9 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio) might_sleep(); + if (!card) + goto req_err; + if (unlikely(card->halt)) { st = -EFAULT; goto req_err; @@ -331,6 +334,7 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card) card->gendisk = NULL; blk_cleanup_queue(card->queue); + card->queue->queuedata = NULL; unregister_blkdev(card->major, DRIVER_NAME); } -- cgit v1.2.3-58-ga151 From 3eb8dcafb5a73041e2f3b4a39c057a58e4354d83 Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:48:38 -0500 Subject: rsxx: Adapter address space sanity check. Adding a sanity check to guarentee that DMAs outside of the device's address space will be errored out right away. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index a092f58db212..d7af441880be 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -174,6 +174,9 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio) if (!card) goto req_err; + if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk)) + goto req_err; + if (unlikely(card->halt)) { st = -EFAULT; goto req_err; -- cgit v1.2.3-58-ga151 From b8b225da139f5770d7689b189fd5debc58f4b35d Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:49:48 -0500 Subject: rsxx: Adding EEH check inside cregs timeout. Unfortunaly, our CPU register path does not do any kind of EEH error checking. So to fix this issue, an ioread32 was added to the CPU register timeout code. This way, the driver can check to see if the timeout was caused by an EEH error or not. This is a dummy read. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/cregs.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c index 4914464c19fc..926dce9c452f 100644 --- a/drivers/block/rsxx/cregs.c +++ b/drivers/block/rsxx/cregs.c @@ -431,6 +431,15 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card, *hw_stat = completion.creg_status; if (completion.st) { + /* + * This read is needed to verify that there has not been any + * extreme errors that might have occurred, i.e. EEH. The + * function iowrite32 will not detect EEH errors, so it is + * necessary that we recover if such an error is the reason + * for the timeout. This is a dummy read. + */ + ioread32(card->regmap + SCRATCH); + dev_warn(CARD_TO_DEV(card), "creg command failed(%d x%08x)\n", completion.st, addr); -- cgit v1.2.3-58-ga151 From 62302508f2986720ad73494dd8037dff1c4f77d1 Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:50:48 -0500 Subject: rsxx: Fixes incorrect stats calculation. Fixing incorrect stats calculation during read retries. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/dma.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index b485a65b8de1..bed32f16b084 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -269,6 +269,7 @@ static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, * first. */ spin_lock_bh(&ctrl->queue_lock); + ctrl->stats.sw_q_depth++; list_add(&dma->list, &ctrl->queue); spin_unlock_bh(&ctrl->queue_lock); } -- cgit v1.2.3-58-ga151 From 36f988e978f81ffa415df4d77bbcd8887917f25c Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 18 Jun 2013 14:52:21 -0500 Subject: rsxx: Adding in debugfs entries. Adding debugfs entries to help with debugging and testing and testing code. pci_regs: This entry will spit out all of the data stored on the BAR. stats: This entry will display all of the driver stats for each DMA channel. cram: This will allow read/write ability to the CRAM address space on our adapter's CPU. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 275 +++++++++++++++++++++++++++++++++++++++++ drivers/block/rsxx/rsxx_priv.h | 3 + 2 files changed, 278 insertions(+) (limited to 'drivers') diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index bd763f426774..6e85e21445eb 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include @@ -58,6 +60,274 @@ MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete " static DEFINE_IDA(rsxx_disk_ida); static DEFINE_SPINLOCK(rsxx_ida_lock); +/* --------------------Debugfs Setup ------------------- */ + +struct rsxx_cram { + u32 f_pos; + u32 offset; + void *i_private; +}; + +static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p) +{ + struct rsxx_cardinfo *card = m->private; + + seq_printf(m, "HWID 0x%08x\n", + ioread32(card->regmap + HWID)); + seq_printf(m, "SCRATCH 0x%08x\n", + ioread32(card->regmap + SCRATCH)); + seq_printf(m, "IER 0x%08x\n", + ioread32(card->regmap + IER)); + seq_printf(m, "IPR 0x%08x\n", + ioread32(card->regmap + IPR)); + seq_printf(m, "CREG_CMD 0x%08x\n", + ioread32(card->regmap + CREG_CMD)); + seq_printf(m, "CREG_ADD 0x%08x\n", + ioread32(card->regmap + CREG_ADD)); + seq_printf(m, "CREG_CNT 0x%08x\n", + ioread32(card->regmap + CREG_CNT)); + seq_printf(m, "CREG_STAT 0x%08x\n", + ioread32(card->regmap + CREG_STAT)); + seq_printf(m, "CREG_DATA0 0x%08x\n", + ioread32(card->regmap + CREG_DATA0)); + seq_printf(m, "CREG_DATA1 0x%08x\n", + ioread32(card->regmap + CREG_DATA1)); + seq_printf(m, "CREG_DATA2 0x%08x\n", + ioread32(card->regmap + CREG_DATA2)); + seq_printf(m, "CREG_DATA3 0x%08x\n", + ioread32(card->regmap + CREG_DATA3)); + seq_printf(m, "CREG_DATA4 0x%08x\n", + ioread32(card->regmap + CREG_DATA4)); + seq_printf(m, "CREG_DATA5 0x%08x\n", + ioread32(card->regmap + CREG_DATA5)); + seq_printf(m, "CREG_DATA6 0x%08x\n", + ioread32(card->regmap + CREG_DATA6)); + seq_printf(m, "CREG_DATA7 0x%08x\n", + ioread32(card->regmap + CREG_DATA7)); + seq_printf(m, "INTR_COAL 0x%08x\n", + ioread32(card->regmap + INTR_COAL)); + seq_printf(m, "HW_ERROR 0x%08x\n", + ioread32(card->regmap + HW_ERROR)); + seq_printf(m, "DEBUG0 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG0)); + seq_printf(m, "DEBUG1 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG1)); + seq_printf(m, "DEBUG2 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG2)); + seq_printf(m, "DEBUG3 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG3)); + seq_printf(m, "DEBUG4 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG4)); + seq_printf(m, "DEBUG5 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG5)); + seq_printf(m, "DEBUG6 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG6)); + seq_printf(m, "DEBUG7 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG7)); + seq_printf(m, "RECONFIG 0x%08x\n", + ioread32(card->regmap + PCI_RECONFIG)); + + return 0; +} + +static int rsxx_attr_stats_show(struct seq_file *m, void *p) +{ + struct rsxx_cardinfo *card = m->private; + int i; + + for (i = 0; i < card->n_targets; i++) { + seq_printf(m, "Ctrl %d CRC Errors = %d\n", + i, card->ctrl[i].stats.crc_errors); + seq_printf(m, "Ctrl %d Hard Errors = %d\n", + i, card->ctrl[i].stats.hard_errors); + seq_printf(m, "Ctrl %d Soft Errors = %d\n", + i, card->ctrl[i].stats.soft_errors); + seq_printf(m, "Ctrl %d Writes Issued = %d\n", + i, card->ctrl[i].stats.writes_issued); + seq_printf(m, "Ctrl %d Writes Failed = %d\n", + i, card->ctrl[i].stats.writes_failed); + seq_printf(m, "Ctrl %d Reads Issued = %d\n", + i, card->ctrl[i].stats.reads_issued); + seq_printf(m, "Ctrl %d Reads Failed = %d\n", + i, card->ctrl[i].stats.reads_failed); + seq_printf(m, "Ctrl %d Reads Retried = %d\n", + i, card->ctrl[i].stats.reads_retried); + seq_printf(m, "Ctrl %d Discards Issued = %d\n", + i, card->ctrl[i].stats.discards_issued); + seq_printf(m, "Ctrl %d Discards Failed = %d\n", + i, card->ctrl[i].stats.discards_failed); + seq_printf(m, "Ctrl %d DMA SW Errors = %d\n", + i, card->ctrl[i].stats.dma_sw_err); + seq_printf(m, "Ctrl %d DMA HW Faults = %d\n", + i, card->ctrl[i].stats.dma_hw_fault); + seq_printf(m, "Ctrl %d DMAs Cancelled = %d\n", + i, card->ctrl[i].stats.dma_cancelled); + seq_printf(m, "Ctrl %d SW Queue Depth = %d\n", + i, card->ctrl[i].stats.sw_q_depth); + seq_printf(m, "Ctrl %d HW Queue Depth = %d\n", + i, atomic_read(&card->ctrl[i].stats.hw_q_depth)); + } + + return 0; +} + +static int rsxx_attr_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, rsxx_attr_stats_show, inode->i_private); +} + +static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file) +{ + return single_open(file, rsxx_attr_pci_regs_show, inode->i_private); +} + +static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct rsxx_cram *info = fp->private_data; + struct rsxx_cardinfo *card = info->i_private; + char *buf; + int st; + + buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + info->f_pos = (u32)*ppos + info->offset; + + st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1); + if (st) + return st; + + st = copy_to_user(ubuf, buf, cnt); + if (st) + return st; + + info->offset += cnt; + + kfree(buf); + + return cnt; +} + +static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct rsxx_cram *info = fp->private_data; + struct rsxx_cardinfo *card = info->i_private; + char *buf; + int st; + + buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + st = copy_from_user(buf, ubuf, cnt); + if (st) + return st; + + info->f_pos = (u32)*ppos + info->offset; + + st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1); + if (st) + return st; + + info->offset += cnt; + + kfree(buf); + + return cnt; +} + +static int rsxx_cram_open(struct inode *inode, struct file *file) +{ + struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->i_private = inode->i_private; + info->f_pos = file->f_pos; + file->private_data = info; + + return 0; +} + +static int rsxx_cram_release(struct inode *inode, struct file *file) +{ + struct rsxx_cram *info = file->private_data; + + if (!info) + return 0; + + kfree(info); + file->private_data = NULL; + + return 0; +} + +static const struct file_operations debugfs_cram_fops = { + .owner = THIS_MODULE, + .open = rsxx_cram_open, + .read = rsxx_cram_read, + .write = rsxx_cram_write, + .release = rsxx_cram_release, +}; + +static const struct file_operations debugfs_stats_fops = { + .owner = THIS_MODULE, + .open = rsxx_attr_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations debugfs_pci_regs_fops = { + .owner = THIS_MODULE, + .open = rsxx_attr_pci_regs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card) +{ + struct dentry *debugfs_stats; + struct dentry *debugfs_pci_regs; + struct dentry *debugfs_cram; + + card->debugfs_dir = debugfs_create_dir(card->gendisk->disk_name, NULL); + if (IS_ERR_OR_NULL(card->debugfs_dir)) + goto failed_debugfs_dir; + + debugfs_stats = debugfs_create_file("stats", S_IRUGO, + card->debugfs_dir, card, + &debugfs_stats_fops); + if (IS_ERR_OR_NULL(debugfs_stats)) + goto failed_debugfs_stats; + + debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO, + card->debugfs_dir, card, + &debugfs_pci_regs_fops); + if (IS_ERR_OR_NULL(debugfs_pci_regs)) + goto failed_debugfs_pci_regs; + + debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR, + card->debugfs_dir, card, + &debugfs_cram_fops); + if (IS_ERR_OR_NULL(debugfs_cram)) + goto failed_debugfs_cram; + + return; +failed_debugfs_cram: + debugfs_remove(debugfs_pci_regs); +failed_debugfs_pci_regs: + debugfs_remove(debugfs_stats); +failed_debugfs_stats: + debugfs_remove(card->debugfs_dir); +failed_debugfs_dir: + card->debugfs_dir = NULL; +} + /*----------------- Interrupt Control & Handling -------------------*/ static void rsxx_mask_interrupts(struct rsxx_cardinfo *card) @@ -741,6 +1011,9 @@ static int rsxx_pci_probe(struct pci_dev *dev, rsxx_attach_dev(card); + /************* Setup Debugfs *************/ + rsxx_debugfs_dev_new(card); + return 0; failed_create_dev: @@ -818,6 +1091,8 @@ static void rsxx_pci_remove(struct pci_dev *dev) /* Prevent work_structs from re-queuing themselves. */ card->halt = 1; + debugfs_remove_recursive(card->debugfs_dir); + free_irq(dev->irq, card); if (!force_legacy) diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index c968a6918b6e..5ad5055a4104 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -185,6 +185,8 @@ struct rsxx_cardinfo { int n_targets; struct rsxx_dma_ctrl *ctrl; + + struct dentry *debugfs_dir; }; enum rsxx_pci_regmap { @@ -287,6 +289,7 @@ enum rsxx_creg_addr { CREG_ADD_CAPABILITIES = 0x80001050, CREG_ADD_LOG = 0x80002000, CREG_ADD_NUM_TARGETS = 0x80003000, + CREG_ADD_CRAM = 0xA0000000, CREG_ADD_CONFIG = 0xB0000000, }; -- cgit v1.2.3-58-ga151 From 2d9105433ff471d2c688817e98804029c074a623 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Fri, 21 Jun 2013 12:56:53 +0200 Subject: xen-blkback: workaround compiler bug in gcc 4.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code generat with gcc (GCC) 4.1.2 20080704 (Red Hat 4.1.2-54) creates an unbound loop for the second foreach_grant_safe loop in purge_persistent_gnt. The workaround is to avoid having this second loop and instead perform all the work inside the first loop by adding a new variable, clean_used, that will be set when all the desired persistent grants have been removed and we need to iterate over the remaining ones to remove the WAS_ACTIVE flag. Signed-off-by: Roger Pau Monné Reported-by: Tom O'Neill Reported-by: Konrad Rzeszutek Wilk Cc: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index ea158fe0c9a4..4662217c61be 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -341,7 +341,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) struct persistent_gnt *persistent_gnt; struct rb_node *n; unsigned int num_clean, total; - bool scan_used = false; + bool scan_used = false, clean_used = false; struct rb_root *root; if (blkif->persistent_gnt_c < xen_blkif_max_pgrants || @@ -358,9 +358,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean; num_clean = min(blkif->persistent_gnt_c, num_clean); - if (num_clean > - (blkif->persistent_gnt_c - - atomic_read(&blkif->persistent_gnt_in_use))) + if ((num_clean == 0) || + (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use)))) return; /* @@ -383,6 +382,11 @@ purge_list: BUG_ON(persistent_gnt->handle == BLKBACK_INVALID_HANDLE); + if (clean_used) { + clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); + continue; + } + if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) continue; if (!scan_used && @@ -400,18 +404,18 @@ purge_list: * grants that were used since last purge in order to cope * with the requested num */ - if (!scan_used) { + if (!scan_used && !clean_used) { pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean); scan_used = true; goto purge_list; } finished: - /* Remove the "used" flag from all the persistent grants */ - foreach_grant_safe(persistent_gnt, n, root, node) { - BUG_ON(persistent_gnt->handle == - BLKBACK_INVALID_HANDLE); - clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); + if (!clean_used) { + pr_debug(DRV_PFX "Finished scanning for grants to clean, removing used flag\n"); + clean_used = true; + goto purge_list; } + blkif->persistent_gnt_c -= (total - num_clean); blkif->vbd.overflow_max_grants = 0; -- cgit v1.2.3-58-ga151 From 294caaf29c26cfed3b446fb46393b8b39ea1c0d3 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Fri, 21 Jun 2013 12:56:54 +0200 Subject: xen-blkfront: set blk_queue_max_hw_sectors correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that indirect segments are enabled blk_queue_max_hw_sectors must be set to match the maximum number of sectors we can handle in a request. Signed-off-by: Roger Pau Monné Reported-by: Felipe Franciosi Cc: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 1a0f67c10ec7..2e1ee348ffe1 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -633,7 +633,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); blk_queue_physical_block_size(rq, physical_sector_size); - blk_queue_max_hw_sectors(rq, 512); + blk_queue_max_hw_sectors(rq, (segments * PAGE_SIZE) / 512); /* Each segment in a request is up to an aligned page in size. */ blk_queue_segment_boundary(rq, PAGE_SIZE - 1); -- cgit v1.2.3-58-ga151 From 1e0f7a21b2fffc70f27cc4a454c60321501045b1 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Sat, 22 Jun 2013 09:59:17 +0200 Subject: xen-blkback: check the number of iovecs before allocating a bios MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the introduction of indirect segments we can receive requests with a number of segments bigger than the maximum number of allowed iovecs in a bios, so make sure that blkback doesn't try to allocate a bios with more iovecs than BIO_MAX_PAGES Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4662217c61be..bf4b9d282c04 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1247,7 +1247,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, seg[i].nsec << 9, seg[i].offset) == 0)) { - bio = bio_alloc(GFP_KERNEL, nseg-i); + int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES); + bio = bio_alloc(GFP_KERNEL, nr_iovecs); if (unlikely(bio == NULL)) goto fail_put_bio; -- cgit v1.2.3-58-ga151 From 5c694129c8db6d89c9be109049a16510b2f70f6d Mon Sep 17 00:00:00 2001 From: Kumar Amit Mehta Date: Tue, 28 May 2013 00:31:15 -0700 Subject: md: bcache: io.c: fix a potential NULL pointer dereference bio_alloc_bioset returns NULL on failure. This fix adds a missing check for potential NULL pointer dereferencing. Signed-off-by: Kumar Amit Mehta Signed-off-by: Kent Overstreet --- drivers/md/bcache/io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 48efd4dea645..d285cd49104c 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -97,6 +97,8 @@ struct bio *bch_bio_split(struct bio *bio, int sectors, if (bio->bi_rw & REQ_DISCARD) { ret = bio_alloc_bioset(gfp, 1, bs); + if (!ret) + return NULL; idx = 0; goto out; } -- cgit v1.2.3-58-ga151 From bbc77aa7fb72e616edcb1b165c293e47c6d4d0cf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 28 May 2013 21:53:19 -0700 Subject: bcache: fix a spurious gcc complaint, use scnprintf An old version of gcc was complaining about using a const int as the size of a stack allocated array. Which should be fine - but using ARRAY_SIZE() is better, anyways. Also, refactor the code to use scnprintf(). Signed-off-by: Kent Overstreet --- drivers/md/bcache/sysfs.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 4d9cca47e4c6..29228b8a6ffe 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -665,12 +665,10 @@ SHOW(__bch_cache) int cmp(const void *l, const void *r) { return *((uint16_t *) r) - *((uint16_t *) l); } - /* Number of quantiles we compute */ - const unsigned nq = 31; - size_t n = ca->sb.nbuckets, i, unused, btree; uint64_t sum = 0; - uint16_t q[nq], *p, *cached; + /* Compute 31 quantiles */ + uint16_t q[31], *p, *cached; ssize_t ret; cached = p = vmalloc(ca->sb.nbuckets * sizeof(uint16_t)); @@ -703,26 +701,29 @@ SHOW(__bch_cache) if (n) do_div(sum, n); - for (i = 0; i < nq; i++) - q[i] = INITIAL_PRIO - cached[n * (i + 1) / (nq + 1)]; + for (i = 0; i < ARRAY_SIZE(q); i++) + q[i] = INITIAL_PRIO - cached[n * (i + 1) / + (ARRAY_SIZE(q) + 1)]; vfree(p); - ret = snprintf(buf, PAGE_SIZE, - "Unused: %zu%%\n" - "Metadata: %zu%%\n" - "Average: %llu\n" - "Sectors per Q: %zu\n" - "Quantiles: [", - unused * 100 / (size_t) ca->sb.nbuckets, - btree * 100 / (size_t) ca->sb.nbuckets, sum, - n * ca->sb.bucket_size / (nq + 1)); - - for (i = 0; i < nq && ret < (ssize_t) PAGE_SIZE; i++) - ret += snprintf(buf + ret, PAGE_SIZE - ret, - i < nq - 1 ? "%u " : "%u]\n", q[i]); - - buf[PAGE_SIZE - 1] = '\0'; + ret = scnprintf(buf, PAGE_SIZE, + "Unused: %zu%%\n" + "Metadata: %zu%%\n" + "Average: %llu\n" + "Sectors per Q: %zu\n" + "Quantiles: [", + unused * 100 / (size_t) ca->sb.nbuckets, + btree * 100 / (size_t) ca->sb.nbuckets, sum, + n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1)); + + for (i = 0; i < ARRAY_SIZE(q); i++) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%u ", q[i]); + ret--; + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n"); + return ret; } -- cgit v1.2.3-58-ga151 From a9dd53adbb84c12f769a862ba2c80404873c2c99 Mon Sep 17 00:00:00 2001 From: Gabriel de Perthuis Date: Sat, 4 May 2013 12:19:41 +0200 Subject: bcache: Warn when a device is already registered. Signed-off-by: Gabriel de Perthuis Signed-off-by: Kent Overstreet --- drivers/md/bcache/super.c | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f88e2b653a3f..3de5626919ef 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1786,6 +1786,36 @@ static ssize_t register_bcache(struct kobject *, struct kobj_attribute *, kobj_attribute_write(register, register_bcache); kobj_attribute_write(register_quiet, register_bcache); +static bool bch_is_open_backing(struct block_device *bdev) { + struct cache_set *c, *tc; + struct cached_dev *dc, *t; + + list_for_each_entry_safe(c, tc, &bch_cache_sets, list) + list_for_each_entry_safe(dc, t, &c->cached_devs, list) + if (dc->bdev == bdev) + return true; + list_for_each_entry_safe(dc, t, &uncached_devices, list) + if (dc->bdev == bdev) + return true; + return false; +} + +static bool bch_is_open_cache(struct block_device *bdev) { + struct cache_set *c, *tc; + struct cache *ca; + unsigned i; + + list_for_each_entry_safe(c, tc, &bch_cache_sets, list) + for_each_cache(ca, c, i) + if (ca->bdev == bdev) + return true; + return false; +} + +static bool bch_is_open(struct block_device *bdev) { + return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); +} + static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size) { @@ -1810,8 +1840,13 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) { - if (bdev == ERR_PTR(-EBUSY)) - err = "device busy"; + if (bdev == ERR_PTR(-EBUSY)) { + bdev = lookup_bdev(strim(path)); + if (!IS_ERR(bdev) && bch_is_open(bdev)) + err = "device already registered"; + else + err = "device busy"; + } goto err; } -- cgit v1.2.3-58-ga151 From 119ba0f82839cd80eaef3e6991988f1403965d5b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 24 Apr 2013 19:01:12 -0700 Subject: bcache: Convert allocator thread to kthread Using a workqueue when we just want a single thread is a bit silly. Signed-off-by: Kent Overstreet --- drivers/md/bcache/alloc.c | 34 ++++++++++++++++++++++------------ drivers/md/bcache/bcache.h | 17 +++++++++++------ drivers/md/bcache/btree.c | 6 +++--- drivers/md/bcache/super.c | 19 +++++++------------ 4 files changed, 43 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 048f2947e08b..38428f46ea74 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -63,6 +63,7 @@ #include "bcache.h" #include "btree.h" +#include #include #define MAX_IN_FLIGHT_DISCARDS 8U @@ -151,7 +152,7 @@ static void discard_finish(struct work_struct *w) mutex_unlock(&ca->set->bucket_lock); closure_wake_up(&ca->set->bucket_wait); - wake_up(&ca->set->alloc_wait); + wake_up_process(ca->alloc_thread); closure_put(&ca->set->cl); } @@ -358,30 +359,26 @@ static void invalidate_buckets(struct cache *ca) #define allocator_wait(ca, cond) \ do { \ - DEFINE_WAIT(__wait); \ - \ while (1) { \ - prepare_to_wait(&ca->set->alloc_wait, \ - &__wait, TASK_INTERRUPTIBLE); \ + set_current_state(TASK_INTERRUPTIBLE); \ if (cond) \ break; \ \ mutex_unlock(&(ca)->set->bucket_lock); \ if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) { \ - finish_wait(&ca->set->alloc_wait, &__wait); \ - closure_return(cl); \ + closure_put(&ca->set->cl); \ + return 0; \ } \ \ schedule(); \ mutex_lock(&(ca)->set->bucket_lock); \ } \ - \ - finish_wait(&ca->set->alloc_wait, &__wait); \ + __set_current_state(TASK_RUNNING); \ } while (0) -void bch_allocator_thread(struct closure *cl) +static int bch_allocator_thread(void *arg) { - struct cache *ca = container_of(cl, struct cache, alloc); + struct cache *ca = arg; mutex_lock(&ca->set->bucket_lock); @@ -442,7 +439,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl) { long r = -1; again: - wake_up(&ca->set->alloc_wait); + wake_up_process(ca->alloc_thread); if (fifo_used(&ca->free) > ca->watermark[watermark] && fifo_pop(&ca->free, r)) { @@ -552,6 +549,19 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, /* Init */ +int bch_cache_allocator_start(struct cache *ca) +{ + ca->alloc_thread = kthread_create(bch_allocator_thread, + ca, "bcache_allocator"); + if (IS_ERR(ca->alloc_thread)) + return PTR_ERR(ca->alloc_thread); + + closure_get(&ca->set->cl); + wake_up_process(ca->alloc_thread); + + return 0; +} + void bch_cache_allocator_exit(struct cache *ca) { struct discard *d; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d3e15b42a4ab..166c8ddc0be4 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -565,8 +565,7 @@ struct cache { unsigned watermark[WATERMARK_MAX]; - struct closure alloc; - struct workqueue_struct *alloc_workqueue; + struct task_struct *alloc_thread; struct closure prio; struct prio_set *disk_buckets; @@ -703,9 +702,6 @@ struct cache_set { /* For the btree cache */ struct shrinker shrink; - /* For the allocator itself */ - wait_queue_head_t alloc_wait; - /* For the btree cache and anything allocation related */ struct mutex bucket_lock; @@ -1173,6 +1169,15 @@ static inline uint8_t bucket_disk_gen(struct bucket *b) static struct kobj_attribute ksysfs_##n = \ __ATTR(n, S_IWUSR|S_IRUSR, show, store) +static inline void wake_up_allocators(struct cache_set *c) +{ + struct cache *ca; + unsigned i; + + for_each_cache(ca, c, i) + wake_up_process(ca->alloc_thread); +} + /* Forward declarations */ void bch_writeback_queue(struct cached_dev *); @@ -1193,7 +1198,6 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); uint8_t bch_inc_gen(struct cache *, struct bucket *); void bch_rescale_priorities(struct cache_set *, int); bool bch_bucket_add_unused(struct cache *, struct bucket *); -void bch_allocator_thread(struct closure *); long bch_bucket_alloc(struct cache *, unsigned, struct closure *); void bch_bucket_free(struct cache_set *, struct bkey *); @@ -1244,6 +1248,7 @@ int bch_btree_cache_alloc(struct cache_set *); void bch_cached_dev_writeback_init(struct cached_dev *); void bch_moving_init_cache_set(struct cache_set *); +int bch_cache_allocator_start(struct cache *ca); void bch_cache_allocator_exit(struct cache *ca); int bch_cache_allocator_init(struct cache *ca); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7a5658f04e62..45b88fbffbe0 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -273,7 +273,7 @@ static void btree_complete_write(struct btree *b, struct btree_write *w) { if (w->prio_blocked && !atomic_sub_return(w->prio_blocked, &b->c->prio_blocked)) - wake_up(&b->c->alloc_wait); + wake_up_allocators(b->c); if (w->journal) { atomic_dec_bug(w->journal); @@ -984,7 +984,7 @@ static void btree_node_free(struct btree *b, struct btree_op *op) if (b->prio_blocked && !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked)) - wake_up(&b->c->alloc_wait); + wake_up_allocators(b->c); b->prio_blocked = 0; @@ -1547,7 +1547,7 @@ static void bch_btree_gc(struct closure *cl) blktrace_msg_all(c, "Finished gc"); trace_bcache_gc_end(c->sb.set_uuid); - wake_up(&c->alloc_wait); + wake_up_allocators(c); continue_at(cl, bch_moving_gc, bch_gc_wq); } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3de5626919ef..aaeda235fc75 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1282,7 +1282,7 @@ static void cache_set_flush(struct closure *cl) /* Shut down allocator threads */ set_bit(CACHE_SET_STOPPING_2, &c->flags); - wake_up(&c->alloc_wait); + wake_up_allocators(c); bch_cache_accounting_destroy(&c->accounting); @@ -1373,7 +1373,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->btree_pages = max_t(int, c->btree_pages / 4, BTREE_MAX_PAGES); - init_waitqueue_head(&c->alloc_wait); mutex_init(&c->bucket_lock); mutex_init(&c->fill_lock); mutex_init(&c->sort_lock); @@ -1496,9 +1495,10 @@ static void run_cache_set(struct cache_set *c) */ bch_journal_next(&c->journal); + err = "error starting allocator thread"; for_each_cache(ca, c, i) - closure_call(&ca->alloc, bch_allocator_thread, - system_wq, &c->cl); + if (bch_cache_allocator_start(ca)) + goto err; /* * First place it's safe to allocate: btree_check() and @@ -1531,17 +1531,16 @@ static void run_cache_set(struct cache_set *c) bch_btree_gc_finish(c); + err = "error starting allocator thread"; for_each_cache(ca, c, i) - closure_call(&ca->alloc, bch_allocator_thread, - ca->alloc_workqueue, &c->cl); + if (bch_cache_allocator_start(ca)) + goto err; mutex_lock(&c->bucket_lock); for_each_cache(ca, c, i) bch_prio_write(ca); mutex_unlock(&c->bucket_lock); - wake_up(&c->alloc_wait); - err = "cannot allocate new UUID bucket"; if (__uuid_write(c)) goto err_unlock_gc; @@ -1673,9 +1672,6 @@ void bch_cache_release(struct kobject *kobj) bio_split_pool_free(&ca->bio_split_hook); - if (ca->alloc_workqueue) - destroy_workqueue(ca->alloc_workqueue); - free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca))); kfree(ca->prio_buckets); vfree(ca->buckets); @@ -1723,7 +1719,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * 2, GFP_KERNEL)) || !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || - !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) || bio_split_pool_init(&ca->bio_split_hook)) return -ENOMEM; -- cgit v1.2.3-58-ga151 From 5794351146199b9ac67a5ab1beab82be8bfd7b5d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 25 Apr 2013 13:58:35 -0700 Subject: bcache: Refactor btree io The most significant change is that btree reads are now done synchronously, instead of asynchronously and doing the post read stuff from a workqueue. This was originally done because we can't block on IO under generic_make_request(). But - we already have a mechanism to punt cache lookups to workqueue if needed, so if we just use that we don't have to deal with the complexity of doing things asynchronously. The main benefit is this makes the locking situation saner; we can hold our write lock on the btree node until we're finished reading it, and we don't need that btree_node_read_done() flag anymore. Also, for writes, btree_write() was broken out into btree_node_write() and btree_leaf_dirty() - the old code with the boolean argument was dumb and confusing. The prio_blocked mechanism was improved a bit too, now the only counter is in struct btree_write, we don't mess with transfering a count from struct btree anymore. This required changing garbage collection to block prios at the start and unblock when it finishes, which is cleaner than what it was doing anyways (the old code had mostly the same effect, but was doing it in a convoluted way) And the btree iter btree_node_read_done() uses was converted to a real mempool. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 5 +- drivers/md/bcache/btree.c | 274 ++++++++++++++++++++------------------------ drivers/md/bcache/btree.h | 19 +-- drivers/md/bcache/debug.c | 4 +- drivers/md/bcache/journal.c | 2 +- drivers/md/bcache/super.c | 12 +- 6 files changed, 140 insertions(+), 176 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 166c8ddc0be4..ad4957b52f10 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -819,10 +819,9 @@ struct cache_set { /* * A btree node on disk could have too many bsets for an iterator to fit - * on the stack - this is a single element mempool for btree_read_work() + * on the stack - have to dynamically allocate them */ - struct mutex fill_lock; - struct btree_iter *fill_iter; + mempool_t *fill_iter; /* * btree_sort() is a merge sort and requires temporary space - single diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 45b88fbffbe0..aaec186f7ba6 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -134,44 +134,17 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) return crc ^ 0xffffffffffffffffULL; } -static void btree_bio_endio(struct bio *bio, int error) +void bch_btree_node_read_done(struct btree *b) { - struct closure *cl = bio->bi_private; - struct btree *b = container_of(cl, struct btree, io.cl); - - if (error) - set_btree_node_io_error(b); - - bch_bbio_count_io_errors(b->c, bio, error, (bio->bi_rw & WRITE) - ? "writing btree" : "reading btree"); - closure_put(cl); -} - -static void btree_bio_init(struct btree *b) -{ - BUG_ON(b->bio); - b->bio = bch_bbio_alloc(b->c); - - b->bio->bi_end_io = btree_bio_endio; - b->bio->bi_private = &b->io.cl; -} - -void bch_btree_read_done(struct closure *cl) -{ - struct btree *b = container_of(cl, struct btree, io.cl); - struct bset *i = b->sets[0].data; - struct btree_iter *iter = b->c->fill_iter; const char *err = "bad btree header"; - BUG_ON(b->nsets || b->written); - - bch_bbio_free(b->bio, b->c); - b->bio = NULL; + struct bset *i = b->sets[0].data; + struct btree_iter *iter; - mutex_lock(&b->c->fill_lock); + iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); + iter->size = b->c->sb.bucket_size / b->c->sb.block_size; iter->used = 0; - if (btree_node_io_error(b) || - !i->seq) + if (!i->seq) goto err; for (; @@ -228,17 +201,8 @@ void bch_btree_read_done(struct closure *cl) if (b->written < btree_blocks(b)) bch_bset_init_next(b); out: - - mutex_unlock(&b->c->fill_lock); - - spin_lock(&b->c->btree_read_time_lock); - bch_time_stats_update(&b->c->btree_read_time, b->io_start_time); - spin_unlock(&b->c->btree_read_time_lock); - - smp_wmb(); /* read_done is our write lock */ - set_btree_node_read_done(b); - - closure_return(cl); + mempool_free(iter, b->c->fill_iter); + return; err: set_btree_node_io_error(b); bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys", @@ -247,26 +211,51 @@ err: goto out; } -void bch_btree_read(struct btree *b) +static void btree_node_read_endio(struct bio *bio, int error) { - BUG_ON(b->nsets || b->written); + struct closure *cl = bio->bi_private; + closure_put(cl); +} - if (!closure_trylock(&b->io.cl, &b->c->cl)) - BUG(); +void bch_btree_node_read(struct btree *b) +{ + uint64_t start_time = local_clock(); + struct closure cl; + struct bio *bio; - b->io_start_time = local_clock(); + closure_init_stack(&cl); + pr_debug("%s", pbtree(b)); - btree_bio_init(b); - b->bio->bi_rw = REQ_META|READ_SYNC; - b->bio->bi_size = KEY_SIZE(&b->key) << 9; + bio = bch_bbio_alloc(b->c); + bio->bi_rw = REQ_META|READ_SYNC; + bio->bi_size = KEY_SIZE(&b->key) << 9; + bio->bi_end_io = btree_node_read_endio; + bio->bi_private = &cl; - bch_bio_map(b->bio, b->sets[0].data); + bch_bio_map(bio, b->sets[0].data); - pr_debug("%s", pbtree(b)); - trace_bcache_btree_read(b->bio); - bch_submit_bbio(b->bio, b->c, &b->key, 0); + trace_bcache_btree_read(bio); + bch_submit_bbio(bio, b->c, &b->key, 0); + closure_sync(&cl); - continue_at(&b->io.cl, bch_btree_read_done, system_wq); + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + set_btree_node_io_error(b); + + bch_bbio_free(bio, b->c); + + if (btree_node_io_error(b)) + goto err; + + bch_btree_node_read_done(b); + + spin_lock(&b->c->btree_read_time_lock); + bch_time_stats_update(&b->c->btree_read_time, start_time); + spin_unlock(&b->c->btree_read_time_lock); + + return; +err: + bch_cache_set_error(b->c, "io error reading bucket %lu", + PTR_BUCKET_NR(b->c, &b->key, 0)); } static void btree_complete_write(struct btree *b, struct btree_write *w) @@ -280,15 +269,11 @@ static void btree_complete_write(struct btree *b, struct btree_write *w) __closure_wake_up(&b->c->journal.wait); } - if (w->owner) - closure_put(w->owner); - w->prio_blocked = 0; w->journal = NULL; - w->owner = NULL; } -static void __btree_write_done(struct closure *cl) +static void __btree_node_write_done(struct closure *cl) { struct btree *b = container_of(cl, struct btree, io.cl); struct btree_write *w = btree_prev_write(b); @@ -304,7 +289,7 @@ static void __btree_write_done(struct closure *cl) closure_return(cl); } -static void btree_write_done(struct closure *cl) +static void btree_node_write_done(struct closure *cl) { struct btree *b = container_of(cl, struct btree, io.cl); struct bio_vec *bv; @@ -313,10 +298,22 @@ static void btree_write_done(struct closure *cl) __bio_for_each_segment(bv, b->bio, n, 0) __free_page(bv->bv_page); - __btree_write_done(cl); + __btree_node_write_done(cl); } -static void do_btree_write(struct btree *b) +static void btree_node_write_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct btree *b = container_of(cl, struct btree, io.cl); + + if (error) + set_btree_node_io_error(b); + + bch_bbio_count_io_errors(b->c, bio, error, "writing btree"); + closure_put(cl); +} + +static void do_btree_node_write(struct btree *b) { struct closure *cl = &b->io.cl; struct bset *i = b->sets[b->nsets].data; @@ -325,7 +322,11 @@ static void do_btree_write(struct btree *b) i->version = BCACHE_BSET_VERSION; i->csum = btree_csum_set(b, i); - btree_bio_init(b); + BUG_ON(b->bio); + b->bio = bch_bbio_alloc(b->c); + + b->bio->bi_end_io = btree_node_write_endio; + b->bio->bi_private = &b->io.cl; b->bio->bi_rw = REQ_META|WRITE_SYNC; b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); bch_bio_map(b->bio, i); @@ -345,7 +346,7 @@ static void do_btree_write(struct btree *b) trace_bcache_btree_write(b->bio); bch_submit_bbio(b->bio, b->c, &k.key, 0); - continue_at(cl, btree_write_done, NULL); + continue_at(cl, btree_node_write_done, NULL); } else { b->bio->bi_vcnt = 0; bch_bio_map(b->bio, i); @@ -354,26 +355,30 @@ static void do_btree_write(struct btree *b) bch_submit_bbio(b->bio, b->c, &k.key, 0); closure_sync(cl); - __btree_write_done(cl); + __btree_node_write_done(cl); } } -static void __btree_write(struct btree *b) +void bch_btree_node_write(struct btree *b, struct closure *parent) { struct bset *i = b->sets[b->nsets].data; BUG_ON(current->bio_list); + BUG_ON(b->written >= btree_blocks(b)); + BUG_ON(b->written && !i->keys); + BUG_ON(b->sets->data->seq != i->seq); - closure_lock(&b->io, &b->c->cl); cancel_delayed_work(&b->work); + /* If caller isn't waiting for write, parent refcount is cache set */ + closure_lock(&b->io, parent ?: &b->c->cl); + clear_bit(BTREE_NODE_dirty, &b->flags); change_bit(BTREE_NODE_write_idx, &b->flags); bch_check_key_order(b, i); - BUG_ON(b->written && !i->keys); - do_btree_write(b); + do_btree_node_write(b); pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys); @@ -387,37 +392,31 @@ static void __btree_write(struct btree *b) bch_bset_init_next(b); } -static void btree_write_work(struct work_struct *w) +static void btree_node_write_work(struct work_struct *w) { struct btree *b = container_of(to_delayed_work(w), struct btree, work); - down_write(&b->lock); + rw_lock(true, b, b->level); if (btree_node_dirty(b)) - __btree_write(b); - up_write(&b->lock); + bch_btree_node_write(b, NULL); + rw_unlock(true, b); } -void bch_btree_write(struct btree *b, bool now, struct btree_op *op) +static void bch_btree_leaf_dirty(struct btree *b, struct btree_op *op) { struct bset *i = b->sets[b->nsets].data; struct btree_write *w = btree_current_write(b); - BUG_ON(b->written && - (b->written >= btree_blocks(b) || - i->seq != b->sets[0].data->seq || - !i->keys)); + BUG_ON(!b->written); + BUG_ON(!i->keys); - if (!btree_node_dirty(b)) { - set_btree_node_dirty(b); - queue_delayed_work(btree_io_wq, &b->work, - msecs_to_jiffies(30000)); - } + if (!btree_node_dirty(b)) + queue_delayed_work(btree_io_wq, &b->work, 30 * HZ); - w->prio_blocked += b->prio_blocked; - b->prio_blocked = 0; + set_btree_node_dirty(b); - if (op && op->journal && !b->level) { + if (op && op->journal) { if (w->journal && journal_pin_cmp(b->c, w, op)) { atomic_dec_bug(w->journal); @@ -430,23 +429,10 @@ void bch_btree_write(struct btree *b, bool now, struct btree_op *op) } } - if (current->bio_list) - return; - /* Force write if set is too big */ - if (now || - b->level || - set_bytes(i) > PAGE_SIZE - 48) { - if (op && now) { - /* Must wait on multiple writes */ - BUG_ON(w->owner); - w->owner = &op->cl; - closure_get(&op->cl); - } - - __btree_write(b); - } - BUG_ON(!b->written); + if (set_bytes(i) > PAGE_SIZE - 48 && + !current->bio_list) + bch_btree_node_write(b, NULL); } /* @@ -559,7 +545,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, init_rwsem(&b->lock); lockdep_set_novalidate_class(&b->lock); INIT_LIST_HEAD(&b->list); - INIT_DELAYED_WORK(&b->work, btree_write_work); + INIT_DELAYED_WORK(&b->work, btree_node_write_work); b->c = c; closure_init_unlocked(&b->io); @@ -582,7 +568,7 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order) BUG_ON(btree_node_dirty(b) && !b->sets[0].data); if (cl && btree_node_dirty(b)) - bch_btree_write(b, true, NULL); + bch_btree_node_write(b, NULL); if (cl) closure_wait_event_async(&b->io.wait, cl, @@ -905,6 +891,9 @@ retry: b = mca_find(c, k); if (!b) { + if (current->bio_list) + return ERR_PTR(-EAGAIN); + mutex_lock(&c->bucket_lock); b = mca_alloc(c, k, level, &op->cl); mutex_unlock(&c->bucket_lock); @@ -914,7 +903,7 @@ retry: if (IS_ERR(b)) return b; - bch_btree_read(b); + bch_btree_node_read(b); if (!write) downgrade_write(&b->lock); @@ -937,15 +926,12 @@ retry: for (; i <= b->nsets; i++) prefetch(b->sets[i].data); - if (!closure_wait_event(&b->io.wait, &op->cl, - btree_node_read_done(b))) { - rw_unlock(write, b); - b = ERR_PTR(-EAGAIN); - } else if (btree_node_io_error(b)) { + if (btree_node_io_error(b)) { rw_unlock(write, b); - b = ERR_PTR(-EIO); - } else - BUG_ON(!b->written); + return ERR_PTR(-EIO); + } + + BUG_ON(!b->written); return b; } @@ -959,7 +945,7 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) mutex_unlock(&c->bucket_lock); if (!IS_ERR_OR_NULL(b)) { - bch_btree_read(b); + bch_btree_node_read(b); rw_unlock(true, b); } } @@ -982,12 +968,6 @@ static void btree_node_free(struct btree *b, struct btree_op *op) btree_complete_write(b, btree_current_write(b)); clear_bit(BTREE_NODE_dirty, &b->flags); - if (b->prio_blocked && - !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked)) - wake_up_allocators(b->c); - - b->prio_blocked = 0; - cancel_delayed_work(&b->work); mutex_lock(&b->c->bucket_lock); @@ -1028,7 +1008,6 @@ retry: goto retry; } - set_btree_node_read_done(b); b->accessed = 1; bch_bset_init_next(b); @@ -1166,14 +1145,11 @@ static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k, if (!IS_ERR_OR_NULL(n)) { swap(b, n); + __bkey_put(b->c, &b->key); memcpy(k->ptr, b->key.ptr, sizeof(uint64_t) * KEY_PTRS(&b->key)); - __bkey_put(b->c, &b->key); - atomic_inc(&b->c->prio_blocked); - b->prio_blocked++; - btree_node_free(n, op); up_write(&n->lock); } @@ -1293,14 +1269,9 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, void write(struct btree *r) { if (!r->written) - bch_btree_write(r, true, op); - else if (btree_node_dirty(r)) { - BUG_ON(btree_current_write(r)->owner); - btree_current_write(r)->owner = writes; - closure_get(writes); - - bch_btree_write(r, true, NULL); - } + bch_btree_node_write(r, &op->cl); + else if (btree_node_dirty(r)) + bch_btree_node_write(r, writes); up_write(&r->lock); } @@ -1386,9 +1357,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, ret = btree_gc_recurse(b, op, writes, gc); if (!b->written || btree_node_dirty(b)) { - atomic_inc(&b->c->prio_blocked); - b->prio_blocked++; - bch_btree_write(b, true, n ? op : NULL); + bch_btree_node_write(b, n ? &op->cl : NULL); } if (!IS_ERR_OR_NULL(n)) { @@ -1508,8 +1477,8 @@ static void bch_btree_gc(struct closure *cl) struct gc_stat stats; struct closure writes; struct btree_op op; - uint64_t start_time = local_clock(); + trace_bcache_gc_start(c->sb.set_uuid); blktrace_msg_all(c, "Starting gc"); @@ -1520,6 +1489,8 @@ static void bch_btree_gc(struct closure *cl) btree_gc_start(c); + atomic_inc(&c->prio_blocked); + ret = btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&op.cl); closure_sync(&writes); @@ -1537,6 +1508,9 @@ static void bch_btree_gc(struct closure *cl) available = bch_btree_gc_finish(c); + atomic_dec(&c->prio_blocked); + wake_up_allocators(c); + bch_time_stats_update(&c->btree_gc_time, start_time); stats.key_bytes *= sizeof(uint64_t); @@ -1544,10 +1518,9 @@ static void bch_btree_gc(struct closure *cl) stats.data <<= 9; stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); - blktrace_msg_all(c, "Finished gc"); + blktrace_msg_all(c, "Finished gc"); trace_bcache_gc_end(c->sb.set_uuid); - wake_up_allocators(c); continue_at(cl, bch_moving_gc, bch_gc_wq); } @@ -1857,7 +1830,7 @@ merged: op_type(op), pbtree(b), pkey(k)); if (b->level && !KEY_OFFSET(k)) - b->prio_blocked++; + btree_current_write(b)->prio_blocked++; pr_debug("%s for %s at %s: %s", status, op_type(op), pbtree(b), pkey(k)); @@ -1907,7 +1880,6 @@ bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op, BUG_ON(op->type != BTREE_INSERT); BUG_ON(!btree_insert_key(b, op, &tmp.k)); - bch_btree_write(b, false, NULL); ret = true; out: downgrade_write(&b->lock); @@ -1967,18 +1939,18 @@ static int btree_split(struct btree *b, struct btree_op *op) bkey_copy_key(&n2->key, &b->key); bch_keylist_add(&op->keys, &n2->key); - bch_btree_write(n2, true, op); + bch_btree_node_write(n2, &op->cl); rw_unlock(true, n2); } else bch_btree_insert_keys(n1, op); bch_keylist_add(&op->keys, &n1->key); - bch_btree_write(n1, true, op); + bch_btree_node_write(n1, &op->cl); if (n3) { bkey_copy_key(&n3->key, &MAX_KEY); bch_btree_insert_keys(n3, op); - bch_btree_write(n3, true, op); + bch_btree_node_write(n3, &op->cl); closure_sync(&op->cl); bch_btree_set_root(n3); @@ -2082,8 +2054,12 @@ static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op, BUG_ON(write_block(b) != b->sets[b->nsets].data); - if (bch_btree_insert_keys(b, op)) - bch_btree_write(b, false, op); + if (bch_btree_insert_keys(b, op)) { + if (!b->level) + bch_btree_leaf_dirty(b, op); + else + bch_btree_node_write(b, &op->cl); + } } return 0; diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index af4a7092a28c..809bd77847a2 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -102,7 +102,6 @@ #include "debug.h" struct btree_write { - struct closure *owner; atomic_t *journal; /* If btree_split() frees a btree node, it writes a new pointer to that @@ -142,16 +141,12 @@ struct btree { */ struct bset_tree sets[MAX_BSETS]; - /* Used to refcount bio splits, also protects b->bio */ + /* For outstanding btree writes, used as a lock - protects write_idx */ struct closure_with_waitlist io; - /* Gets transferred to w->prio_blocked - see the comment there */ - int prio_blocked; - struct list_head list; struct delayed_work work; - uint64_t io_start_time; struct btree_write writes[2]; struct bio *bio; }; @@ -164,13 +159,11 @@ static inline void set_btree_node_ ## flag(struct btree *b) \ { set_bit(BTREE_NODE_ ## flag, &b->flags); } \ enum btree_flags { - BTREE_NODE_read_done, BTREE_NODE_io_error, BTREE_NODE_dirty, BTREE_NODE_write_idx, }; -BTREE_FLAG(read_done); BTREE_FLAG(io_error); BTREE_FLAG(dirty); BTREE_FLAG(write_idx); @@ -293,9 +286,7 @@ static inline void rw_unlock(bool w, struct btree *b) #ifdef CONFIG_BCACHE_EDEBUG unsigned i; - if (w && - b->key.ptr[0] && - btree_node_read_done(b)) + if (w && b->key.ptr[0]) for (i = 0; i <= b->nsets; i++) bch_check_key_order(b, b->sets[i].data); #endif @@ -370,9 +361,9 @@ static inline bool should_split(struct btree *b) > btree_blocks(b)); } -void bch_btree_read_done(struct closure *); -void bch_btree_read(struct btree *); -void bch_btree_write(struct btree *b, bool now, struct btree_op *op); +void bch_btree_node_read(struct btree *); +void bch_btree_node_read_done(struct btree *); +void bch_btree_node_write(struct btree *, struct closure *); void bch_cannibalize_unlock(struct cache_set *, struct closure *); void bch_btree_set_root(struct btree *); diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 89fd5204924e..ae6096c6845d 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -144,7 +144,7 @@ void bch_btree_verify(struct btree *b, struct bset *new) v->written = 0; v->level = b->level; - bch_btree_read(v); + bch_btree_node_read(v); closure_wait_event(&v->io.wait, &cl, atomic_read(&b->io.cl.remaining) == -1); @@ -512,7 +512,7 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, bch_btree_sort(b); fill->written = 0; - bch_btree_read_done(&fill->io.cl); + bch_btree_node_read_done(fill); if (b->sets[0].data->keys != fill->sets[0].data->keys || memcmp(b->sets[0].data->start, diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8c8dfdcd9d4c..970d819d4350 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -384,7 +384,7 @@ out: return; found: if (btree_node_dirty(best)) - bch_btree_write(best, true, NULL); + bch_btree_node_write(best, NULL); rw_unlock(true, best); } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index aaeda235fc75..e53f89988b08 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1255,9 +1255,10 @@ static void cache_set_free(struct closure *cl) free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); free_pages((unsigned long) c->sort, ilog2(bucket_pages(c))); - kfree(c->fill_iter); if (c->bio_split) bioset_free(c->bio_split); + if (c->fill_iter) + mempool_destroy(c->fill_iter); if (c->bio_meta) mempool_destroy(c->bio_meta); if (c->search) @@ -1295,7 +1296,7 @@ static void cache_set_flush(struct closure *cl) /* Should skip this if we're unregistering because of an error */ list_for_each_entry(b, &c->btree_cache, list) if (btree_node_dirty(b)) - bch_btree_write(b, true, NULL); + bch_btree_node_write(b, NULL); closure_return(cl); } @@ -1374,7 +1375,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) BTREE_MAX_PAGES); mutex_init(&c->bucket_lock); - mutex_init(&c->fill_lock); mutex_init(&c->sort_lock); spin_lock_init(&c->sort_time_lock); closure_init_unlocked(&c->sb_write); @@ -1400,8 +1400,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) !(c->bio_meta = mempool_create_kmalloc_pool(2, sizeof(struct bbio) + sizeof(struct bio_vec) * bucket_pages(c))) || + !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || - !(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) || !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) || !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || bch_journal_alloc(c) || @@ -1409,8 +1409,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) bch_open_buckets_alloc(c)) goto err; - c->fill_iter->size = sb->bucket_size / sb->block_size; - c->congested_read_threshold_us = 2000; c->congested_write_threshold_us = 20000; c->error_limit = 8 << IO_ERROR_SHIFT; @@ -1551,7 +1549,7 @@ static void run_cache_set(struct cache_set *c) goto err_unlock_gc; bkey_copy_key(&c->root->key, &MAX_KEY); - bch_btree_write(c->root, true, &op); + bch_btree_node_write(c->root, &op.cl); bch_btree_set_root(c->root); rw_unlock(true, c->root); -- cgit v1.2.3-58-ga151 From c37511b863f36c1cc6e18440717fd4cc0e881b8a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 26 Apr 2013 15:39:55 -0700 Subject: bcache: Fix/revamp tracepoints The tracepoints were reworked to be more sensible, and fixed a null pointer deref in one of the tracepoints. Converted some of the pr_debug()s to tracepoints - this is partly a performance optimization; it used to be that with DEBUG or CONFIG_DYNAMIC_DEBUG pr_debug() was an empty macro; but at some point it was changed to an empty inline function. Some of the pr_debug() statements had rather expensive function calls as part of the arguments, so this code was getting run unnecessarily even on non debug kernels - in some fast paths, too. Signed-off-by: Kent Overstreet --- drivers/md/bcache/alloc.c | 10 +- drivers/md/bcache/bcache.h | 20 --- drivers/md/bcache/bset.h | 4 + drivers/md/bcache/btree.c | 47 +++--- drivers/md/bcache/io.c | 2 + drivers/md/bcache/journal.c | 14 +- drivers/md/bcache/movinggc.c | 12 +- drivers/md/bcache/request.c | 65 ++++---- drivers/md/bcache/request.h | 2 +- drivers/md/bcache/super.c | 2 +- drivers/md/bcache/sysfs.c | 1 + drivers/md/bcache/trace.c | 45 ++++-- drivers/md/bcache/util.h | 2 - drivers/md/bcache/writeback.c | 10 +- include/trace/events/bcache.h | 352 +++++++++++++++++++++++++++++------------- 15 files changed, 367 insertions(+), 221 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 38428f46ea74..b54b73b9b2b7 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -65,6 +65,7 @@ #include #include +#include #define MAX_IN_FLIGHT_DISCARDS 8U @@ -351,10 +352,7 @@ static void invalidate_buckets(struct cache *ca) break; } - pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu", - fifo_used(&ca->free), ca->free.size, - fifo_used(&ca->free_inc), ca->free_inc.size, - fifo_used(&ca->unused), ca->unused.size); + trace_bcache_alloc_invalidate(ca); } #define allocator_wait(ca, cond) \ @@ -473,9 +471,7 @@ again: return r; } - pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu", - atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free), - fifo_used(&ca->free_inc), fifo_used(&ca->unused)); + trace_bcache_alloc_fail(ca); if (cl) { closure_wait(&ca->set->bucket_wait, cl); diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index ad4957b52f10..59c15e09e4dd 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -178,7 +178,6 @@ #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ #include -#include #include #include #include @@ -901,8 +900,6 @@ static inline unsigned local_clock_us(void) return local_clock() >> 10; } -#define MAX_BSETS 4U - #define BTREE_PRIO USHRT_MAX #define INITIAL_PRIO 32768 @@ -1107,23 +1104,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k) atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin); } -/* Blktrace macros */ - -#define blktrace_msg(c, fmt, ...) \ -do { \ - struct request_queue *q = bdev_get_queue(c->bdev); \ - if (q) \ - blk_add_trace_msg(q, fmt, ##__VA_ARGS__); \ -} while (0) - -#define blktrace_msg_all(s, fmt, ...) \ -do { \ - struct cache *_c; \ - unsigned i; \ - for_each_cache(_c, (s), i) \ - blktrace_msg(_c, fmt, ##__VA_ARGS__); \ -} while (0) - static inline void cached_dev_put(struct cached_dev *dc) { if (atomic_dec_and_test(&dc->count)) diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 57a9cff41546..ae115a253d73 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -1,6 +1,8 @@ #ifndef _BCACHE_BSET_H #define _BCACHE_BSET_H +#include + /* * BKEYS: * @@ -142,6 +144,8 @@ /* Btree key comparison/iteration */ +#define MAX_BSETS 4U + struct btree_iter { size_t size, used; struct btree_iter_set { diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index aaec186f7ba6..218d486259a3 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -223,8 +223,9 @@ void bch_btree_node_read(struct btree *b) struct closure cl; struct bio *bio; + trace_bcache_btree_read(b); + closure_init_stack(&cl); - pr_debug("%s", pbtree(b)); bio = bch_bbio_alloc(b->c); bio->bi_rw = REQ_META|READ_SYNC; @@ -234,7 +235,6 @@ void bch_btree_node_read(struct btree *b) bch_bio_map(bio, b->sets[0].data); - trace_bcache_btree_read(bio); bch_submit_bbio(bio, b->c, &b->key, 0); closure_sync(&cl); @@ -343,7 +343,6 @@ static void do_btree_node_write(struct btree *b) memcpy(page_address(bv->bv_page), base + j * PAGE_SIZE, PAGE_SIZE); - trace_bcache_btree_write(b->bio); bch_submit_bbio(b->bio, b->c, &k.key, 0); continue_at(cl, btree_node_write_done, NULL); @@ -351,7 +350,6 @@ static void do_btree_node_write(struct btree *b) b->bio->bi_vcnt = 0; bch_bio_map(b->bio, i); - trace_bcache_btree_write(b->bio); bch_submit_bbio(b->bio, b->c, &k.key, 0); closure_sync(cl); @@ -363,10 +361,13 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) { struct bset *i = b->sets[b->nsets].data; + trace_bcache_btree_write(b); + BUG_ON(current->bio_list); BUG_ON(b->written >= btree_blocks(b)); BUG_ON(b->written && !i->keys); BUG_ON(b->sets->data->seq != i->seq); + bch_check_key_order(b, i); cancel_delayed_work(&b->work); @@ -376,12 +377,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) clear_bit(BTREE_NODE_dirty, &b->flags); change_bit(BTREE_NODE_write_idx, &b->flags); - bch_check_key_order(b, i); - do_btree_node_write(b); - pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys); - b->written += set_blocks(i, b->c); atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); @@ -752,6 +749,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, int ret = -ENOMEM; struct btree *i; + trace_bcache_btree_cache_cannibalize(c); + if (!cl) return ERR_PTR(-ENOMEM); @@ -770,7 +769,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, return ERR_PTR(-EAGAIN); } - /* XXX: tracepoint */ c->try_harder = cl; c->try_harder_start = local_clock(); retry: @@ -956,13 +954,14 @@ static void btree_node_free(struct btree *b, struct btree_op *op) { unsigned i; + trace_bcache_btree_node_free(b); + /* * The BUG_ON() in btree_node_get() implies that we must have a write * lock on parent to free or even invalidate a node */ BUG_ON(op->lock <= b->level); BUG_ON(b == b->c->root); - pr_debug("bucket %s", pbtree(b)); if (btree_node_dirty(b)) btree_complete_write(b, btree_current_write(b)); @@ -1012,12 +1011,16 @@ retry: bch_bset_init_next(b); mutex_unlock(&c->bucket_lock); + + trace_bcache_btree_node_alloc(b); return b; err_free: bch_bucket_free(c, &k.key); __bkey_put(c, &k.key); err: mutex_unlock(&c->bucket_lock); + + trace_bcache_btree_node_alloc_fail(b); return b; } @@ -1254,7 +1257,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op, btree_node_free(r->b, op); up_write(&r->b->lock); - pr_debug("coalesced %u nodes", nodes); + trace_bcache_btree_gc_coalesce(nodes); gc->nodes--; nodes--; @@ -1479,8 +1482,7 @@ static void bch_btree_gc(struct closure *cl) struct btree_op op; uint64_t start_time = local_clock(); - trace_bcache_gc_start(c->sb.set_uuid); - blktrace_msg_all(c, "Starting gc"); + trace_bcache_gc_start(c); memset(&stats, 0, sizeof(struct gc_stat)); closure_init_stack(&writes); @@ -1496,9 +1498,7 @@ static void bch_btree_gc(struct closure *cl) closure_sync(&writes); if (ret) { - blktrace_msg_all(c, "Stopped gc"); pr_warn("gc failed!"); - continue_at(cl, bch_btree_gc, bch_gc_wq); } @@ -1519,8 +1519,7 @@ static void bch_btree_gc(struct closure *cl) stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); - blktrace_msg_all(c, "Finished gc"); - trace_bcache_gc_end(c->sb.set_uuid); + trace_bcache_gc_end(c); continue_at(cl, bch_moving_gc, bch_gc_wq); } @@ -1901,12 +1900,11 @@ static int btree_split(struct btree *b, struct btree_op *op) split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; - pr_debug("%ssplitting at %s keys %i", split ? "" : "not ", - pbtree(b), n1->sets[0].data->keys); - if (split) { unsigned keys = 0; + trace_bcache_btree_node_split(b, n1->sets[0].data->keys); + n2 = bch_btree_node_alloc(b->c, b->level, &op->cl); if (IS_ERR(n2)) goto err_free1; @@ -1941,8 +1939,11 @@ static int btree_split(struct btree *b, struct btree_op *op) bch_keylist_add(&op->keys, &n2->key); bch_btree_node_write(n2, &op->cl); rw_unlock(true, n2); - } else + } else { + trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); + bch_btree_insert_keys(n1, op); + } bch_keylist_add(&op->keys, &n1->key); bch_btree_node_write(n1, &op->cl); @@ -2117,6 +2118,8 @@ void bch_btree_set_root(struct btree *b) { unsigned i; + trace_bcache_btree_set_root(b); + BUG_ON(!b->written); for (i = 0; i < KEY_PTRS(&b->key); i++) @@ -2130,7 +2133,6 @@ void bch_btree_set_root(struct btree *b) __bkey_put(b->c, &b->key); bch_journal_meta(b->c, NULL); - pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0)); } /* Cache lookup */ @@ -2216,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op, n->bi_end_io = bch_cache_read_endio; n->bi_private = &s->cl; - trace_bcache_cache_hit(n); __bch_submit_bbio(n, b->c); } diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index d285cd49104c..0f6d69658b61 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -9,6 +9,8 @@ #include "bset.h" #include "debug.h" +#include + static void bch_bi_idx_hack_endio(struct bio *bio, int error) { struct bio *p = bio->bi_private; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 970d819d4350..5ca22149b749 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -9,6 +9,8 @@ #include "debug.h" #include "request.h" +#include + /* * Journal replay/recovery: * @@ -300,7 +302,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list, for (k = i->j.start; k < end(&i->j); k = bkey_next(k)) { - pr_debug("%s", pkey(k)); + trace_bcache_journal_replay_key(k); + bkey_copy(op->keys.top, k); bch_keylist_push(&op->keys); @@ -712,7 +715,8 @@ void bch_journal(struct closure *cl) spin_lock(&c->journal.lock); if (journal_full(&c->journal)) { - /* XXX: tracepoint */ + trace_bcache_journal_full(c); + closure_wait(&c->journal.wait, cl); journal_reclaim(c); @@ -728,13 +732,15 @@ void bch_journal(struct closure *cl) if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS || b > c->journal.blocks_free) { - /* XXX: If we were inserting so many keys that they won't fit in + trace_bcache_journal_entry_full(c); + + /* + * XXX: If we were inserting so many keys that they won't fit in * an _empty_ journal write, we'll deadlock. For now, handle * this in bch_keylist_realloc() - but something to think about. */ BUG_ON(!w->data->keys); - /* XXX: tracepoint */ BUG_ON(!closure_wait(&w->wait, cl)); closure_flush(&c->journal.io); diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 8589512c972e..04f6b97ffda6 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -9,6 +9,8 @@ #include "debug.h" #include "request.h" +#include + struct moving_io { struct keybuf_key *w; struct search s; @@ -49,9 +51,8 @@ static void write_moving_finish(struct closure *cl) while (bv-- != bio->bi_io_vec) __free_page(bv->bv_page); - pr_debug("%s %s", io->s.op.insert_collision - ? "collision moving" : "moved", - pkey(&io->w->key)); + if (io->s.op.insert_collision) + trace_bcache_gc_copy_collision(&io->w->key); bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w); @@ -94,8 +95,6 @@ static void write_moving(struct closure *cl) struct moving_io *io = container_of(s, struct moving_io, s); if (!s->error) { - trace_bcache_write_moving(&io->bio.bio); - moving_init(io); io->bio.bio.bi_sector = KEY_START(&io->w->key); @@ -122,7 +121,6 @@ static void read_moving_submit(struct closure *cl) struct moving_io *io = container_of(s, struct moving_io, s); struct bio *bio = &io->bio.bio; - trace_bcache_read_moving(bio); bch_submit_bbio(bio, s->op.c, &io->w->key, 0); continue_at(cl, write_moving, bch_gc_wq); @@ -162,7 +160,7 @@ static void read_moving(struct closure *cl) if (bch_bio_alloc_pages(bio, GFP_KERNEL)) goto err; - pr_debug("%s", pkey(&w->key)); + trace_bcache_gc_copy(&w->key); closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index e5ff12e52d5b..695469958c1e 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -530,10 +530,9 @@ static void bch_insert_data_loop(struct closure *cl) if (KEY_CSUM(k)) bio_csum(n, k); - pr_debug("%s", pkey(k)); + trace_bcache_cache_insert(k); bch_keylist_push(&op->keys); - trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev); n->bi_rw |= REQ_WRITE; bch_submit_bbio(n, op->c, k, 0); } while (n != bio); @@ -784,11 +783,8 @@ static void request_read_error(struct closure *cl) int i; if (s->recoverable) { - /* The cache read failed, but we can retry from the backing - * device. - */ - pr_debug("recovering at sector %llu", - (uint64_t) s->orig_bio->bi_sector); + /* Retry from the backing device: */ + trace_bcache_read_retry(s->orig_bio); s->error = 0; bv = s->bio.bio.bi_io_vec; @@ -806,7 +802,6 @@ static void request_read_error(struct closure *cl) /* XXX: invalidate cache */ - trace_bcache_read_retry(&s->bio.bio); closure_bio_submit(&s->bio.bio, &s->cl, s->d); } @@ -899,6 +894,7 @@ static void request_read_done_bh(struct closure *cl) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip); + trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip); if (s->error) continue_at_nobarrier(cl, request_read_error, bcache_wq); @@ -969,7 +965,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->cache_miss = miss; bio_get(s->op.cache_bio); - trace_bcache_cache_miss(s->orig_bio); closure_bio_submit(s->op.cache_bio, &s->cl, s->d); return ret; @@ -1040,15 +1035,15 @@ static void request_write(struct cached_dev *dc, struct search *s) if (should_writeback(dc, s->orig_bio)) s->writeback = true; + trace_bcache_write(s->orig_bio, s->writeback, s->op.skip); + if (!s->writeback) { s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, dc->disk.bio_split); - trace_bcache_writethrough(s->orig_bio); closure_bio_submit(bio, cl, s->d); } else { s->op.cache_bio = bio; - trace_bcache_writeback(s->orig_bio); bch_writeback_add(dc, bio_sectors(bio)); } out: @@ -1058,7 +1053,6 @@ skip: s->op.skip = true; s->op.cache_bio = s->orig_bio; bio_get(s->op.cache_bio); - trace_bcache_write_skip(s->orig_bio); if ((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(dc->bdev))) @@ -1088,9 +1082,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s) /* Cached devices - read & write stuff */ -int bch_get_congested(struct cache_set *c) +unsigned bch_get_congested(struct cache_set *c) { int i; + long rand; if (!c->congested_read_threshold_us && !c->congested_write_threshold_us) @@ -1106,7 +1101,13 @@ int bch_get_congested(struct cache_set *c) i += CONGESTED_MAX; - return i <= 0 ? 1 : fract_exp_two(i, 6); + if (i > 0) + i = fract_exp_two(i, 6); + + rand = get_random_int(); + i -= bitmap_weight(&rand, BITS_PER_LONG); + + return i > 0 ? i : 1; } static void add_sequential(struct task_struct *t) @@ -1126,10 +1127,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) { struct cache_set *c = s->op.c; struct bio *bio = &s->bio.bio; - - long rand; - int cutoff = bch_get_congested(c); unsigned mode = cache_mode(dc, bio); + unsigned sectors, congested = bch_get_congested(c); if (atomic_read(&dc->disk.detaching) || c->gc_stats.in_use > CUTOFF_CACHE_ADD || @@ -1147,17 +1146,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) goto skip; } - if (!cutoff) { - cutoff = dc->sequential_cutoff >> 9; + if (!congested && !dc->sequential_cutoff) + goto rescale; - if (!cutoff) - goto rescale; - - if (mode == CACHE_MODE_WRITEBACK && - (bio->bi_rw & REQ_WRITE) && - (bio->bi_rw & REQ_SYNC)) - goto rescale; - } + if (!congested && + mode == CACHE_MODE_WRITEBACK && + (bio->bi_rw & REQ_WRITE) && + (bio->bi_rw & REQ_SYNC)) + goto rescale; if (dc->sequential_merge) { struct io *i; @@ -1192,12 +1188,19 @@ found: add_sequential(s->task); } - rand = get_random_int(); - cutoff -= bitmap_weight(&rand, BITS_PER_LONG); + sectors = max(s->task->sequential_io, + s->task->sequential_io_avg) >> 9; - if (cutoff <= (int) (max(s->task->sequential_io, - s->task->sequential_io_avg) >> 9)) + if (dc->sequential_cutoff && + sectors >= dc->sequential_cutoff >> 9) { + trace_bcache_bypass_sequential(s->orig_bio); goto skip; + } + + if (congested && sectors >= congested) { + trace_bcache_bypass_congested(s->orig_bio); + goto skip; + } rescale: bch_rescale_priorities(c, bio_sectors(bio)); diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 254d9ab5707c..57dc4784f4f4 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -30,7 +30,7 @@ struct search { }; void bch_cache_read_endio(struct bio *, int); -int bch_get_congested(struct cache_set *); +unsigned bch_get_congested(struct cache_set *); void bch_insert_data(struct closure *cl); void bch_btree_insert_async(struct closure *); void bch_cache_read_endio(struct bio *, int); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e53f89988b08..47bc13745068 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -11,6 +11,7 @@ #include "debug.h" #include "request.h" +#include #include #include #include @@ -543,7 +544,6 @@ void bch_prio_write(struct cache *ca) pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free), fifo_used(&ca->free_inc), fifo_used(&ca->unused)); - blktrace_msg(ca, "Starting priorities: " buckets_free(ca)); for (i = prio_buckets(ca) - 1; i >= 0; --i) { long bucket; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 29228b8a6ffe..f5c2d8695230 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -10,6 +10,7 @@ #include "btree.h" #include "request.h" +#include #include static const char * const cache_replacement_policies[] = { diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c index 983f9bb411bc..7f4f38aa16ae 100644 --- a/drivers/md/bcache/trace.c +++ b/drivers/md/bcache/trace.c @@ -2,6 +2,7 @@ #include "btree.h" #include "request.h" +#include #include #define CREATE_TRACE_POINTS @@ -9,18 +10,42 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize); + EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 577393e38c3a..e02780545f12 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -15,8 +15,6 @@ struct closure; -#include - #ifdef CONFIG_BCACHE_EDEBUG #define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 2714ed3991d1..82f6d4577be2 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -10,6 +10,8 @@ #include "btree.h" #include "debug.h" +#include + static struct workqueue_struct *dirty_wq; static void read_dirty(struct closure *); @@ -236,10 +238,12 @@ static void write_dirty_finish(struct closure *cl) for (i = 0; i < KEY_PTRS(&w->key); i++) atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); - pr_debug("clearing %s", pkey(&w->key)); bch_btree_insert(&op, dc->disk.c); closure_sync(&op.cl); + if (op.insert_collision) + trace_bcache_writeback_collision(&w->key); + atomic_long_inc(op.insert_collision ? &dc->disk.c->writeback_keys_failed : &dc->disk.c->writeback_keys_done); @@ -275,7 +279,6 @@ static void write_dirty(struct closure *cl) io->bio.bi_bdev = io->dc->bdev; io->bio.bi_end_io = dirty_endio; - trace_bcache_write_dirty(&io->bio); closure_bio_submit(&io->bio, cl, &io->dc->disk); continue_at(cl, write_dirty_finish, dirty_wq); @@ -296,7 +299,6 @@ static void read_dirty_submit(struct closure *cl) { struct dirty_io *io = container_of(cl, struct dirty_io, cl); - trace_bcache_read_dirty(&io->bio); closure_bio_submit(&io->bio, cl, &io->dc->disk); continue_at(cl, write_dirty, dirty_wq); @@ -352,7 +354,7 @@ static void read_dirty(struct closure *cl) if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) goto err_free; - pr_debug("%s", pkey(&w->key)); + trace_bcache_writeback(&w->key); closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 3cc5a0b278c3..c9952b36fcea 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -9,9 +9,7 @@ struct search; DECLARE_EVENT_CLASS(bcache_request, - TP_PROTO(struct search *s, struct bio *bio), - TP_ARGS(s, bio), TP_STRUCT__entry( @@ -22,7 +20,6 @@ DECLARE_EVENT_CLASS(bcache_request, __field(dev_t, orig_sector ) __field(unsigned int, nr_sector ) __array(char, rwbs, 6 ) - __array(char, comm, TASK_COMM_LEN ) ), TP_fast_assign( @@ -33,36 +30,66 @@ DECLARE_EVENT_CLASS(bcache_request, __entry->orig_sector = bio->bi_sector - 16; __entry->nr_sector = bio->bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), - TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)", + TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm, - __entry->orig_major, __entry->orig_minor, + __entry->rwbs, (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->orig_major, __entry->orig_minor, (unsigned long long)__entry->orig_sector) ); -DEFINE_EVENT(bcache_request, bcache_request_start, +DECLARE_EVENT_CLASS(bkey, + TP_PROTO(struct bkey *k), + TP_ARGS(k), - TP_PROTO(struct search *s, struct bio *bio), + TP_STRUCT__entry( + __field(u32, size ) + __field(u32, inode ) + __field(u64, offset ) + __field(bool, dirty ) + ), - TP_ARGS(s, bio) + TP_fast_assign( + __entry->inode = KEY_INODE(k); + __entry->offset = KEY_OFFSET(k); + __entry->size = KEY_SIZE(k); + __entry->dirty = KEY_DIRTY(k); + ), + + TP_printk("%u:%llu len %u dirty %u", __entry->inode, + __entry->offset, __entry->size, __entry->dirty) ); -DEFINE_EVENT(bcache_request, bcache_request_end, +DECLARE_EVENT_CLASS(btree_node, + TP_PROTO(struct btree *b), + TP_ARGS(b), + + TP_STRUCT__entry( + __field(size_t, bucket ) + ), + + TP_fast_assign( + __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); + ), + TP_printk("bucket %zu", __entry->bucket) +); + +/* request.c */ + +DEFINE_EVENT(bcache_request, bcache_request_start, TP_PROTO(struct search *s, struct bio *bio), + TP_ARGS(s, bio) +); +DEFINE_EVENT(bcache_request, bcache_request_end, + TP_PROTO(struct search *s, struct bio *bio), TP_ARGS(s, bio) ); DECLARE_EVENT_CLASS(bcache_bio, - TP_PROTO(struct bio *bio), - TP_ARGS(bio), TP_STRUCT__entry( @@ -70,7 +97,6 @@ DECLARE_EVENT_CLASS(bcache_bio, __field(sector_t, sector ) __field(unsigned int, nr_sector ) __array(char, rwbs, 6 ) - __array(char, comm, TASK_COMM_LEN ) ), TP_fast_assign( @@ -78,191 +104,295 @@ DECLARE_EVENT_CLASS(bcache_bio, __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_printk("%d,%d %s %llu + %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, __entry->nr_sector) ); - -DEFINE_EVENT(bcache_bio, bcache_passthrough, - +DEFINE_EVENT(bcache_bio, bcache_bypass_sequential, TP_PROTO(struct bio *bio), - TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_cache_hit, - +DEFINE_EVENT(bcache_bio, bcache_bypass_congested, TP_PROTO(struct bio *bio), - TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_cache_miss, +TRACE_EVENT(bcache_read, + TP_PROTO(struct bio *bio, bool hit, bool bypass), + TP_ARGS(bio, hit, bypass), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(sector_t, sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __field(bool, cache_hit ) + __field(bool, bypass ) + ), - TP_ARGS(bio) + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + __entry->cache_hit = hit; + __entry->bypass = bypass; + ), + + TP_printk("%d,%d %s %llu + %u hit %u bypass %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->cache_hit, __entry->bypass) ); -DEFINE_EVENT(bcache_bio, bcache_read_retry, +TRACE_EVENT(bcache_write, + TP_PROTO(struct bio *bio, bool writeback, bool bypass), + TP_ARGS(bio, writeback, bypass), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(sector_t, sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __field(bool, writeback ) + __field(bool, bypass ) + ), - TP_ARGS(bio) -); + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + __entry->writeback = writeback; + __entry->bypass = bypass; + ), -DEFINE_EVENT(bcache_bio, bcache_writethrough, + TP_printk("%d,%d %s %llu + %u hit %u bypass %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->writeback, __entry->bypass) +); +DEFINE_EVENT(bcache_bio, bcache_read_retry, TP_PROTO(struct bio *bio), - TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_writeback, +DEFINE_EVENT(bkey, bcache_cache_insert, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_PROTO(struct bio *bio), +/* Journal */ - TP_ARGS(bio) -); +DECLARE_EVENT_CLASS(cache_set, + TP_PROTO(struct cache_set *c), + TP_ARGS(c), -DEFINE_EVENT(bcache_bio, bcache_write_skip, + TP_STRUCT__entry( + __array(char, uuid, 16 ) + ), - TP_PROTO(struct bio *bio), + TP_fast_assign( + memcpy(__entry->uuid, c->sb.set_uuid, 16); + ), - TP_ARGS(bio) + TP_printk("%pU", __entry->uuid) ); -DEFINE_EVENT(bcache_bio, bcache_btree_read, - - TP_PROTO(struct bio *bio), +DEFINE_EVENT(bkey, bcache_journal_replay_key, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_ARGS(bio) +DEFINE_EVENT(cache_set, bcache_journal_full, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) ); -DEFINE_EVENT(bcache_bio, bcache_btree_write, +DEFINE_EVENT(cache_set, bcache_journal_entry_full, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) +); +DEFINE_EVENT(bcache_bio, bcache_journal_write, TP_PROTO(struct bio *bio), - TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_write_dirty, +/* Btree */ - TP_PROTO(struct bio *bio), +DEFINE_EVENT(cache_set, bcache_btree_cache_cannibalize, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) +); - TP_ARGS(bio) +DEFINE_EVENT(btree_node, bcache_btree_read, + TP_PROTO(struct btree *b), + TP_ARGS(b) ); -DEFINE_EVENT(bcache_bio, bcache_read_dirty, +TRACE_EVENT(bcache_btree_write, + TP_PROTO(struct btree *b), + TP_ARGS(b), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(size_t, bucket ) + __field(unsigned, block ) + __field(unsigned, keys ) + ), - TP_ARGS(bio) + TP_fast_assign( + __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); + __entry->block = b->written; + __entry->keys = b->sets[b->nsets].data->keys; + ), + + TP_printk("bucket %zu", __entry->bucket) ); -DEFINE_EVENT(bcache_bio, bcache_write_moving, +DEFINE_EVENT(btree_node, bcache_btree_node_alloc, + TP_PROTO(struct btree *b), + TP_ARGS(b) +); - TP_PROTO(struct bio *bio), +DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail, + TP_PROTO(struct btree *b), + TP_ARGS(b) +); - TP_ARGS(bio) +DEFINE_EVENT(btree_node, bcache_btree_node_free, + TP_PROTO(struct btree *b), + TP_ARGS(b) ); -DEFINE_EVENT(bcache_bio, bcache_read_moving, +TRACE_EVENT(bcache_btree_gc_coalesce, + TP_PROTO(unsigned nodes), + TP_ARGS(nodes), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(unsigned, nodes ) + ), - TP_ARGS(bio) -); + TP_fast_assign( + __entry->nodes = nodes; + ), -DEFINE_EVENT(bcache_bio, bcache_journal_write, + TP_printk("coalesced %u nodes", __entry->nodes) +); - TP_PROTO(struct bio *bio), +DEFINE_EVENT(cache_set, bcache_gc_start, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) +); - TP_ARGS(bio) +DEFINE_EVENT(cache_set, bcache_gc_end, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) ); -DECLARE_EVENT_CLASS(bcache_cache_bio, +DEFINE_EVENT(bkey, bcache_gc_copy, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_PROTO(struct bio *bio, - sector_t orig_sector, - struct block_device* orig_bdev), +DEFINE_EVENT(bkey, bcache_gc_copy_collision, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_ARGS(bio, orig_sector, orig_bdev), +DECLARE_EVENT_CLASS(btree_split, + TP_PROTO(struct btree *b, unsigned keys), + TP_ARGS(b, keys), TP_STRUCT__entry( - __field(dev_t, dev ) - __field(dev_t, orig_dev ) - __field(sector_t, sector ) - __field(sector_t, orig_sector ) - __field(unsigned int, nr_sector ) - __array(char, rwbs, 6 ) - __array(char, comm, TASK_COMM_LEN ) + __field(size_t, bucket ) + __field(unsigned, keys ) ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->orig_dev = orig_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->orig_sector = orig_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); + __entry->keys = keys; ), - TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d %llu)", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm, - MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev), - (unsigned long long)__entry->orig_sector) + TP_printk("bucket %zu keys %u", __entry->bucket, __entry->keys) ); -DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert, - - TP_PROTO(struct bio *bio, - sector_t orig_sector, - struct block_device *orig_bdev), +DEFINE_EVENT(btree_split, bcache_btree_node_split, + TP_PROTO(struct btree *b, unsigned keys), + TP_ARGS(b, keys) +); - TP_ARGS(bio, orig_sector, orig_bdev) +DEFINE_EVENT(btree_split, bcache_btree_node_compact, + TP_PROTO(struct btree *b, unsigned keys), + TP_ARGS(b, keys) ); -DECLARE_EVENT_CLASS(bcache_gc, +DEFINE_EVENT(btree_node, bcache_btree_set_root, + TP_PROTO(struct btree *b), + TP_ARGS(b) +); - TP_PROTO(uint8_t *uuid), +/* Allocator */ - TP_ARGS(uuid), +TRACE_EVENT(bcache_alloc_invalidate, + TP_PROTO(struct cache *ca), + TP_ARGS(ca), TP_STRUCT__entry( - __field(uint8_t *, uuid) + __field(unsigned, free ) + __field(unsigned, free_inc ) + __field(unsigned, free_inc_size ) + __field(unsigned, unused ) ), TP_fast_assign( - __entry->uuid = uuid; + __entry->free = fifo_used(&ca->free); + __entry->free_inc = fifo_used(&ca->free_inc); + __entry->free_inc_size = ca->free_inc.size; + __entry->unused = fifo_used(&ca->unused); ), - TP_printk("%pU", __entry->uuid) + TP_printk("free %u free_inc %u/%u unused %u", __entry->free, + __entry->free_inc, __entry->free_inc_size, __entry->unused) ); +TRACE_EVENT(bcache_alloc_fail, + TP_PROTO(struct cache *ca), + TP_ARGS(ca), -DEFINE_EVENT(bcache_gc, bcache_gc_start, + TP_STRUCT__entry( + __field(unsigned, free ) + __field(unsigned, free_inc ) + __field(unsigned, unused ) + __field(unsigned, blocked ) + ), - TP_PROTO(uint8_t *uuid), + TP_fast_assign( + __entry->free = fifo_used(&ca->free); + __entry->free_inc = fifo_used(&ca->free_inc); + __entry->unused = fifo_used(&ca->unused); + __entry->blocked = atomic_read(&ca->set->prio_blocked); + ), - TP_ARGS(uuid) + TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free, + __entry->free_inc, __entry->unused, __entry->blocked) ); -DEFINE_EVENT(bcache_gc, bcache_gc_end, +/* Background writeback */ - TP_PROTO(uint8_t *uuid), +DEFINE_EVENT(bkey, bcache_writeback, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_ARGS(uuid) +DEFINE_EVENT(bkey, bcache_writeback_collision, + TP_PROTO(struct bkey *k), + TP_ARGS(k) ); #endif /* _TRACE_BCACHE_H */ -- cgit v1.2.3-58-ga151 From 85b1492ee113486d871de7676a61f506a43ca475 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 14 May 2013 20:33:16 -0700 Subject: bcache: Rip out pkey()/pbtree() Old gcc doesnt like the struct hack, and it is kind of ugly. So finish off the work to convert pr_debug() statements to tracepoints, and delete pkey()/pbtree(). Signed-off-by: Kent Overstreet --- drivers/md/bcache/bset.c | 16 ++++++++++++---- drivers/md/bcache/btree.c | 21 ++++++--------------- drivers/md/bcache/btree.h | 7 +++++++ drivers/md/bcache/debug.c | 40 +++++++++++++++++++++++++--------------- drivers/md/bcache/debug.h | 11 ++--------- drivers/md/bcache/super.c | 5 +++-- drivers/md/bcache/trace.c | 2 ++ include/trace/events/bcache.h | 33 +++++++++++++++++++++++++++++++++ 8 files changed, 90 insertions(+), 45 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index cb4578a327b9..e9399ed7f688 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -78,6 +78,7 @@ struct bkey *bch_keylist_pop(struct keylist *l) bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) { unsigned i; + char buf[80]; if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))) goto bad; @@ -102,7 +103,8 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) return false; bad: - cache_bug(c, "spotted bad key %s: %s", pkey(k), bch_ptr_status(c, k)); + bch_bkey_to_text(buf, sizeof(buf), k); + cache_bug(c, "spotted bad key %s: %s", buf, bch_ptr_status(c, k)); return true; } @@ -162,10 +164,16 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k) #ifdef CONFIG_BCACHE_EDEBUG bug: mutex_unlock(&b->c->bucket_lock); - btree_bug(b, + + { + char buf[80]; + + bch_bkey_to_text(buf, sizeof(buf), k); + btree_bug(b, "inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", - pkey(k), PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), - g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); + buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), + g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); + } return true; #endif } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 218d486259a3..53a0f4ef4e32 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1770,7 +1770,7 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, { struct bset *i = b->sets[b->nsets].data; struct bkey *m, *prev; - const char *status = "insert"; + unsigned status = BTREE_INSERT_STATUS_INSERT; BUG_ON(bkey_cmp(k, &b->key) > 0); BUG_ON(b->level && !KEY_PTRS(k)); @@ -1803,17 +1803,17 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, goto insert; /* prev is in the tree, if we merge we're done */ - status = "back merging"; + status = BTREE_INSERT_STATUS_BACK_MERGE; if (prev && bch_bkey_try_merge(b, prev, k)) goto merged; - status = "overwrote front"; + status = BTREE_INSERT_STATUS_OVERWROTE; if (m != end(i) && KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m)) goto copy; - status = "front merge"; + status = BTREE_INSERT_STATUS_FRONT_MERGE; if (m != end(i) && bch_bkey_try_merge(b, k, m)) goto copy; @@ -1823,16 +1823,12 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, insert: shift_keys(b, m, k); copy: bkey_copy(m, k); merged: - bch_check_keys(b, "%s for %s at %s: %s", status, - op_type(op), pbtree(b), pkey(k)); - bch_check_key_order_msg(b, i, "%s for %s at %s: %s", status, - op_type(op), pbtree(b), pkey(k)); + bch_check_keys(b, "%u for %s", status, op_type(op)); if (b->level && !KEY_OFFSET(k)) btree_current_write(b)->prio_blocked++; - pr_debug("%s for %s at %s: %s", status, - op_type(op), pbtree(b), pkey(k)); + trace_bcache_btree_insert_key(b, k, op->type, status); return true; } @@ -2234,9 +2230,6 @@ int bch_btree_search_recurse(struct btree *b, struct btree_op *op) struct btree_iter iter; bch_btree_iter_init(b, &iter, &KEY(op->inode, bio->bi_sector, 0)); - pr_debug("at %s searching for %u:%llu", pbtree(b), op->inode, - (uint64_t) bio->bi_sector); - do { k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); if (!k) { @@ -2302,8 +2295,6 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, if (buf->key_predicate(buf, k)) { struct keybuf_key *w; - pr_debug("%s", pkey(k)); - spin_lock(&buf->lock); w = array_alloc(&buf->freelist); diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 809bd77847a2..2b016b93cad4 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -271,6 +271,13 @@ struct btree_op { BKEY_PADDED(replace); }; +enum { + BTREE_INSERT_STATUS_INSERT, + BTREE_INSERT_STATUS_BACK_MERGE, + BTREE_INSERT_STATUS_OVERWROTE, + BTREE_INSERT_STATUS_FRONT_MERGE, +}; + void bch_btree_op_init_stack(struct btree_op *); static inline void rw_lock(bool w, struct btree *b, int level) diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index ae6096c6845d..82e3a07771ec 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -47,11 +47,10 @@ const char *bch_ptr_status(struct cache_set *c, const struct bkey *k) return ""; } -struct keyprint_hack bch_pkey(const struct bkey *k) +int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k) { unsigned i = 0; - struct keyprint_hack r; - char *out = r.s, *end = r.s + KEYHACK_SIZE; + char *out = buf, *end = buf + size; #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) @@ -75,16 +74,14 @@ struct keyprint_hack bch_pkey(const struct bkey *k) if (KEY_CSUM(k)) p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]); #undef p - return r; + return out - buf; } -struct keyprint_hack bch_pbtree(const struct btree *b) +int bch_btree_to_text(char *buf, size_t size, const struct btree *b) { - struct keyprint_hack r; - - snprintf(r.s, 40, "%zu level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0), - b->level, b->c->root ? b->c->root->level : -1); - return r; + return scnprintf(buf, size, "%zu level %i/%i", + PTR_BUCKET_NR(b->c, &b->key, 0), + b->level, b->c->root ? b->c->root->level : -1); } #if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG) @@ -100,10 +97,12 @@ static void dump_bset(struct btree *b, struct bset *i) { struct bkey *k; unsigned j; + char buf[80]; for (k = i->start; k < end(i); k = bkey_next(k)) { + bch_bkey_to_text(buf, sizeof(buf), k); printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b), - (uint64_t *) k - i->d, i->keys, pkey(k)); + (uint64_t *) k - i->d, i->keys, buf); for (j = 0; j < KEY_PTRS(k); j++) { size_t n = PTR_BUCKET_NR(b->c, k, j); @@ -252,6 +251,7 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt, va_list args) { unsigned i; + char buf[80]; console_lock(); @@ -262,7 +262,8 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt, console_unlock(); - panic("at %s\n", pbtree(b)); + bch_btree_to_text(buf, sizeof(buf), b); + panic("at %s\n", buf); } void bch_check_key_order_msg(struct btree *b, struct bset *i, @@ -337,6 +338,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf, { struct dump_iterator *i = file->private_data; ssize_t ret = 0; + char kbuf[80]; while (size) { struct keybuf_key *w; @@ -359,7 +361,8 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf, if (!w) break; - i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key)); + bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key); + i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf); bch_keybuf_del(&i->keys, w); } @@ -526,10 +529,17 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, k < end(i); k = bkey_next(k), l = bkey_next(l)) if (bkey_cmp(k, l) || - KEY_SIZE(k) != KEY_SIZE(l)) + KEY_SIZE(k) != KEY_SIZE(l)) { + char buf1[80]; + char buf2[80]; + + bch_bkey_to_text(buf1, sizeof(buf1), k); + bch_bkey_to_text(buf2, sizeof(buf2), l); + pr_err("key %zi differs: %s != %s", (uint64_t *) k - i->d, - pkey(k), pkey(l)); + buf1, buf2); + } for (j = 0; j < 3; j++) { pr_err("**** Set %i ****", j); diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h index f9378a218148..1c39b5a2489b 100644 --- a/drivers/md/bcache/debug.h +++ b/drivers/md/bcache/debug.h @@ -3,15 +3,8 @@ /* Btree/bkey debug printing */ -#define KEYHACK_SIZE 80 -struct keyprint_hack { - char s[KEYHACK_SIZE]; -}; - -struct keyprint_hack bch_pkey(const struct bkey *k); -struct keyprint_hack bch_pbtree(const struct btree *b); -#define pkey(k) (&bch_pkey(k).s[0]) -#define pbtree(b) (&bch_pbtree(b).s[0]) +int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k); +int bch_btree_to_text(char *buf, size_t size, const struct btree *b); #ifdef CONFIG_BCACHE_EDEBUG diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 47bc13745068..f24c2e0cbb1c 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -343,6 +343,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw, struct closure *cl = &c->uuid_write.cl; struct uuid_entry *u; unsigned i; + char buf[80]; BUG_ON(!parent); closure_lock(&c->uuid_write, parent); @@ -363,8 +364,8 @@ static void uuid_io(struct cache_set *c, unsigned long rw, break; } - pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", - pkey(&c->uuid_bucket)); + bch_bkey_to_text(buf, sizeof(buf), k); + pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf); for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) if (!bch_is_zero(u->uuid, 16)) diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c index 7f4f38aa16ae..f7b6c197f90f 100644 --- a/drivers/md/bcache/trace.c +++ b/drivers/md/bcache/trace.c @@ -40,6 +40,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_insert_key); + EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root); diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index c9952b36fcea..5ebda976ea93 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -305,6 +305,39 @@ DEFINE_EVENT(bkey, bcache_gc_copy_collision, TP_ARGS(k) ); +TRACE_EVENT(bcache_btree_insert_key, + TP_PROTO(struct btree *b, struct bkey *k, unsigned op, unsigned status), + TP_ARGS(b, k, op, status), + + TP_STRUCT__entry( + __field(u64, btree_node ) + __field(u32, btree_level ) + __field(u32, inode ) + __field(u64, offset ) + __field(u32, size ) + __field(u8, dirty ) + __field(u8, op ) + __field(u8, status ) + ), + + TP_fast_assign( + __entry->btree_node = PTR_BUCKET_NR(b->c, &b->key, 0); + __entry->btree_level = b->level; + __entry->inode = KEY_INODE(k); + __entry->offset = KEY_OFFSET(k); + __entry->size = KEY_SIZE(k); + __entry->dirty = KEY_DIRTY(k); + __entry->op = op; + __entry->status = status; + ), + + TP_printk("%u for %u at %llu(%u): %u:%llu len %u dirty %u", + __entry->status, __entry->op, + __entry->btree_node, __entry->btree_level, + __entry->inode, __entry->offset, + __entry->size, __entry->dirty) +); + DECLARE_EVENT_CLASS(btree_split, TP_PROTO(struct btree *b, unsigned keys), TP_ARGS(b, keys), -- cgit v1.2.3-58-ga151 From 6ded34d1a54c046a45db071d3cb7b37bd0a4a31f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 May 2013 15:59:37 -0700 Subject: bcache: Improve lazy sorting The old lazy sorting code was kind of hacky - rewrite in a way that mathematically makes more sense; the idea is that the size of the sets of keys in a btree node should increase by a more or less fixed ratio from smallest to biggest. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/bset.c | 40 +++++++++++++++++++++++----------------- drivers/md/bcache/super.c | 2 ++ 3 files changed, 26 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 59c15e09e4dd..6fa5a1e33c49 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -828,6 +828,7 @@ struct cache_set { */ struct mutex sort_lock; struct bset *sort; + unsigned sort_crit_factor; /* List of buckets we're currently writing data to */ struct list_head data_buckets; diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index e9399ed7f688..a0f190ac17a4 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1092,33 +1092,39 @@ void bch_btree_sort_into(struct btree *b, struct btree *new) new->sets->size = 0; } +#define SORT_CRIT (4096 / sizeof(uint64_t)) + void bch_btree_sort_lazy(struct btree *b) { - if (b->nsets) { - unsigned i, j, keys = 0, total; - - for (i = 0; i <= b->nsets; i++) - keys += b->sets[i].data->keys; + unsigned crit = SORT_CRIT; + int i; - total = keys; + /* Don't sort if nothing to do */ + if (!b->nsets) + goto out; - for (j = 0; j < b->nsets; j++) { - if (keys * 2 < total || - keys < 1000) { - bch_btree_sort_partial(b, j); - return; - } + /* If not a leaf node, always sort */ + if (b->level) { + bch_btree_sort(b); + return; + } - keys -= b->sets[j].data->keys; - } + for (i = b->nsets - 1; i >= 0; --i) { + crit *= b->c->sort_crit_factor; - /* Must sort if b->nsets == 3 or we'll overflow */ - if (b->nsets >= (MAX_BSETS - 1) - b->level) { - bch_btree_sort(b); + if (b->sets[i].data->keys < crit) { + bch_btree_sort_partial(b, i); return; } } + /* Sort if we'd overflow */ + if (b->nsets + 1 == MAX_BSETS) { + bch_btree_sort(b); + return; + } + +out: bset_build_written_tree(b); } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f24c2e0cbb1c..3c8474161e8e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1375,6 +1375,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->btree_pages = max_t(int, c->btree_pages / 4, BTREE_MAX_PAGES); + c->sort_crit_factor = int_sqrt(c->btree_pages); + mutex_init(&c->bucket_lock); mutex_init(&c->sort_lock); spin_lock_init(&c->sort_time_lock); -- cgit v1.2.3-58-ga151 From 444fc0b6b167ed164e7436621a9d095e042644dd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 May 2013 17:07:26 -0700 Subject: bcache: Initialize sectors_dirty when attaching Previously, dirty_data wouldn't get initialized until the first garbage collection... which was a bit of a problem for background writeback (as the PD controller keys off of it) and also confusing for users. This is also prep work for making background writeback aware of raid5/6 stripes. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/btree.c | 29 +---------------------------- drivers/md/bcache/super.c | 1 + drivers/md/bcache/writeback.c | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 6fa5a1e33c49..d099d8894c2f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -438,7 +438,6 @@ struct bcache_device { atomic_t detaching; atomic_long_t sectors_dirty; - unsigned long sectors_dirty_gc; unsigned long sectors_dirty_last; long sectors_dirty_derivative; @@ -1225,6 +1224,7 @@ void bch_cache_set_stop(struct cache_set *); struct cache_set *bch_cache_set_alloc(struct cache_sb *); void bch_btree_cache_free(struct cache_set *); int bch_btree_cache_alloc(struct cache_set *); +void bch_sectors_dirty_init(struct cached_dev *); void bch_cached_dev_writeback_init(struct cached_dev *); void bch_moving_init_cache_set(struct cache_set *); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 53a0f4ef4e32..230c3a6d9be2 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1119,11 +1119,8 @@ static int btree_gc_mark_node(struct btree *b, unsigned *keys, gc->nkeys++; gc->data += KEY_SIZE(k); - if (KEY_DIRTY(k)) { + if (KEY_DIRTY(k)) gc->dirty += KEY_SIZE(k); - if (d) - d->sectors_dirty_gc += KEY_SIZE(k); - } } for (t = b->sets; t <= &b->sets[b->nsets]; t++) @@ -1377,7 +1374,6 @@ static void btree_gc_start(struct cache_set *c) { struct cache *ca; struct bucket *b; - struct bcache_device **d; unsigned i; if (!c->gc_mark_valid) @@ -1395,12 +1391,6 @@ static void btree_gc_start(struct cache_set *c) SET_GC_MARK(b, GC_MARK_RECLAIMABLE); } - for (d = c->devices; - d < c->devices + c->nr_uuids; - d++) - if (*d) - (*d)->sectors_dirty_gc = 0; - mutex_unlock(&c->bucket_lock); } @@ -1409,7 +1399,6 @@ size_t bch_btree_gc_finish(struct cache_set *c) size_t available = 0; struct bucket *b; struct cache *ca; - struct bcache_device **d; unsigned i; mutex_lock(&c->bucket_lock); @@ -1452,22 +1441,6 @@ size_t bch_btree_gc_finish(struct cache_set *c) } } - for (d = c->devices; - d < c->devices + c->nr_uuids; - d++) - if (*d) { - unsigned long last = - atomic_long_read(&((*d)->sectors_dirty)); - long difference = (*d)->sectors_dirty_gc - last; - - pr_debug("sectors dirty off by %li", difference); - - (*d)->sectors_dirty_last += difference; - - atomic_long_set(&((*d)->sectors_dirty), - (*d)->sectors_dirty_gc); - } - mutex_unlock(&c->bucket_lock); return available; } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3c8474161e8e..dbfa1c38e85e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -961,6 +961,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) atomic_set(&dc->count, 1); if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { + bch_sectors_dirty_init(dc); atomic_set(&dc->has_dirty, 1); atomic_inc(&dc->count); bch_writeback_queue(dc); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 82f6d4577be2..553949eefd51 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -377,6 +377,42 @@ err: refill_dirty(cl); } +/* Init */ + +static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op, + struct cached_dev *dc) +{ + struct bkey *k; + struct btree_iter iter; + + bch_btree_iter_init(b, &iter, &KEY(dc->disk.id, 0, 0)); + while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad))) + if (!b->level) { + if (KEY_INODE(k) > dc->disk.id) + break; + + if (KEY_DIRTY(k)) + atomic_long_add(KEY_SIZE(k), + &dc->disk.sectors_dirty); + } else { + btree(sectors_dirty_init, k, b, op, dc); + if (KEY_INODE(k) > dc->disk.id) + break; + + cond_resched(); + } + + return 0; +} + +void bch_sectors_dirty_init(struct cached_dev *dc) +{ + struct btree_op op; + + bch_btree_op_init_stack(&op); + btree_root(sectors_dirty_init, dc->disk.c, &op, dc); +} + void bch_cached_dev_writeback_init(struct cached_dev *dc) { closure_init_unlocked(&dc->writeback); -- cgit v1.2.3-58-ga151 From 279afbad4e54acbd61bf88a54a73af3bbfdeb5dd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Jun 2013 06:21:07 -0700 Subject: bcache: Track dirty data by stripe To make background writeback aware of raid5/6 stripes, we first need to track the amount of dirty data within each stripe - we do this by breaking up the existing sectors_dirty into per stripe atomic_ts Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 10 ++++------ drivers/md/bcache/btree.c | 20 +++++++++++++------- drivers/md/bcache/request.c | 3 ++- drivers/md/bcache/super.c | 32 ++++++++++++++++++++++++++++---- drivers/md/bcache/sysfs.c | 5 +++-- drivers/md/bcache/writeback.c | 40 ++++++++++++++++++++++++++++++++++------ drivers/md/bcache/writeback.h | 21 +++++++++++++++++++++ 7 files changed, 105 insertions(+), 26 deletions(-) create mode 100644 drivers/md/bcache/writeback.h (limited to 'drivers') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d099d8894c2f..dbddef0cdb59 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -437,7 +437,10 @@ struct bcache_device { /* If nonzero, we're detaching/unregistering from cache set */ atomic_t detaching; - atomic_long_t sectors_dirty; + uint64_t nr_stripes; + unsigned stripe_size_bits; + atomic_t *stripe_sectors_dirty; + unsigned long sectors_dirty_last; long sectors_dirty_derivative; @@ -1159,9 +1162,6 @@ static inline void wake_up_allocators(struct cache_set *c) /* Forward declarations */ -void bch_writeback_queue(struct cached_dev *); -void bch_writeback_add(struct cached_dev *, unsigned); - void bch_count_io_errors(struct cache *, int, const char *); void bch_bbio_count_io_errors(struct cache_set *, struct bio *, int, const char *); @@ -1224,8 +1224,6 @@ void bch_cache_set_stop(struct cache_set *); struct cache_set *bch_cache_set_alloc(struct cache_sb *); void bch_btree_cache_free(struct cache_set *); int bch_btree_cache_alloc(struct cache_set *); -void bch_sectors_dirty_init(struct cached_dev *); -void bch_cached_dev_writeback_init(struct cached_dev *); void bch_moving_init_cache_set(struct cache_set *); int bch_cache_allocator_start(struct cache *ca); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 230c3a6d9be2..b93cf56260a4 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -24,6 +24,7 @@ #include "btree.h" #include "debug.h" #include "request.h" +#include "writeback.h" #include #include @@ -1599,14 +1600,14 @@ static bool fix_overlapping_extents(struct btree *b, struct btree_iter *iter, struct btree_op *op) { - void subtract_dirty(struct bkey *k, int sectors) + void subtract_dirty(struct bkey *k, uint64_t offset, int sectors) { - struct bcache_device *d = b->c->devices[KEY_INODE(k)]; - - if (KEY_DIRTY(k) && d) - atomic_long_sub(sectors, &d->sectors_dirty); + if (KEY_DIRTY(k)) + bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), + offset, -sectors); } + uint64_t old_offset; unsigned old_size, sectors_found = 0; while (1) { @@ -1618,6 +1619,7 @@ static bool fix_overlapping_extents(struct btree *b, if (bkey_cmp(k, &START_KEY(insert)) <= 0) continue; + old_offset = KEY_START(k); old_size = KEY_SIZE(k); /* @@ -1673,7 +1675,7 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *top; - subtract_dirty(k, KEY_SIZE(insert)); + subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert)); if (bkey_written(b, k)) { /* @@ -1720,7 +1722,7 @@ static bool fix_overlapping_extents(struct btree *b, } } - subtract_dirty(k, old_size - KEY_SIZE(k)); + subtract_dirty(k, old_offset, old_size - KEY_SIZE(k)); } check_failed: @@ -1796,6 +1798,10 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, insert: shift_keys(b, m, k); copy: bkey_copy(m, k); merged: + if (KEY_DIRTY(k)) + bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), + KEY_START(k), KEY_SIZE(k)); + bch_check_keys(b, "%u for %s", status, op_type(op)); if (b->level && !KEY_OFFSET(k)) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 695469958c1e..017c95fced8e 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -10,6 +10,7 @@ #include "btree.h" #include "debug.h" #include "request.h" +#include "writeback.h" #include #include @@ -1044,7 +1045,7 @@ static void request_write(struct cached_dev *dc, struct search *s) closure_bio_submit(bio, cl, s->d); } else { s->op.cache_bio = bio; - bch_writeback_add(dc, bio_sectors(bio)); + bch_writeback_add(dc); } out: closure_call(&s->op.cl, bch_insert_data, NULL, cl); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index dbfa1c38e85e..8c73f0c7f28a 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -10,6 +10,7 @@ #include "btree.h" #include "debug.h" #include "request.h" +#include "writeback.h" #include #include @@ -744,13 +745,35 @@ static void bcache_device_free(struct bcache_device *d) mempool_destroy(d->unaligned_bvec); if (d->bio_split) bioset_free(d->bio_split); + if (is_vmalloc_addr(d->stripe_sectors_dirty)) + vfree(d->stripe_sectors_dirty); + else + kfree(d->stripe_sectors_dirty); closure_debug_destroy(&d->cl); } -static int bcache_device_init(struct bcache_device *d, unsigned block_size) +static int bcache_device_init(struct bcache_device *d, unsigned block_size, + sector_t sectors) { struct request_queue *q; + size_t n; + + if (!d->stripe_size_bits) + d->stripe_size_bits = 31; + + d->nr_stripes = round_up(sectors, 1 << d->stripe_size_bits) >> + d->stripe_size_bits; + + if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) + return -ENOMEM; + + n = d->nr_stripes * sizeof(atomic_t); + d->stripe_sectors_dirty = n < PAGE_SIZE << 6 + ? kzalloc(n, GFP_KERNEL) + : vzalloc(n); + if (!d->stripe_sectors_dirty) + return -ENOMEM; if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || !(d->unaligned_bvec = mempool_create_kmalloc_pool(1, @@ -760,6 +783,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size) !(q = blk_alloc_queue(GFP_KERNEL))) return -ENOMEM; + set_capacity(d->disk, sectors); snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor); d->disk->major = bcache_major; @@ -1047,7 +1071,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size) hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); } - ret = bcache_device_init(&dc->disk, block_size); + ret = bcache_device_init(&dc->disk, block_size, + dc->bdev->bd_part->nr_sects - dc->sb.data_offset); if (ret) return ret; @@ -1146,11 +1171,10 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) kobject_init(&d->kobj, &bch_flash_dev_ktype); - if (bcache_device_init(d, block_bytes(c))) + if (bcache_device_init(d, block_bytes(c), u->sectors)) goto err; bcache_device_attach(d, c, u - c->uuids); - set_capacity(d->disk, u->sectors); bch_flash_dev_request_init(d); add_disk(d->disk); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index f5c2d8695230..cf8d91ec3238 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -9,6 +9,7 @@ #include "sysfs.h" #include "btree.h" #include "request.h" +#include "writeback.h" #include #include @@ -128,7 +129,7 @@ SHOW(__bch_cached_dev) char derivative[20]; char target[20]; bch_hprint(dirty, - atomic_long_read(&dc->disk.sectors_dirty) << 9); + bcache_dev_sectors_dirty(&dc->disk) << 9); bch_hprint(derivative, dc->writeback_rate_derivative << 9); bch_hprint(target, dc->writeback_rate_target << 9); @@ -144,7 +145,7 @@ SHOW(__bch_cached_dev) } sysfs_hprint(dirty_data, - atomic_long_read(&dc->disk.sectors_dirty) << 9); + bcache_dev_sectors_dirty(&dc->disk) << 9); var_printf(sequential_merge, "%i"); var_hprint(sequential_cutoff); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 553949eefd51..dd815475c524 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -9,6 +9,7 @@ #include "bcache.h" #include "btree.h" #include "debug.h" +#include "writeback.h" #include @@ -38,7 +39,7 @@ static void __update_writeback_rate(struct cached_dev *dc) int change = 0; int64_t error; - int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty); + int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); int64_t derivative = dirty - dc->disk.sectors_dirty_last; dc->disk.sectors_dirty_last = dirty; @@ -183,10 +184,8 @@ void bch_writeback_queue(struct cached_dev *dc) } } -void bch_writeback_add(struct cached_dev *dc, unsigned sectors) +void bch_writeback_add(struct cached_dev *dc) { - atomic_long_add(sectors, &dc->disk.sectors_dirty); - if (!atomic_read(&dc->has_dirty) && !atomic_xchg(&dc->has_dirty, 1)) { atomic_inc(&dc->count); @@ -205,6 +204,34 @@ void bch_writeback_add(struct cached_dev *dc, unsigned sectors) } } +void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, + uint64_t offset, int nr_sectors) +{ + struct bcache_device *d = c->devices[inode]; + unsigned stripe_size, stripe_offset; + uint64_t stripe; + + if (!d) + return; + + stripe_size = 1 << d->stripe_size_bits; + stripe = offset >> d->stripe_size_bits; + stripe_offset = offset & (stripe_size - 1); + + while (nr_sectors) { + int s = min_t(unsigned, abs(nr_sectors), + stripe_size - stripe_offset); + + if (nr_sectors < 0) + s = -s; + + atomic_add(s, d->stripe_sectors_dirty + stripe); + nr_sectors -= s; + stripe_offset = 0; + stripe++; + } +} + /* Background writeback - IO loop */ static void dirty_io_destructor(struct closure *cl) @@ -392,8 +419,9 @@ static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op, break; if (KEY_DIRTY(k)) - atomic_long_add(KEY_SIZE(k), - &dc->disk.sectors_dirty); + bcache_dev_sectors_dirty_add(b->c, dc->disk.id, + KEY_START(k), + KEY_SIZE(k)); } else { btree(sectors_dirty_init, k, b, op, dc); if (KEY_INODE(k) > dc->disk.id) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h new file mode 100644 index 000000000000..5ce9771df047 --- /dev/null +++ b/drivers/md/bcache/writeback.h @@ -0,0 +1,21 @@ +#ifndef _BCACHE_WRITEBACK_H +#define _BCACHE_WRITEBACK_H + +static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) +{ + uint64_t i, ret = 0; + + for (i = 0; i < d->nr_stripes; i++) + ret += atomic_read(d->stripe_sectors_dirty + i); + + return ret; +} + +void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); +void bch_writeback_queue(struct cached_dev *); +void bch_writeback_add(struct cached_dev *); + +void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_cached_dev_writeback_init(struct cached_dev *); + +#endif -- cgit v1.2.3-58-ga151 From 72c270612bd33192fa836ad0f2939af1ca218292 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Jun 2013 06:24:39 -0700 Subject: bcache: Write out full stripes Now that we're tracking dirty data per stripe, we can add two optimizations for raid5/6: * If a stripe is already dirty, force writes to that stripe to writeback mode - to help build up full stripes of dirty data * When flushing dirty data, preferentially write out full stripes first if there are any. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 3 +-- drivers/md/bcache/btree.c | 19 ++++++++++--------- drivers/md/bcache/btree.h | 9 +++++---- drivers/md/bcache/debug.c | 4 ++-- drivers/md/bcache/movinggc.c | 5 +++-- drivers/md/bcache/request.c | 23 +++++++--------------- drivers/md/bcache/sysfs.c | 8 ++++++++ drivers/md/bcache/writeback.c | 44 +++++++++++++++++++++++++++++++++++++++++-- drivers/md/bcache/writeback.h | 43 ++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 121 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index dbddef0cdb59..342ba86c6e4f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -387,8 +387,6 @@ struct keybuf_key { typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *); struct keybuf { - keybuf_pred_fn *key_predicate; - struct bkey last_scanned; spinlock_t lock; @@ -532,6 +530,7 @@ struct cached_dev { unsigned sequential_merge:1; unsigned verify:1; + unsigned partial_stripes_expensive:1; unsigned writeback_metadata:1; unsigned writeback_running:1; unsigned char writeback_percent; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index b93cf56260a4..09fb8a2f43da 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2252,7 +2252,8 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l, } static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, - struct keybuf *buf, struct bkey *end) + struct keybuf *buf, struct bkey *end, + keybuf_pred_fn *pred) { struct btree_iter iter; bch_btree_iter_init(b, &iter, &buf->last_scanned); @@ -2271,7 +2272,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, if (bkey_cmp(&buf->last_scanned, end) >= 0) break; - if (buf->key_predicate(buf, k)) { + if (pred(buf, k)) { struct keybuf_key *w; spin_lock(&buf->lock); @@ -2290,7 +2291,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, if (!k) break; - btree(refill_keybuf, k, b, op, buf, end); + btree(refill_keybuf, k, b, op, buf, end, pred); /* * Might get an error here, but can't really do anything * and it'll get logged elsewhere. Just read what we @@ -2308,7 +2309,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, } void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, - struct bkey *end) + struct bkey *end, keybuf_pred_fn *pred) { struct bkey start = buf->last_scanned; struct btree_op op; @@ -2316,7 +2317,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, cond_resched(); - btree_root(refill_keybuf, c, &op, buf, end); + btree_root(refill_keybuf, c, &op, buf, end, pred); closure_sync(&op.cl); pr_debug("found %s keys from %llu:%llu to %llu:%llu", @@ -2402,7 +2403,8 @@ struct keybuf_key *bch_keybuf_next(struct keybuf *buf) struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, struct keybuf *buf, - struct bkey *end) + struct bkey *end, + keybuf_pred_fn *pred) { struct keybuf_key *ret; @@ -2416,15 +2418,14 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, break; } - bch_refill_keybuf(c, buf, end); + bch_refill_keybuf(c, buf, end, pred); } return ret; } -void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn) +void bch_keybuf_init(struct keybuf *buf) { - buf->key_predicate = fn; buf->last_scanned = MAX_KEY; buf->keys = RB_ROOT; diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 2b016b93cad4..f66d69a7baf1 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -391,13 +391,14 @@ void bch_moving_gc(struct closure *); int bch_btree_check(struct cache_set *, struct btree_op *); uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); -void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *); -void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *); +void bch_keybuf_init(struct keybuf *); +void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *, + keybuf_pred_fn *); bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *, struct bkey *); void bch_keybuf_del(struct keybuf *, struct keybuf_key *); struct keybuf_key *bch_keybuf_next(struct keybuf *); -struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, - struct keybuf *, struct bkey *); +struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *, + struct bkey *, keybuf_pred_fn *); #endif diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 82e3a07771ec..1c8fd319846e 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -357,7 +357,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf, if (i->bytes) break; - w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY); + w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred); if (!w) break; @@ -380,7 +380,7 @@ static int bch_dump_open(struct inode *inode, struct file *file) file->private_data = i; i->c = c; - bch_keybuf_init(&i->keys, dump_pred); + bch_keybuf_init(&i->keys); i->keys.last_scanned = KEY(0, 0, 0); return 0; diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 04f6b97ffda6..a241e9fd4f7f 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -136,7 +136,8 @@ static void read_moving(struct closure *cl) /* XXX: if we error, background writeback could stall indefinitely */ while (!test_bit(CACHE_SET_STOPPING, &c->flags)) { - w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY); + w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, + &MAX_KEY, moving_pred); if (!w) break; @@ -248,5 +249,5 @@ void bch_moving_gc(struct closure *cl) void bch_moving_init_cache_set(struct cache_set *c) { - bch_keybuf_init(&c->moving_gc_keys, moving_pred); + bch_keybuf_init(&c->moving_gc_keys); } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 017c95fced8e..17bd59704eba 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -22,8 +22,6 @@ #define CUTOFF_CACHE_ADD 95 #define CUTOFF_CACHE_READA 90 -#define CUTOFF_WRITEBACK 50 -#define CUTOFF_WRITEBACK_SYNC 75 struct kmem_cache *bch_search_cache; @@ -998,17 +996,6 @@ static void cached_dev_write_complete(struct closure *cl) cached_dev_bio_complete(cl); } -static bool should_writeback(struct cached_dev *dc, struct bio *bio) -{ - unsigned threshold = (bio->bi_rw & REQ_SYNC) - ? CUTOFF_WRITEBACK_SYNC - : CUTOFF_WRITEBACK; - - return !atomic_read(&dc->disk.detaching) && - cache_mode(dc, bio) == CACHE_MODE_WRITEBACK && - dc->disk.c->gc_stats.in_use < threshold; -} - static void request_write(struct cached_dev *dc, struct search *s) { struct closure *cl = &s->cl; @@ -1030,12 +1017,16 @@ static void request_write(struct cached_dev *dc, struct search *s) if (bio->bi_rw & REQ_DISCARD) goto skip; + if (should_writeback(dc, s->orig_bio, + cache_mode(dc, bio), + s->op.skip)) { + s->op.skip = false; + s->writeback = true; + } + if (s->op.skip) goto skip; - if (should_writeback(dc, s->orig_bio)) - s->writeback = true; - trace_bcache_write(s->orig_bio, s->writeback, s->op.skip); if (!s->writeback) { diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index cf8d91ec3238..70c6dff0d0cd 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -81,6 +81,9 @@ rw_attribute(writeback_rate_p_term_inverse); rw_attribute(writeback_rate_d_smooth); read_attribute(writeback_rate_debug); +read_attribute(stripe_size); +read_attribute(partial_stripes_expensive); + rw_attribute(synchronous); rw_attribute(journal_delay_ms); rw_attribute(discard); @@ -147,6 +150,9 @@ SHOW(__bch_cached_dev) sysfs_hprint(dirty_data, bcache_dev_sectors_dirty(&dc->disk) << 9); + sysfs_hprint(stripe_size, (1 << dc->disk.stripe_size_bits) << 9); + var_printf(partial_stripes_expensive, "%u"); + var_printf(sequential_merge, "%i"); var_hprint(sequential_cutoff); var_hprint(readahead); @@ -286,6 +292,8 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_writeback_rate_d_smooth, &sysfs_writeback_rate_debug, &sysfs_dirty_data, + &sysfs_stripe_size, + &sysfs_partial_stripes_expensive, &sysfs_sequential_cutoff, &sysfs_sequential_merge, &sysfs_clear_stats, diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index dd815475c524..d81ee5ccc726 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -108,6 +108,31 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k) return KEY_DIRTY(k); } +static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k) +{ + uint64_t stripe; + unsigned nr_sectors = KEY_SIZE(k); + struct cached_dev *dc = container_of(buf, struct cached_dev, + writeback_keys); + unsigned stripe_size = 1 << dc->disk.stripe_size_bits; + + if (!KEY_DIRTY(k)) + return false; + + stripe = KEY_START(k) >> dc->disk.stripe_size_bits; + while (1) { + if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) != + stripe_size) + return false; + + if (nr_sectors <= stripe_size) + return true; + + nr_sectors -= stripe_size; + stripe++; + } +} + static void dirty_init(struct keybuf_key *w) { struct dirty_io *io = w->private; @@ -152,7 +177,22 @@ static void refill_dirty(struct closure *cl) searched_from_start = true; } - bch_refill_keybuf(dc->disk.c, buf, &end); + if (dc->partial_stripes_expensive) { + uint64_t i; + + for (i = 0; i < dc->disk.nr_stripes; i++) + if (atomic_read(dc->disk.stripe_sectors_dirty + i) == + 1 << dc->disk.stripe_size_bits) + goto full_stripes; + + goto normal_refill; +full_stripes: + bch_refill_keybuf(dc->disk.c, buf, &end, + dirty_full_stripe_pred); + } else { +normal_refill: + bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); + } if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { /* Searched the entire btree - delay awhile */ @@ -446,7 +486,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) closure_init_unlocked(&dc->writeback); init_rwsem(&dc->writeback_lock); - bch_keybuf_init(&dc->writeback_keys, dirty_pred); + bch_keybuf_init(&dc->writeback_keys); dc->writeback_metadata = true; dc->writeback_running = true; diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 5ce9771df047..c91f61bb95b6 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -1,6 +1,9 @@ #ifndef _BCACHE_WRITEBACK_H #define _BCACHE_WRITEBACK_H +#define CUTOFF_WRITEBACK 40 +#define CUTOFF_WRITEBACK_SYNC 70 + static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) { uint64_t i, ret = 0; @@ -11,6 +14,46 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } +static inline bool bcache_dev_stripe_dirty(struct bcache_device *d, + uint64_t offset, + unsigned nr_sectors) +{ + uint64_t stripe = offset >> d->stripe_size_bits; + + while (1) { + if (atomic_read(d->stripe_sectors_dirty + stripe)) + return true; + + if (nr_sectors <= 1 << d->stripe_size_bits) + return false; + + nr_sectors -= 1 << d->stripe_size_bits; + stripe++; + } +} + +static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, + unsigned cache_mode, bool would_skip) +{ + unsigned in_use = dc->disk.c->gc_stats.in_use; + + if (cache_mode != CACHE_MODE_WRITEBACK || + atomic_read(&dc->disk.detaching) || + in_use > CUTOFF_WRITEBACK_SYNC) + return false; + + if (dc->partial_stripes_expensive && + bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector, + bio_sectors(bio))) + return true; + + if (would_skip) + return false; + + return bio->bi_rw & REQ_SYNC || + in_use <= CUTOFF_WRITEBACK; +} + void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); void bch_writeback_queue(struct cached_dev *); void bch_writeback_add(struct cached_dev *); -- cgit v1.2.3-58-ga151 From a25c32bedeff3573b53572b87bcafe10ec5b75a9 Mon Sep 17 00:00:00 2001 From: Gabriel de Perthuis Date: Fri, 7 Jun 2013 23:27:01 +0200 Subject: bcache: Send a uevent with a cached device's UUID Signed-off-by: Gabriel de Perthuis --- drivers/md/bcache/super.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 8c73f0c7f28a..a104dad2c7fa 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -825,6 +825,11 @@ static void calc_cached_dev_sectors(struct cache_set *c) void bch_cached_dev_run(struct cached_dev *dc) { struct bcache_device *d = &dc->disk; + char *env[] = { + "DRIVER=bcache", + kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid), + NULL + }; if (atomic_xchg(&dc->running, 1)) return; @@ -841,10 +846,11 @@ void bch_cached_dev_run(struct cached_dev *dc) add_disk(d->disk); bd_link_disk_holder(dc->bdev, dc->disk.disk); -#if 0 - char *env[] = { "SYMLINK=label" , NULL }; + /* won't show up in the uevent file, use udevadm monitor -e instead + * only class / kset properties are persistent */ kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); -#endif + kfree(env[1]); + if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) pr_debug("error creating sysfs link"); -- cgit v1.2.3-58-ga151 From ab9e14002e271eba41f7f9ab7e9b03cac4adc22d Mon Sep 17 00:00:00 2001 From: Gabriel de Perthuis Date: Sun, 9 Jun 2013 00:54:48 +0200 Subject: bcache: Send label uevents Signed-off-by: Gabriel de Perthuis Signed-off-by: Kent Overstreet --- drivers/md/bcache/super.c | 9 ++++++++- drivers/md/bcache/sysfs.c | 9 +++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a104dad2c7fa..cff2d182dfb0 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -825,12 +825,18 @@ static void calc_cached_dev_sectors(struct cache_set *c) void bch_cached_dev_run(struct cached_dev *dc) { struct bcache_device *d = &dc->disk; + char buf[SB_LABEL_SIZE + 1]; char *env[] = { "DRIVER=bcache", kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid), - NULL + NULL, + NULL, }; + memcpy(buf, dc->sb.label, SB_LABEL_SIZE); + buf[SB_LABEL_SIZE] = '\0'; + env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); + if (atomic_xchg(&dc->running, 1)) return; @@ -850,6 +856,7 @@ void bch_cached_dev_run(struct cached_dev *dc) * only class / kset properties are persistent */ kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); kfree(env[1]); + kfree(env[2]); if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 70c6dff0d0cd..dd3f00a42729 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -178,6 +178,7 @@ STORE(__cached_dev) disk.kobj); unsigned v = size; struct cache_set *c; + struct kobj_uevent_env *env; #define d_strtoul(var) sysfs_strtoul(var, dc->var) #define d_strtoi_h(var) sysfs_hatoi(var, dc->var) @@ -222,6 +223,7 @@ STORE(__cached_dev) } if (attr == &sysfs_label) { + /* note: endlines are preserved */ memcpy(dc->sb.label, buf, SB_LABEL_SIZE); bch_write_bdev_super(dc, NULL); if (dc->disk.c) { @@ -229,6 +231,13 @@ STORE(__cached_dev) buf, SB_LABEL_SIZE); bch_uuid_write(dc->disk.c); } + env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); + add_uevent_var(env, "DRIVER=bcache"); + add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid), + add_uevent_var(env, "CACHED_LABEL=%s", buf); + kobject_uevent_env( + &disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp); + kfree(env); } if (attr == &sysfs_attach) { -- cgit v1.2.3-58-ga151 From 26ea8f9239a062a47cfef38ea9c63409906366ff Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 25 Jun 2013 16:50:03 +0200 Subject: drbd: Do not sleep inside rcu Signed-off-by: Andreas Gruenbacher Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4222affff488..adee58e19e83 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1039,6 +1039,8 @@ randomize: rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { kref_get(&mdev->kref); + rcu_read_unlock(); + /* Prevent a race between resync-handshake and * being promoted to Primary. * @@ -1049,8 +1051,6 @@ randomize: mutex_lock(mdev->state_mutex); mutex_unlock(mdev->state_mutex); - rcu_read_unlock(); - if (discard_my_data) set_bit(DISCARD_MY_DATA, &mdev->flags); else -- cgit v1.2.3-58-ga151 From 6110d70bdf99f9d0448f1f61798542e3b123b42a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 25 Jun 2013 16:50:04 +0200 Subject: drbd: fix error return code in drbd_init() Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Lars Ellenberg Signed-off-by: Andreas Gruenbacher Signed-off-by: Philipp Reisner Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a5dca6affcbb..49040a336949 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2762,8 +2762,6 @@ int __init drbd_init(void) /* * allocate all necessary structs */ - err = -ENOMEM; - init_waitqueue_head(&drbd_pp_wait); drbd_proc = NULL; /* play safe for drbd_cleanup */ @@ -2773,6 +2771,7 @@ int __init drbd_init(void) if (err) goto fail; + err = -ENOMEM; drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); if (!drbd_proc) { printk(KERN_ERR "drbd: unable to register proc file\n"); @@ -2803,7 +2802,6 @@ int __init drbd_init(void) fail: drbd_cleanup(); if (err == -ENOMEM) - /* currently always the case */ printk(KERN_ERR "drbd: ran out of memory\n"); else printk(KERN_ERR "drbd: initialization failure\n"); -- cgit v1.2.3-58-ga151 From f9eb7bf424e766e00bbc6d69fd7eaaf4bd003cf9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 25 Jun 2013 16:50:05 +0200 Subject: drbd: Fix rcu_read_lock balance on error path Signed-off-by: Andreas Gruenbacher Signed-off-by: Philipp Reisner Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9e3f441e7e84..0936d6aabef9 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2658,7 +2658,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, const struct sib_info *sib) { struct state_info *si = NULL; /* for sizeof(si->member); */ - struct net_conf *nc; struct nlattr *nla; int got_ldev; int err = 0; @@ -2688,13 +2687,19 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, goto nla_put_failure; rcu_read_lock(); - if (got_ldev) - if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive)) - goto nla_put_failure; + if (got_ldev) { + struct disk_conf *disk_conf; - nc = rcu_dereference(mdev->tconn->net_conf); - if (nc) - err = net_conf_to_skb(skb, nc, exclude_sensitive); + disk_conf = rcu_dereference(mdev->ldev->disk_conf); + err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive); + } + if (!err) { + struct net_conf *nc; + + nc = rcu_dereference(mdev->tconn->net_conf); + if (nc) + err = net_conf_to_skb(skb, nc, exclude_sensitive); + } rcu_read_unlock(); if (err) goto nla_put_failure; -- cgit v1.2.3-58-ga151 From 28e448bb30d0f3fc7daa652d2d3a30adaf9e171b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 25 Jun 2013 16:50:06 +0200 Subject: drbd: Ignore the exit code of a fence-peer handler if it returns too late In case the connection was established and lost again before the a fence-peer handler returns, ignore the exit code of this instance. (And use the exit code of the later started instance) Signed-off-by: Andreas Gruenbacher Signed-off-by: Philipp Reisner Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_nl.c | 15 +++++++++++++-- drivers/block/drbd/drbd_state.c | 4 +++- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f943aacfdad8..f104328d6325 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -832,6 +832,7 @@ struct drbd_tconn { /* is a resource from the config file */ unsigned susp_nod:1; /* IO suspended because no data */ unsigned susp_fen:1; /* IO suspended because fence peer handler runs */ struct mutex cstate_mutex; /* Protects graceful disconnects */ + unsigned int connect_cnt; /* Inc each time a connection is established */ unsigned long flags; struct net_conf *net_conf; /* content protected by rcu */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0936d6aabef9..e25803b447ff 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -417,6 +417,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn) bool conn_try_outdate_peer(struct drbd_tconn *tconn) { + unsigned int connect_cnt; union drbd_state mask = { }; union drbd_state val = { }; enum drbd_fencing_p fp; @@ -428,6 +429,10 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn) return false; } + spin_lock_irq(&tconn->req_lock); + connect_cnt = tconn->connect_cnt; + spin_unlock_irq(&tconn->req_lock); + fp = highest_fencing_policy(tconn); switch (fp) { case FP_NOT_AVAIL: @@ -492,8 +497,14 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn) here, because we might were able to re-establish the connection in the meantime. */ spin_lock_irq(&tconn->req_lock); - if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) - _conn_request_state(tconn, mask, val, CS_VERBOSE); + if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) { + if (tconn->connect_cnt != connect_cnt) + /* In case the connection was established and droped + while the fence-peer handler was running, ignore it */ + conn_info(tconn, "Ignoring fence-peer exit code\n"); + else + _conn_request_state(tconn, mask, val, CS_VERBOSE); + } spin_unlock_irq(&tconn->req_lock); return conn_highest_pdsk(tconn) <= D_OUTDATED; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 90c5be2b1d30..216d47b7e88b 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1115,8 +1115,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, drbd_thread_restart_nowait(&mdev->tconn->receiver); /* Resume AL writing if we get a connection */ - if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) + if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { drbd_resume_al(mdev); + mdev->tconn->connect_cnt++; + } /* remember last attach time so request_timer_fn() won't * kill newly established sessions while we are still trying to thaw -- cgit v1.2.3-58-ga151 From e96c96333fe5a4f252cc4e1d7edde8ee7dce7dfe Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 25 Jun 2013 16:50:07 +0200 Subject: drbd: Constants should be UPPERCASE Signed-off-by: Andreas Gruenbacher Signed-off-by: Philipp Reisner Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 7 ++++++- drivers/block/drbd/drbd_nl.c | 20 ++++++++++---------- drivers/block/drbd/drbd_receiver.c | 6 +++--- 3 files changed, 19 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f104328d6325..4519d63350fb 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1467,7 +1467,12 @@ extern void drbd_suspend_io(struct drbd_conf *mdev); extern void drbd_resume_io(struct drbd_conf *mdev); extern char *ppsize(char *buf, unsigned long long size); extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int); -enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; +enum determine_dev_size { + DS_ERROR = -1, + DS_UNCHANGED = 0, + DS_SHRUNK = 1, + DS_GREW = 2 +}; extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e25803b447ff..45d127522e0a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -835,7 +835,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds char ppb[10]; int md_moved, la_size_changed; - enum determine_dev_size rv = unchanged; + enum determine_dev_size rv = DS_UNCHANGED; /* race: * application request passes inc_ap_bio, @@ -878,7 +878,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds "Leaving size unchanged at size = %lu KB\n", (unsigned long)size); } - rv = dev_size_error; + rv = DS_ERROR; } /* racy, see comments above. */ drbd_set_my_capacity(mdev, size); @@ -886,7 +886,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), (unsigned long long)size>>1); } - if (rv == dev_size_error) + if (rv == DS_ERROR) goto out; la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect); @@ -905,16 +905,16 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, "size changed", BM_LOCKED_MASK); if (err) { - rv = dev_size_error; + rv = DS_ERROR; goto out; } drbd_md_mark_dirty(mdev); } if (size > la_size_sect) - rv = grew; + rv = DS_GREW; if (size < la_size_sect) - rv = shrunk; + rv = DS_SHRUNK; out: lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); @@ -1619,10 +1619,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) set_bit(USE_DEGR_WFC_T, &mdev->flags); dd = drbd_determine_dev_size(mdev, 0); - if (dd == dev_size_error) { + if (dd == DS_ERROR) { retcode = ERR_NOMEM_BITMAP; goto force_diskless_dec; - } else if (dd == grew) + } else if (dd == DS_GREW) set_bit(RESYNC_AFTER_NEG, &mdev->flags); if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) || @@ -2387,13 +2387,13 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) dd = drbd_determine_dev_size(mdev, ddsf); drbd_md_sync(mdev); put_ldev(mdev); - if (dd == dev_size_error) { + if (dd == DS_ERROR) { retcode = ERR_NOMEM_BITMAP; goto fail; } if (mdev->state.conn == C_CONNECTED) { - if (dd == grew) + if (dd == DS_GREW) set_bit(RESIZE_PENDING, &mdev->flags); drbd_send_uuids(mdev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index adee58e19e83..26852b87b034 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3545,7 +3545,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; struct p_sizes *p = pi->data; - enum determine_dev_size dd = unchanged; + enum determine_dev_size dd = DS_UNCHANGED; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; @@ -3619,7 +3619,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) if (get_ldev(mdev)) { dd = drbd_determine_dev_size(mdev, ddsf); put_ldev(mdev); - if (dd == dev_size_error) + if (dd == DS_ERROR) return -EIO; drbd_md_sync(mdev); } else { @@ -3647,7 +3647,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) drbd_send_sizes(mdev, 0, ddsf); } if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) || - (dd == grew && mdev->state.conn == C_CONNECTED)) { + (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) { if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.disk >= D_INCONSISTENT) { if (ddsf & DDSF_NO_RESYNC) -- cgit v1.2.3-58-ga151 From d752b2696072ed52fd5afab08b601e2220a3b87e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 25 Jun 2013 16:50:08 +0200 Subject: drbd: Allow online change of al-stripes and al-stripe-size Allow to change the AL layout with an resize operation. For that the reisze command gets two new fields: al_stripes and al_stripe_size. In order to make the operation crash save: 1) Lock out all IO and MD-IO 2) Write the super block with MDF_PRIMARY_IND clear 3) write the bitmap to the new location (all zeros, since we allow only while connected) 4) Initialize the new AL-area 5) Write the super block with the restored MDF_PRIMARY_IND. 6) Unfreeze all IO Since the AL-layout has no influence on the protocol, this operation needs to be beforemed on both sides of a resource (if intended). Signed-off-by: Andreas Gruenbacher Signed-off-by: Philipp Reisner Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 21 ++++++ drivers/block/drbd/drbd_int.h | 7 +- drivers/block/drbd/drbd_main.c | 57 ++++++++------- drivers/block/drbd/drbd_nl.c | 137 ++++++++++++++++++++++++++++++------- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd.h | 6 +- include/linux/drbd_genl.h | 2 + include/linux/drbd_limits.h | 9 +++ 8 files changed, 188 insertions(+), 53 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 6608076dc39e..28c73ca320a8 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev) wake_up(&mdev->al_wait); } +int drbd_initialize_al(struct drbd_conf *mdev, void *buffer) +{ + struct al_transaction_on_disk *al = buffer; + struct drbd_md *md = &mdev->ldev->md; + sector_t al_base = md->md_offset + md->al_offset; + int al_size_4k = md->al_stripes * md->al_stripe_size_4k; + int i; + + memset(al, 0, 4096); + al->magic = cpu_to_be32(DRBD_AL_MAGIC); + al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED); + al->crc32c = cpu_to_be32(crc32c(0, al, 4096)); + + for (i = 0; i < al_size_4k; i++) { + int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE); + if (err) + return err; + } + return 0; +} + static int w_update_odbm(struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4519d63350fb..2d7f608d181c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1133,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev); void drbd_print_uuids(struct drbd_conf *mdev, const char *text); extern void conn_md_sync(struct drbd_tconn *tconn); +extern void drbd_md_write(struct drbd_conf *mdev, void *buffer); extern void drbd_md_sync(struct drbd_conf *mdev); extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); @@ -1468,12 +1469,15 @@ extern void drbd_resume_io(struct drbd_conf *mdev); extern char *ppsize(char *buf, unsigned long long size); extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int); enum determine_dev_size { + DS_ERROR_SHRINK = -3, + DS_ERROR_SPACE_MD = -2, DS_ERROR = -1, DS_UNCHANGED = 0, DS_SHRUNK = 1, DS_GREW = 2 }; -extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); +extern enum determine_dev_size +drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, @@ -1639,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, #define drbd_set_out_of_sync(mdev, sector, size) \ __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) extern void drbd_al_shrink(struct drbd_conf *mdev); +extern int drbd_initialize_al(struct drbd_conf *, void *); /* drbd_nl.c */ /* state info broadcast */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 49040a336949..55635edf563b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2879,34 +2879,14 @@ struct meta_data_on_disk { u8 reserved_u8[4096 - (7*8 + 10*4)]; } __packed; -/** - * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set - * @mdev: DRBD device. - */ -void drbd_md_sync(struct drbd_conf *mdev) + + +void drbd_md_write(struct drbd_conf *mdev, void *b) { - struct meta_data_on_disk *buffer; + struct meta_data_on_disk *buffer = b; sector_t sector; int i; - /* Don't accidentally change the DRBD meta data layout. */ - BUILD_BUG_ON(UI_SIZE != 4); - BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096); - - del_timer(&mdev->md_sync_timer); - /* timer may be rearmed by drbd_md_mark_dirty() now. */ - if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) - return; - - /* We use here D_FAILED and not D_ATTACHING because we try to write - * metadata even if we detach due to a disk failure! */ - if (!get_ldev_if_state(mdev, D_FAILED)) - return; - - buffer = drbd_md_get_buffer(mdev); - if (!buffer) - goto out; - memset(buffer, 0, sizeof(*buffer)); buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); @@ -2935,6 +2915,35 @@ void drbd_md_sync(struct drbd_conf *mdev) dev_err(DEV, "meta data update failed!\n"); drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); } +} + +/** + * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set + * @mdev: DRBD device. + */ +void drbd_md_sync(struct drbd_conf *mdev) +{ + struct meta_data_on_disk *buffer; + + /* Don't accidentally change the DRBD meta data layout. */ + BUILD_BUG_ON(UI_SIZE != 4); + BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096); + + del_timer(&mdev->md_sync_timer); + /* timer may be rearmed by drbd_md_mark_dirty() now. */ + if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) + return; + + /* We use here D_FAILED and not D_ATTACHING because we try to write + * metadata even if we detach due to a disk failure! */ + if (!get_ldev_if_state(mdev, D_FAILED)) + return; + + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; + + drbd_md_write(mdev, buffer); /* Update mdev->ldev->md.la_size_sect, * since we updated it on metadata. */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 45d127522e0a..8cc1e640f485 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -827,12 +827,17 @@ void drbd_resume_io(struct drbd_conf *mdev) * Returns 0 on success, negative return values indicate errors. * You should call drbd_md_sync() after calling this function. */ -enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) +enum determine_dev_size +drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local) { sector_t prev_first_sect, prev_size; /* previous meta location */ sector_t la_size_sect, u_size; + struct drbd_md *md = &mdev->ldev->md; + u32 prev_al_stripe_size_4k; + u32 prev_al_stripes; sector_t size; char ppb[10]; + void *buffer; int md_moved, la_size_changed; enum determine_dev_size rv = DS_UNCHANGED; @@ -847,6 +852,11 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds * still lock the act_log to not trigger ASSERTs there. */ drbd_suspend_io(mdev); + buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */ + if (!buffer) { + drbd_resume_io(mdev); + return DS_ERROR; + } /* no wait necessary anymore, actually we could assert that */ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); @@ -855,7 +865,17 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds prev_size = mdev->ldev->md.md_size_sect; la_size_sect = mdev->ldev->md.la_size_sect; - /* TODO: should only be some assert here, not (re)init... */ + if (rs) { + /* rs is non NULL if we should change the AL layout only */ + + prev_al_stripes = md->al_stripes; + prev_al_stripe_size_4k = md->al_stripe_size_4k; + + md->al_stripes = rs->al_stripes; + md->al_stripe_size_4k = rs->al_stripe_size / 4; + md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4; + } + drbd_md_set_sector_offsets(mdev, mdev->ldev); rcu_read_lock(); @@ -863,6 +883,21 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds rcu_read_unlock(); size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED); + if (size < la_size_sect) { + if (rs && u_size == 0) { + /* Remove "rs &&" later. This check should always be active, but + right now the receiver expects the permissive behavior */ + dev_warn(DEV, "Implicit shrink not allowed. " + "Use --size=%llus for explicit shrink.\n", + (unsigned long long)size); + rv = DS_ERROR_SHRINK; + } + if (u_size > size) + rv = DS_ERROR_SPACE_MD; + if (rv != DS_UNCHANGED) + goto err_out; + } + if (drbd_get_capacity(mdev->this_bdev) != size || drbd_bm_capacity(mdev) != size) { int err; @@ -886,38 +921,57 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), (unsigned long long)size>>1); } - if (rv == DS_ERROR) - goto out; + if (rv <= DS_ERROR) + goto err_out; la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect); md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev) || prev_size != mdev->ldev->md.md_size_sect; - if (la_size_changed || md_moved) { - int err; + if (la_size_changed || md_moved || rs) { + u32 prev_flags; drbd_al_shrink(mdev); /* All extents inactive. */ + + prev_flags = md->flags; + md->flags &= ~MDF_PRIMARY_IND; + drbd_md_write(mdev, buffer); + dev_info(DEV, "Writing the whole bitmap, %s\n", la_size_changed && md_moved ? "size changed and md moved" : la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ - err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, - "size changed", BM_LOCKED_MASK); - if (err) { - rv = DS_ERROR; - goto out; - } - drbd_md_mark_dirty(mdev); + drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, + "size changed", BM_LOCKED_MASK); + drbd_initialize_al(mdev, buffer); + + md->flags = prev_flags; + drbd_md_write(mdev, buffer); + + if (rs) + dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n", + md->al_stripes, md->al_stripe_size_4k * 4); } if (size > la_size_sect) rv = DS_GREW; if (size < la_size_sect) rv = DS_SHRUNK; -out: + + if (0) { + err_out: + if (rs) { + md->al_stripes = prev_al_stripes; + md->al_stripe_size_4k = prev_al_stripe_size_4k; + md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k; + + drbd_md_set_sector_offsets(mdev, mdev->ldev); + } + } lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); + drbd_md_put_buffer(mdev); drbd_resume_io(mdev); return rv; @@ -1618,8 +1672,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) set_bit(USE_DEGR_WFC_T, &mdev->flags); - dd = drbd_determine_dev_size(mdev, 0); - if (dd == DS_ERROR) { + dd = drbd_determine_dev_size(mdev, 0, NULL); + if (dd <= DS_ERROR) { retcode = ERR_NOMEM_BITMAP; goto force_diskless_dec; } else if (dd == DS_GREW) @@ -2316,6 +2370,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) struct drbd_conf *mdev; enum drbd_ret_code retcode; enum determine_dev_size dd; + bool change_al_layout = false; enum dds_flags ddsf; sector_t u_size; int err; @@ -2326,31 +2381,33 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail; + mdev = adm_ctx.mdev; + if (!get_ldev(mdev)) { + retcode = ERR_NO_DISK; + goto fail; + } + memset(&rs, 0, sizeof(struct resize_parms)); + rs.al_stripes = mdev->ldev->md.al_stripes; + rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4; if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { err = resize_parms_from_attrs(&rs, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); - goto fail; + goto fail_ldev; } } - mdev = adm_ctx.mdev; if (mdev->state.conn > C_CONNECTED) { retcode = ERR_RESIZE_RESYNC; - goto fail; + goto fail_ldev; } if (mdev->state.role == R_SECONDARY && mdev->state.peer == R_SECONDARY) { retcode = ERR_NO_PRIMARY; - goto fail; - } - - if (!get_ldev(mdev)) { - retcode = ERR_NO_DISK; - goto fail; + goto fail_ldev; } if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) { @@ -2369,6 +2426,28 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) } } + if (mdev->ldev->md.al_stripes != rs.al_stripes || + mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) { + u32 al_size_k = rs.al_stripes * rs.al_stripe_size; + + if (al_size_k > (16 * 1024 * 1024)) { + retcode = ERR_MD_LAYOUT_TOO_BIG; + goto fail_ldev; + } + + if (al_size_k < MD_32kB_SECT/2) { + retcode = ERR_MD_LAYOUT_TOO_SMALL; + goto fail_ldev; + } + + if (mdev->state.conn != C_CONNECTED) { + retcode = ERR_MD_LAYOUT_CONNECTED; + goto fail_ldev; + } + + change_al_layout = true; + } + if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); @@ -2384,12 +2463,18 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) } ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); - dd = drbd_determine_dev_size(mdev, ddsf); + dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL); drbd_md_sync(mdev); put_ldev(mdev); if (dd == DS_ERROR) { retcode = ERR_NOMEM_BITMAP; goto fail; + } else if (dd == DS_ERROR_SPACE_MD) { + retcode = ERR_MD_LAYOUT_NO_FIT; + goto fail; + } else if (dd == DS_ERROR_SHRINK) { + retcode = ERR_IMPLICIT_SHRINK; + goto fail; } if (mdev->state.conn == C_CONNECTED) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 26852b87b034..cc29cd3bf78b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3617,7 +3617,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) ddsf = be16_to_cpu(p->dds_flags); if (get_ldev(mdev)) { - dd = drbd_determine_dev_size(mdev, ddsf); + dd = drbd_determine_dev_size(mdev, ddsf, NULL); put_ldev(mdev); if (dd == DS_ERROR) return -EIO; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1b4d4ee1168f..de7d74ab3de6 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -177,7 +177,11 @@ enum drbd_ret_code { ERR_NEED_APV_100 = 163, ERR_NEED_ALLOW_TWO_PRI = 164, ERR_MD_UNCLEAN = 165, - + ERR_MD_LAYOUT_CONNECTED = 166, + ERR_MD_LAYOUT_TOO_BIG = 167, + ERR_MD_LAYOUT_TOO_SMALL = 168, + ERR_MD_LAYOUT_NO_FIT = 169, + ERR_IMPLICIT_SHRINK = 170, /* insert new ones above this line */ AFTER_LAST_ERR_CODE }; diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index d0d8fac8a6e4..e8c44572b8cb 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -181,6 +181,8 @@ GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, __u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size) __flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force) __flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync) + __u32_field_def(4, 0 /* OPTIONAL */, al_stripes, DRBD_AL_STRIPES_DEF) + __u32_field_def(5, 0 /* OPTIONAL */, al_stripe_size, DRBD_AL_STRIPE_SIZE_DEF) ) GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 1fedf2b17cc8..17e50bb00521 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -215,4 +215,13 @@ #define DRBD_ALWAYS_ASBP_DEF 0 #define DRBD_USE_RLE_DEF 1 +#define DRBD_AL_STRIPES_MIN 1 +#define DRBD_AL_STRIPES_MAX 1024 +#define DRBD_AL_STRIPES_DEF 1 +#define DRBD_AL_STRIPES_SCALE '1' + +#define DRBD_AL_STRIPE_SIZE_MIN 4 +#define DRBD_AL_STRIPE_SIZE_MAX 16777216 +#define DRBD_AL_STRIPE_SIZE_DEF 32 +#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */ #endif -- cgit v1.2.3-58-ga151 From e49c7c374e7aacd1f04ecbc21d9dbbeeea4a77d6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Jun 2013 17:25:38 -0700 Subject: bcache: FUA fixes Journal writes need to be marked FUA, not just REQ_FLUSH. And btree node writes have... weird ordering requirements. Signed-off-by: Kent Overstreet --- drivers/md/bcache/btree.c | 25 ++++++++++++++++++++++--- drivers/md/bcache/journal.c | 2 +- drivers/md/bcache/request.c | 13 ++++++++++++- 3 files changed, 35 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 09fb8a2f43da..a6ad49ac5f2b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -328,10 +328,25 @@ static void do_btree_node_write(struct btree *b) b->bio->bi_end_io = btree_node_write_endio; b->bio->bi_private = &b->io.cl; - b->bio->bi_rw = REQ_META|WRITE_SYNC; - b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); + b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; + b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); bch_bio_map(b->bio, i); + /* + * If we're appending to a leaf node, we don't technically need FUA - + * this write just needs to be persisted before the next journal write, + * which will be marked FLUSH|FUA. + * + * Similarly if we're writing a new btree root - the pointer is going to + * be in the next journal entry. + * + * But if we're writing a new btree node (that isn't a root) or + * appending to a non leaf btree node, we need either FUA or a flush + * when we write the parent with the new pointer. FUA is cheaper than a + * flush, and writes appending to leaf nodes aren't blocking anything so + * just make all btree node writes FUA to keep things sane. + */ + bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); @@ -2092,6 +2107,9 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c) void bch_btree_set_root(struct btree *b) { unsigned i; + struct closure cl; + + closure_init_stack(&cl); trace_bcache_btree_set_root(b); @@ -2107,7 +2125,8 @@ void bch_btree_set_root(struct btree *b) b->c->root = b; __bkey_put(b->c, &b->key); - bch_journal_meta(b->c, NULL); + bch_journal_meta(b->c, &cl); + closure_sync(&cl); } /* Cache lookup */ diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 5ca22149b749..4b250667bb7f 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -620,7 +620,7 @@ static void journal_write_unlocked(struct closure *cl) bio_reset(bio); bio->bi_sector = PTR_OFFSET(k, i); bio->bi_bdev = ca->bdev; - bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH; + bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA; bio->bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 17bd59704eba..bcdf1f782c3e 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1035,8 +1035,19 @@ static void request_write(struct cached_dev *dc, struct search *s) closure_bio_submit(bio, cl, s->d); } else { - s->op.cache_bio = bio; bch_writeback_add(dc); + + if (s->op.flush_journal) { + /* Also need to send a flush to the backing device */ + s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, + dc->disk.bio_split); + + bio->bi_size = 0; + bio->bi_vcnt = 0; + closure_bio_submit(bio, cl, s->d); + } else { + s->op.cache_bio = bio; + } } out: closure_call(&s->op.cl, bch_insert_data, NULL, cl); -- cgit v1.2.3-58-ga151 From 36c9ea9837c1cb21c778781495101eaff7e5eb56 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 3 Jun 2013 13:04:56 -0700 Subject: bcache: Document shrinker reserve better Signed-off-by: Kent Overstreet --- drivers/md/bcache/btree.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index a6ad49ac5f2b..04636a70ffaa 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -622,6 +622,13 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) else if (!mutex_trylock(&c->bucket_lock)) return -1; + /* + * It's _really_ critical that we don't free too many btree nodes - we + * have to always leave ourselves a reserve. The reserve is how we + * guarantee that allocating memory for a new btree node can always + * succeed, so that inserting keys into the btree can always succeed and + * IO can always make forward progress: + */ nr /= c->btree_pages; nr = min_t(unsigned long, nr, mca_can_free(c)); -- cgit v1.2.3-58-ga151 From f3059a54610f6c516c0942d58b9435921768ce2d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 May 2013 17:13:45 -0700 Subject: bcache: Delete fuzz tester This code has rotted and it hasn't been used in ages anyways. Signed-off-by: Kent Overstreet --- drivers/md/bcache/btree.c | 4 +- drivers/md/bcache/btree.h | 2 - drivers/md/bcache/debug.c | 148 ---------------------------------------------- 3 files changed, 2 insertions(+), 152 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 04636a70ffaa..e0cca3673d0f 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -135,7 +135,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) return crc ^ 0xffffffffffffffffULL; } -void bch_btree_node_read_done(struct btree *b) +static void bch_btree_node_read_done(struct btree *b) { const char *err = "bad btree header"; struct bset *i = b->sets[0].data; @@ -1834,7 +1834,7 @@ merged: return true; } -bool bch_btree_insert_keys(struct btree *b, struct btree_op *op) +static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op) { bool ret = false; struct bkey *k; diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index f66d69a7baf1..3333d3723633 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -369,7 +369,6 @@ static inline bool should_split(struct btree *b) } void bch_btree_node_read(struct btree *); -void bch_btree_node_read_done(struct btree *); void bch_btree_node_write(struct btree *, struct closure *); void bch_cannibalize_unlock(struct cache_set *, struct closure *); @@ -378,7 +377,6 @@ struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *); struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, struct btree_op *); -bool bch_btree_insert_keys(struct btree *, struct btree_op *); bool bch_btree_insert_check_key(struct btree *, struct btree_op *, struct bio *); int bch_btree_insert(struct btree_op *, struct cache_set *); diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 1c8fd319846e..ba2ceee0fdbb 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -412,149 +412,6 @@ void bch_debug_init_cache_set(struct cache_set *c) #endif -/* Fuzz tester has rotted: */ -#if 0 - -static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, - const char *buffer, size_t size) -{ - void dump(struct btree *b) - { - struct bset *i; - - for (i = b->sets[0].data; - index(i, b) < btree_blocks(b) && - i->seq == b->sets[0].data->seq; - i = ((void *) i) + set_blocks(i, b->c) * block_bytes(b->c)) - dump_bset(b, i); - } - - struct cache_sb *sb; - struct cache_set *c; - struct btree *all[3], *b, *fill, *orig; - int j; - - struct btree_op op; - bch_btree_op_init_stack(&op); - - sb = kzalloc(sizeof(struct cache_sb), GFP_KERNEL); - if (!sb) - return -ENOMEM; - - sb->bucket_size = 128; - sb->block_size = 4; - - c = bch_cache_set_alloc(sb); - if (!c) - return -ENOMEM; - - for (j = 0; j < 3; j++) { - BUG_ON(list_empty(&c->btree_cache)); - all[j] = list_first_entry(&c->btree_cache, struct btree, list); - list_del_init(&all[j]->list); - - all[j]->key = KEY(0, 0, c->sb.bucket_size); - bkey_copy_key(&all[j]->key, &MAX_KEY); - } - - b = all[0]; - fill = all[1]; - orig = all[2]; - - while (1) { - for (j = 0; j < 3; j++) - all[j]->written = all[j]->nsets = 0; - - bch_bset_init_next(b); - - while (1) { - struct bset *i = write_block(b); - struct bkey *k = op.keys.top; - unsigned rand; - - bkey_init(k); - rand = get_random_int(); - - op.type = rand & 1 - ? BTREE_INSERT - : BTREE_REPLACE; - rand >>= 1; - - SET_KEY_SIZE(k, bucket_remainder(c, rand)); - rand >>= c->bucket_bits; - rand &= 1024 * 512 - 1; - rand += c->sb.bucket_size; - SET_KEY_OFFSET(k, rand); -#if 0 - SET_KEY_PTRS(k, 1); -#endif - bch_keylist_push(&op.keys); - bch_btree_insert_keys(b, &op); - - if (should_split(b) || - set_blocks(i, b->c) != - __set_blocks(i, i->keys + 15, b->c)) { - i->csum = csum_set(i); - - memcpy(write_block(fill), - i, set_bytes(i)); - - b->written += set_blocks(i, b->c); - fill->written = b->written; - if (b->written == btree_blocks(b)) - break; - - bch_btree_sort_lazy(b); - bch_bset_init_next(b); - } - } - - memcpy(orig->sets[0].data, - fill->sets[0].data, - btree_bytes(c)); - - bch_btree_sort(b); - fill->written = 0; - bch_btree_node_read_done(fill); - - if (b->sets[0].data->keys != fill->sets[0].data->keys || - memcmp(b->sets[0].data->start, - fill->sets[0].data->start, - b->sets[0].data->keys * sizeof(uint64_t))) { - struct bset *i = b->sets[0].data; - struct bkey *k, *l; - - for (k = i->start, - l = fill->sets[0].data->start; - k < end(i); - k = bkey_next(k), l = bkey_next(l)) - if (bkey_cmp(k, l) || - KEY_SIZE(k) != KEY_SIZE(l)) { - char buf1[80]; - char buf2[80]; - - bch_bkey_to_text(buf1, sizeof(buf1), k); - bch_bkey_to_text(buf2, sizeof(buf2), l); - - pr_err("key %zi differs: %s != %s", - (uint64_t *) k - i->d, - buf1, buf2); - } - - for (j = 0; j < 3; j++) { - pr_err("**** Set %i ****", j); - dump(all[j]); - } - panic("\n"); - } - - pr_info("fuzz complete: %i keys", b->sets[0].data->keys); - } -} - -kobj_attribute_write(fuzz, btree_fuzz); -#endif - void bch_debug_exit(void) { if (!IS_ERR_OR_NULL(debug)) @@ -564,11 +421,6 @@ void bch_debug_exit(void) int __init bch_debug_init(struct kobject *kobj) { int ret = 0; -#if 0 - ret = sysfs_create_file(kobj, &ksysfs_fuzz.attr); - if (ret) - return ret; -#endif debug = debugfs_create_dir("bcache", NULL); return ret; -- cgit v1.2.3-58-ga151 From 8e51e414a3c6d92ef2cc41720c67342a8e2c0bf7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Jun 2013 18:15:57 -0700 Subject: bcache: Use standard utility code Some of bcache's utility code has made it into the rest of the kernel, so drop the bcache versions. Bcache used to have a workaround for allocating from a bio set under generic_make_request() (if you allocated more than once, the bios you already allocated would get stuck on current->bio_list when you submitted, and you'd risk deadlock) - bcache would mask out __GFP_WAIT when allocating bios under generic_make_request() so that allocation could fail and it could retry from workqueue. But bio_alloc_bioset() has a workaround now, so we can drop this hack and the associated error handling. Signed-off-by: Kent Overstreet --- drivers/md/bcache/btree.c | 7 +--- drivers/md/bcache/debug.c | 2 +- drivers/md/bcache/io.c | 64 +++++++++++-------------------- drivers/md/bcache/movinggc.c | 7 ++-- drivers/md/bcache/request.c | 87 +++++++++---------------------------------- drivers/md/bcache/util.c | 17 --------- drivers/md/bcache/util.h | 4 -- drivers/md/bcache/writeback.c | 7 ++-- 8 files changed, 51 insertions(+), 144 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e0cca3673d0f..15b58239c683 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -350,7 +350,7 @@ static void do_btree_node_write(struct btree *b) bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); - if (!bch_bio_alloc_pages(b->bio, GFP_NOIO)) { + if (!bio_alloc_pages(b->bio, GFP_NOIO)) { int j; struct bio_vec *bv; void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); @@ -1865,7 +1865,7 @@ bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op, should_split(b)) goto out; - op->replace = KEY(op->inode, bio_end(bio), bio_sectors(bio)); + op->replace = KEY(op->inode, bio_end_sector(bio), bio_sectors(bio)); SET_KEY_PTRS(&op->replace, 1); get_random_bytes(&op->replace.ptr[0], sizeof(uint64_t)); @@ -2194,9 +2194,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op, KEY_OFFSET(k) - bio->bi_sector); n = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); - if (!n) - return -EAGAIN; - if (n == bio) op->lookup_done = true; diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index ba2ceee0fdbb..88e6411eab4f 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -199,7 +199,7 @@ void bch_data_verify(struct search *s) if (!check) return; - if (bch_bio_alloc_pages(check, GFP_NOIO)) + if (bio_alloc_pages(check, GFP_NOIO)) goto out_put; check->bi_rw = READ_SYNC; diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 0f6d69658b61..9056632995b1 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -68,13 +68,6 @@ static void bch_generic_make_request_hack(struct bio *bio) * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a * bvec boundry; it is the caller's responsibility to ensure that @bio is not * freed before the split. - * - * If bch_bio_split() is running under generic_make_request(), it's not safe to - * allocate more than one bio from the same bio set. Therefore, if it is running - * under generic_make_request() it masks out __GFP_WAIT when doing the - * allocation. The caller must check for failure if there's any possibility of - * it being called from under generic_make_request(); it is then the caller's - * responsibility to retry from a safe context (by e.g. punting to workqueue). */ struct bio *bch_bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) @@ -85,15 +78,6 @@ struct bio *bch_bio_split(struct bio *bio, int sectors, BUG_ON(sectors <= 0); - /* - * If we're being called from underneath generic_make_request() and we - * already allocated any bios from this bio set, we risk deadlock if we - * use the mempool. So instead, we possibly fail and let the caller punt - * to workqueue or somesuch and retry in a safe context. - */ - if (current->bio_list) - gfp &= ~__GFP_WAIT; - if (sectors >= bio_sectors(bio)) return bio; @@ -164,17 +148,18 @@ static unsigned bch_bio_max_sectors(struct bio *bio) struct request_queue *q = bdev_get_queue(bio->bi_bdev); unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES, queue_max_segments(q)); - struct bio_vec *bv, *end = bio_iovec(bio) + - min_t(int, bio_segments(bio), max_segments); if (bio->bi_rw & REQ_DISCARD) return min(ret, q->limits.max_discard_sectors); if (bio_segments(bio) > max_segments || q->merge_bvec_fn) { + struct bio_vec *bv; + int i, seg = 0; + ret = 0; - for (bv = bio_iovec(bio); bv < end; bv++) { + bio_for_each_segment(bv, bio, i) { struct bvec_merge_data bvm = { .bi_bdev = bio->bi_bdev, .bi_sector = bio->bi_sector, @@ -182,10 +167,14 @@ static unsigned bch_bio_max_sectors(struct bio *bio) .bi_rw = bio->bi_rw, }; + if (seg == max_segments) + break; + if (q->merge_bvec_fn && q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) break; + seg++; ret += bv->bv_len >> 9; } } @@ -222,30 +211,10 @@ static void bch_bio_submit_split_endio(struct bio *bio, int error) closure_put(cl); } -static void __bch_bio_submit_split(struct closure *cl) -{ - struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); - struct bio *bio = s->bio, *n; - - do { - n = bch_bio_split(bio, bch_bio_max_sectors(bio), - GFP_NOIO, s->p->bio_split); - if (!n) - continue_at(cl, __bch_bio_submit_split, system_wq); - - n->bi_end_io = bch_bio_submit_split_endio; - n->bi_private = cl; - - closure_get(cl); - bch_generic_make_request_hack(n); - } while (n != bio); - - continue_at(cl, bch_bio_submit_split_done, NULL); -} - void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) { struct bio_split_hook *s; + struct bio *n; if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) goto submit; @@ -254,6 +223,7 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) goto submit; s = mempool_alloc(p->bio_split_hook, GFP_NOIO); + closure_init(&s->cl, NULL); s->bio = bio; s->p = p; @@ -261,8 +231,18 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) s->bi_private = bio->bi_private; bio_get(bio); - closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL); - return; + do { + n = bch_bio_split(bio, bch_bio_max_sectors(bio), + GFP_NOIO, s->p->bio_split); + + n->bi_end_io = bch_bio_submit_split_endio; + n->bi_private = &s->cl; + + closure_get(&s->cl); + bch_generic_make_request_hack(n); + } while (n != bio); + + continue_at(&s->cl, bch_bio_submit_split_done, NULL); submit: bch_generic_make_request_hack(bio); } diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index a241e9fd4f7f..1a3b4f4786c3 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -46,9 +46,10 @@ static void write_moving_finish(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, s.cl); struct bio *bio = &io->bio.bio; - struct bio_vec *bv = bio_iovec_idx(bio, bio->bi_vcnt); + struct bio_vec *bv; + int i; - while (bv-- != bio->bi_io_vec) + bio_for_each_segment_all(bv, bio, i) __free_page(bv->bv_page); if (io->s.op.insert_collision) @@ -158,7 +159,7 @@ static void read_moving(struct closure *cl) bio->bi_rw = READ; bio->bi_end_io = read_moving_endio; - if (bch_bio_alloc_pages(bio, GFP_KERNEL)) + if (bio_alloc_pages(bio, GFP_KERNEL)) goto err; trace_bcache_gc_copy(&w->key); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index bcdf1f782c3e..b6e74d3c8faf 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -509,10 +509,6 @@ static void bch_insert_data_loop(struct closure *cl) goto err; n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); - if (!n) { - __bkey_put(op->c, k); - continue_at(cl, bch_insert_data_loop, bcache_wq); - } n->bi_end_io = bch_insert_data_endio; n->bi_private = cl; @@ -821,53 +817,13 @@ static void request_read_done(struct closure *cl) */ if (s->op.cache_bio) { - struct bio_vec *src, *dst; - unsigned src_offset, dst_offset, bytes; - void *dst_ptr; - bio_reset(s->op.cache_bio); s->op.cache_bio->bi_sector = s->cache_miss->bi_sector; s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev; s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; bch_bio_map(s->op.cache_bio, NULL); - src = bio_iovec(s->op.cache_bio); - dst = bio_iovec(s->cache_miss); - src_offset = src->bv_offset; - dst_offset = dst->bv_offset; - dst_ptr = kmap(dst->bv_page); - - while (1) { - if (dst_offset == dst->bv_offset + dst->bv_len) { - kunmap(dst->bv_page); - dst++; - if (dst == bio_iovec_idx(s->cache_miss, - s->cache_miss->bi_vcnt)) - break; - - dst_offset = dst->bv_offset; - dst_ptr = kmap(dst->bv_page); - } - - if (src_offset == src->bv_offset + src->bv_len) { - src++; - if (src == bio_iovec_idx(s->op.cache_bio, - s->op.cache_bio->bi_vcnt)) - BUG(); - - src_offset = src->bv_offset; - } - - bytes = min(dst->bv_offset + dst->bv_len - dst_offset, - src->bv_offset + src->bv_len - src_offset); - - memcpy(dst_ptr + dst_offset, - page_address(src->bv_page) + src_offset, - bytes); - - src_offset += bytes; - dst_offset += bytes; - } + bio_copy_data(s->cache_miss, s->op.cache_bio); bio_put(s->cache_miss); s->cache_miss = NULL; @@ -912,9 +868,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, struct bio *miss; miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); - if (!miss) - return -EAGAIN; - if (miss == bio) s->op.lookup_done = true; @@ -933,8 +886,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, reada = min(dc->readahead >> 9, sectors - bio_sectors(miss)); - if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev)) - reada = bdev_sectors(miss->bi_bdev) - bio_end(miss); + if (bio_end_sector(miss) + reada > bdev_sectors(miss->bi_bdev)) + reada = bdev_sectors(miss->bi_bdev) - + bio_end_sector(miss); } s->cache_bio_sectors = bio_sectors(miss) + reada; @@ -958,7 +912,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, goto out_put; bch_bio_map(s->op.cache_bio, NULL); - if (bch_bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) + if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) goto out_put; s->cache_miss = miss; @@ -1002,7 +956,7 @@ static void request_write(struct cached_dev *dc, struct search *s) struct bio *bio = &s->bio.bio; struct bkey start, end; start = KEY(dc->disk.id, bio->bi_sector, 0); - end = KEY(dc->disk.id, bio_end(bio), 0); + end = KEY(dc->disk.id, bio_end_sector(bio), 0); bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end); @@ -1176,7 +1130,7 @@ found: if (i->sequential + bio->bi_size > i->sequential) i->sequential += bio->bi_size; - i->last = bio_end(bio); + i->last = bio_end_sector(bio); i->jiffies = jiffies + msecs_to_jiffies(5000); s->task->sequential_io = i->sequential; @@ -1294,30 +1248,25 @@ void bch_cached_dev_request_init(struct cached_dev *dc) static int flash_dev_cache_miss(struct btree *b, struct search *s, struct bio *bio, unsigned sectors) { + struct bio_vec *bv; + int i; + /* Zero fill bio */ - while (bio->bi_idx != bio->bi_vcnt) { - struct bio_vec *bv = bio_iovec(bio); + bio_for_each_segment(bv, bio, i) { unsigned j = min(bv->bv_len >> 9, sectors); void *p = kmap(bv->bv_page); memset(p + bv->bv_offset, 0, j << 9); kunmap(bv->bv_page); - bv->bv_len -= j << 9; - bv->bv_offset += j << 9; - - if (bv->bv_len) - return 0; - - bio->bi_sector += j; - bio->bi_size -= j << 9; - - bio->bi_idx++; - sectors -= j; + sectors -= j; } - s->op.lookup_done = true; + bio_advance(bio, min(sectors << 9, bio->bi_size)); + + if (!bio->bi_size) + s->op.lookup_done = true; return 0; } @@ -1344,8 +1293,8 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) closure_call(&s->op.cl, btree_read_async, NULL, cl); } else if (bio_has_data(bio) || s->op.skip) { bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, - &KEY(d->id, bio->bi_sector, 0), - &KEY(d->id, bio_end(bio), 0)); + &KEY(d->id, bio->bi_sector, 0), + &KEY(d->id, bio_end_sector(bio), 0)); s->writeback = true; s->op.cache_bio = bio; diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index da3a99e85b1e..98eb81159a22 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -228,23 +228,6 @@ start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset, } } -int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp) -{ - int i; - struct bio_vec *bv; - - bio_for_each_segment(bv, bio, i) { - bv->bv_page = alloc_page(gfp); - if (!bv->bv_page) { - while (bv-- != bio->bi_io_vec + bio->bi_idx) - __free_page(bv->bv_page); - return -ENOMEM; - } - } - - return 0; -} - /* * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any * use permitted, subject to terms of PostgreSQL license; see.) diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index e02780545f12..1ae2a73ad85f 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -564,12 +564,8 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) return x; } -#define bio_end(bio) ((bio)->bi_sector + bio_sectors(bio)) - void bch_bio_map(struct bio *bio, void *base); -int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp); - static inline sector_t bdev_sectors(struct block_device *bdev) { return bdev->bd_inode->i_size >> 9; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index d81ee5ccc726..22cbff551628 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -285,9 +285,10 @@ static void write_dirty_finish(struct closure *cl) struct dirty_io *io = container_of(cl, struct dirty_io, cl); struct keybuf_key *w = io->bio.bi_private; struct cached_dev *dc = io->dc; - struct bio_vec *bv = bio_iovec_idx(&io->bio, io->bio.bi_vcnt); + struct bio_vec *bv; + int i; - while (bv-- != io->bio.bi_io_vec) + bio_for_each_segment_all(bv, &io->bio, i) __free_page(bv->bv_page); /* This is kind of a dumb way of signalling errors. */ @@ -418,7 +419,7 @@ static void read_dirty(struct closure *cl) io->bio.bi_rw = READ; io->bio.bi_end_io = read_dirty_endio; - if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) + if (bio_alloc_pages(&io->bio, GFP_KERNEL)) goto err_free; trace_bcache_writeback(&w->key); -- cgit v1.2.3-58-ga151 From 060810d7abaabcab282e062c595871d661561400 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 8 Jul 2013 14:15:51 +1000 Subject: drm/nouveau: fix locking issues in page flipping paths b580c9e2b7ba5030a795aa2fb73b796523d65a78 introduced additional problems while trying to solve issues that became apparent while porting to the new reservation stuff. The major problem was that the the previously mentioned patch took the client mutex earlier than previously, but the pinning of new_bo can can potentially cause a buffer move, which would result in attempting to acquire the same mutex again. This commit attempts to fix that "fix". Thanks to Maarten for the tips on keeping lockdep happy and cooking :) Reported-by: Maarten Lankhorst Signed-off-by: Ben Skeggs Acked-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/nouveau/nouveau_display.c | 53 ++++++++++++------------------- drivers/gpu/drm/nouveau/nouveau_drm.c | 3 -- 3 files changed, 22 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 4b1afb131380..85fed108d7e4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -973,7 +973,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr, struct ttm_mem_reg *old_mem = &bo->mem; int ret; - mutex_lock(&chan->cli->mutex); + mutex_lock_nested(&chan->cli->mutex, SINGLE_DEPTH_NESTING); /* create temporary vmas for the transfer and attach them to the * old nouveau_mem node, these will get cleaned up after ttm has diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 708b2d1c0037..61fdef8eac49 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -524,9 +524,12 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct nouveau_page_flip_state *s; struct nouveau_channel *chan = NULL; struct nouveau_fence *fence; - struct list_head res; - struct ttm_validate_buffer res_val[2]; + struct ttm_validate_buffer resv[2] = { + { .bo = &old_bo->bo }, + { .bo = &new_bo->bo }, + }; struct ww_acquire_ctx ticket; + LIST_HEAD(res); int ret; if (!drm->channel) @@ -545,27 +548,19 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, chan = drm->channel; spin_unlock(&old_bo->bo.bdev->fence_lock); - mutex_lock(&chan->cli->mutex); - if (new_bo != old_bo) { ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM); - if (likely(!ret)) { - res_val[0].bo = &old_bo->bo; - res_val[1].bo = &new_bo->bo; - INIT_LIST_HEAD(&res); - list_add_tail(&res_val[0].head, &res); - list_add_tail(&res_val[1].head, &res); - ret = ttm_eu_reserve_buffers(&ticket, &res); - if (ret) - nouveau_bo_unpin(new_bo); - } - } else - ret = ttm_bo_reserve(&new_bo->bo, false, false, false, 0); + if (ret) + goto fail_free; - if (ret) { - mutex_unlock(&chan->cli->mutex); - goto fail_free; + list_add(&resv[1].head, &res); } + list_add(&resv[0].head, &res); + + mutex_lock(&chan->cli->mutex); + ret = ttm_eu_reserve_buffers(&ticket, &res); + if (ret) + goto fail_unpin; /* Initialize a page flip struct */ *s = (struct nouveau_page_flip_state) @@ -576,10 +571,8 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, /* Emit a page flip */ if (nv_device(drm->device)->card_type >= NV_50) { ret = nv50_display_flip_next(crtc, fb, chan, 0); - if (ret) { - mutex_unlock(&chan->cli->mutex); + if (ret) goto fail_unreserve; - } } ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence); @@ -590,22 +583,18 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, /* Update the crtc struct and cleanup */ crtc->fb = fb; - if (old_bo != new_bo) { - ttm_eu_fence_buffer_objects(&ticket, &res, fence); + ttm_eu_fence_buffer_objects(&ticket, &res, fence); + if (old_bo != new_bo) nouveau_bo_unpin(old_bo); - } else { - nouveau_bo_fence(new_bo, fence); - ttm_bo_unreserve(&new_bo->bo); - } nouveau_fence_unref(&fence); return 0; fail_unreserve: - if (old_bo != new_bo) { - ttm_eu_backoff_reservation(&ticket, &res); + ttm_eu_backoff_reservation(&ticket, &res); +fail_unpin: + mutex_unlock(&chan->cli->mutex); + if (old_bo != new_bo) nouveau_bo_unpin(new_bo); - } else - ttm_bo_unreserve(&new_bo->bo); fail_free: kfree(s); return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 218a4b522fe5..4eca52b8d573 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -284,8 +284,6 @@ static int nouveau_drm_probe(struct pci_dev *pdev, return 0; } -static struct lock_class_key drm_client_lock_class_key; - static int nouveau_drm_load(struct drm_device *dev, unsigned long flags) { @@ -297,7 +295,6 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) ret = nouveau_cli_create(pdev, "DRM", sizeof(*drm), (void**)&drm); if (ret) return ret; - lockdep_set_class(&drm->client.mutex, &drm_client_lock_class_key); dev->dev_private = drm; drm->dev = dev; -- cgit v1.2.3-58-ga151 From fdfb8332651db7a280851dfccfc4f0cff4bcd052 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 8 Jul 2013 14:50:54 +1000 Subject: drm/nouveau: fix some error-path leaks in fbcon handling code Signed-off-by: Maarten Lankhorst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_display.c | 16 ++++++++++------ drivers/gpu/drm/nouveau/nouveau_fbcon.c | 1 + 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 61fdef8eac49..907d20ef6d4d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -138,7 +138,7 @@ nouveau_user_framebuffer_create(struct drm_device *dev, { struct nouveau_framebuffer *nouveau_fb; struct drm_gem_object *gem; - int ret; + int ret = -ENOMEM; gem = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]); if (!gem) @@ -146,15 +146,19 @@ nouveau_user_framebuffer_create(struct drm_device *dev, nouveau_fb = kzalloc(sizeof(struct nouveau_framebuffer), GFP_KERNEL); if (!nouveau_fb) - return ERR_PTR(-ENOMEM); + goto err_unref; ret = nouveau_framebuffer_init(dev, nouveau_fb, mode_cmd, nouveau_gem_object(gem)); - if (ret) { - drm_gem_object_unreference(gem); - return ERR_PTR(ret); - } + if (ret) + goto err; return &nouveau_fb->base; + +err: + kfree(nouveau_fb); +err_unref: + drm_gem_object_unreference(gem); + return ERR_PTR(ret); } static const struct drm_mode_config_funcs nouveau_mode_config_funcs = { diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 9352010030e9..4c1bc061fae2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -385,6 +385,7 @@ out_unlock: mutex_unlock(&dev->struct_mutex); if (chan) nouveau_bo_vma_del(nvbo, &fbcon->nouveau_fb.vma); + nouveau_bo_unmap(nvbo); out_unpin: nouveau_bo_unpin(nvbo); out_unref: -- cgit v1.2.3-58-ga151 From 8dda53fca24789acf891ef31a3c5e03332b43cce Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 9 Jul 2013 12:35:55 +1000 Subject: drm/nv50/kms: fix pin refcnt leaks Weren't critical previously, the buffers would go away anyway. But with recent changes to core drm/ttm lockdep will get pissed off now, so let's fix it. Reported-by: Maarten Lankhorst Signed-off-by: Ben Skeggs Acked-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nv50_display.c | 46 ++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 54dc6355b0c2..8b40a36c1b57 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -355,6 +355,7 @@ struct nv50_oimm { struct nv50_head { struct nouveau_crtc base; + struct nouveau_bo *image; struct nv50_curs curs; struct nv50_sync sync; struct nv50_ovly ovly; @@ -517,9 +518,10 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, { struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb); struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); + struct nv50_head *head = nv50_head(crtc); struct nv50_sync *sync = nv50_sync(crtc); - int head = nv_crtc->index, ret; u32 *push; + int ret; swap_interval <<= 4; if (swap_interval == 0) @@ -537,7 +539,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, return ret; BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2); - OUT_RING (chan, NvEvoSema0 + head); + OUT_RING (chan, NvEvoSema0 + nv_crtc->index); OUT_RING (chan, sync->addr ^ 0x10); BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1); OUT_RING (chan, sync->data + 1); @@ -546,7 +548,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, OUT_RING (chan, sync->data); } else if (chan && nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) { - u64 addr = nv84_fence_crtc(chan, head) + sync->addr; + u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr; ret = RING_SPACE(chan, 12); if (ret) return ret; @@ -565,7 +567,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); } else if (chan) { - u64 addr = nv84_fence_crtc(chan, head) + sync->addr; + u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr; ret = RING_SPACE(chan, 10); if (ret) return ret; @@ -630,6 +632,8 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, evo_mthd(push, 0x0080, 1); evo_data(push, 0x00000000); evo_kick(push, sync); + + nouveau_bo_ref(nv_fb->nvbo, &head->image); return 0; } @@ -1038,18 +1042,17 @@ static int nv50_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb) { struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb); + struct nv50_head *head = nv50_head(crtc); int ret; ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM); - if (ret) - return ret; - - if (old_fb) { - nvfb = nouveau_framebuffer(old_fb); - nouveau_bo_unpin(nvfb->nvbo); + if (ret == 0) { + if (head->image) + nouveau_bo_unpin(head->image); + nouveau_bo_ref(nvfb->nvbo, &head->image); } - return 0; + return ret; } static int @@ -1198,6 +1201,15 @@ nv50_crtc_lut_load(struct drm_crtc *crtc) } } +static void +nv50_crtc_disable(struct drm_crtc *crtc) +{ + struct nv50_head *head = nv50_head(crtc); + if (head->image) + nouveau_bo_unpin(head->image); + nouveau_bo_ref(NULL, &head->image); +} + static int nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv, uint32_t handle, uint32_t width, uint32_t height) @@ -1271,18 +1283,29 @@ nv50_crtc_destroy(struct drm_crtc *crtc) struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); struct nv50_disp *disp = nv50_disp(crtc->dev); struct nv50_head *head = nv50_head(crtc); + nv50_dmac_destroy(disp->core, &head->ovly.base); nv50_pioc_destroy(disp->core, &head->oimm.base); nv50_dmac_destroy(disp->core, &head->sync.base); nv50_pioc_destroy(disp->core, &head->curs.base); + + /*XXX: this shouldn't be necessary, but the core doesn't call + * disconnect() during the cleanup paths + */ + if (head->image) + nouveau_bo_unpin(head->image); + nouveau_bo_ref(NULL, &head->image); + nouveau_bo_unmap(nv_crtc->cursor.nvbo); if (nv_crtc->cursor.nvbo) nouveau_bo_unpin(nv_crtc->cursor.nvbo); nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo); + nouveau_bo_unmap(nv_crtc->lut.nvbo); if (nv_crtc->lut.nvbo) nouveau_bo_unpin(nv_crtc->lut.nvbo); nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo); + drm_crtc_cleanup(crtc); kfree(crtc); } @@ -1296,6 +1319,7 @@ static const struct drm_crtc_helper_funcs nv50_crtc_hfunc = { .mode_set_base = nv50_crtc_mode_set_base, .mode_set_base_atomic = nv50_crtc_mode_set_base_atomic, .load_lut = nv50_crtc_lut_load, + .disable = nv50_crtc_disable, }; static const struct drm_crtc_funcs nv50_crtc_func = { -- cgit v1.2.3-58-ga151 From 4f3855997c5d2bf6443853800eb1a03f61ad9140 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Sun, 7 Jul 2013 10:37:35 +0200 Subject: drm/nouveau: do not unpin in nouveau_gem_object_del This should no longer be required, and is harmful for framebuffer pinning. Also add a warning if unpin causes the pin count to drop below 0. Signed-off-by: Maarten Lankhorst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bo.c | 7 +++++-- drivers/gpu/drm/nouveau/nouveau_gem.c | 6 ------ 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 85fed108d7e4..824a98811de6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -148,6 +148,7 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) if (unlikely(nvbo->gem)) DRM_ERROR("bo %p still attached to GEM object\n", bo); + WARN_ON(nvbo->pin_refcnt > 0); nv10_bo_put_tile_region(dev, nvbo->tile, NULL); kfree(nvbo); } @@ -340,13 +341,15 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo) { struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct ttm_buffer_object *bo = &nvbo->bo; - int ret; + int ret, ref; ret = ttm_bo_reserve(bo, false, false, false, 0); if (ret) return ret; - if (--nvbo->pin_refcnt) + ref = --nvbo->pin_refcnt; + WARN_ON_ONCE(ref < 0); + if (ref) goto out; nouveau_bo_placement_set(nvbo, bo->mem.placement, 0); diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index e72d09c068a8..830cb7bad922 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -50,12 +50,6 @@ nouveau_gem_object_del(struct drm_gem_object *gem) return; nvbo->gem = NULL; - /* Lockdep hates you for doing reserve with gem object lock held */ - if (WARN_ON_ONCE(nvbo->pin_refcnt)) { - nvbo->pin_refcnt = 1; - nouveau_bo_unpin(nvbo); - } - if (gem->import_attach) drm_prime_gem_destroy(gem, nvbo->bo.sg); -- cgit v1.2.3-58-ga151 From 06b237ef3903a0a5e33785105ea5203153323dd8 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Sun, 7 Jul 2013 10:53:37 +0200 Subject: drm/nouveau: bump fence timeout to 15 seconds calim didn't like 150 seconds timeout, so lower the timeout for him. 15 seconds should still be plenty. Signed-off-by: Maarten Lankhorst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 1680d9187bab..be3149932c2d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -143,7 +143,7 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) int ret; fence->channel = chan; - fence->timeout = jiffies + (3 * DRM_HZ); + fence->timeout = jiffies + (15 * DRM_HZ); fence->sequence = ++fctx->sequence; ret = fctx->emit(fence); -- cgit v1.2.3-58-ga151 From 00fc6f6f731efb7b76b839598e494b01890d901d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 9 Jul 2013 14:20:15 +1000 Subject: drm/nouveau: use dedicated channel for async moves on GT/GF chipsets. The moves themselves were generally async to graphics previously, with the exception that if the "main" channel is used to synchronise a page flip at the same time, it can end up blocked for a noticable amount of time for large buffer moves. Not really critical, and there's better ways of handling this, but they are all rather invasive, so this is fine for now. Based on a patch by Maarten Lankhorst addressing the same issue. Reported-by: Maarten Lankhorst Signed-off-by: Ben Skeggs Acked-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_bo.c | 6 +++--- drivers/gpu/drm/nouveau/nouveau_drm.c | 12 ++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 824a98811de6..459a44550ce5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -581,7 +581,7 @@ nve0_bo_move_init(struct nouveau_channel *chan, u32 handle) int ret = RING_SPACE(chan, 2); if (ret == 0) { BEGIN_NVC0(chan, NvSubCopy, 0x0000, 1); - OUT_RING (chan, handle); + OUT_RING (chan, handle & 0x0000ffff); FIRE_RING (chan); } return ret; @@ -1017,7 +1017,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm) struct ttm_mem_reg *, struct ttm_mem_reg *); int (*init)(struct nouveau_channel *, u32 handle); } _methods[] = { - { "COPY", 0, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init }, + { "COPY", 4, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init }, { "GRCE", 0, 0xa0b5, nve0_bo_move_copy, nvc0_bo_move_init }, { "COPY1", 5, 0x90b8, nvc0_bo_move_copy, nvc0_bo_move_init }, { "COPY0", 4, 0x90b5, nvc0_bo_move_copy, nvc0_bo_move_init }, @@ -1037,7 +1037,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm) struct nouveau_channel *chan; u32 handle = (mthd->engine << 16) | mthd->oclass; - if (mthd->init == nve0_bo_move_init) + if (mthd->engine) chan = drm->cechan; else chan = drm->channel; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 4eca52b8d573..61972668fd05 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -192,6 +192,18 @@ nouveau_accel_init(struct nouveau_drm *drm) arg0 = NVE0_CHANNEL_IND_ENGINE_GR; arg1 = 1; + } else + if (device->chipset >= 0xa3 && + device->chipset != 0xaa && + device->chipset != 0xac) { + ret = nouveau_channel_new(drm, &drm->client, NVDRM_DEVICE, + NVDRM_CHAN + 1, NvDmaFB, NvDmaTT, + &drm->cechan); + if (ret) + NV_ERROR(drm, "failed to create ce channel, %d\n", ret); + + arg0 = NvDmaFB; + arg1 = NvDmaTT; } else { arg0 = NvDmaFB; arg1 = NvDmaTT; -- cgit v1.2.3-58-ga151 From 9b234db37825860b833ba110673301e2009f0546 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Sun, 7 Jul 2013 10:45:43 +0200 Subject: drm/nouveau: add falcon interrupt handler This prevents 100% cpu usage on fermi cards when the exit interrupt from the secret scrubber is not acked. Signed-off-by: Maarten Lankhorst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c | 1 + drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c | 1 + drivers/gpu/drm/nouveau/core/engine/falcon.c | 19 +++++++++++++++++++ drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c | 1 + drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c | 1 + drivers/gpu/drm/nouveau/core/engine/vp/nve0.c | 1 + drivers/gpu/drm/nouveau/core/include/engine/falcon.h | 2 ++ 7 files changed, 26 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c index 262c9f5f5f60..ce860de43e61 100644 --- a/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c @@ -90,6 +90,7 @@ nvc0_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return ret; nv_subdev(priv)->unit = 0x00008000; + nv_subdev(priv)->intr = nouveau_falcon_intr; nv_engine(priv)->cclass = &nvc0_bsp_cclass; nv_engine(priv)->sclass = nvc0_bsp_sclass; return 0; diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c index c46882c83982..ba6aeca0285e 100644 --- a/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c @@ -90,6 +90,7 @@ nve0_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return ret; nv_subdev(priv)->unit = 0x00008000; + nv_subdev(priv)->intr = nouveau_falcon_intr; nv_engine(priv)->cclass = &nve0_bsp_cclass; nv_engine(priv)->sclass = nve0_bsp_sclass; return 0; diff --git a/drivers/gpu/drm/nouveau/core/engine/falcon.c b/drivers/gpu/drm/nouveau/core/engine/falcon.c index 3c7a31f7590e..e03fc8e4dc1d 100644 --- a/drivers/gpu/drm/nouveau/core/engine/falcon.c +++ b/drivers/gpu/drm/nouveau/core/engine/falcon.c @@ -23,6 +23,25 @@ #include #include +void +nouveau_falcon_intr(struct nouveau_subdev *subdev) +{ + struct nouveau_falcon *falcon = (void *)subdev; + u32 dispatch = nv_ro32(falcon, 0x01c); + u32 intr = nv_ro32(falcon, 0x008) & dispatch & ~(dispatch >> 16); + + if (intr & 0x00000010) { + nv_debug(falcon, "ucode halted\n"); + nv_wo32(falcon, 0x004, 0x00000010); + intr &= ~0x00000010; + } + + if (intr) { + nv_error(falcon, "unhandled intr 0x%08x\n", intr); + nv_wo32(falcon, 0x004, intr); + } +} + u32 _nouveau_falcon_rd32(struct nouveau_object *object, u64 addr) { diff --git a/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c index 98072c1ff360..73719aaa62d6 100644 --- a/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c @@ -90,6 +90,7 @@ nvc0_ppp_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return ret; nv_subdev(priv)->unit = 0x00000002; + nv_subdev(priv)->intr = nouveau_falcon_intr; nv_engine(priv)->cclass = &nvc0_ppp_cclass; nv_engine(priv)->sclass = nvc0_ppp_sclass; return 0; diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c index 1879229b60eb..ac1f62aace72 100644 --- a/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c @@ -90,6 +90,7 @@ nvc0_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return ret; nv_subdev(priv)->unit = 0x00020000; + nv_subdev(priv)->intr = nouveau_falcon_intr; nv_engine(priv)->cclass = &nvc0_vp_cclass; nv_engine(priv)->sclass = nvc0_vp_sclass; return 0; diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c index d28ecbf7bc49..d4c3108479c9 100644 --- a/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c @@ -90,6 +90,7 @@ nve0_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return ret; nv_subdev(priv)->unit = 0x00020000; + nv_subdev(priv)->intr = nouveau_falcon_intr; nv_engine(priv)->cclass = &nve0_vp_cclass; nv_engine(priv)->sclass = nve0_vp_sclass; return 0; diff --git a/drivers/gpu/drm/nouveau/core/include/engine/falcon.h b/drivers/gpu/drm/nouveau/core/include/engine/falcon.h index 1edec386ab36..181aa7da524d 100644 --- a/drivers/gpu/drm/nouveau/core/include/engine/falcon.h +++ b/drivers/gpu/drm/nouveau/core/include/engine/falcon.h @@ -72,6 +72,8 @@ int nouveau_falcon_create_(struct nouveau_object *, struct nouveau_object *, struct nouveau_oclass *, u32, bool, const char *, const char *, int, void **); +void nouveau_falcon_intr(struct nouveau_subdev *subdev); + #define _nouveau_falcon_dtor _nouveau_engine_dtor int _nouveau_falcon_init(struct nouveau_object *); int _nouveau_falcon_fini(struct nouveau_object *, bool); -- cgit v1.2.3-58-ga151 From 0108bc808107b97e101b15af9705729626be6447 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Sun, 7 Jul 2013 10:40:19 +0200 Subject: drm/nouveau: do not allow negative sizes for now The API allows up to 64-bits allocations, but size is handled as int inside nouveau almost everywhere. Until this is fixed it's better to prevent negative sizes. The 256 kB before INT_MAX is paranoia, because of the large page aligning below that could flip it above INT_MAX. Signed-off-by: Maarten Lankhorst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bo.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 459a44550ce5..4e7ee5f4155c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -198,6 +198,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, size_t acc_size; int ret; int type = ttm_bo_type_device; + int max_size = INT_MAX & ~((1 << drm->client.base.vm->vmm->lpg_shift) - 1); + + if (size <= 0 || size > max_size) { + nv_warn(drm, "skipped size %x\n", (u32)size); + return -EINVAL; + } if (sg) type = ttm_bo_type_sg; -- cgit v1.2.3-58-ga151 From 9c23b7d3d6bda41e2a27375df705485523a96dc8 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Wed, 10 Jul 2013 06:26:13 +0000 Subject: crypto: caam - Fixed the memory out of bound overwrite issue When kernel is compiled with CONFIG_SLUB_DEBUG=y and CRYPTO_MANAGER_DISABLE_TESTS=n, during kernel bootup, the kernel reports error given below. The root cause is that in function hash_digest_key(), for allocating descriptor, insufficient memory was being allocated. The required number of descriptor words apart from input and output pointers are 8 (instead of 6). ============================================================================= BUG dma-kmalloc-32 (Not tainted): Redzone overwritten ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: 0xdec5dec0-0xdec5dec3. First byte 0x0 instead of 0xcc INFO: Allocated in ahash_setkey+0x60/0x594 age=7 cpu=1 pid=1257 __kmalloc+0x154/0x1b4 ahash_setkey+0x60/0x594 test_hash+0x260/0x5a0 alg_test_hash+0x48/0xb0 alg_test+0x84/0x228 cryptomgr_test+0x4c/0x54 kthread+0x98/0x9c ret_from_kernel_thread+0x64/0x6c INFO: Slab 0xc0bd0ba0 objects=19 used=2 fp=0xdec5d0d0 flags=0x0081 INFO: Object 0xdec5dea0 @offset=3744 fp=0x5c200014 Bytes b4 dec5de90: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ Object dec5dea0: b0 80 00 0a 84 41 00 0d f0 40 00 00 00 67 3f c0 .....A...@...g?. Object dec5deb0: 00 00 00 50 2c 14 00 50 f8 40 00 00 1e c5 d0 00 ...P,..P.@...... Redzone dec5dec0: 00 00 00 14 .... Padding dec5df68: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ Call Trace: [dec65b60] [c00071b4] show_stack+0x4c/0x168 (unreliable) [dec65ba0] [c00d4ec8] check_bytes_and_report+0xe4/0x11c [dec65bd0] [c00d507c] check_object+0x17c/0x23c [dec65bf0] [c0550a00] free_debug_processing+0xf4/0x294 [dec65c20] [c0550bdc] __slab_free+0x3c/0x294 [dec65c80] [c03f0744] ahash_setkey+0x4e0/0x594 [dec65cd0] [c01ef138] test_hash+0x260/0x5a0 [dec65e50] [c01ef4c0] alg_test_hash+0x48/0xb0 [dec65e70] [c01eecc4] alg_test+0x84/0x228 [dec65ee0] [c01ec640] cryptomgr_test+0x4c/0x54 [dec65ef0] [c005adc0] kthread+0x98/0x9c [dec65f40] [c000e1ac] ret_from_kernel_thread+0x64/0x6c FIX dma-kmalloc-32: Restoring 0xdec5dec0-0xdec5dec3=0xcc Change-Id: I0c7a1048053e811025d1c3b487940f87345c8f5d Signed-off-by: Vakul Garg CC: #3.9 Reviewed-by: Geanta Neag Horia Ioan-B05471 Reviewed-by: Fleming Andrew-AFLEMING Tested-by: Fleming Andrew-AFLEMING Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamhash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 5996521a1caf..84573b4d6f92 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -429,7 +429,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, dma_addr_t src_dma, dst_dma; int ret = 0; - desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); + desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); if (!desc) { dev_err(jrdev, "unable to allocate key input memory\n"); return -ENOMEM; -- cgit v1.2.3-58-ga151 From 2e26af79b7e24e9644a6c16ad4dca61501fb4b3f Mon Sep 17 00:00:00 2001 From: Asias He Date: Tue, 7 May 2013 14:54:33 +0800 Subject: vhost-net: Always access vq->private_data under vq mutex Signed-off-by: Asias He Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 027be91db139..99f8d63491aa 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -346,12 +346,11 @@ static void handle_tx(struct vhost_net *net) struct vhost_net_ubuf_ref *uninitialized_var(ubufs); bool zcopy, zcopy_used; - /* TODO: check that we are running from vhost_worker? */ - sock = rcu_dereference_check(vq->private_data, 1); + mutex_lock(&vq->mutex); + sock = vq->private_data; if (!sock) - return; + goto out; - mutex_lock(&vq->mutex); vhost_disable_notify(&net->dev, vq); hdr_size = nvq->vhost_hlen; @@ -461,7 +460,7 @@ static void handle_tx(struct vhost_net *net) break; } } - +out: mutex_unlock(&vq->mutex); } @@ -570,14 +569,14 @@ static void handle_rx(struct vhost_net *net) s16 headcount; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; - /* TODO: check that we are running from vhost_worker? */ - struct socket *sock = rcu_dereference_check(vq->private_data, 1); - - if (!sock) - return; + struct socket *sock; mutex_lock(&vq->mutex); + sock = vq->private_data; + if (!sock) + goto out; vhost_disable_notify(&net->dev, vq); + vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; @@ -652,7 +651,7 @@ static void handle_rx(struct vhost_net *net) break; } } - +out: mutex_unlock(&vq->mutex); } -- cgit v1.2.3-58-ga151 From e7802212ea4bbbd5db99181942a19ab36ca4b914 Mon Sep 17 00:00:00 2001 From: Asias He Date: Tue, 7 May 2013 14:54:35 +0800 Subject: vhost-scsi: Always access vq->private_data under vq mutex Signed-off-by: Asias He Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 4264840ef7dc..45365396dbbc 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -896,19 +896,15 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) int head, ret; u8 target; + mutex_lock(&vq->mutex); /* * We can handle the vq only after the endpoint is setup by calling the * VHOST_SCSI_SET_ENDPOINT ioctl. - * - * TODO: Check that we are running from vhost_worker which acts - * as read-side critical section for vhost kind of RCU. - * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h */ - vs_tpg = rcu_dereference_check(vq->private_data, 1); + vs_tpg = vq->private_data; if (!vs_tpg) - return; + goto out; - mutex_lock(&vq->mutex); vhost_disable_notify(&vs->dev, vq); for (;;) { @@ -1058,6 +1054,7 @@ err_free: vhost_scsi_free_cmd(cmd); err_cmd: vhost_scsi_send_bad_target(vs, vq, head, out); +out: mutex_unlock(&vq->mutex); } -- cgit v1.2.3-58-ga151 From 22fa90c7fb479694d6affebc049d21f06b714be6 Mon Sep 17 00:00:00 2001 From: Asias He Date: Tue, 7 May 2013 14:54:36 +0800 Subject: vhost: Remove custom vhost rcu usage Now, vq->private_data is always accessed under vq mutex. No need to play the vhost rcu trick. Signed-off-by: Asias He Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 16 ++++++---------- drivers/vhost/scsi.c | 6 ++---- drivers/vhost/test.c | 6 ++---- drivers/vhost/vhost.h | 10 ++-------- 4 files changed, 12 insertions(+), 26 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 99f8d63491aa..969a85960e9f 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -749,8 +748,7 @@ static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_poll *poll = n->poll + (nvq - n->vqs); struct socket *sock; - sock = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); + sock = vq->private_data; if (!sock) return 0; @@ -763,10 +761,9 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct socket *sock; mutex_lock(&vq->mutex); - sock = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); + sock = vq->private_data; vhost_net_disable_vq(n, vq); - rcu_assign_pointer(vq->private_data, NULL); + vq->private_data = NULL; mutex_unlock(&vq->mutex); return sock; } @@ -922,8 +919,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } /* start polling new socket */ - oldsock = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); + oldsock = vq->private_data; if (sock != oldsock) { ubufs = vhost_net_ubuf_alloc(vq, sock && vhost_sock_zcopy(sock)); @@ -933,7 +929,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } vhost_net_disable_vq(n, vq); - rcu_assign_pointer(vq->private_data, sock); + vq->private_data = sock; r = vhost_init_used(vq); if (r) goto err_used; @@ -967,7 +963,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) return 0; err_used: - rcu_assign_pointer(vq->private_data, oldsock); + vq->private_data = oldsock; vhost_net_enable_vq(n, vq); if (ubufs) vhost_net_ubuf_put_wait_and_free(ubufs); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 45365396dbbc..35ab0ce98414 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1223,9 +1223,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, sizeof(vs->vs_vhost_wwpn)); for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { vq = &vs->vqs[i].vq; - /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); - rcu_assign_pointer(vq->private_data, vs_tpg); + vq->private_data = vs_tpg; vhost_init_used(vq); mutex_unlock(&vq->mutex); } @@ -1304,9 +1303,8 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { vq = &vs->vqs[i].vq; - /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); - rcu_assign_pointer(vq->private_data, NULL); + vq->private_data = NULL; mutex_unlock(&vq->mutex); } } diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index a73ea217f24d..339eae85859a 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -200,9 +199,8 @@ static long vhost_test_run(struct vhost_test *n, int test) priv = test ? n : NULL; /* start polling new socket */ - oldpriv = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); - rcu_assign_pointer(vq->private_data, priv); + oldpriv = vq->private_data; + vq->private_data = priv; r = vhost_init_used(&n->vqs[index]); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 42298cd23c73..4465ed5f316d 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -103,14 +103,8 @@ struct vhost_virtqueue { struct iovec iov[UIO_MAXIOV]; struct iovec *indirect; struct vring_used_elem *heads; - /* We use a kind of RCU to access private pointer. - * All readers access it from worker, which makes it possible to - * flush the vhost_work instead of synchronize_rcu. Therefore readers do - * not need to call rcu_read_lock/rcu_read_unlock: the beginning of - * vhost_work execution acts instead of rcu_read_lock() and the end of - * vhost_work execution acts instead of rcu_read_unlock(). - * Writers use virtqueue mutex. */ - void __rcu *private_data; + /* Protected by virtqueue mutex. */ + void *private_data; /* Log write descriptors */ void __user *log_base; struct vhost_log *log; -- cgit v1.2.3-58-ga151 From 0692282181e248328c226c2520994fc06dfd65bf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 11 Jul 2013 13:35:40 +0200 Subject: drm/i915: fix up readout of the lvds dither bit on gen2/3 It's in the PFIT_CONTROL register, but very much associated with the lvds encoder. So move the readout for it (in the case of an otherwise disabled pfit) from the pipe to the lvds encoder's get_config function. Otherwise we get a pipe state mismatch if we use pipe B for a non-lvds output and we've left the dither bit enabled behind us. This can happen if the BIOS has set the bit (some seem to unconditionally do that, even in the complete absence of an lvds port), but not enabled pipe B at boot-up. Then we won't clear the pfit control register since we can only touch that if the pfit is associated with our pipe in the crtc configuration - we could trample over the pfit state of the other pipe otherwise since it's shared. Once pipe B is enabled we notice that the 6to8 dither bit is set and complain about the mismatch. Note that testing indicates that we don't actually need to set this bit when the pfit is disabled, dithering on 18bpp panels seems to work regardless. But ripping that code out is not something for a bugfix meant for -rc kernels. v2: While at it clarify the logic in i9xx_get_pfit_config, spurred by comments from Chris on irc. v3: Use Chris suggestion to make the control flow in i9xx_get_pfit_config easier to understand. v4: Kill the extra line, spotted by Chris. Reported-by: Knut Petersen Cc: Knut Petersen Cc: Chris Wilson References: http://lists.freedesktop.org/archives/intel-gfx/2013-July/030092.html Tested-by: Knut Petersen Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 11 ++++------- drivers/gpu/drm/i915/intel_lvds.c | 7 +++++++ 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 85f3eb74d2b7..c59335ce84f4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4913,22 +4913,19 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc, uint32_t tmp; tmp = I915_READ(PFIT_CONTROL); + if (!(tmp & PFIT_ENABLE)) + return; + /* Check whether the pfit is attached to our pipe. */ if (INTEL_INFO(dev)->gen < 4) { if (crtc->pipe != PIPE_B) return; - - /* gen2/3 store dither state in pfit control, needs to match */ - pipe_config->gmch_pfit.control = tmp & PANEL_8TO6_DITHER_ENABLE; } else { if ((tmp & PFIT_PIPE_MASK) != (crtc->pipe << PFIT_PIPE_SHIFT)) return; } - if (!(tmp & PFIT_ENABLE)) - return; - - pipe_config->gmch_pfit.control = I915_READ(PFIT_CONTROL); + pipe_config->gmch_pfit.control = tmp; pipe_config->gmch_pfit.pgm_ratios = I915_READ(PFIT_PGM_RATIOS); if (INTEL_INFO(dev)->gen < 5) pipe_config->gmch_pfit.lvds_border_bits = diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 2abb2d3c727b..12079983c8ab 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -109,6 +109,13 @@ static void intel_lvds_get_config(struct intel_encoder *encoder, flags |= DRM_MODE_FLAG_PVSYNC; pipe_config->adjusted_mode.flags |= flags; + + /* gen2/3 store dither state in pfit control, needs to match */ + if (INTEL_INFO(dev)->gen < 4) { + tmp = I915_READ(PFIT_CONTROL); + + pipe_config->gmch_pfit.control |= tmp & PANEL_8TO6_DITHER_ENABLE; + } } /* The LVDS pin pair needs to be on before the DPLLs are enabled. -- cgit v1.2.3-58-ga151 From 6aa8f1a6ca41c49721d2de4e048d3da8d06411f9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:04:21 -0700 Subject: bcache: Fix a dumb race In the far-too-complicated closure code - closures can have destructors, for probably dubious reasons; they get run after the closure is no longer waiting on anything but before dropping the parent ref, intended just for freeing whatever memory the closure is embedded in. Trouble is, when remaining goes to 0 and we've got nothing more to run - we also have to unlock the closure, setting remaining to -1. If there's a destructor, that unlock isn't doing anything - nobody could be trying to lock it if we're about to free it - but if the unlock _is needed... that check for a destructor was racy. Argh. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/closure.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index bd05a9a8c7cf..9aba2017f0d1 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -66,16 +66,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) } else { struct closure *parent = cl->parent; struct closure_waitlist *wait = closure_waitlist(cl); + closure_fn *destructor = cl->fn; closure_debug_destroy(cl); + smp_mb(); atomic_set(&cl->remaining, -1); if (wait) closure_wake_up(wait); - if (cl->fn) - cl->fn(cl); + if (destructor) + destructor(cl); if (parent) closure_put(parent); -- cgit v1.2.3-58-ga151 From d2a65ce2ac224413b291307201c5dafb03aa90d7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Jul 2013 09:05:46 +0300 Subject: bcache: check for allocation failures There is a missing NULL check after the kzalloc(). Signed-off-by: Dan Carpenter --- drivers/md/bcache/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index dd3f00a42729..12a2c2846f99 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -232,6 +232,8 @@ STORE(__cached_dev) bch_uuid_write(dc->disk.c); } env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); + if (!env) + return -ENOMEM; add_uevent_var(env, "DRIVER=bcache"); add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid), add_uevent_var(env, "CACHED_LABEL=%s", buf); -- cgit v1.2.3-58-ga151 From 54d12f2b4fd0f218590d1490b41a18d0e2328a9a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:44:40 -0700 Subject: bcache: Advertise that flushes are supported Whoops - bcache's flush/FUA was mostly correct, but flushes get filtered out unless we say we support them... Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/request.c | 8 +++++++- drivers/md/bcache/super.c | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index b6e74d3c8faf..786a1a4f74d8 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -488,6 +488,12 @@ static void bch_insert_data_loop(struct closure *cl) bch_queue_gc(op->c); } + /* + * Journal writes are marked REQ_FLUSH; if the original write was a + * flush, it'll wait on the journal write. + */ + bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA); + do { unsigned i; struct bkey *k; @@ -710,7 +716,7 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d) s->task = current; s->orig_bio = bio; s->write = (bio->bi_rw & REQ_WRITE) != 0; - s->op.flush_journal = (bio->bi_rw & REQ_FLUSH) != 0; + s->op.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0; s->recoverable = 1; s->start_time = jiffies; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index cff2d182dfb0..728fdc673f31 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -806,6 +806,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); + blk_queue_flush(q, REQ_FLUSH|REQ_FUA); + return 0; } -- cgit v1.2.3-58-ga151 From c9502ea4424b31728703d113fc6b30bfead14633 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 21:25:02 -0700 Subject: bcache: Fix a sysfs splat on shutdown If we stopped a bcache device when we were already detaching (or something like that), bcache_device_unlink() would try to remove a symlink from sysfs that was already gone because the bcache dev kobject had already been removed from sysfs. So keep track of whether we've removed stuff from sysfs. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/super.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 342ba86c6e4f..68f1ded81ae0 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -434,6 +434,7 @@ struct bcache_device { /* If nonzero, we're detaching/unregistering from cache set */ atomic_t detaching; + int flush_done; uint64_t nr_stripes; unsigned stripe_size_bits; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 728fdc673f31..7a1dcdb2536e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -706,7 +706,8 @@ static void bcache_device_detach(struct bcache_device *d) atomic_set(&d->detaching, 0); } - bcache_device_unlink(d); + if (!d->flush_done) + bcache_device_unlink(d); d->c->devices[d->id] = NULL; closure_put(&d->c->caching); @@ -1055,6 +1056,14 @@ static void cached_dev_flush(struct closure *cl) struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); struct bcache_device *d = &dc->disk; + mutex_lock(&bch_register_lock); + d->flush_done = 1; + + if (d->c) + bcache_device_unlink(d); + + mutex_unlock(&bch_register_lock); + bch_cache_accounting_destroy(&dc->accounting); kobject_del(&d->kobj); -- cgit v1.2.3-58-ga151 From 5caa52afc5abd1396e4af720469abb5843a71eb8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 21:03:25 -0700 Subject: bcache: Shutdown fix Stopping a cache set is supposed to make it stop attached backing devices, but somewhere along the way that code got lost. Fixing this mainly has the effect of fixing our reboot notifier. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/super.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 7a1dcdb2536e..f6a62174e8f6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1354,18 +1354,22 @@ static void cache_set_flush(struct closure *cl) static void __cache_set_unregister(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); - struct cached_dev *dc, *t; + struct cached_dev *dc; size_t i; mutex_lock(&bch_register_lock); - if (test_bit(CACHE_SET_UNREGISTERING, &c->flags)) - list_for_each_entry_safe(dc, t, &c->cached_devs, list) - bch_cached_dev_detach(dc); - for (i = 0; i < c->nr_uuids; i++) - if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i])) - bcache_device_stop(c->devices[i]); + if (c->devices[i]) { + if (!UUID_FLASH_ONLY(&c->uuids[i]) && + test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { + dc = container_of(c->devices[i], + struct cached_dev, disk); + bch_cached_dev_detach(dc); + } else { + bcache_device_stop(c->devices[i]); + } + } mutex_unlock(&bch_register_lock); -- cgit v1.2.3-58-ga151 From faa5673617656ee58369a3cfe4a312cfcdc59c81 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Jul 2013 22:42:14 -0700 Subject: bcache: Journal replay fix The journal replay code starts by finding something that looks like a valid journal entry, then it does a binary search over the unchecked region of the journal for the journal entries with the highest sequence numbers. Trouble is, the logic was wrong - journal_read_bucket() returns true if it found journal entries we need, but if the range of journal entries we're looking for loops around the end of the journal - in that case journal_read_bucket() could return true when it hadn't found the highest sequence number we'd seen yet, and in that case the binary search did the wrong thing. Whoops. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/journal.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 4b250667bb7f..ba95ab84b2be 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -184,9 +184,14 @@ bsearch: pr_debug("starting binary search, l %u r %u", l, r); while (l + 1 < r) { + seq = list_entry(list->prev, struct journal_replay, + list)->j.seq; + m = (l + r) >> 1; + read_bucket(m); - if (read_bucket(m)) + if (seq != list_entry(list->prev, struct journal_replay, + list)->j.seq) l = m; else r = m; -- cgit v1.2.3-58-ga151 From 29ebf465b9050f241c4433a796a32e6c896a9dcd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Jul 2013 19:43:21 -0700 Subject: bcache: Fix GC_SECTORS_USED() calculation Part of the job of garbage collection is to add up however many sectors of live data it finds in each bucket, but that doesn't work very well if it doesn't reset GC_SECTORS_USED() when it starts. Whoops. This wouldn't have broken anything horribly, but allocation tries to preferentially reclaim buckets that are mostly empty and that's not gonna work with an incorrect GC_SECTORS_USED() value. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/btree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 15b58239c683..ee372884c405 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1410,8 +1410,10 @@ static void btree_gc_start(struct cache_set *c) for_each_cache(ca, c, i) for_each_bucket(b, ca) { b->gc_gen = b->gen; - if (!atomic_read(&b->pin)) + if (!atomic_read(&b->pin)) { SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + SET_GC_SECTORS_USED(b, 0); + } } mutex_unlock(&c->bucket_lock); -- cgit v1.2.3-58-ga151 From 79826c35eb99cd3c0873b8396f45fa26c87fb0b0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:31:58 -0700 Subject: bcache: Allocation kthread fixes The alloc kthread should've been using try_to_freeze() - and also there was the potential for the alloc kthread to get woken up after it had shut down, which would have been bad. Signed-off-by: Kent Overstreet --- drivers/md/bcache/alloc.c | 18 ++++++++---------- drivers/md/bcache/bcache.h | 4 ---- drivers/md/bcache/super.c | 11 +++++++---- 3 files changed, 15 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index b54b73b9b2b7..e45f5575fd4d 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -63,6 +63,7 @@ #include "bcache.h" #include "btree.h" +#include #include #include #include @@ -363,11 +364,10 @@ do { \ break; \ \ mutex_unlock(&(ca)->set->bucket_lock); \ - if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) { \ - closure_put(&ca->set->cl); \ + if (kthread_should_stop()) \ return 0; \ - } \ \ + try_to_freeze(); \ schedule(); \ mutex_lock(&(ca)->set->bucket_lock); \ } \ @@ -547,14 +547,12 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, int bch_cache_allocator_start(struct cache *ca) { - ca->alloc_thread = kthread_create(bch_allocator_thread, - ca, "bcache_allocator"); - if (IS_ERR(ca->alloc_thread)) - return PTR_ERR(ca->alloc_thread); - - closure_get(&ca->set->cl); - wake_up_process(ca->alloc_thread); + struct task_struct *k = kthread_run(bch_allocator_thread, + ca, "bcache_allocator"); + if (IS_ERR(k)) + return PTR_ERR(k); + ca->alloc_thread = k; return 0; } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 68f1ded81ae0..b39f6f0b45f2 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -664,13 +664,9 @@ struct gc_stat { * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. * flushing dirty data). - * - * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down - * the allocation thread. */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 -#define CACHE_SET_STOPPING_2 2 struct cache_set { struct closure cl; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f6a62174e8f6..547c4c57b052 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -1329,11 +1330,9 @@ static void cache_set_free(struct closure *cl) static void cache_set_flush(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); + struct cache *ca; struct btree *b; - - /* Shut down allocator threads */ - set_bit(CACHE_SET_STOPPING_2, &c->flags); - wake_up_allocators(c); + unsigned i; bch_cache_accounting_destroy(&c->accounting); @@ -1348,6 +1347,10 @@ static void cache_set_flush(struct closure *cl) if (btree_node_dirty(b)) bch_btree_node_write(b, NULL); + for_each_cache(ca, c, i) + if (ca->alloc_thread) + kthread_stop(ca->alloc_thread); + closure_return(cl); } -- cgit v1.2.3-58-ga151 From 21d8a4756af5fdf4a42e79a77cf3b6f52678d443 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 12 Jul 2013 08:07:30 +0200 Subject: drm/i915: fix pfit regression for non-autoscaled resolutions I.e. for letter/pillarboxing. For those cases we need to adjust the mode a bit, but Jesse gmch pfit refactoring in commit 2dd24552cab40ea829ba3fda890eeafd2c4816d8 Author: Jesse Barnes Date: Thu Apr 25 12:55:01 2013 -0700 drm/i915: factor out GMCH panel fitting code and use for eDP v3 broke that by reordering the computation of the gmch pfit state with the block of code that prepared the adjusted mode for it and told the modeset core not to overwrite the adjusted mode with default settings. We might want to switch around the core code to just fill in defaults, but this code predates the pipe_config modeset rework. And in the old crtc helpers we did not have a suitable spot to do this. Cc: Mika Kuoppala Cc: Jesse Barnes Cc: Hans de Bruin Reported-and-tested-by: Hans de Bruin Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lvds.c | 5 +---- drivers/gpu/drm/i915/intel_panel.c | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 12079983c8ab..9a797c987537 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -297,14 +297,11 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, intel_pch_panel_fitting(intel_crtc, pipe_config, intel_connector->panel.fitting_mode); - return true; } else { intel_gmch_panel_fitting(intel_crtc, pipe_config, intel_connector->panel.fitting_mode); - } - drm_mode_set_crtcinfo(adjusted_mode, 0); - pipe_config->timings_set = true; + } /* * XXX: It would be nice to support lower refresh rates on the diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 80bea1d3209f..45010bb5d132 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -194,6 +194,9 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc, adjusted_mode->vdisplay == mode->vdisplay) goto out; + drm_mode_set_crtcinfo(adjusted_mode, 0); + pipe_config->timings_set = true; + switch (fitting_mode) { case DRM_MODE_SCALE_CENTER: /* -- cgit v1.2.3-58-ga151 From bcf53de4e60d9000b82f541d654529e2902a4c2c Mon Sep 17 00:00:00 2001 From: Stéphane Marchesin Date: Fri, 12 Jul 2013 13:54:41 -0700 Subject: drm/i915: Preserve the DDI_A_4_LANES bit from the bios MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the DDI_A_4_LANES bit gets lost and we can't use > 2 lanes on eDP. This fixes eDP on hsw with > 2 lanes. Also s/port_reversal/saved_port_bits/ since the current name is confusing. Signed-off-by: Stéphane Marchesin Reviewed-by: Paulo Zanoni Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 10 ++++++---- drivers/gpu/drm/i915/intel_drv.h | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 324211ac9c55..b042ee5c4070 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -301,7 +301,7 @@ static void intel_ddi_mode_set(struct drm_encoder *encoder, struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); - intel_dp->DP = intel_dig_port->port_reversal | + intel_dp->DP = intel_dig_port->saved_port_bits | DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW; intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count); @@ -1109,7 +1109,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder) * enabling the port. */ I915_WRITE(DDI_BUF_CTL(port), - intel_dig_port->port_reversal | DDI_BUF_CTL_ENABLE); + intel_dig_port->saved_port_bits | + DDI_BUF_CTL_ENABLE); } else if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); @@ -1347,8 +1348,9 @@ void intel_ddi_init(struct drm_device *dev, enum port port) intel_encoder->get_config = intel_ddi_get_config; intel_dig_port->port = port; - intel_dig_port->port_reversal = I915_READ(DDI_BUF_CTL(port)) & - DDI_BUF_PORT_REVERSAL; + intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & + (DDI_BUF_PORT_REVERSAL | + DDI_A_4_LANES); intel_dig_port->dp.output_reg = DDI_BUF_CTL(port); intel_encoder->type = INTEL_OUTPUT_UNKNOWN; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c8c9b6f48230..b7d6e09456ce 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -504,7 +504,7 @@ struct intel_dp { struct intel_digital_port { struct intel_encoder base; enum port port; - u32 port_reversal; + u32 saved_port_bits; struct intel_dp dp; struct intel_hdmi hdmi; }; -- cgit v1.2.3-58-ga151 From f3ed0a17f0292300b3caca32d823ecd32554a667 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 11 Jul 2013 09:50:30 -0700 Subject: Thermal: x86 package temp thermal crash On systems with no package MSR support this caused crash as there is a bug in the logic to check presence of DTHERM and PTS feature together. Added a change so that when there is no PTS support, module doesn't get loaded. Even if some CPU comes online with the PTS feature disabled, and other CPUs has this support, this patch will still prevent such MSR accesses. Signed-off-by: Srinivas Pandruvada Reported-by: Daniel Walker Signed-off-by: Zhang Rui --- drivers/thermal/x86_pkg_temp_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 5de56f671a9d..034604c021d6 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -511,7 +511,7 @@ static int get_core_online(unsigned int cpu) /* Check if there is already an instance for this package */ if (!phdev) { - if (!cpu_has(c, X86_FEATURE_DTHERM) && + if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS)) return -ENODEV; if (pkg_temp_thermal_device_add(cpu)) @@ -562,7 +562,7 @@ static struct notifier_block pkg_temp_thermal_notifier __refdata = { }; static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = { - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM }, + { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS }, {} }; MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids); -- cgit v1.2.3-58-ga151 From c7c1b3112e643d290471abd0f3b27ffeec6f28dd Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 18 Jun 2013 21:09:12 +0800 Subject: Thermal: x86_pkg_temp: fix krealloc() misuse in in pkg_temp_thermal_device_add() If krealloc() returns NULL, it doesn't free the original. So any code of the form 'foo = krealloc(foo, ...);' is almost certainly a bug. Signed-off-by: Wei Yongjun Signed-off-by: Zhang Rui --- drivers/thermal/x86_pkg_temp_thermal.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 034604c021d6..cf172d50b5d3 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -394,6 +394,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) char buffer[30]; int thres_count; u32 eax, ebx, ecx, edx; + u8 *temp; cpuid(6, &eax, &ebx, &ecx, &edx); thres_count = ebx & 0x07; @@ -417,13 +418,14 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) spin_lock(&pkg_work_lock); if (topology_physical_package_id(cpu) > max_phy_id) max_phy_id = topology_physical_package_id(cpu); - pkg_work_scheduled = krealloc(pkg_work_scheduled, - (max_phy_id+1) * sizeof(u8), GFP_ATOMIC); - if (!pkg_work_scheduled) { + temp = krealloc(pkg_work_scheduled, + (max_phy_id+1) * sizeof(u8), GFP_ATOMIC); + if (!temp) { spin_unlock(&pkg_work_lock); err = -ENOMEM; goto err_ret_free; } + pkg_work_scheduled = temp; pkg_work_scheduled[topology_physical_package_id(cpu)] = 0; spin_unlock(&pkg_work_lock); -- cgit v1.2.3-58-ga151 From cdcedd6981194e511cc206887db661d016069d68 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 15 Jul 2013 12:36:35 +0300 Subject: usb: dwc3: gadget: don't prevent gadget from being probed if we fail In case we fail our ->udc_start() callback, we should be ready to accept another modprobe following the failed one. We had forgotten to clear dwc->gadget_driver back to NULL and, because of that, we were preventing gadget driver modprobe from being retried. Cc: Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b5e5b35df49c..f77083fedc68 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1584,6 +1584,7 @@ err1: __dwc3_gadget_ep_disable(dwc->eps[0]); err0: + dwc->gadget_driver = NULL; spin_unlock_irqrestore(&dwc->lock, flags); return ret; -- cgit v1.2.3-58-ga151 From ae40d64b1f2db93d7b092e6425a2f716289fbd09 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jun 2013 13:27:27 +0200 Subject: usb: gadget: at91_udc: call at91udc_of_init only when needed This avoids a build error in at91sam9261_9g10_defconfig: drivers/usb/gadget/at91_udc.c: In function 'at91udc_probe': drivers/usb/gadget/at91_udc.c:1685:34: warning: 'flags' may be used uninitialized in this function [-Wmaybe-uninitialized] board->vbus_active_low = (flags & OF_GPIO_ACTIVE_LOW) ? 1 : 0; ^ drivers/usb/gadget/at91_udc.c:1678:21: note: 'flags' was declared here enum of_gpio_flags flags; ^ Making the call to at91udc_of_init conditinal also reduces the object code size without sacrificing build coverage. Signed-off-by: Arnd Bergmann Cc: Felipe Balbi Cc: Nicolas Ferre Signed-off-by: Felipe Balbi --- drivers/usb/gadget/at91_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c index 073b938f9135..2cbab1c9ff58 100644 --- a/drivers/usb/gadget/at91_udc.c +++ b/drivers/usb/gadget/at91_udc.c @@ -1725,7 +1725,7 @@ static int at91udc_probe(struct platform_device *pdev) /* init software state */ udc = &controller; udc->gadget.dev.parent = dev; - if (pdev->dev.of_node) + if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) at91udc_of_init(udc, pdev->dev.of_node); else memcpy(&udc->board, dev->platform_data, -- cgit v1.2.3-58-ga151 From 88ae742347831c47846ca7343e786abd6635752b Mon Sep 17 00:00:00 2001 From: Yuan-Hsin Chen Date: Fri, 21 Jun 2013 14:50:47 +0000 Subject: usb: gadget: fotg210-udc: remove __init and __exit Remove __init and __exit from probe() and remove() and would also fix the section mismatch issue. Signed-off-by: Yuan-Hsin Chen Signed-off-by: Felipe Balbi --- drivers/usb/gadget/fotg210-udc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/gadget/fotg210-udc.c b/drivers/usb/gadget/fotg210-udc.c index cce5535b1dc6..10cd18ddd0d4 100644 --- a/drivers/usb/gadget/fotg210-udc.c +++ b/drivers/usb/gadget/fotg210-udc.c @@ -1074,7 +1074,7 @@ static struct usb_gadget_ops fotg210_gadget_ops = { .udc_stop = fotg210_udc_stop, }; -static int __exit fotg210_udc_remove(struct platform_device *pdev) +static int fotg210_udc_remove(struct platform_device *pdev) { struct fotg210_udc *fotg210 = dev_get_drvdata(&pdev->dev); @@ -1088,7 +1088,7 @@ static int __exit fotg210_udc_remove(struct platform_device *pdev) return 0; } -static int __init fotg210_udc_probe(struct platform_device *pdev) +static int fotg210_udc_probe(struct platform_device *pdev) { struct resource *res, *ires; struct fotg210_udc *fotg210 = NULL; -- cgit v1.2.3-58-ga151 From 7628083227b6bc4a7e33d7c381d7a4e558424b6b Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Tue, 25 Jun 2013 10:12:56 +0200 Subject: usb: gadget: at91_udc: prepare clk before calling enable Replace clk_enable/disable with clk_prepare_enable/disable_unprepare to avoid common clk framework warnings. Signed-off-by: Boris BREZILLON Signed-off-by: Felipe Balbi --- drivers/usb/gadget/at91_udc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c index 2cbab1c9ff58..d9a6add0c852 100644 --- a/drivers/usb/gadget/at91_udc.c +++ b/drivers/usb/gadget/at91_udc.c @@ -870,8 +870,8 @@ static void clk_on(struct at91_udc *udc) if (udc->clocked) return; udc->clocked = 1; - clk_enable(udc->iclk); - clk_enable(udc->fclk); + clk_prepare_enable(udc->iclk); + clk_prepare_enable(udc->fclk); } static void clk_off(struct at91_udc *udc) @@ -880,8 +880,8 @@ static void clk_off(struct at91_udc *udc) return; udc->clocked = 0; udc->gadget.speed = USB_SPEED_UNKNOWN; - clk_disable(udc->fclk); - clk_disable(udc->iclk); + clk_disable_unprepare(udc->fclk); + clk_disable_unprepare(udc->iclk); } /* @@ -1782,12 +1782,14 @@ static int at91udc_probe(struct platform_device *pdev) } /* don't do anything until we have both gadget driver and VBUS */ - clk_enable(udc->iclk); + retval = clk_prepare_enable(udc->iclk); + if (retval) + goto fail1; at91_udp_write(udc, AT91_UDP_TXVC, AT91_UDP_TXVC_TXVDIS); at91_udp_write(udc, AT91_UDP_IDR, 0xffffffff); /* Clear all pending interrupts - UDP may be used by bootloader. */ at91_udp_write(udc, AT91_UDP_ICR, 0xffffffff); - clk_disable(udc->iclk); + clk_disable_unprepare(udc->iclk); /* request UDC and maybe VBUS irqs */ udc->udp_irq = platform_get_irq(pdev, 0); -- cgit v1.2.3-58-ga151 From 1974d494dea05ea227cb42f5e918828801e237aa Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Thu, 27 Jun 2013 01:08:11 +0800 Subject: usb: dwc3: fix wrong bit mask in dwc3_event_type Per dwc3 2.50a spec, the is_devspec bit is used to distinguish the Device Endpoint-Specific Event or Device-Specific Event (DEVT). If the bit is 1, the event is represented Device-Specific Event, then use [7:1] bits as Device Specific Event to marked the type. It has 7 bits, and we can see the reserved8_31 variable name which means from 8 to 31 bits marked reserved, actually there are 24 bits not 25 bits between that. And 1 + 7 + 24 = 32, the event size is 4 byes. So in dwc3_event_type, the bit mask should be: is_devspec [0] 1 bit type [7:1] 7 bits reserved8_31 [31:8] 24 bits This patch should be backported to kernels as old as 3.2, that contain the commit 72246da40f3719af3bfd104a2365b32537c27d83 "usb: Introduce DesignWare USB3 DRD Driver". Cc: Signed-off-by: Huang Rui Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index b69d322e3cab..27dad993b007 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -759,8 +759,8 @@ struct dwc3 { struct dwc3_event_type { u32 is_devspec:1; - u32 type:6; - u32 reserved8_31:25; + u32 type:7; + u32 reserved8_31:24; } __packed; #define DWC3_DEPEVT_XFERCOMPLETE 0x01 -- cgit v1.2.3-58-ga151 From 315955d707b50c8aad20a32ec0dd4c9fe243cabe Mon Sep 17 00:00:00 2001 From: Ruchika Kharwar Date: Thu, 4 Jul 2013 00:59:34 -0500 Subject: usb: dwc3: fix the error returned with usb3_phy failure When there is an error with the usb3_phy probe or absence, the error returned is erroneously for usb2_phy. Cc: Signed-off-by: Ruchika Kharwar Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index c35d49d39b76..358375e0b291 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -450,7 +450,7 @@ static int dwc3_probe(struct platform_device *pdev) } if (IS_ERR(dwc->usb3_phy)) { - ret = PTR_ERR(dwc->usb2_phy); + ret = PTR_ERR(dwc->usb3_phy); /* * if -ENXIO is returned, it means PHY layer wasn't -- cgit v1.2.3-58-ga151 From 5257a6332e9d02ed7a94381f553b9c05437baa5d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 4 Jul 2013 21:45:02 +0800 Subject: usb: gadget: mv_u3d_core: fix to pass correct device identity to free_irq() free_irq() expects the same device identity that was passed to corresponding request_irq(), otherwise the IRQ is not freed. Signed-off-by: Wei Yongjun Signed-off-by: Felipe Balbi --- drivers/usb/gadget/mv_u3d_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/gadget/mv_u3d_core.c b/drivers/usb/gadget/mv_u3d_core.c index 07fdb3eaf48a..ec6a2d290398 100644 --- a/drivers/usb/gadget/mv_u3d_core.c +++ b/drivers/usb/gadget/mv_u3d_core.c @@ -1776,7 +1776,7 @@ static int mv_u3d_remove(struct platform_device *dev) kfree(u3d->eps); if (u3d->irq) - free_irq(u3d->irq, &dev->dev); + free_irq(u3d->irq, u3d); if (u3d->cap_regs) iounmap(u3d->cap_regs); @@ -1974,7 +1974,7 @@ static int mv_u3d_probe(struct platform_device *dev) return 0; err_unregister: - free_irq(u3d->irq, &dev->dev); + free_irq(u3d->irq, u3d); err_request_irq: err_get_irq: kfree(u3d->status_req); -- cgit v1.2.3-58-ga151 From 8047806e64ea7b33fcede5b93f7276568a6119e8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 8 Jul 2013 22:47:37 -0700 Subject: usb: renesas_usbhs: gadget: remove extra check on udc_stop usb_gadget_ops :: udc_stop might be called with driver = NULL since 511f3c5326eabe1ece35202a404c24c0aeacc246 (usb: gadget: udc-core: fix a regression during gadget driver unbinding) Because of that, 2nd times insmod goes fail. This patch fixes it up. Reported-by: Yusuke Goda Signed-off-by: Kuninori Morimoto Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/mod_gadget.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index ed4949faa70d..805940c37353 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -855,10 +855,6 @@ static int usbhsg_gadget_stop(struct usb_gadget *gadget, struct usbhsg_gpriv *gpriv = usbhsg_gadget_to_gpriv(gadget); struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv); - if (!driver || - !driver->unbind) - return -EINVAL; - usbhsg_try_stop(priv, USBHSG_STATUS_REGISTERD); gpriv->driver = NULL; -- cgit v1.2.3-58-ga151 From 9d140f796f76d6a11fd27f07838285096e09b63b Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Tue, 9 Jul 2013 08:14:39 +0200 Subject: usb: gadget: Kconfig: Fix configfs-based RNDIS function build USB_CONFIGFS_RNDIS depends on USB_U_RNDIS. Select it. Acked-by: Michal Nazarewicz Reported-by: Fengguang Wu Signed-off-by: Andrzej Pietrasiewicz Signed-off-by: Kyungmin Park Signed-off-by: Felipe Balbi --- drivers/usb/gadget/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 62f6802f6e0f..5fce99a5d99e 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -639,6 +639,7 @@ config USB_CONFIGFS_RNDIS depends on USB_CONFIGFS depends on NET select USB_U_ETHER + select USB_U_RNDIS select USB_F_RNDIS help Microsoft Windows XP bundles the "Remote NDIS" (RNDIS) protocol, -- cgit v1.2.3-58-ga151 From 8744303eef570b4adc19a406b11418d21baa1c3c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 11 Jul 2013 15:54:00 +0200 Subject: usb: dwc3: USB_DWC3 should depend on HAS_DMA If NO_DMA=y: drivers/built-in.o: In function `dwc3_free_one_event_buffer': drivers/usb/dwc3/core.c:132: undefined reference to `dma_free_coherent' drivers/built-in.o: In function `dwc3_alloc_one_event_buffer': drivers/usb/dwc3/core.c:154: undefined reference to `dma_alloc_coherent' drivers/built-in.o: In function `dma_set_coherent_mask': include/linux/dma-mapping.h:93: undefined reference to `dma_supported' drivers/built-in.o: In function `dwc3_free_trb_pool': drivers/usb/dwc3/gadget.c:407: undefined reference to `dma_free_coherent' drivers/built-in.o: In function `dwc3_gadget_exit': drivers/usb/dwc3/gadget.c:2693: undefined reference to `dma_free_coherent' drivers/built-in.o: In function `dwc3_alloc_trb_pool': drivers/usb/dwc3/gadget.c:391: undefined reference to `dma_alloc_coherent' drivers/built-in.o: In function `dwc3_gadget_init': drivers/usb/dwc3/gadget.c:2598: undefined reference to `dma_alloc_coherent' drivers/usb/dwc3/gadget.c:2667: undefined reference to `dma_free_coherent' drivers/usb/dwc3/gadget.c:2674: undefined reference to `dma_free_coherent' drivers/usb/dwc3/gadget.c:2678: undefined reference to `dma_free_coherent' Signed-off-by: Geert Uytterhoeven Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig index 757aa18027d0..2378958ea63e 100644 --- a/drivers/usb/dwc3/Kconfig +++ b/drivers/usb/dwc3/Kconfig @@ -1,6 +1,6 @@ config USB_DWC3 tristate "DesignWare USB3 DRD Core Support" - depends on (USB || USB_GADGET) && GENERIC_HARDIRQS + depends on (USB || USB_GADGET) && GENERIC_HARDIRQS && HAS_DMA select USB_XHCI_PLATFORM if USB_SUPPORT && USB_XHCI_HCD help Say Y or M here if your system has a Dual Role SuperSpeed -- cgit v1.2.3-58-ga151 From 4713aec180b666a78bc658490870b61f6846d8a6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 11 Jul 2013 15:54:02 +0200 Subject: usb: gadget: USB_MV_UDC should depend on HAS_DMA If NO_DMA=y: drivers/built-in.o: In function `done': drivers/usb/gadget/mv_udc_core.c:239: undefined reference to `dma_pool_free' drivers/built-in.o: In function `build_dtd': drivers/usb/gadget/mv_udc_core.c:371: undefined reference to `dma_pool_alloc' drivers/built-in.o: In function `udc_prime_status': drivers/usb/gadget/mv_udc_core.c:1465: undefined reference to `dma_map_single' drivers/built-in.o: In function `mv_udc_remove': drivers/usb/gadget/mv_udc_core.c:2087: undefined reference to `dma_pool_destroy' drivers/usb/gadget/mv_udc_core.c:2090: undefined reference to `dma_free_coherent' drivers/built-in.o: In function `mv_udc_probe': drivers/usb/gadget/mv_udc_core.c:2190: undefined reference to `dma_alloc_coherent' drivers/usb/gadget/mv_udc_core.c:2201: undefined reference to `dma_pool_create' drivers/usb/gadget/mv_udc_core.c:2315: undefined reference to `dma_pool_destroy' drivers/usb/gadget/mv_udc_core.c:2317: undefined reference to `dma_free_coherent' Signed-off-by: Geert Uytterhoeven Signed-off-by: Felipe Balbi --- drivers/usb/gadget/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 5fce99a5d99e..1fd0e17d3706 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -328,7 +328,7 @@ config USB_S3C_HSUDC config USB_MV_UDC tristate "Marvell USB2.0 Device Controller" - depends on GENERIC_HARDIRQS + depends on GENERIC_HARDIRQS && HAS_DMA help Marvell Socs (including PXA and MMP series) include a high speed USB2.0 OTG controller, which can be configured as high speed or -- cgit v1.2.3-58-ga151 From bfcbd020396202e7795d78f8d87845e4e225c3b1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 11 Jul 2013 15:54:03 +0200 Subject: usb: gadget: USB_FOTG210_UDC should depend on HAS_DMA If NO_DMA=y: drivers/built-in.o: In function `fotg210_start_dma': drivers/usb/gadget/fotg210-udc.c:354: undefined reference to `dma_map_single' drivers/usb/gadget/fotg210-udc.c:357: undefined reference to `dma_mapping_error' drivers/usb/gadget/fotg210-udc.c:362: undefined reference to `dma_sync_single_for_cpu' drivers/usb/gadget/fotg210-udc.c:376: undefined reference to `dma_unmap_single' Signed-off-by: Geert Uytterhoeven Signed-off-by: Felipe Balbi --- drivers/usb/gadget/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 1fd0e17d3706..2c4e978cef04 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -193,6 +193,7 @@ config USB_FUSB300 Faraday usb device controller FUSB300 driver config USB_FOTG210_UDC + depends on HAS_DMA tristate "Faraday FOTG210 USB Peripheral Controller" help Faraday USB2.0 OTG controller which can be configured as -- cgit v1.2.3-58-ga151 From 91f6b84739e29069670f54651340d2c20e4e7399 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 11 Jul 2013 15:54:04 +0200 Subject: usb: gadget: USB_MV_U3D should depend on HAS_DMA If NO_DMA=y: drivers/built-in.o: In function `mv_u3d_done': drivers/usb/gadget/mv_u3d_core.c:206: undefined reference to `dma_pool_free' drivers/usb/gadget/mv_u3d_core.c:209: undefined reference to `dma_unmap_single' drivers/built-in.o: In function `mv_u3d_build_trb_one': drivers/usb/gadget/mv_u3d_core.c:311: undefined reference to `dma_pool_alloc' drivers/built-in.o: In function `mv_u3d_req_to_trb': drivers/usb/gadget/mv_u3d_core.c:480: undefined reference to `dma_map_single' drivers/built-in.o: In function `mv_u3d_remove': drivers/usb/gadget/mv_u3d_core.c:1770: undefined reference to `dma_pool_destroy' drivers/usb/gadget/mv_u3d_core.c:1773: undefined reference to `dma_free_coherent' drivers/built-in.o: In function `mv_u3d_probe': drivers/usb/gadget/mv_u3d_core.c:1880: undefined reference to `dma_alloc_coherent' drivers/usb/gadget/mv_u3d_core.c:1890: undefined reference to `dma_pool_create' drivers/usb/gadget/mv_u3d_core.c:1984: undefined reference to `dma_pool_destroy' drivers/usb/gadget/mv_u3d_core.c:1986: undefined reference to `dma_free_coherent' Signed-off-by: Geert Uytterhoeven Signed-off-by: Felipe Balbi --- drivers/usb/gadget/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 2c4e978cef04..8e9368330b10 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -336,6 +336,7 @@ config USB_MV_UDC full speed USB peripheral. config USB_MV_U3D + depends on HAS_DMA tristate "MARVELL PXA2128 USB 3.0 controller" help MARVELL PXA2128 Processor series include a super speed USB3.0 device -- cgit v1.2.3-58-ga151 From 908b961326b2ecfbf494c9b2f206847d925f269d Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 12 Jul 2013 11:06:21 -0400 Subject: usb: gadget: don't fail when DMA isn't present When CONFIG_HAS_DMA isn't enabled, the UDC core gets build errors: drivers/built-in.o: In function `dma_set_coherent_mask': include/linux/dma-mapping.h:93: undefined reference to `dma_supported' include/linux/dma-mapping.h:93: undefined reference to `dma_supported' drivers/built-in.o: In function `usb_gadget_unmap_request': drivers/usb/gadget/udc-core.c:91: undefined reference to `dma_unmap_sg' drivers/usb/gadget/udc-core.c:96: undefined reference to `dma_unmap_single' drivers/built-in.o: In function `usb_gadget_map_request': drivers/usb/gadget/udc-core.c:62: undefined reference to `dma_map_sg' drivers/usb/gadget/udc-core.c:71: undefined reference to `dma_map_single' drivers/usb/gadget/udc-core.c:74: undefined reference to `dma_mapping_error' Prevent this by protecting the DMA API routines with preprocessor tests. Signed-off-by: Alan Stern CC: Geert Uytterhoeven Acked-by: Alexander Shishkin Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc-core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c index ffd8fa541101..c28ac9872030 100644 --- a/drivers/usb/gadget/udc-core.c +++ b/drivers/usb/gadget/udc-core.c @@ -50,6 +50,8 @@ static DEFINE_MUTEX(udc_lock); /* ------------------------------------------------------------------------- */ +#ifdef CONFIG_HAS_DMA + int usb_gadget_map_request(struct usb_gadget *gadget, struct usb_request *req, int is_in) { @@ -99,6 +101,8 @@ void usb_gadget_unmap_request(struct usb_gadget *gadget, } EXPORT_SYMBOL_GPL(usb_gadget_unmap_request); +#endif /* CONFIG_HAS_DMA */ + /* ------------------------------------------------------------------------- */ void usb_gadget_set_state(struct usb_gadget *gadget, @@ -194,9 +198,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, dev_set_name(&gadget->dev, "gadget"); gadget->dev.parent = parent; +#ifdef CONFIG_HAS_DMA dma_set_coherent_mask(&gadget->dev, parent->coherent_dma_mask); gadget->dev.dma_parms = parent->dma_parms; gadget->dev.dma_mask = parent->dma_mask; +#endif if (release) gadget->dev.release = release; -- cgit v1.2.3-58-ga151 From 24e6bfd9d9d4e2f34dee59f0b477b72bdda5f800 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 15 Jul 2013 14:06:54 +0530 Subject: usb: phy: samsung: Fix an error message typo The error message is common to both clk_get functions. Update it accordingly. Signed-off-by: Sachin Kamat Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-samsung-usb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/phy/phy-samsung-usb2.c b/drivers/usb/phy/phy-samsung-usb2.c index 1011c16ade7e..758b86d0fcb3 100644 --- a/drivers/usb/phy/phy-samsung-usb2.c +++ b/drivers/usb/phy/phy-samsung-usb2.c @@ -388,7 +388,7 @@ static int samsung_usb2phy_probe(struct platform_device *pdev) clk = devm_clk_get(dev, "otg"); if (IS_ERR(clk)) { - dev_err(dev, "Failed to get otg clock\n"); + dev_err(dev, "Failed to get usbhost/otg clock\n"); return PTR_ERR(clk); } -- cgit v1.2.3-58-ga151 From 690c70bab1ebab0782cf9c316b726e2dddebc411 Mon Sep 17 00:00:00 2001 From: Ruchika Kharwar Date: Thu, 4 Jul 2013 00:21:19 -0500 Subject: usb: phy: omap-usb3: fix dpll clock index Correction of the omap_usb3_dpll_params array when the sys_clk_rate is 20MHz. Signed-off-by: Nikhil Devshatwar Signed-off-by: Ruchika Kharwar Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-omap-usb3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/phy/phy-omap-usb3.c b/drivers/usb/phy/phy-omap-usb3.c index efe6e1464f45..a2fb30bbb971 100644 --- a/drivers/usb/phy/phy-omap-usb3.c +++ b/drivers/usb/phy/phy-omap-usb3.c @@ -71,9 +71,9 @@ static struct usb_dpll_params omap_usb3_dpll_params[NUM_SYS_CLKS] = { {1250, 5, 4, 20, 0}, /* 12 MHz */ {3125, 20, 4, 20, 0}, /* 16.8 MHz */ {1172, 8, 4, 20, 65537}, /* 19.2 MHz */ + {1000, 7, 4, 10, 0}, /* 20 MHz */ {1250, 12, 4, 20, 0}, /* 26 MHz */ {3125, 47, 4, 20, 92843}, /* 38.4 MHz */ - {1000, 7, 4, 10, 0}, /* 20 MHz */ }; -- cgit v1.2.3-58-ga151 From 94e791f522595e2d5276e6d27a5b3b57f4e1cd8d Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 15 Jul 2013 15:38:52 -0700 Subject: Thermal: x86_pkg_temp: Limit number of pkg temp zones Although it is unlikley that physical package id is set to some arbitary number, it is better to prevent in anycase. Since package temp zones use this in thermal zone type and for allocation, added a limit. Signed-off-by: Srinivas Pandruvada Signed-off-by: Zhang Rui --- drivers/thermal/x86_pkg_temp_thermal.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index cf172d50b5d3..810143a6aa33 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -54,6 +54,8 @@ MODULE_PARM_DESC(notify_delay_ms, * is some wrong values returned by cpuid for number of thresholds. */ #define MAX_NUMBER_OF_TRIPS 2 +/* Limit number of package temp zones */ +#define MAX_PKG_TEMP_ZONE_IDS 256 struct phy_dev_entry { struct list_head list; @@ -401,6 +403,9 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) if (!thres_count) return -ENODEV; + if (topology_physical_package_id(cpu) > MAX_PKG_TEMP_ZONE_IDS) + return -ENODEV; + thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS); err = get_tj_max(cpu, &tj_max); -- cgit v1.2.3-58-ga151 From eac27f04a71e1f39f196f7e520d16dcefc955d77 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Thu, 11 Jul 2013 21:15:57 -0400 Subject: ata: Fix DVD not dectected at some platform with Wellsburg PCH There is a patch b55f84e2d527182e7c611d466cd0bb6ddce201de "ata_piix: Fix DVD not dectected at some Haswell platforms" to fix an issue of DVD not recognized on Haswell Desktop platform with Lynx Point. Recently, it is also found the same issue at some platformas with Wellsburg PCH. So deliver a similar patch to fix it by disables 32bit PIO in IDE mode. Signed-off-by: Youquan Song Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/ata_piix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index b52a10c8eeb9..513ad7ed0c99 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -330,7 +330,7 @@ static const struct pci_device_id piix_pci_tbl[] = { /* SATA Controller IDE (Wellsburg) */ { 0x8086, 0x8d00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, /* SATA Controller IDE (Wellsburg) */ - { 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + { 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb }, /* SATA Controller IDE (Wellsburg) */ { 0x8086, 0x8d60, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, /* SATA Controller IDE (Wellsburg) */ -- cgit v1.2.3-58-ga151 From e583d9db9960cf40e0bc8afee4946baa9d71596e Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 11 Jul 2013 14:58:04 -0400 Subject: USB: global suspend and remote wakeup don't mix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hub driver was recently changed to use "global" suspend for system suspend transitions on non-SuperSpeed buses. This means that we don't suspend devices individually by setting the suspend feature on the upstream hub port; instead devices all go into suspend automatically when the root hub stops transmitting packets. The idea was to save time and to avoid certain kinds of wakeup races. Now it turns out that many hubs are buggy; they don't relay wakeup requests from a downstream port to their upstream port if the downstream port's suspend feature is not set (depending on the speed of the downstream port, whether or not the hub is enabled for remote wakeup, and possibly other factors). We can't have hubs dropping wakeup requests. Therefore this patch goes partway back to the old policy: It sets the suspend feature for a port if the device attached to that port or any of its descendants is enabled for wakeup. People will still be able to benefit from the time savings if they don't care about wakeup and leave it disabled on all their devices. In order to accomplish this, the patch adds a new field to the usb_hub structure: wakeup_enabled_descendants is a count of how many devices below a suspended hub are enabled for remote wakeup. A corresponding new subroutine determines the number of wakeup-enabled devices at or below an arbitrary suspended USB device. This should be applied to the 3.10 stable kernel. Signed-off-by: Alan Stern Reported-and-tested-by: Toralf Förster Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 39 +++++++++++++++++++++++++++++++-------- drivers/usb/core/hub.h | 3 +++ 2 files changed, 34 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 4191db32f12c..c857471519e3 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2848,6 +2848,15 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev) USB_CTRL_SET_TIMEOUT); } +/* Count of wakeup-enabled devices at or below udev */ +static unsigned wakeup_enabled_descendants(struct usb_device *udev) +{ + struct usb_hub *hub = usb_hub_to_struct_hub(udev); + + return udev->do_remote_wakeup + + (hub ? hub->wakeup_enabled_descendants : 0); +} + /* * usb_port_suspend - suspend a usb device's upstream port * @udev: device that's no longer in active use, not a root hub @@ -2888,8 +2897,8 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev) * Linux (2.6) currently has NO mechanisms to initiate that: no khubd * timer, no SRP, no requests through sysfs. * - * If Runtime PM isn't enabled or used, non-SuperSpeed devices really get - * suspended only when their bus goes into global suspend (i.e., the root + * If Runtime PM isn't enabled or used, non-SuperSpeed devices may not get + * suspended until their bus goes into global suspend (i.e., the root * hub is suspended). Nevertheless, we change @udev->state to * USB_STATE_SUSPENDED as this is the device's "logical" state. The actual * upstream port setting is stored in @udev->port_is_suspended. @@ -2960,15 +2969,21 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) /* see 7.1.7.6 */ if (hub_is_superspeed(hub->hdev)) status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U3); - else if (PMSG_IS_AUTO(msg)) - status = set_port_feature(hub->hdev, port1, - USB_PORT_FEAT_SUSPEND); + /* * For system suspend, we do not need to enable the suspend feature * on individual USB-2 ports. The devices will automatically go * into suspend a few ms after the root hub stops sending packets. * The USB 2.0 spec calls this "global suspend". + * + * However, many USB hubs have a bug: They don't relay wakeup requests + * from a downstream port if the port's suspend feature isn't on. + * Therefore we will turn on the suspend feature if udev or any of its + * descendants is enabled for remote wakeup. */ + else if (PMSG_IS_AUTO(msg) || wakeup_enabled_descendants(udev) > 0) + status = set_port_feature(hub->hdev, port1, + USB_PORT_FEAT_SUSPEND); else { really_suspend = false; status = 0; @@ -3003,15 +3018,16 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) if (!PMSG_IS_AUTO(msg)) status = 0; } else { - /* device has up to 10 msec to fully suspend */ dev_dbg(&udev->dev, "usb %ssuspend, wakeup %d\n", (PMSG_IS_AUTO(msg) ? "auto-" : ""), udev->do_remote_wakeup); - usb_set_device_state(udev, USB_STATE_SUSPENDED); if (really_suspend) { udev->port_is_suspended = 1; + + /* device has up to 10 msec to fully suspend */ msleep(10); } + usb_set_device_state(udev, USB_STATE_SUSPENDED); } /* @@ -3293,7 +3309,11 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg) unsigned port1; int status; - /* Warn if children aren't already suspended */ + /* + * Warn if children aren't already suspended. + * Also, add up the number of wakeup-enabled descendants. + */ + hub->wakeup_enabled_descendants = 0; for (port1 = 1; port1 <= hdev->maxchild; port1++) { struct usb_device *udev; @@ -3303,6 +3323,9 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg) if (PMSG_IS_AUTO(msg)) return -EBUSY; } + if (udev) + hub->wakeup_enabled_descendants += + wakeup_enabled_descendants(udev); } if (hdev->do_remote_wakeup && hub->quirk_check_port_auto_suspend) { diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 6508e02b3dac..4e4790dea343 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -59,6 +59,9 @@ struct usb_hub { struct usb_tt tt; /* Transaction Translator */ unsigned mA_per_port; /* current for each child */ +#ifdef CONFIG_PM + unsigned wakeup_enabled_descendants; +#endif unsigned limited_power:1; unsigned quiescing:1; -- cgit v1.2.3-58-ga151 From 84672369ffb98a51d4ddf74c20a23636da3ad615 Mon Sep 17 00:00:00 2001 From: Fernando Soto Date: Fri, 14 Jun 2013 23:13:35 +0000 Subject: Drivers: hv: vmbus: incorrect device name is printed when child device is unregistered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whenever a device is unregistered in vmbus_device_unregister (drivers/hv/vmbus_drv.c), the device name in the log message may contain garbage as the memory has already been freed by the time pr_info is called. Log example: [ 3149.170475] hv_vmbus: child device àõsèè0_5 unregistered By logging the message just before calling device_unregister, the correct device name is printed: [ 3145.034652] hv_vmbus: child device vmbus_0_5 unregistered Also changing register & unregister messages to debug to avoid unnecessarily cluttering the kernel log. Signed-off-by: Fernando M Soto Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a2464bf07c49..e8e071fc1d6d 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -690,7 +690,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) if (ret) pr_err("Unable to register child device\n"); else - pr_info("child device %s registered\n", + pr_debug("child device %s registered\n", dev_name(&child_device_obj->device)); return ret; @@ -702,14 +702,14 @@ int vmbus_device_register(struct hv_device *child_device_obj) */ void vmbus_device_unregister(struct hv_device *device_obj) { + pr_debug("child device %s unregistered\n", + dev_name(&device_obj->device)); + /* * Kick off the process of unregistering the device. * This will call vmbus_remove() and eventually vmbus_device_release() */ device_unregister(&device_obj->device); - - pr_info("child device %s unregistered\n", - dev_name(&device_obj->device)); } -- cgit v1.2.3-58-ga151 From ed07ec93e83ec471d365ce084e43ad90fd205903 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sun, 14 Jul 2013 22:38:11 -0700 Subject: Drivers: hv: balloon: Fix a bug in the hot-add code As we hot-add 128 MB chunks of memory, we wait to ensure that the memory is onlined before attempting to hot-add the next chunk. If the udev rule for memory hot-add is not executed within the allowed time, we would rollback the state and abort further hot-add. Since the hot-add has succeeded and the only failure is that the memory is not onlined within the allowed time, we should not be rolling back the state. Fix this bug. Signed-off-by: K. Y. Srinivasan Cc: Stable Acked-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 4c605c70ebf9..61b7351df1d4 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -562,7 +562,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, struct hv_hotadd_state *has) { int ret = 0; - int i, nid, t; + int i, nid; unsigned long start_pfn; unsigned long processed_pfn; unsigned long total_pfn = pfn_count; @@ -607,14 +607,11 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, /* * Wait for the memory block to be onlined. + * Since the hot add has succeeded, it is ok to + * proceed even if the pages in the hot added region + * have not been "onlined" within the allowed time. */ - t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); - if (t == 0) { - pr_info("hot_add memory timedout\n"); - has->ha_end_pfn -= HA_CHUNK; - has->covered_end_pfn -= processed_pfn; - break; - } + wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); } -- cgit v1.2.3-58-ga151 From c5e2254f8d63a6654149aa32ac5f2b7dd66a976d Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sun, 14 Jul 2013 22:38:12 -0700 Subject: Drivers: hv: balloon: Do not post pressure status if interrupted When we are posting pressure status, we may get interrupted and handle the un-balloon operation. In this case just don't post the status as we know the pressure status is stale. Signed-off-by: K. Y. Srinivasan Cc: Stable Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 61b7351df1d4..deb5c25305af 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -975,6 +975,14 @@ static void post_status(struct hv_dynmem_device *dm) dm->num_pages_ballooned + compute_balloon_floor(); + /* + * If our transaction ID is no longer current, just don't + * send the status. This can happen if we were interrupted + * after we picked our transaction ID. + */ + if (status.hdr.trans_id != atomic_read(&trans_id)) + return; + vmbus_sendpacket(dm->dev->channel, &status, sizeof(struct dm_status), (unsigned long)NULL, -- cgit v1.2.3-58-ga151 From 7dcd2677ea912573d9ed4bcd629b0023b2d11505 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 17 Jul 2013 10:22:58 +0400 Subject: drm/i915: fix long-standing SNB regression in power consumption after resume v2 This patch fixes regression in power consumtion of sandy bridge gpu, which exists since v3.6 Sometimes after resuming from s2ram gpu starts thinking that it's extremely busy. After that it never reaches rc6 state. Bug exists since kernel v3.6: commit b4ae3f22d238617ca11610b29fde16cf8c0bc6e0 Author: Jesse Barnes Date: Thu Jun 14 11:04:48 2012 -0700 drm/i915: load boot context at driver init time For some reason RC6 is already enabled at the beginning of resuming process. Following initliaztion breaks some internal state and confuses RPS engine. This patch disables RC6 at the beginnig of resume and initialization. I've rearranged initialization sequence, because intel_disable_gt_powersave() needs initialized force_wake_get/put and some locks from the dev_priv. Note: The culprit in the initialization sequence seems to be the write to MBCTL added in the above mentioned commit. The first version of this patch just held a forcewake reference across the clock gating init functions, which seems to have been enought to gather quite a few positive test reports. But since that smelled a bit like ad-hoc duct-tape v2 now just disables rps/rc6 across the entire hw setup. References: https://bugs.freedesktop.org/show_bug.cgi?id=54089 References: https://bugzilla.kernel.org/show_bug.cgi?id=58971 References: https://patchwork.kernel.org/patch/2827634/ (patch v1) Signed-off-by: Konstantin Khlebnikov Cc: Daniel Vetter Cc: Chris Wilson Cc: Jesse Barnes [danvet: Add note about v1 vs. v2 of this patch and use standard layout for the commit citation. Also add the tested-bys from v1 and a cc: stable.] Cc: stable@vger.kernel.org (Note: tiny conflict due to the addition of the backlight lock in 3.11) Tested-by: Alexander Kaltsas (v1) Tested-by: rocko (v1) Tested-by: JohnMB (v1) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 18 +++++++++--------- drivers/gpu/drm/i915/intel_pm.c | 5 +++-- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index adb319b53ecd..5c0663f58aff 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1495,6 +1495,14 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev_priv->dev = dev; dev_priv->info = info; + spin_lock_init(&dev_priv->irq_lock); + spin_lock_init(&dev_priv->gpu_error.lock); + spin_lock_init(&dev_priv->rps.lock); + spin_lock_init(&dev_priv->backlight.lock); + mutex_init(&dev_priv->dpio_lock); + mutex_init(&dev_priv->rps.hw_lock); + mutex_init(&dev_priv->modeset_restore_lock); + i915_dump_device_info(dev_priv); if (i915_get_bridge_dev(dev)) { @@ -1586,6 +1594,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) intel_irq_init(dev); intel_gt_init(dev); + intel_gt_reset(dev); /* Try to make sure MCHBAR is enabled before poking at it */ intel_setup_mchbar(dev); @@ -1610,15 +1619,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!IS_I945G(dev) && !IS_I945GM(dev)) pci_enable_msi(dev->pdev); - spin_lock_init(&dev_priv->irq_lock); - spin_lock_init(&dev_priv->gpu_error.lock); - spin_lock_init(&dev_priv->rps.lock); - spin_lock_init(&dev_priv->backlight.lock); - mutex_init(&dev_priv->dpio_lock); - - mutex_init(&dev_priv->rps.hw_lock); - mutex_init(&dev_priv->modeset_restore_lock); - dev_priv->num_plane = 1; if (IS_VALLEYVIEW(dev)) dev_priv->num_plane = 2; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d10e6735771f..828c426b4b92 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5487,6 +5487,9 @@ void intel_gt_reset(struct drm_device *dev) if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) __gen6_gt_force_wake_mt_reset(dev_priv); } + + /* BIOS often leaves RC6 enabled, but disable it for hw init */ + intel_disable_gt_powersave(dev); } void intel_gt_init(struct drm_device *dev) @@ -5495,8 +5498,6 @@ void intel_gt_init(struct drm_device *dev) spin_lock_init(&dev_priv->gt_lock); - intel_gt_reset(dev); - if (IS_VALLEYVIEW(dev)) { dev_priv->gt.force_wake_get = vlv_force_wake_get; dev_priv->gt.force_wake_put = vlv_force_wake_put; -- cgit v1.2.3-58-ga151 From 35c95375f69ceec721fea67a0532bc17ceb5cf64 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 17 Jul 2013 06:55:04 +0200 Subject: drm/i915: Sanitize shared dpll state There seems to be no limit to the amount of gunk the firmware can leave behind. Some platforms leave pch dplls on which are not in active use at all. The example in the bug report is a Apple Macbook Pro. Note that this escape scrunity of the hw state checker until we've tried to use this enabled, but unused pll since we did only check for the inverse case of a in-used, but disabled pll. v2: Add a WARN in the pll state checker which would have caught this case. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66952 Reported-and-tested-by: shui yangwei Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c59335ce84f4..76b01c16a0e8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8314,6 +8314,8 @@ check_shared_dpll_state(struct drm_device *dev) pll->active, pll->refcount); WARN(pll->active && !pll->on, "pll in active use but not on in sw tracking\n"); + WARN(pll->on && !pll->active, + "pll in on but not on in use in sw tracking\n"); WARN(pll->on != active, "pll on state mismatch (expected %i, found %i)\n", pll->on, active); @@ -9814,8 +9816,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } pll->refcount = pll->active; - DRM_DEBUG_KMS("%s hw state readout: refcount %i\n", - pll->name, pll->refcount); + DRM_DEBUG_KMS("%s hw state readout: refcount %i, on %i\n", + pll->name, pll->refcount, pll->on); } list_for_each_entry(encoder, &dev->mode_config.encoder_list, @@ -9866,6 +9868,7 @@ void intel_modeset_setup_hw_state(struct drm_device *dev, struct drm_plane *plane; struct intel_crtc *crtc; struct intel_encoder *encoder; + int i; intel_modeset_readout_hw_state(dev); @@ -9881,6 +9884,18 @@ void intel_modeset_setup_hw_state(struct drm_device *dev, intel_dump_pipe_config(crtc, &crtc->config, "[setup_hw_state]"); } + for (i = 0; i < dev_priv->num_shared_dpll; i++) { + struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i]; + + if (!pll->on || pll->active) + continue; + + DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", pll->name); + + pll->disable(dev_priv, pll); + pll->on = false; + } + if (force_restore) { /* * We need to use raw interfaces for restoring state to avoid -- cgit v1.2.3-58-ga151 From 2e57f47d317dd035b18634b0c602272529368fcc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Jul 2013 12:14:40 +0100 Subject: drm/i915: Fix dereferencing invalid connectors in is_crtc_connector_off() In commit e3de42b68478a8c95dd27520e9adead2af9477a5 Author: Imre Deak Date: Fri May 3 19:44:07 2013 +0200 drm/i915: force full modeset if the connector is in DPMS OFF mode a new function was added that walked over the set of connectors to see if any of the currently associated CRTC was switched off. This function walked an array of connectors, rather than the array of pointers to connectors contained in the drm_mode_set - i.e. it was dereferencing far past the end of the first connector. This only becomes an issue if we attempt to use a clone mode (i.e. more than one connector per CRTC) such that set->num_connectors > 1. Reported-by: Timo Aaltonen Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=65927 Signed-off-by: Chris Wilson Cc: Imre Deak Cc: Egbert Eich Cc: Jesse Barnes Cc: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 76b01c16a0e8..76b034be2c7c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8540,15 +8540,20 @@ static void intel_set_config_restore_state(struct drm_device *dev, } static bool -is_crtc_connector_off(struct drm_crtc *crtc, struct drm_connector *connectors, - int num_connectors) +is_crtc_connector_off(struct drm_mode_set *set) { int i; - for (i = 0; i < num_connectors; i++) - if (connectors[i].encoder && - connectors[i].encoder->crtc == crtc && - connectors[i].dpms != DRM_MODE_DPMS_ON) + if (set->num_connectors == 0) + return false; + + if (WARN_ON(set->connectors == NULL)) + return false; + + for (i = 0; i < set->num_connectors; i++) + if (set->connectors[i]->encoder && + set->connectors[i]->encoder->crtc == set->crtc && + set->connectors[i]->dpms != DRM_MODE_DPMS_ON) return true; return false; @@ -8561,10 +8566,8 @@ intel_set_config_compute_mode_changes(struct drm_mode_set *set, /* We should be able to check here if the fb has the same properties * and then just flip_or_move it */ - if (set->connectors != NULL && - is_crtc_connector_off(set->crtc, *set->connectors, - set->num_connectors)) { - config->mode_changed = true; + if (is_crtc_connector_off(set)) { + config->mode_changed = true; } else if (set->crtc->fb != set->fb) { /* If we have no fb then treat it as a full mode set */ if (set->crtc->fb == NULL) { -- cgit v1.2.3-58-ga151 From 9a9b1c618d835e7a6a4097eb47ee47991202a3c1 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Thu, 18 Jul 2013 09:48:40 +0200 Subject: ASoC: atmel-ssc: remove clk_disable_unprepare call from critical section clk_prepare/unprepare (and indirectly clk_prepare_enable/disable_unprepare) may sleep and thus cannot be called in critical section. This patch fix a bug introduced by commit 6f0d94790efe9f4481bbd7c174ef0e9b5e5db7c4 where clk_disable_unprepare was called with user_lock hold. Signed-off-by: Boris BREZILLON Signed-off-by: Mark Brown --- drivers/misc/atmel-ssc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index f7b90661e321..e068a76a5f6f 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -66,14 +66,19 @@ EXPORT_SYMBOL(ssc_request); void ssc_free(struct ssc_device *ssc) { + bool disable_clk = true; + spin_lock(&user_lock); - if (ssc->user) { + if (ssc->user) ssc->user--; - clk_disable_unprepare(ssc->clk); - } else { + else { + disable_clk = false; dev_dbg(&ssc->pdev->dev, "device already free\n"); } spin_unlock(&user_lock); + + if (disable_clk) + clk_disable_unprepare(ssc->clk); } EXPORT_SYMBOL(ssc_free); -- cgit v1.2.3-58-ga151 From 94a335dba34ff47cad3d6d0c29b452d43a1be3c8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 17 Jul 2013 14:51:28 +0200 Subject: drm/i915: correctly restore fences with objects attached MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid stalls we delay tiling changes and especially hold of committing the new fence state for as long as possible. Synchronization points are in the execbuf code and in our gtt fault handler. Unfortunately we've missed that tricky detail when adding proper fence restore code in commit 19b2dbde5732170a03bd82cc8bd442cf88d856f7 Author: Chris Wilson Date: Wed Jun 12 10:15:12 2013 +0100 drm/i915: Restore fences after resume and GPU resets The result was that we've restored fences for objects with no tiling, since the object<->fence link still existed after resume. Now that wouldn't have been too bad since any subsequent access would have fixed things up, but if we've changed from tiled to untiled real havoc happened: The tiling stride is stored -1 in the fence register, so a stride of 0 resulted in all 1s in the top 32bits, and so a completely bogus fence spanning everything from the start of the object to the top of the GTT. The tell-tale in the register dumps looks like: FENCE START 2: 0x0214d001 FENCE END 2: 0xfffff3ff Bit 11 isn't set since the hw doesn't store it, even when writing all 1s (at least on my snb here). To prevent such a gaffle in the future add a sanity check for fences with an untiled object attached in i915_gem_write_fence. v2: Fix the WARN, spotted by Chris. v3: Trying to reuse get_fences looked ugly and obfuscated the code. Instead reuse update_fence and to make it really dtrt also move the fence dirty state clearing into update_fence. Cc: Chris Wilson Cc: Stéphane Marchesin Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=60530 Cc: stable@vger.kernel.org (for 3.10 only) Reviewed-by: Chris Wilson Tested-by: Matthew Garrett Tested-by: Björn Bidar Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 97afd2639fb6..d9e2208cfe98 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2258,7 +2258,17 @@ void i915_gem_restore_fences(struct drm_device *dev) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - i915_gem_write_fence(dev, i, reg->obj); + + /* + * Commit delayed tiling changes if we have an object still + * attached to the fence, otherwise just clear the fence. + */ + if (reg->obj) { + i915_gem_object_update_fence(reg->obj, reg, + reg->obj->tiling_mode); + } else { + i915_gem_write_fence(dev, i, NULL); + } } } @@ -2795,6 +2805,10 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) mb(); + WARN(obj && (!obj->stride || !obj->tiling_mode), + "bogus fence setup with stride: 0x%x, tiling mode: %i\n", + obj->stride, obj->tiling_mode); + switch (INTEL_INFO(dev)->gen) { case 7: case 6: @@ -2836,6 +2850,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, fence->obj = NULL; list_del_init(&fence->lru_list); } + obj->fence_dirty = false; } static int @@ -2965,7 +2980,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; i915_gem_object_update_fence(obj, reg, enable); - obj->fence_dirty = false; return 0; } -- cgit v1.2.3-58-ga151 From e85843bec6c2ea7c10ec61238396891cc2b753a9 Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Fri, 19 Jul 2013 15:02:01 -0700 Subject: drm/i915: quirk no PCH_PWM_ENABLE for Dell XPS13 backlight BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=47941 BugLink: https://bugs.launchpad.net/bugs/1163720 BugLink: https://bugs.launchpad.net/bugs/1162026 Some machines suffer from non-functional backlight controls if BLM_PCH_PWM_ENABLE is set, so provide a quirk to avoid doing so. Apply this quirk to Dell XPS 13 models. Tested-by: Eric Griffith Tested-by: Kent Baxley Cc: # v3.8+ Signed-off-by: Kamal Mostafa Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/intel_panel.c | 3 ++- 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a416645bcd23..204c3eca0fb2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -555,6 +555,7 @@ enum intel_sbi_destination { #define QUIRK_PIPEA_FORCE (1<<0) #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) +#define QUIRK_NO_PCH_PWM_ENABLE (1<<3) struct intel_fbdev; struct intel_fbc_work; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 76b034be2c7c..5fb305840db8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9400,6 +9400,17 @@ static void quirk_invert_brightness(struct drm_device *dev) DRM_INFO("applying inverted panel brightness quirk\n"); } +/* + * Some machines (Dell XPS13) suffer broken backlight controls if + * BLM_PCH_PWM_ENABLE is set. + */ +static void quirk_no_pcm_pwm_enable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + dev_priv->quirks |= QUIRK_NO_PCH_PWM_ENABLE; + DRM_INFO("applying no-PCH_PWM_ENABLE quirk\n"); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -9469,6 +9480,11 @@ static struct intel_quirk intel_quirks[] = { /* Acer Aspire 4736Z */ { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, + + /* Dell XPS13 HD Sandy Bridge */ + { 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable }, + /* Dell XPS13 HD and XPS13 FHD Ivy Bridge */ + { 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable }, }; static void intel_init_quirks(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 45010bb5d132..67e2c1f1c9a8 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -583,7 +583,8 @@ void intel_panel_enable_backlight(struct drm_device *dev, POSTING_READ(reg); I915_WRITE(reg, tmp | BLM_PWM_ENABLE); - if (HAS_PCH_SPLIT(dev)) { + if (HAS_PCH_SPLIT(dev) && + !(dev_priv->quirks & QUIRK_NO_PCH_PWM_ENABLE)) { tmp = I915_READ(BLC_PWM_PCH_CTL1); tmp |= BLM_PCH_PWM_ENABLE; tmp &= ~BLM_PCH_OVERRIDE_ENABLE; -- cgit v1.2.3-58-ga151 From a7cd1b8fea2f341b626b255d9898a5ca5fabbf0a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Jul 2013 20:36:51 +0100 Subject: drm/i915: Serialize almost all register access In theory, the different register blocks were meant to be only ever touched when holding either the struct_mutex, mode_config.lock or even a specific localised lock. This does not seem to be the case, and the hardware reacts extremely badly if we attempt to concurrently access two registers within the same cacheline. The HSD suggests that we only need to do this workaround for display range registers. However, upon review we need to serialize the multiple stages in our register write functions - if only for preemption protection. Irrespective of the hardware requirements, the current io functions are a little too loose with respect to the combination of pre- and post-condition testing that we do in conjunction with the actual io. As a result, we may be pre-empted and generate both false-postive and false-negative errors. Note well that this is a "90%" solution, there remains a few direct users of ioread/iowrite which will be fixed up in the next few patches. Since they are more invasive and that this simple change will prevent almost all lockups on Haswell, we kept this patch simple to facilitate backporting to stable. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=63914 Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f4af1ca0fb62..6ddc5677ea2f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1253,21 +1253,21 @@ hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg) #define __i915_read(x, y) \ u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \ + unsigned long irqflags; \ u##x val = 0; \ + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \ if (IS_GEN5(dev_priv->dev)) \ ilk_dummy_write(dev_priv); \ if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ - unsigned long irqflags; \ - spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \ if (dev_priv->forcewake_count == 0) \ dev_priv->gt.force_wake_get(dev_priv); \ val = read##y(dev_priv->regs + reg); \ if (dev_priv->forcewake_count == 0) \ dev_priv->gt.force_wake_put(dev_priv); \ - spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \ } else { \ val = read##y(dev_priv->regs + reg); \ } \ + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \ trace_i915_reg_rw(false, reg, val, sizeof(val)); \ return val; \ } @@ -1280,8 +1280,10 @@ __i915_read(64, q) #define __i915_write(x, y) \ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \ + unsigned long irqflags; \ u32 __fifo_ret = 0; \ trace_i915_reg_rw(true, reg, val, sizeof(val)); \ + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \ if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ __fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \ } \ @@ -1293,6 +1295,7 @@ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \ gen6_gt_check_fifodbg(dev_priv); \ } \ hsw_unclaimed_reg_check(dev_priv, reg); \ + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \ } __i915_write(8, b) __i915_write(16, w) -- cgit v1.2.3-58-ga151 From 0e970cec05635adbe7b686063e2548a8e4afb8f4 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 28 Jun 2013 17:27:02 +0200 Subject: gpio/omap: don't create an IRQ mapping for every GPIO on DT When a GPIO is defined as an interrupt line using Device Tree, a call to irq_create_of_mapping() is made that calls irq_create_mapping(). So, is not necessary to do the mapping for all OMAP GPIO lines and explicitly call irq_create_mapping() on the driver probe() when booting with Device Tree. Add a custom IRQ domain .map function handler that will be called by irq_create_mapping() to map the GPIO lines used as IRQ. This also allows to execute needed setup code such as configuring a GPIO as input and enabling the GPIO bank. Changes since v3: - Use bank->chip.of_node instead of_have_populated_dt() to check DT or legacy boot as suggested by Jean-Christophe PLAGNIOL-VILLARD Changes since v2: - Unconditionally do the IRQ setup in the .map() function and only call irq_create_mapping() in the gpio chip init to avoid code duplication as suggested by Grant Likely. Changes since v1: - Split the addition of the .map function handler and the automatic gpio request in two different patches. - Add GPIO IRQ setup logic to the irq domain mapping function. - Only call irq_create_mapping for every GPIO on legacy boot. - Only setup a GPIO IRQ on the .map function for DeviceTree boot. Signed-off-by: Javier Martinez Canillas Tested-by: Enric Balletbo i Serra Acked-by: Grant Likely Acked-by: Jean-Christophe PLAGNIOL-VILLARD Acked-by: Santosh Shilimkar Signed-off-by: Linus Walleij --- drivers/gpio/gpio-omap.c | 54 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index dfeb3a3a8f20..5e667ff91dc3 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1068,24 +1068,50 @@ static void omap_gpio_chip_init(struct gpio_bank *bank) gpiochip_add(&bank->chip); - for (j = 0; j < bank->width; j++) { - int irq = irq_create_mapping(bank->domain, j); - irq_set_lockdep_class(irq, &gpio_lock_class); - irq_set_chip_data(irq, bank); - if (bank->is_mpuio) { - omap_mpuio_alloc_gc(bank, irq, bank->width); - } else { - irq_set_chip_and_handler(irq, &gpio_irq_chip, - handle_simple_irq); - set_irq_flags(irq, IRQF_VALID); - } - } + /* + * REVISIT these explicit calls to irq_create_mapping() + * to do the GPIO to IRQ domain mapping for each GPIO in + * the bank can be removed once all OMAP platforms have + * been migrated to Device Tree boot only. + * Since in DT boot irq_create_mapping() is called from + * irq_create_of_mapping() only for the GPIO lines that + * are used as interrupts. + */ + if (!bank->chip.of_node) + for (j = 0; j < bank->width; j++) + irq_create_mapping(bank->domain, j); irq_set_chained_handler(bank->irq, gpio_irq_handler); irq_set_handler_data(bank->irq, bank); } static const struct of_device_id omap_gpio_match[]; +static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq, + irq_hw_number_t hwirq) +{ + struct gpio_bank *bank = d->host_data; + + if (!bank) + return -EINVAL; + + irq_set_lockdep_class(virq, &gpio_lock_class); + irq_set_chip_data(virq, bank); + if (bank->is_mpuio) { + omap_mpuio_alloc_gc(bank, virq, bank->width); + } else { + irq_set_chip_and_handler(virq, &gpio_irq_chip, + handle_simple_irq); + set_irq_flags(virq, IRQF_VALID); + } + + return 0; +} + +static struct irq_domain_ops omap_gpio_irq_ops = { + .xlate = irq_domain_xlate_onetwocell, + .map = omap_gpio_irq_map, +}; + static int omap_gpio_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1151,10 +1177,10 @@ static int omap_gpio_probe(struct platform_device *pdev) } bank->domain = irq_domain_add_legacy(node, bank->width, irq_base, - 0, &irq_domain_simple_ops, NULL); + 0, &omap_gpio_irq_ops, bank); #else bank->domain = irq_domain_add_linear(node, bank->width, - &irq_domain_simple_ops, NULL); + &omap_gpio_irq_ops, bank); #endif if (!bank->domain) { dev_err(dev, "Couldn't register an IRQ domain\n"); -- cgit v1.2.3-58-ga151 From b4419e1a15905191661ffe75ba2f9e649f5d565e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 28 Jun 2013 17:27:03 +0200 Subject: gpio/omap: auto request GPIO as input if used as IRQ via DT When an OMAP GPIO is used as an IRQ line, a call to gpio_request() has to be made to initialize the OMAP GPIO bank before a driver request the IRQ. Otherwise the call to request_irq() fails. Drives should not be aware of this neither care wether an IRQ line is a GPIO or not. They should just request the IRQ and this has to be handled by the irq_chip driver. With the current OMAP GPIO DT binding, if we define: gpio6: gpio@49058000 { compatible = "ti,omap3-gpio"; reg = <0x49058000 0x200>; interrupts = <34>; ti,hwmods = "gpio6"; gpio-controller; #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; }; interrupt-parent = <&gpio6>; interrupts = <16 8>; The GPIO is correctly mapped as an IRQ but a call to gpio_request() is never made. Since a call to the custom IRQ domain .map function handler is made for each GPIO used as an IRQ, the GPIO can be setup and configured as input there automatically. Changes since v3: - Use bank->chip.of_node instead of_have_populated_dt() to check DT or legacy boot as suggested by Jean-Christophe PLAGNIOL-VILLARD - Add a comment that this is just a temporary solution until and that it has to be removed once is handled by the IRQ core. Changes since v2: - Only make the call to gpio_request_one() conditional in the DT case as suggested by Grant Likely. Changes since v1: - Split the irq domain mapping function handler and the GPIO request in two different patches. Signed-off-by: Javier Martinez Canillas Tested-by: Enric Balletbo i Serra Acked-by: Grant Likely Acked-by: Jean-Christophe PLAGNIOL-VILLARD Acked-by: Santosh Shilimkar Signed-off-by: Linus Walleij --- drivers/gpio/gpio-omap.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers') diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 5e667ff91dc3..3a0c1606f885 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1090,6 +1090,8 @@ static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq) { struct gpio_bank *bank = d->host_data; + int gpio; + int ret; if (!bank) return -EINVAL; @@ -1104,6 +1106,22 @@ static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq, set_irq_flags(virq, IRQF_VALID); } + /* + * REVISIT most GPIO IRQ chip drivers need to call + * gpio_request() before a GPIO line can be used as an + * IRQ. Ideally this should be handled by the IRQ core + * but until then this has to be done on a per driver + * basis. Remove this once this is managed by the core. + */ + if (bank->chip.of_node) { + gpio = irq_to_gpio(bank, hwirq); + ret = gpio_request_one(gpio, GPIOF_IN, NULL); + if (ret) { + dev_err(bank->dev, "Could not request GPIO%d\n", gpio); + return ret; + } + } + return 0; } -- cgit v1.2.3-58-ga151 From 949eb1a4d29dc75e0b5b16b03747886b52ecf854 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 2 Jul 2013 21:46:30 +0200 Subject: gpio/omap: fix build error when OF_GPIO is not defined. The OMAP GPIO driver check if the chip has an associated Device Tree node using the struct gpio_chip of_node member. But this is only build if CONFIG_OF_GPIO is defined which leads to the following error when using omap1_defconfig: linux/drivers/gpio/gpio-omap.c: In function 'omap_gpio_chip_init': linux/drivers/gpio/gpio-omap.c:1080:17: error: 'struct gpio_chip' has no member named 'of_node' linux/drivers/gpio/gpio-omap.c: In function 'omap_gpio_irq_map': linux/drivers/gpio/gpio-omap.c:1116:16: error: 'struct gpio_chip' has no member named 'of_node' Reported-by: Kevin Hilman Signed-off-by: Javier Martinez Canillas Signed-off-by: Linus Walleij --- drivers/gpio/gpio-omap.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 3a0c1606f885..c57244ef428b 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1037,6 +1037,18 @@ omap_mpuio_alloc_gc(struct gpio_bank *bank, unsigned int irq_start, IRQ_NOREQUEST | IRQ_NOPROBE, 0); } +#if defined(CONFIG_OF_GPIO) +static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip) +{ + return chip->of_node != NULL; +} +#else +static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip) +{ + return false; +} +#endif + static void omap_gpio_chip_init(struct gpio_bank *bank) { int j; @@ -1077,7 +1089,7 @@ static void omap_gpio_chip_init(struct gpio_bank *bank) * irq_create_of_mapping() only for the GPIO lines that * are used as interrupts. */ - if (!bank->chip.of_node) + if (!omap_gpio_chip_boot_dt(&bank->chip)) for (j = 0; j < bank->width; j++) irq_create_mapping(bank->domain, j); irq_set_chained_handler(bank->irq, gpio_irq_handler); @@ -1113,7 +1125,7 @@ static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq, * but until then this has to be done on a per driver * basis. Remove this once this is managed by the core. */ - if (bank->chip.of_node) { + if (omap_gpio_chip_boot_dt(&bank->chip)) { gpio = irq_to_gpio(bank, hwirq); ret = gpio_request_one(gpio, GPIOF_IN, NULL); if (ret) { -- cgit v1.2.3-58-ga151 From afe8ce9b29c487ea7f62faa936b6abb84e0b8815 Mon Sep 17 00:00:00 2001 From: Rohit Vaswani Date: Thu, 18 Jul 2013 13:07:14 -0700 Subject: drivers: gpio: msm: Fix the error condition for reading ngpio of_property_read_u32 return 0 on success. The check was using a ! to return error. Fix the if condition. Signed-off-by: Rohit Vaswani Acked-by: Linus Walleij Reviewed-by: Pankaj Jangra Cc: "Bird, Tim" Signed-off-by: David Brown Signed-off-by: Linus Walleij --- drivers/gpio/gpio-msm-v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpio/gpio-msm-v2.c b/drivers/gpio/gpio-msm-v2.c index f4491a497cc8..c2fa77086eb5 100644 --- a/drivers/gpio/gpio-msm-v2.c +++ b/drivers/gpio/gpio-msm-v2.c @@ -378,7 +378,7 @@ static int msm_gpio_probe(struct platform_device *pdev) int ret, ngpio; struct resource *res; - if (!of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) { + if (of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) { dev_err(&pdev->dev, "%s: ngpio property missing\n", __func__); return -EINVAL; } -- cgit v1.2.3-58-ga151 From 181d1b9e31c668259d3798c521672afb8edd355c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 21 Jul 2013 13:16:24 +0200 Subject: drm/i915: fix up gt init sequence fallout The regression fix for gen6+ rps fallout commit 7dcd2677ea912573d9ed4bcd629b0023b2d11505 Author: Konstantin Khlebnikov Date: Wed Jul 17 10:22:58 2013 +0400 drm/i915: fix long-standing SNB regression in power consumption after resume unintentionally also changed the init sequence ordering between gt_init and gt_reset - we need to reset BIOS damage like leftover forcewake references before we run our own code. Otherwise we can get nasty dmesg noise like [drm:__gen6_gt_force_wake_mt_get] *ERROR* Timed out waiting for forcewake old ack to clear. again. Since _reset suggests that we first need to have stuff initialized (which isn't the case here) call it sanitze instead. While at it also block out the rps disable introduced by the above commit on ilk: We don't have any knowledge of ilk rps being broken in similar ways. And the disable functions uses the default hw state which is only read out when we're enabling rps. So essentially we've been writing random grabage into that register. Reported-by: Chris Wilson Cc: Chris Wilson Cc: Konstantin Khlebnikov Cc: Jesse Barnes Cc: stable@vger.kernel.org Tested-by: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 5 +++-- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 5c0663f58aff..abf158da9b30 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1593,8 +1593,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) intel_detect_pch(dev); intel_irq_init(dev); + intel_gt_sanitize(dev); intel_gt_init(dev); - intel_gt_reset(dev); /* Try to make sure MCHBAR is enabled before poking at it */ intel_setup_mchbar(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6ddc5677ea2f..45b3c030f483 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -706,7 +706,7 @@ static int i915_drm_thaw(struct drm_device *dev) { int error = 0; - intel_gt_reset(dev); + intel_gt_sanitize(dev); if (drm_core_check_feature(dev, DRIVER_MODESET)) { mutex_lock(&dev->struct_mutex); @@ -732,7 +732,7 @@ int i915_resume(struct drm_device *dev) pci_set_master(dev->pdev); - intel_gt_reset(dev); + intel_gt_sanitize(dev); /* * Platforms with opregion should have sane BIOS, older ones (gen3 and diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 204c3eca0fb2..d2ee3343c943 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1584,7 +1584,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged); extern void intel_irq_init(struct drm_device *dev); extern void intel_hpd_init(struct drm_device *dev); extern void intel_gt_init(struct drm_device *dev); -extern void intel_gt_reset(struct drm_device *dev); +extern void intel_gt_sanitize(struct drm_device *dev); void i915_error_state_free(struct kref *error_ref); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 828c426b4b92..6a347f54d39f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5476,7 +5476,7 @@ static void vlv_force_wake_put(struct drm_i915_private *dev_priv) gen6_gt_check_fifodbg(dev_priv); } -void intel_gt_reset(struct drm_device *dev) +void intel_gt_sanitize(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -5489,7 +5489,8 @@ void intel_gt_reset(struct drm_device *dev) } /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_disable_gt_powersave(dev); + if (INTEL_INFO(dev)->gen >= 6) + intel_disable_gt_powersave(dev); } void intel_gt_init(struct drm_device *dev) -- cgit v1.2.3-58-ga151 From 42a708c9328b1b23ed5f415401975e37a64f2747 Mon Sep 17 00:00:00 2001 From: Qipan Li Date: Thu, 4 Jul 2013 15:55:25 +0800 Subject: pinctrl: sirf: fix the pin number and mux bit for usp0 we missed a pin and related mux bit for usp pin group, this patch fixes it. Signed-off-by: Qipan Li Signed-off-by: Barry Song Signed-off-by: Linus Walleij --- drivers/pinctrl/sirf/pinctrl-atlas6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/pinctrl/sirf/pinctrl-atlas6.c b/drivers/pinctrl/sirf/pinctrl-atlas6.c index 1fa39a444171..c641be9c5b80 100644 --- a/drivers/pinctrl/sirf/pinctrl-atlas6.c +++ b/drivers/pinctrl/sirf/pinctrl-atlas6.c @@ -496,7 +496,7 @@ static const unsigned sdmmc5_pins[] = { 24, 25, 26 }; static const struct sirfsoc_muxmask usp0_muxmask[] = { { .group = 1, - .mask = BIT(19) | BIT(20) | BIT(21) | BIT(22), + .mask = BIT(19) | BIT(20) | BIT(21) | BIT(22) | BIT(23), }, }; @@ -507,7 +507,7 @@ static const struct sirfsoc_padmux usp0_padmux = { .funcval = 0, }; -static const unsigned usp0_pins[] = { 51, 52, 53, 54 }; +static const unsigned usp0_pins[] = { 51, 52, 53, 54, 55 }; static const struct sirfsoc_muxmask usp1_muxmask[] = { { -- cgit v1.2.3-58-ga151 From d58e9a02e1435744d80a99742d1135a6921318eb Mon Sep 17 00:00:00 2001 From: Qipan Li Date: Thu, 4 Jul 2013 15:55:26 +0800 Subject: pinctrl: sirf: add usp0_uart_nostreamctrl pin group for usp-uart without flowctrl this patch adds the lost pin group which supports to let USP0 to simulate a UART without hardware flow control. Signed-off-by: Qipan Li Signed-off-by: Barry Song Signed-off-by: Linus Walleij --- arch/arm/boot/dts/atlas6.dtsi | 6 ++++++ drivers/pinctrl/sirf/pinctrl-atlas6.c | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'drivers') diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi index 9866cd736dee..537041253db7 100644 --- a/arch/arm/boot/dts/atlas6.dtsi +++ b/arch/arm/boot/dts/atlas6.dtsi @@ -485,6 +485,12 @@ sirf,function = "usp0"; }; }; + usp0_uart_nostreamctrl_pins_a: usp0@1 { + usp0 { + sirf,pins = "usp0_uart_nostreamctrl_grp"; + sirf,function = "usp0_uart_nostreamctrl"; + }; + }; usp1_pins_a: usp1@0 { usp1 { sirf,pins = "usp1grp"; diff --git a/drivers/pinctrl/sirf/pinctrl-atlas6.c b/drivers/pinctrl/sirf/pinctrl-atlas6.c index c641be9c5b80..867c9681763c 100644 --- a/drivers/pinctrl/sirf/pinctrl-atlas6.c +++ b/drivers/pinctrl/sirf/pinctrl-atlas6.c @@ -509,6 +509,19 @@ static const struct sirfsoc_padmux usp0_padmux = { static const unsigned usp0_pins[] = { 51, 52, 53, 54, 55 }; +static const struct sirfsoc_muxmask usp0_uart_nostreamctrl_muxmask[] = { + { + .group = 1, + .mask = BIT(20) | BIT(21), + }, +}; + +static const struct sirfsoc_padmux usp0_uart_nostreamctrl_padmux = { + .muxmask_counts = ARRAY_SIZE(usp0_uart_nostreamctrl_muxmask), + .muxmask = usp0_uart_nostreamctrl_muxmask, +}; + +static const unsigned usp0_uart_nostreamctrl_pins[] = { 52, 53 }; static const struct sirfsoc_muxmask usp1_muxmask[] = { { .group = 0, @@ -822,6 +835,8 @@ static const struct sirfsoc_pin_group sirfsoc_pin_groups[] = { SIRFSOC_PIN_GROUP("uart2grp", uart2_pins), SIRFSOC_PIN_GROUP("uart2_nostreamctrlgrp", uart2_nostreamctrl_pins), SIRFSOC_PIN_GROUP("usp0grp", usp0_pins), + SIRFSOC_PIN_GROUP("usp0_uart_nostreamctrl_grp", + usp0_uart_nostreamctrl_pins), SIRFSOC_PIN_GROUP("usp1grp", usp1_pins), SIRFSOC_PIN_GROUP("i2c0grp", i2c0_pins), SIRFSOC_PIN_GROUP("i2c1grp", i2c1_pins), @@ -862,6 +877,8 @@ static const char * const uart0grp[] = { "uart0grp" }; static const char * const uart1grp[] = { "uart1grp" }; static const char * const uart2grp[] = { "uart2grp" }; static const char * const uart2_nostreamctrlgrp[] = { "uart2_nostreamctrlgrp" }; +static const char * const usp0_uart_nostreamctrl_grp[] = { + "usp0_uart_nostreamctrl_grp" }; static const char * const usp0grp[] = { "usp0grp" }; static const char * const usp1grp[] = { "usp1grp" }; static const char * const i2c0grp[] = { "i2c0grp" }; @@ -904,6 +921,9 @@ static const struct sirfsoc_pmx_func sirfsoc_pmx_functions[] = { SIRFSOC_PMX_FUNCTION("uart2", uart2grp, uart2_padmux), SIRFSOC_PMX_FUNCTION("uart2_nostreamctrl", uart2_nostreamctrlgrp, uart2_nostreamctrl_padmux), SIRFSOC_PMX_FUNCTION("usp0", usp0grp, usp0_padmux), + SIRFSOC_PMX_FUNCTION("usp0_uart_nostreamctrl", + usp0_uart_nostreamctrl_grp, + usp0_uart_nostreamctrl_padmux), SIRFSOC_PMX_FUNCTION("usp1", usp1grp, usp1_padmux), SIRFSOC_PMX_FUNCTION("i2c0", i2c0grp, i2c0_padmux), SIRFSOC_PMX_FUNCTION("i2c1", i2c1grp, i2c1_padmux), -- cgit v1.2.3-58-ga151 From 25f397a429dfa43f22c278d0119a60a343aa568f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 19 Jul 2013 18:57:11 +0200 Subject: drm/crtc-helper: explicit DPMS on after modeset Atm the crtc helper implementation of set_config has really inconsisten semantics: If just an fb update is good enough, dpms state will be left as-is, but if we do a full modeset we force everything to dpms on. This change has already been applied to the i915 modeset code in commit e3de42b68478a8c95dd27520e9adead2af9477a5 Author: Imre Deak Date: Fri May 3 19:44:07 2013 +0200 drm/i915: force full modeset if the connector is in DPMS OFF mode which according to Greg KH seems to aim for a new record in most Bugzilla: links in a commit message. The history of this dpms forcing is pretty interesting. This patch here is an almost-revert of commit 811aaa55ba21ab37407018cfc01770d6b037d3fb Author: Keith Packard Date: Thu Feb 3 16:57:28 2011 -0800 drm: Only set DPMS ON when actually configuring a mode which fixed the bug of trying to dpms on disabled outputs, but introduced the new discrepancy between an fb update only and full modesets. The actual introduction of this goes back to commit bf9dc102e284a5aa78c73fc9d72e11d5ccd8669f Author: Keith Packard Date: Fri Nov 26 10:45:58 2010 -0800 drm: Set connector DPMS status to ON in drm_crtc_helper_set_config And if you'd dig around in the i915 driver code there's even more fun around forcing dpms on and losing our heads and temper of the resulting inconsistencies. Especially the DP re-training code had tons of funny stuff in it. v2: So v1 totally blew up on resume on my radeon system here. After much head-scraching I've figured out that the radeon resume functions resumes the console system _before_ it actually restores all the modeset state. And resuming the console systems means that fbdev doeas an immediate ->set_par call. Now up to this patch that ->set_par did absolutely nothing: All the old sw state from pre-suspend was still around (since the modeset reset wasn't done yet), which means that the set_config calls done as a result of the ->set_par where all treated as no-ops (despite that the real hw state was obviously something completely different). Since v1 of this patch just added a bunch of ->dpms calls if the crtc was enabled, those set_config calls suddenly stopped being no-ops. But because the hw state wasn't restored the ->dpms callbacks resulted in decent amounts of hilarity and eventual full hangs. Since I can't review all kms drivers for such tricky ordering constraints v2 opts for a different approach and forces a full modeset if the connector dpms state isnt' DPMS_ON. Since the ->dpms callbacks implemented by the modeset helpers update the connector->dpms property we have the same effect of ensuring that the pipe is ultimately turned on, even if we just end up updating the fb. This is the same approac we ended up using in the intel driver. Note that besides i915.ko only all other drivers eventually call drm_helper_connector_dpms with the exception of vmwgfx, which does not support dmps at all. v3: Dave Airlie merged the broken first version of this patch, so squash in the revert of commit 372835a8527f85b3eff20a18c2c339e827dfd4e4 Author: Daniel Vetter Date: Sat Jun 15 00:13:13 2013 +0200 drm/crtc-helper: explicit DPMS on after modeset Also fix up the spelling fail a bit in the commit message while at it. Cc: Dave Airlie Reviewed-by: Alex Deucher Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67043 Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 738a4294d820..6a647493ca7f 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -677,6 +677,11 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) /* don't break so fail path works correct */ fail = 1; break; + + if (connector->dpms != DRM_MODE_DPMS_ON) { + DRM_DEBUG_KMS("connector dpms not on, full mode switch\n"); + mode_changed = true; + } } } @@ -754,6 +759,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) ret = -EINVAL; goto fail; } + DRM_DEBUG_KMS("Setting connector DPMS state to on\n"); + for (i = 0; i < set->num_connectors; i++) { + DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id, + drm_get_connector_name(set->connectors[i])); + set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON); + } } drm_helper_disable_unused_functions(dev); } else if (fb_changed) { @@ -771,22 +782,6 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) } } - /* - * crtc set_config helpers implicit set the crtc and all connected - * encoders to DPMS on for a full mode set. But for just an fb update it - * doesn't do that. To not confuse userspace, do an explicit DPMS_ON - * unconditionally. This will also ensure driver internal dpms state is - * consistent again. - */ - if (set->crtc->enabled) { - DRM_DEBUG_KMS("Setting connector DPMS state to on\n"); - for (i = 0; i < set->num_connectors; i++) { - DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id, - drm_get_connector_name(set->connectors[i])); - set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON); - } - } - kfree(save_connectors); kfree(save_encoders); kfree(save_crtcs); -- cgit v1.2.3-58-ga151 From ace120dcf23b3bbba00d797a898481997381052f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 16 Jul 2013 14:02:28 -0400 Subject: Thermal: Fix lockup of cpu_down() Commit f1a18a105 "Thermal: CPU Package temperature thermal" had code that did a get_online_cpus(), run a loop and then do a put_online_cpus(). The problem is that the loop had an error exit that would skip the put_online_cpus() part. In the error exit part of the function, it also did a get_online_cpus(), run a loop and then put_online_cpus(). The only way to get to the error exit part is with get_online_cpus() already performed. If this error condition is hit, the system will be prevented from taking CPUs offline. The process taking the CPU offline will lock up hard. Removing the get_online_cpus() removes the lockup as the hotplug CPU refcount is back to zero. This was bisected with ktest. Signed-off-by: Steven Rostedt Signed-off-by: Zhang Rui --- drivers/thermal/x86_pkg_temp_thermal.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 810143a6aa33..f36950e4134f 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -599,7 +599,6 @@ static int __init pkg_temp_thermal_init(void) return 0; err_ret: - get_online_cpus(); for_each_online_cpu(i) put_core_offline(i); put_online_cpus(); -- cgit v1.2.3-58-ga151 From 90625070c4253377025878c4e82feed8b35c7116 Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Mon, 1 Jul 2013 23:56:25 -0300 Subject: usb: serial: cp210x: Add USB ID for Netgear Switches embedded serial adapter This adds NetGear Managed Switch M4100 series, M5300 series, M7100 series USB ID (0846:0110) to the cp210x driver. Without this, the serial adapter is not recognized in Linux. Description was obtained from an Netgear Eng. Signed-off-by: Luiz Angelo Daros de Luca Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index d6ef2f8da37d..55852fe13218 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -53,6 +53,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ + { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */ -- cgit v1.2.3-58-ga151 From e7a6121f4929c17215f0cdca3726f4bf3e4e9529 Mon Sep 17 00:00:00 2001 From: Ren Bigcren Date: Tue, 2 Jul 2013 13:34:30 +0200 Subject: USB: storage: Add MicroVault Flash Drive to unusual_devs The device report an error capacity when read_capacity_16(). Using read_capacity_10() can get the correct capacity. Signed-off-by: Ren Bigcren Cc: Matthew Dharm Signed-off-by: Oskar Andero Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 1799335288bd..c015f2c16729 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -665,6 +665,13 @@ UNUSUAL_DEV( 0x054c, 0x016a, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_INQUIRY ), +/* Submitted by Ren Bigcren */ +UNUSUAL_DEV( 0x054c, 0x02a5, 0x0100, 0x0100, + "Sony Corp.", + "MicroVault Flash Drive", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_READ_CAPACITY_16 ), + /* floppy reports multiple luns */ UNUSUAL_DEV( 0x055d, 0x2020, 0x0000, 0x0210, "SAMSUNG", -- cgit v1.2.3-58-ga151 From 58fc90db8261b571c026bb8bf23aad48a7233118 Mon Sep 17 00:00:00 2001 From: "Jóhann B. Guðmundsson" Date: Thu, 4 Jul 2013 21:47:52 +0000 Subject: USB: misc: Add Manhattan Hi-Speed USB DVI Converter to sisusbvga MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jóhann B. Guðmundsson Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/sisusbvga/sisusb.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index c21386ec5d35..de98906f786d 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -3247,6 +3247,7 @@ static const struct usb_device_id sisusb_table[] = { { USB_DEVICE(0x0711, 0x0903) }, { USB_DEVICE(0x0711, 0x0918) }, { USB_DEVICE(0x0711, 0x0920) }, + { USB_DEVICE(0x0711, 0x0950) }, { USB_DEVICE(0x182d, 0x021c) }, { USB_DEVICE(0x182d, 0x0269) }, { } -- cgit v1.2.3-58-ga151 From c38e83b6cc2adf80e3f091fd92cfbeacc9748347 Mon Sep 17 00:00:00 2001 From: Daniil Bolsun Date: Fri, 19 Jul 2013 10:21:23 +0300 Subject: USB: option: append Petatel NP10T device to GSM modems list This patch was tested on 3.10.1 kernel. Same models of Petatel NP10T modems have different device IDs. Unfortunately they have no additional revision information on a board which may treat them as different devices. Currently I've seen only two NP10T devices with various IDs. Possibly Petatel NP10T list will be appended upon devices with new IDs will appear. Signed-off-by: Daniil Bolsun Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5dd857de05b0..418746b557ad 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -444,7 +444,8 @@ static void option_instat_callback(struct urb *urb); /* Hyundai Petatel Inc. products */ #define PETATEL_VENDOR_ID 0x1ff4 -#define PETATEL_PRODUCT_NP10T 0x600e +#define PETATEL_PRODUCT_NP10T_600A 0x600a +#define PETATEL_PRODUCT_NP10T_600E 0x600e /* TP-LINK Incorporated products */ #define TPLINK_VENDOR_ID 0x2357 @@ -1329,7 +1330,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) }, { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) }, - { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T) }, + { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) }, + { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) }, { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CHANGHONG_VENDOR_ID, CHANGHONG_PRODUCT_CH690) }, -- cgit v1.2.3-58-ga151 From b579fa52f6be0b4157ca9cc5e94d44a2c89a7e95 Mon Sep 17 00:00:00 2001 From: Barry Grussling Date: Fri, 19 Jul 2013 14:46:12 -0700 Subject: usb: cp210x support SEL C662 Vendor/Device This patch adds support for the Schweitzer Engineering Laboratories C662 USB cable based off the CP210x driver. Signed-off-by: Barry Grussling Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 55852fe13218..f68024ba726a 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -149,6 +149,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ + { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */ { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */ { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */ -- cgit v1.2.3-58-ga151 From 7681156982026ebf7eafd7301eb0374d7648d068 Mon Sep 17 00:00:00 2001 From: Sami Rahman Date: Mon, 8 Jul 2013 14:28:55 -0400 Subject: USB: cp210x: add MMB and PI ZigBee USB Device Support Added support for MMB Networks and Planet Innovation Ingeni ZigBee USB devices using customized Silicon Labs' CP210x.c USB to UART bridge drivers with PIDs: 88A4, 88A5. Signed-off-by: Sami Rahman Tested-by: Sami Rahman Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f68024ba726a..0eae4ba3760e 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -119,6 +119,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ + { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ + { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ -- cgit v1.2.3-58-ga151 From 47a64a13d54f6c669b00542848d5550be3d3310e Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 9 Jul 2013 17:03:50 +0300 Subject: USB: EHCI: Fix resume signalling on remote wakeup Set the ehci->resuming flag for the port we receive a remote wakeup on so that resume signalling can be completed. Without this, the root hub timer will not fire again to check if the resume was completed and there will be a never-ending wait on on the port. This effect is only observed if the HUB IRQ IN does not come after we have initiated the port resume. Signed-off-by: Roger Quadros Cc: stable Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hub.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index 2b702772d04d..6dce37555c4f 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -874,6 +874,7 @@ static int ehci_hub_control ( ehci->reset_done[wIndex] = jiffies + msecs_to_jiffies(20); usb_hcd_start_port_resume(&hcd->self, wIndex); + set_bit(wIndex, &ehci->resuming_ports); /* check the port again */ mod_timer(&ehci_to_hcd(ehci)->rh_timer, ehci->reset_done[wIndex]); -- cgit v1.2.3-58-ga151 From c0cdfaa0a5e7a346ac2f661f63f543cdc5f7cbbe Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 23 Jun 2013 15:50:07 +0800 Subject: of/irq: Avoid calling list_first_entry() for empty list list_first_entry() expects the list is not empty, we need to check if list is empty before calling list_first_entry(). Thus use list_first_entry_or_null() instead of list_first_entry(). Signed-off-by: Axel Lin Signed-off-by: Grant Likely --- drivers/of/irq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/of/irq.c b/drivers/of/irq.c index a3c1c5aae6a9..5c645c7227b8 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -482,8 +482,9 @@ void __init of_irq_init(const struct of_device_id *matches) } /* Get the next pending parent that might have children */ - desc = list_first_entry(&intc_parent_list, typeof(*desc), list); - if (list_empty(&intc_parent_list) || !desc) { + desc = list_first_entry_or_null(&intc_parent_list, + typeof(*desc), list); + if (!desc) { pr_err("of_irq_init: children remain, but no parents\n"); break; } -- cgit v1.2.3-58-ga151 From cf9e2368655d86cd800e4d9fe65a407b39d29373 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Jul 2013 12:24:10 +0200 Subject: of/irq: init struct resource to 0 in of_irq_to_resource() It almost does not matter because most users use only the ->start member of the struct. However if this struct is passed to a platform device which is then added via platform_device_add() then the ->parent member is also used. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Grant Likely --- drivers/of/irq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 5c645c7227b8..1264923ade0f 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -345,6 +345,7 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r) if (r && irq) { const char *name = NULL; + memset(r, 0, sizeof(*r)); /* * Get optional "interrupts-names" property to add a name * to the resource. -- cgit v1.2.3-58-ga151 From 74b843512906e60be24ed6aafa2fa2876f294fab Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 9 Jul 2013 16:27:24 +0200 Subject: pinctrl: sh-pfc: fix SDHI0 VccQ regulator on sh73a0 with DT The PFC pinctrl driver on sh73a0 is also regiatering a VccQ regulator for SDHI0. However, its consumers list only included the platform-data based SDHI device name. When booted with DT SDHI0 couldn't enable VccQ and therefore was unusable. Fix this by adding a consumer with DT-based name. Signed-off-by: Guennadi Liakhovetski Acked-by: Simon Horman Acked-by: Laurent Pinchart Signed-off-by: Linus Walleij --- drivers/pinctrl/sh-pfc/pfc-sh73a0.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c index 7956df58d751..31f7d0e04aaa 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c @@ -3785,6 +3785,7 @@ static const struct regulator_desc sh73a0_vccq_mc0_desc = { static struct regulator_consumer_supply sh73a0_vccq_mc0_consumers[] = { REGULATOR_SUPPLY("vqmmc", "sh_mobile_sdhi.0"), + REGULATOR_SUPPLY("vqmmc", "ee100000.sdhi"), }; static const struct regulator_init_data sh73a0_vccq_mc0_init_data = { -- cgit v1.2.3-58-ga151 From 745a39a9e6b2901cd341af01fc9bac78a9649e23 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Jul 2013 09:24:37 -0400 Subject: drm/radeon: wait for 3D idle before using CP DMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the 3D engine is idle before using CP DMA for bo copies. Signed-off-by: Alex Deucher Cc: Marek Olšák --- drivers/gpu/drm/radeon/r600.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 393880a09412..10f712e37003 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3166,7 +3166,7 @@ int r600_copy_cpdma(struct radeon_device *rdev, size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); - r = radeon_ring_lock(rdev, ring, num_loops * 6 + 21); + r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_semaphore_free(rdev, &sem, NULL); @@ -3181,6 +3181,9 @@ int r600_copy_cpdma(struct radeon_device *rdev, radeon_semaphore_free(rdev, &sem, NULL); } + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(ring, WAIT_3D_IDLE_bit); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; if (cur_size_in_bytes > 0x1fffff) -- cgit v1.2.3-58-ga151 From 3e3e53f86bee87bb14474213c879595605e35112 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Jul 2013 13:11:56 -0400 Subject: drm/radeon/vm: only align the pt base to 32k fixes: https://bugs.freedesktop.org/show_bug.cgi?id=67016 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_gart.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index d9d31a383276..6a51d943ccf4 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -466,7 +466,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) size += rdev->vm_manager.max_pfn * 8; size *= 2; r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - RADEON_VM_PTB_ALIGN(size), + RADEON_GPU_PAGE_ALIGN(size), RADEON_VM_PTB_ALIGN_SIZE, RADEON_GEM_DOMAIN_VRAM); if (r) { @@ -621,7 +621,7 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) } retry: - pd_size = RADEON_VM_PTB_ALIGN(radeon_vm_directory_size(rdev)); + pd_size = radeon_vm_directory_size(rdev); r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->page_directory, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false); @@ -953,8 +953,8 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev, retry: r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->page_tables[pt_idx], - RADEON_VM_PTB_ALIGN(RADEON_VM_PTE_COUNT * 8), - RADEON_VM_PTB_ALIGN_SIZE, false); + RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false); if (r == -ENOMEM) { r = radeon_vm_evict(rdev, vm); -- cgit v1.2.3-58-ga151 From 34be8c9af7b8728465963740fc11136ae90dfc36 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Jul 2013 11:13:53 -0400 Subject: drm/radeon: fix endian issues with DP handling (v3) The atom interpreter expects data in LE format, so swap the message buffer as apprioriate. v2: properly handle non-dw aligned byte counts. v3: properly handle remainder Signed-off-by: Alex Deucher Cc: Dong He Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_dp.c | 43 ++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 064023bed480..32501f6ec991 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -44,6 +44,41 @@ static char *pre_emph_names[] = { }; /***** radeon AUX functions *****/ + +/* Atom needs data in little endian format + * so swap as appropriate when copying data to + * or from atom. Note that atom operates on + * dw units. + */ +static void radeon_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) +{ +#ifdef __BIG_ENDIAN + u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ + u32 *dst32, *src32; + int i; + + memcpy(src_tmp, src, num_bytes); + src32 = (u32 *)src_tmp; + dst32 = (u32 *)dst_tmp; + if (to_le) { + for (i = 0; i < ((num_bytes + 3) / 4); i++) + dst32[i] = cpu_to_le32(src32[i]); + memcpy(dst, dst_tmp, num_bytes); + } else { + u8 dws = num_bytes & ~3; + for (i = 0; i < ((num_bytes + 3) / 4); i++) + dst32[i] = le32_to_cpu(src32[i]); + memcpy(dst, dst_tmp, dws); + if (num_bytes % 4) { + for (i = 0; i < (num_bytes % 4); i++) + dst[dws+i] = dst_tmp[dws+i]; + } + } +#else + memcpy(dst, src, num_bytes); +#endif +} + union aux_channel_transaction { PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1; PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2; @@ -65,10 +100,10 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan, base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1); - memcpy(base, send, send_bytes); + radeon_copy_swap(base, send, send_bytes, true); - args.v1.lpAuxRequest = 0 + 4; - args.v1.lpDataOut = 16 + 4; + args.v1.lpAuxRequest = cpu_to_le16((u16)(0 + 4)); + args.v1.lpDataOut = cpu_to_le16((u16)(16 + 4)); args.v1.ucDataOutLen = 0; args.v1.ucChannelID = chan->rec.i2c_id; args.v1.ucDelay = delay / 10; @@ -102,7 +137,7 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan, recv_bytes = recv_size; if (recv && recv_size) - memcpy(recv, base + 16, recv_bytes); + radeon_copy_swap(recv, base + 16, recv_bytes, false); return recv_bytes; } -- cgit v1.2.3-58-ga151 From f7929f34fa0e0bb6736a2484fdc07d77a1653081 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Fri, 19 Jul 2013 21:08:48 +0200 Subject: drm/radeon: Another card with wrong primary dac adj Hello, got another card with "too bright" problem: Sapphire Radeon VE 7000 DDR (VGA+S-Video) lspci -vnn: 01:00.0 VGA compatible controller [0300]: Advanced Micro Devices [AMD] nee ATI RV100 QY [Radeon 7000/VE] [1002:5159] (prog-if 00 [VGA controller]) Subsystem: PC Partner Limited Sapphire Radeon VE 7000 DDR [174b:7c28] The patch below fixes the problem for this card. But I don't like the blacklist, couldn't some heuristic be used instead? The interesting thing is that the manufacturer is the same as the other card needing the same quirk. I wonder how many different types are broken this way. The "wrong" ps2_pdac_adj value that comes from BIOS on this card is 0x300. ==================== drm/radeon: Add primary dac adj quirk for Sapphire Radeon VE 7000 DDR Values from BIOS are wrong, causing too bright colors. Use default values instead. Signed-off-by: Ondrej Zary Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_combios.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 78edadc9e86b..8528b81cd64f 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -971,10 +971,14 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct } /* quirks */ + /* Radeon 7000 (RV100) */ + if (((dev->pdev->device == 0x5159) && + (dev->pdev->subsystem_vendor == 0x174B) && + (dev->pdev->subsystem_device == 0x7c28)) || /* Radeon 9100 (R200) */ - if ((dev->pdev->device == 0x514D) && + ((dev->pdev->device == 0x514D) && (dev->pdev->subsystem_vendor == 0x174B) && - (dev->pdev->subsystem_device == 0x7149)) { + (dev->pdev->subsystem_device == 0x7149))) { /* vbios value is bad, use the default */ found = 0; } -- cgit v1.2.3-58-ga151 From 03ed8cf9b28d886c64c7e705c7bb1a365fd8fb95 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Jul 2013 17:44:43 -0400 Subject: drm/radeon: improve dac adjust heuristics for legacy pdac Hopefully avoid more quirks in the future due to bogus vbios dac data. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_combios.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 8528b81cd64f..485f82c9929d 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -965,8 +965,10 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct dac = RBIOS8(dac_info + 0x3) & 0xf; p_dac->ps2_pdac_adj = (bg << 8) | (dac); } - /* if the values are all zeros, use the table */ - if (p_dac->ps2_pdac_adj) + /* if the values are zeros, use the table */ + if ((dac == 0) || (bg == 0)) + found = 0; + else found = 1; } -- cgit v1.2.3-58-ga151 From cef1d00cd56f600121ad121875655ad410a001b8 Mon Sep 17 00:00:00 2001 From: Mark Kettenis Date: Sun, 21 Jul 2013 16:44:09 -0400 Subject: drm/radeon: fix combios tables on older cards Noticed that my old Radeon 7500 hung after printing drm: GPU not posted. posting now... when it wasn't selected as the primary card the BIOS. Some digging revealed that it was hanging in combios_parse_mmio_table() while parsing the ASIC INIT 3 table. Looking at the BIOS ROM for the card, it becomes obvious that there is no ASIC INIT 3 table in the BIOS. The code is just processing random garbage. No surprise it hangs! Why do I say that there is no ASIC INIT 3 table is the BIOS? This table is found through the MISC INFO table. The MISC INFO table can be found at offset 0x5e in the COMBIOS header. But the header is smaller than that. The COMBIOS header starts at offset 0x126. The standard PCI Data Structure (the bit that starts with 'PCIR') lives at offset 0x180. That means that the COMBIOS header can not be larger than 0x5a bytes and therefore cannot contain a MISC INFO table. I looked at a dozen or so BIOS images, some my own, some downloaded from: It is fairly obvious that the size of the COMBIOS header can be found at offset 0x6 of the header. Not sure if it is a 16-bit number or just an 8-bit number, but that doesn't really matter since the tables seems to be always smaller than 256 bytes. So I think combios_get_table_offset() should check if the requested table is present. This can be done by checking the offset against the size of the header. See the diff below. The diff is against the WIP OpenBSD codebase that roughly corresponds to Linux 3.8.13 at this point. But I don't think this bit of the code changed much since then. For what it is worth: Signed-off-by: Mark Kettenis Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_combios.c | 145 +++++++++----------------------- 1 file changed, 41 insertions(+), 104 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 485f82c9929d..68ce36056019 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -147,7 +147,7 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, enum radeon_combios_table_offset table) { struct radeon_device *rdev = dev->dev_private; - int rev; + int rev, size; uint16_t offset = 0, check_offset; if (!rdev->bios) @@ -156,174 +156,106 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, switch (table) { /* absolute offset tables */ case COMBIOS_ASIC_INIT_1_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0xc); - if (check_offset) - offset = check_offset; + check_offset = 0xc; break; case COMBIOS_BIOS_SUPPORT_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x14); - if (check_offset) - offset = check_offset; + check_offset = 0x14; break; case COMBIOS_DAC_PROGRAMMING_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x2a); - if (check_offset) - offset = check_offset; + check_offset = 0x2a; break; case COMBIOS_MAX_COLOR_DEPTH_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x2c); - if (check_offset) - offset = check_offset; + check_offset = 0x2c; break; case COMBIOS_CRTC_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x2e); - if (check_offset) - offset = check_offset; + check_offset = 0x2e; break; case COMBIOS_PLL_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x30); - if (check_offset) - offset = check_offset; + check_offset = 0x30; break; case COMBIOS_TV_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x32); - if (check_offset) - offset = check_offset; + check_offset = 0x32; break; case COMBIOS_DFP_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x34); - if (check_offset) - offset = check_offset; + check_offset = 0x34; break; case COMBIOS_HW_CONFIG_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x36); - if (check_offset) - offset = check_offset; + check_offset = 0x36; break; case COMBIOS_MULTIMEDIA_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x38); - if (check_offset) - offset = check_offset; + check_offset = 0x38; break; case COMBIOS_TV_STD_PATCH_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x3e); - if (check_offset) - offset = check_offset; + check_offset = 0x3e; break; case COMBIOS_LCD_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x40); - if (check_offset) - offset = check_offset; + check_offset = 0x40; break; case COMBIOS_MOBILE_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x42); - if (check_offset) - offset = check_offset; + check_offset = 0x42; break; case COMBIOS_PLL_INIT_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x46); - if (check_offset) - offset = check_offset; + check_offset = 0x46; break; case COMBIOS_MEM_CONFIG_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x48); - if (check_offset) - offset = check_offset; + check_offset = 0x48; break; case COMBIOS_SAVE_MASK_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x4a); - if (check_offset) - offset = check_offset; + check_offset = 0x4a; break; case COMBIOS_HARDCODED_EDID_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x4c); - if (check_offset) - offset = check_offset; + check_offset = 0x4c; break; case COMBIOS_ASIC_INIT_2_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x4e); - if (check_offset) - offset = check_offset; + check_offset = 0x4e; break; case COMBIOS_CONNECTOR_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x50); - if (check_offset) - offset = check_offset; + check_offset = 0x50; break; case COMBIOS_DYN_CLK_1_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x52); - if (check_offset) - offset = check_offset; + check_offset = 0x52; break; case COMBIOS_RESERVED_MEM_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x54); - if (check_offset) - offset = check_offset; + check_offset = 0x54; break; case COMBIOS_EXT_TMDS_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x58); - if (check_offset) - offset = check_offset; + check_offset = 0x58; break; case COMBIOS_MEM_CLK_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x5a); - if (check_offset) - offset = check_offset; + check_offset = 0x5a; break; case COMBIOS_EXT_DAC_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x5c); - if (check_offset) - offset = check_offset; + check_offset = 0x5c; break; case COMBIOS_MISC_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x5e); - if (check_offset) - offset = check_offset; + check_offset = 0x5e; break; case COMBIOS_CRT_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x60); - if (check_offset) - offset = check_offset; + check_offset = 0x60; break; case COMBIOS_INTEGRATED_SYSTEM_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x62); - if (check_offset) - offset = check_offset; + check_offset = 0x62; break; case COMBIOS_COMPONENT_VIDEO_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x64); - if (check_offset) - offset = check_offset; + check_offset = 0x64; break; case COMBIOS_FAN_SPEED_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x66); - if (check_offset) - offset = check_offset; + check_offset = 0x66; break; case COMBIOS_OVERDRIVE_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x68); - if (check_offset) - offset = check_offset; + check_offset = 0x68; break; case COMBIOS_OEM_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x6a); - if (check_offset) - offset = check_offset; + check_offset = 0x6a; break; case COMBIOS_DYN_CLK_2_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x6c); - if (check_offset) - offset = check_offset; + check_offset = 0x6c; break; case COMBIOS_POWER_CONNECTOR_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x6e); - if (check_offset) - offset = check_offset; + check_offset = 0x6e; break; case COMBIOS_I2C_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x70); - if (check_offset) - offset = check_offset; + check_offset = 0x70; break; /* relative offset tables */ case COMBIOS_ASIC_INIT_3_TABLE: /* offset from misc info */ @@ -439,11 +371,16 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, } break; default: + check_offset = 0; break; } - return offset; + size = RBIOS8(rdev->bios_header_start + 0x6); + /* check absolute offset tables */ + if (table < COMBIOS_ASIC_INIT_3_TABLE && check_offset && check_offset < size) + offset = RBIOS16(rdev->bios_header_start + check_offset); + return offset; } bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) -- cgit v1.2.3-58-ga151 From 42b9ab7ab798c1fc812bd2032cb506a93080130f Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 19 Jul 2013 15:56:41 +0900 Subject: libata: replace strict_strtol() with kstrtol() The usage of strict_strtol() is not preferred, because strict_strtol() is obsolete. Thus, kstrtol() should be used. Signed-off-by: Jingoo Han Signed-off-by: Tejun Heo --- drivers/ata/libata-scsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 83c08907e042..b1e880a3c3da 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -206,8 +206,10 @@ static ssize_t ata_scsi_park_store(struct device *device, unsigned long flags; int rc; - rc = strict_strtol(buf, 10, &input); - if (rc || input < -2) + rc = kstrtol(buf, 10, &input); + if (rc) + return rc; + if (input < -2) return -EINVAL; if (input > ATA_TMOUT_MAX_PARK) { rc = -EOVERFLOW; -- cgit v1.2.3-58-ga151 From 8cb440ab709fb1f7e3f71a6159ebab9403760472 Mon Sep 17 00:00:00 2001 From: Jean-Francois Moine Date: Mon, 15 Jul 2013 10:14:26 +0200 Subject: pinctrl: pinctrl-single: fix compile warning when no CONFIG_PM This warning has been introduced by the commit 0f9bc4bcdf4f pinctrl: single: adopt pinctrl sleep mode management Signed-off-by: Jean-Francois Moine Acked-by: Tony Lindgren Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-single.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 6866548fab31..7323cca440b5 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1483,6 +1483,7 @@ static int pcs_add_gpio_func(struct device_node *node, struct pcs_device *pcs) return ret; } +#ifdef CONFIG_PM static int pinctrl_single_suspend(struct platform_device *pdev, pm_message_t state) { @@ -1505,6 +1506,7 @@ static int pinctrl_single_resume(struct platform_device *pdev) return pinctrl_force_default(pcs->pctl); } +#endif static int pcs_probe(struct platform_device *pdev) { -- cgit v1.2.3-58-ga151 From bb9696192826a7d9279caf872e95b41bc26c7eff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Jul 2013 16:53:36 -0400 Subject: libata: make it clear that sata_inic162x is experimental sata_inic162x never reached a state where it's reliable enough for production use and data corruption is a relatively common occurrence. Make the driver generate warning about the issues and mark the Kconfig option as experimental. If the situation doesn't improve, we'd be better off making it depend on CONFIG_BROKEN. Let's wait for several cycles and see if the kernel message draws any attention. Signed-off-by: Tejun Heo Reported-by: Martin Braure de Calignon Reported-by: Ben Hutchings Reported-by: risc4all@yahoo.com --- drivers/ata/Kconfig | 2 +- drivers/ata/sata_inic162x.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 80dc988f01e4..5cddaf86cd0a 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -107,7 +107,7 @@ config SATA_FSL If unsure, say N. config SATA_INIC162X - tristate "Initio 162x SATA support" + tristate "Initio 162x SATA support (Very Experimental)" depends on PCI help This option enables support for Initio 162x Serial ATA. diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index e45131748248..5c54d957370a 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -6,6 +6,18 @@ * * This file is released under GPL v2. * + * **** WARNING **** + * + * This driver never worked properly and unfortunately data corruption is + * relatively common. There isn't anyone working on the driver and there's + * no support from the vendor. Do not use this driver in any production + * environment. + * + * http://thread.gmane.org/gmane.linux.debian.devel.bugs.rc/378525/focus=54491 + * https://bugzilla.kernel.org/show_bug.cgi?id=60565 + * + * ***************** + * * This controller is eccentric and easily locks up if something isn't * right. Documentation is available at initio's website but it only * documents registers (not programming model). @@ -807,6 +819,8 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ata_print_version_once(&pdev->dev, DRV_VERSION); + dev_alert(&pdev->dev, "inic162x support is broken with common data corruption issues and will be disabled by default, contact linux-ide@vger.kernel.org if in production use\n"); + /* alloc host */ host = ata_host_alloc_pinfo(&pdev->dev, ppi, NR_PORTS); hpriv = devm_kzalloc(&pdev->dev, sizeof(*hpriv), GFP_KERNEL); -- cgit v1.2.3-58-ga151 From 2134ed4d614349b2b4e8d7bb593baa9179b8dd1e Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Thu, 18 Jul 2013 08:48:42 -0700 Subject: cpufreq / intel_pstate: Change to scale off of max P-state Change to using max P-state instead of max turbo P-state. This change resolves two issues. On a quiet system intel_pstate can fail to respond to a load change. On CPU SKUs that have a limited number of P-states and no turbo range intel_pstate fails to select the highest available P-state. This change is suitable for stable v3.9+ References: https://bugzilla.kernel.org/show_bug.cgi?id=59481 Reported-and-tested-by: Arjan van de Ven Reported-and-tested-by: dsmythies@telus.net Signed-off-by: Dirk Brandewie Acked-by: Viresh Kumar Cc: 3.9+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b012d7600e1a..7cde885011ed 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -103,10 +103,10 @@ struct pstate_adjust_policy { static struct pstate_adjust_policy default_policy = { .sample_rate_ms = 10, .deadband = 0, - .setpoint = 109, - .p_gain_pct = 17, + .setpoint = 97, + .p_gain_pct = 20, .d_gain_pct = 0, - .i_gain_pct = 4, + .i_gain_pct = 0, }; struct perf_limits { @@ -468,12 +468,12 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu) { int32_t busy_scaled; - int32_t core_busy, turbo_pstate, current_pstate; + int32_t core_busy, max_pstate, current_pstate; core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy); - turbo_pstate = int_tofp(cpu->pstate.turbo_pstate); + max_pstate = int_tofp(cpu->pstate.max_pstate); current_pstate = int_tofp(cpu->pstate.current_pstate); - busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate)); + busy_scaled = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); return fp_toint(busy_scaled); } -- cgit v1.2.3-58-ga151 From 7d148ef51a657fd04036c3ed7803da600dd0d451 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 22 Jul 2013 18:02:39 +0200 Subject: drm/i915: fix hdmi portclock limits In commit 325b9d048810f7689ec644595061c0b700e64bce Author: Daniel Vetter Date: Fri Apr 19 11:24:33 2013 +0200 drm/i915: fixup 12bpc hdmi dotclock handling I've errornously claimed that we don't yet support the hdmi 1.4 dotclocks > 225 MHz on Haswell. But a bug report and a closer look at the wrpll table showed that we've supported port clocks up to 300MHz. With the new code to dynamically compute wrpll limits we should have no issues going up to the full 340 MHz range of hdmi 1.4, so let's just use that to fix this regression. That'll allow 4k over hdmi for free! v2: Drop the random hunk that somehow slipped in. v3: Cantiga has the original HDMI dotclock limit of 165MHz. And also patch up the mode filtering. To do so extract the dotclock limits into a little helper function. v4: Use 300MHz (from Bspec) instead of 340MHz (upper limit for hdmi 1.3), apparently hw is not required to be able to drive the highest dotclocks. Suggested by Damien. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67048 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67030 Tested-by: Andreas Reis (v2) Cc: Damien Lespiau Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_hdmi.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 98df2a0c85bd..2fd3fd5b943e 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -785,10 +785,22 @@ static void intel_disable_hdmi(struct intel_encoder *encoder) } } +static int hdmi_portclock_limit(struct intel_hdmi *hdmi) +{ + struct drm_device *dev = intel_hdmi_to_dev(hdmi); + + if (IS_G4X(dev)) + return 165000; + else if (IS_HASWELL(dev)) + return 300000; + else + return 225000; +} + static int intel_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - if (mode->clock > 165000) + if (mode->clock > hdmi_portclock_limit(intel_attached_hdmi(connector))) return MODE_CLOCK_HIGH; if (mode->clock < 20000) return MODE_CLOCK_LOW; @@ -806,6 +818,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_display_mode *adjusted_mode = &pipe_config->adjusted_mode; int clock_12bpc = pipe_config->requested_mode.clock * 3 / 2; + int portclock_limit = hdmi_portclock_limit(intel_hdmi); int desired_bpp; if (intel_hdmi->color_range_auto) { @@ -829,7 +842,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, * outputs. We also need to check that the higher clock still fits * within limits. */ - if (pipe_config->pipe_bpp > 8*3 && clock_12bpc <= 225000 + if (pipe_config->pipe_bpp > 8*3 && clock_12bpc <= portclock_limit && HAS_PCH_SPLIT(dev)) { DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); desired_bpp = 12*3; @@ -846,7 +859,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, pipe_config->pipe_bpp = desired_bpp; } - if (adjusted_mode->clock > 225000) { + if (adjusted_mode->clock > portclock_limit) { DRM_DEBUG_KMS("too high HDMI clock, rejecting mode\n"); return false; } -- cgit v1.2.3-58-ga151 From 96f15f29038e58e1b0a96483e2b369ff446becf1 Mon Sep 17 00:00:00 2001 From: Jeff Skirvin Date: Thu, 11 Jul 2013 17:18:58 -0700 Subject: [SCSI] isci: Fix a race condition in the SSP task management path This commit fixes a race condition in the isci driver abort task and SSP device task management path. The race is caused when an I/O termination in the SCU hardware is necessary because of an SSP target timeout condition, and the check of the I/O end state races against the HW-termination-driven end state. The failure of the race meant that no TMF was sent to the device to clean-up the pending I/O. Signed-off-by: Jeff Skirvin Reviewed-by: Lukasz Dorau Cc: Signed-off-by: James Bottomley --- drivers/scsi/isci/task.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c index 9bb020ac089c..0d30ca849e8f 100644 --- a/drivers/scsi/isci/task.c +++ b/drivers/scsi/isci/task.c @@ -491,6 +491,7 @@ int isci_task_abort_task(struct sas_task *task) struct isci_tmf tmf; int ret = TMF_RESP_FUNC_FAILED; unsigned long flags; + int target_done_already = 0; /* Get the isci_request reference from the task. Note that * this check does not depend on the pending request list @@ -505,9 +506,11 @@ int isci_task_abort_task(struct sas_task *task) /* If task is already done, the request isn't valid */ if (!(task->task_state_flags & SAS_TASK_STATE_DONE) && (task->task_state_flags & SAS_TASK_AT_INITIATOR) && - old_request) + old_request) { idev = isci_get_device(task->dev->lldd_dev); - + target_done_already = test_bit(IREQ_COMPLETE_IN_TARGET, + &old_request->flags); + } spin_unlock(&task->task_state_lock); spin_unlock_irqrestore(&ihost->scic_lock, flags); @@ -561,7 +564,7 @@ int isci_task_abort_task(struct sas_task *task) if (task->task_proto == SAS_PROTOCOL_SMP || sas_protocol_ata(task->task_proto) || - test_bit(IREQ_COMPLETE_IN_TARGET, &old_request->flags) || + target_done_already || test_bit(IDEV_GONE, &idev->flags)) { spin_unlock_irqrestore(&ihost->scic_lock, flags); -- cgit v1.2.3-58-ga151 From c3ccb1d7cf4c4549151876dd37c0944a682fd9e1 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Fri, 12 Jul 2013 14:47:51 -0400 Subject: [SCSI] qla2xxx: Properly set the tagging for commands. This fixes a regression where Xyratex controllers and disks were lost by the driver: https://bugzilla.kernel.org/show_bug.cgi?id=59601 Reported-by: Jack Hill Signed-off-by: Saurav Kashyap Signed-off-by: Giridhar Malavali Cc: Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_iocb.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 42ef481db942..ef0a5481b9dd 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -419,6 +419,8 @@ qla2x00_start_scsi(srb_t *sp) __constant_cpu_to_le16(CF_SIMPLE_TAG); break; } + } else { + cmd_pkt->control_flags = __constant_cpu_to_le16(CF_SIMPLE_TAG); } /* Load SCSI command packet. */ @@ -1307,11 +1309,11 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, fcp_cmnd->task_attribute = TSK_ORDERED; break; default: - fcp_cmnd->task_attribute = 0; + fcp_cmnd->task_attribute = TSK_SIMPLE; break; } } else { - fcp_cmnd->task_attribute = 0; + fcp_cmnd->task_attribute = TSK_SIMPLE; } cmd_pkt->fcp_rsp_dseg_len = 0; /* Let response come in status iocb */ @@ -1525,7 +1527,12 @@ qla24xx_start_scsi(srb_t *sp) case ORDERED_QUEUE_TAG: cmd_pkt->task = TSK_ORDERED; break; + default: + cmd_pkt->task = TSK_SIMPLE; + break; } + } else { + cmd_pkt->task = TSK_SIMPLE; } /* Load SCSI command packet. */ -- cgit v1.2.3-58-ga151 From c91bc6ccd13254826fdfceddba0f3b5e308aa93e Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Tue, 23 Jul 2013 11:54:10 +0800 Subject: ahci: fix Null pointer dereference in achi_host_active() commit b29900e6 (AHCI: Make distinct names for ports in /proc/interrupts) introuded a regression, which resulted Null pointer dereference for achi host with dummy ports. For ahci ports, when the port is dummy port, its private_data will be NULL, as ata_dummy_port_ops doesn't support ->port_start. changes in v2: use pp to check dummy ports, update comments Reported-and-tested-by: Alex Williamson Signed-off-by: Xiaotian Feng Signed-off-by: Tejun Heo Cc: Alexander Gordeev Cc: linux-ide@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- drivers/ata/ahci.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 5064f3ea20f1..db4380d70031 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1146,11 +1146,18 @@ int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis) return rc; for (i = 0; i < host->n_ports; i++) { + const char* desc; struct ahci_port_priv *pp = host->ports[i]->private_data; + /* pp is NULL for dummy ports */ + if (pp) + desc = pp->irq_desc; + else + desc = dev_driver_string(host->dev); + rc = devm_request_threaded_irq(host->dev, irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED, - pp->irq_desc, host->ports[i]); + desc, host->ports[i]); if (rc) goto out_free_irqs; } -- cgit v1.2.3-58-ga151 From 085b513f97d8d799d28491239be4b451bcd8c2c5 Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Fri, 2 Nov 2012 09:38:34 -0400 Subject: [SCSI] sd: fix crash when UA received on DIF enabled device sd_prep_fn will allocate a larger CDB for the command via mempool_alloc for devices using DIF type 2 protection. This CDB was being freed in sd_done, which results in a kernel crash if the command is retried due to a UNIT ATTENTION. This change moves the code to free the larger CDB into sd_unprep_fn instead, which is invoked after the request is complete. It is no longer necessary to call scsi_print_command separately for this case as the ->cmnd will no longer be NULL in the normal code path. Also removed conditional test for DIF type 2 when freeing the larger CDB because the protection_type could have been changed via sysfs while the command was executing. Signed-off-by: Ewan D. Milne Acked-by: Martin K. Petersen Cc: Signed-off-by: James Bottomley --- drivers/scsi/sd.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 80f39b8b0223..86fcf2c313ad 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -838,10 +838,17 @@ static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq) static void sd_unprep_fn(struct request_queue *q, struct request *rq) { + struct scsi_cmnd *SCpnt = rq->special; + if (rq->cmd_flags & REQ_DISCARD) { free_page((unsigned long)rq->buffer); rq->buffer = NULL; } + if (SCpnt->cmnd != rq->cmd) { + mempool_free(SCpnt->cmnd, sd_cdb_pool); + SCpnt->cmnd = NULL; + SCpnt->cmd_len = 0; + } } /** @@ -1720,21 +1727,6 @@ static int sd_done(struct scsi_cmnd *SCpnt) if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt)) sd_dif_complete(SCpnt, good_bytes); - if (scsi_host_dif_capable(sdkp->device->host, sdkp->protection_type) - == SD_DIF_TYPE2_PROTECTION && SCpnt->cmnd != SCpnt->request->cmd) { - - /* We have to print a failed command here as the - * extended CDB gets freed before scsi_io_completion() - * is called. - */ - if (result) - scsi_print_command(SCpnt); - - mempool_free(SCpnt->cmnd, sd_cdb_pool); - SCpnt->cmnd = NULL; - SCpnt->cmd_len = 0; - } - return good_bytes; } -- cgit v1.2.3-58-ga151 From ca24763588844b14f019ffc45c7df6d9e8f932c5 Mon Sep 17 00:00:00 2001 From: "Alexandr \\\"Sky\\\" Ivanov" Date: Tue, 23 Jul 2013 17:46:40 +0400 Subject: USB: option: add D-Link DWM-152/C1 and DWM-156/C1 Adding support for D-Link DWM-152/C1 and DWM-156/C1 devices. DWM-152/C1: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=07d1 ProdID=3e01 Rev= 0.00 S: Product=USB Configuration S: SerialNumber=1234567890ABCDEF C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms DWM-156/C1: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=07d1 ProdID=3e02 Rev= 0.00 S: Product=DataCard Device S: SerialNumber=1234567890ABCDEF C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Alexandr Ivanov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 418746b557ad..9cc40031c23c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1341,6 +1341,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit v1.2.3-58-ga151 From a829abf8daa2dcf8223a9284b76d221e61130e13 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Jul 2013 17:51:20 +0200 Subject: ARM: pxa: propagate errors from regulator_enable() to pxamci The em_x270_mci_setpower() and em_x270_usb_hub_init() functions call regulator_enable(), which may return an error that must be checked. This changes the em_x270_usb_hub_init() function to bail out if it fails, and changes the pxamci_platform_data->setpower callback so that the a failed em_x270_mci_setpower call can be propagated by the pxamci driver into the mmc core. Signed-off-by: Arnd Bergmann Cc: Mike Rapoport Cc: Paul Gortmaker Cc: Mark Brown Cc: Haojian Zhuang Acked-by: Chris Ball [olof: fixed order of regulator_enable() and test in em_x270_usb_hub_init] Signed-off-by: Olof Johansson --- arch/arm/mach-pxa/em-x270.c | 17 +++++++++++++---- arch/arm/mach-pxa/mainstone.c | 3 ++- arch/arm/mach-pxa/pcm990-baseboard.c | 3 ++- arch/arm/mach-pxa/poodle.c | 4 +++- arch/arm/mach-pxa/spitz.c | 4 +++- arch/arm/mach-pxa/stargate2.c | 3 ++- drivers/mmc/host/pxamci.c | 2 +- include/linux/platform_data/mmc-pxamci.h | 2 +- 8 files changed, 27 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index f6726bb4eb95..3a3362fa793e 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -477,16 +477,24 @@ static int em_x270_usb_hub_init(void) /* USB Hub power-on and reset */ gpio_direction_output(usb_hub_reset, 1); gpio_direction_output(GPIO9_USB_VBUS_EN, 0); - regulator_enable(em_x270_usb_ldo); + err = regulator_enable(em_x270_usb_ldo); + if (err) + goto err_free_rst_gpio; + gpio_set_value(usb_hub_reset, 0); gpio_set_value(usb_hub_reset, 1); regulator_disable(em_x270_usb_ldo); - regulator_enable(em_x270_usb_ldo); + err = regulator_enable(em_x270_usb_ldo); + if (err) + goto err_free_rst_gpio; + gpio_set_value(usb_hub_reset, 0); gpio_set_value(GPIO9_USB_VBUS_EN, 1); return 0; +err_free_rst_gpio: + gpio_free(usb_hub_reset); err_free_vbus_gpio: gpio_free(GPIO9_USB_VBUS_EN); err_free_usb_ldo: @@ -592,7 +600,7 @@ err_irq: return err; } -static void em_x270_mci_setpower(struct device *dev, unsigned int vdd) +static int em_x270_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -600,10 +608,11 @@ static void em_x270_mci_setpower(struct device *dev, unsigned int vdd) int vdd_uV = (2000 + (vdd - __ffs(MMC_VDD_20_21)) * 100) * 1000; regulator_set_voltage(em_x270_sdio_ldo, vdd_uV, vdd_uV); - regulator_enable(em_x270_sdio_ldo); + return regulator_enable(em_x270_sdio_ldo); } else { regulator_disable(em_x270_sdio_ldo); } + return 0; } static void em_x270_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index d2c652318376..dd70343c8708 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -408,7 +408,7 @@ static int mainstone_mci_init(struct device *dev, irq_handler_t mstone_detect_in return err; } -static void mainstone_mci_setpower(struct device *dev, unsigned int vdd) +static int mainstone_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -420,6 +420,7 @@ static void mainstone_mci_setpower(struct device *dev, unsigned int vdd) printk(KERN_DEBUG "%s: off\n", __func__); MST_MSCWR1 &= ~MST_MSCWR1_MMC_ON; } + return 0; } static void mainstone_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c index fb7f1d1627dc..13e5b00eae90 100644 --- a/arch/arm/mach-pxa/pcm990-baseboard.c +++ b/arch/arm/mach-pxa/pcm990-baseboard.c @@ -335,7 +335,7 @@ static int pcm990_mci_init(struct device *dev, irq_handler_t mci_detect_int, return err; } -static void pcm990_mci_setpower(struct device *dev, unsigned int vdd) +static int pcm990_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data *p_d = dev->platform_data; u8 val; @@ -348,6 +348,7 @@ static void pcm990_mci_setpower(struct device *dev, unsigned int vdd) val &= ~PCM990_CTRL_MMC2PWR; pcm990_cpld_writeb(PCM990_CTRL_MMC2PWR, PCM990_CTRL_REG5); + return 0; } static void pcm990_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 711d37e26bd8..aedf053a1de5 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -258,7 +258,7 @@ err_free_2: return err; } -static void poodle_mci_setpower(struct device *dev, unsigned int vdd) +static int poodle_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -270,6 +270,8 @@ static void poodle_mci_setpower(struct device *dev, unsigned int vdd) gpio_set_value(POODLE_GPIO_SD_PWR1, 0); gpio_set_value(POODLE_GPIO_SD_PWR, 0); } + + return 0; } static void poodle_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 2125df0444e7..4c29173026e8 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -598,7 +598,7 @@ static inline void spitz_spi_init(void) {} * NOTE: The card detect interrupt isn't debounced so we delay it by 250ms to * give the card a chance to fully insert/eject. */ -static void spitz_mci_setpower(struct device *dev, unsigned int vdd) +static int spitz_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -606,6 +606,8 @@ static void spitz_mci_setpower(struct device *dev, unsigned int vdd) spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, SCOOP_CPR_SD_3V); else spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, 0x0); + + return 0; } static struct pxamci_platform_data spitz_mci_platform_data = { diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c index 88fde43c948c..62aea3e835f3 100644 --- a/arch/arm/mach-pxa/stargate2.c +++ b/arch/arm/mach-pxa/stargate2.c @@ -734,9 +734,10 @@ static int stargate2_mci_init(struct device *dev, * * Very simple control. Either it is on or off and is controlled by * a gpio pin */ -static void stargate2_mci_setpower(struct device *dev, unsigned int vdd) +static int stargate2_mci_setpower(struct device *dev, unsigned int vdd) { gpio_set_value(SG2_SD_POWER_ENABLE, !!vdd); + return 0; } static void stargate2_mci_exit(struct device *dev, void *data) diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 847b1996ce8e..2c5a91bb8ec3 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -128,7 +128,7 @@ static inline int pxamci_set_power(struct pxamci_host *host, !!on ^ host->pdata->gpio_power_invert); } if (!host->vcc && host->pdata && host->pdata->setpower) - host->pdata->setpower(mmc_dev(host->mmc), vdd); + return host->pdata->setpower(mmc_dev(host->mmc), vdd); return 0; } diff --git a/include/linux/platform_data/mmc-pxamci.h b/include/linux/platform_data/mmc-pxamci.h index 9eb515bb799d..1706b3597ce0 100644 --- a/include/linux/platform_data/mmc-pxamci.h +++ b/include/linux/platform_data/mmc-pxamci.h @@ -12,7 +12,7 @@ struct pxamci_platform_data { unsigned long detect_delay_ms; /* delay in millisecond before detecting cards after interrupt */ int (*init)(struct device *, irq_handler_t , void *); int (*get_ro)(struct device *); - void (*setpower)(struct device *, unsigned int); + int (*setpower)(struct device *, unsigned int); void (*exit)(struct device *, void *); int gpio_card_detect; /* gpio detecting card insertion */ int gpio_card_ro; /* gpio detecting read only toggle */ -- cgit v1.2.3-58-ga151 From 69acbaac303e8cb948801a9ddd0ac24e86cc4a1b Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 8 Jul 2013 13:36:19 +0100 Subject: staging: comedi: COMEDI_CANCEL ioctl should wake up read/write Comedi devices can do blocking read() or write() (or poll()) if an asynchronous command has been set up, blocking for data (for read()) or buffer space (for write()). Various events associated with the asynchronous command will wake up the blocked reader or writer (or poller). It is also possible to force the asynchronous command to terminate by issuing a `COMEDI_CANCEL` ioctl. That shuts down the asynchronous command, but does not currently wake up the blocked reader or writer (or poller). If the blocked task could be woken up, it would see that the command is no longer active and return. The caller of the `COMEDI_CANCEL` ioctl could attempt to wake up the blocked task by sending a signal, but that's a nasty workaround. Change `do_cancel_ioctl()` to wake up the wait queue after it returns from `do_cancel()`. `do_cancel()` can propagate an error return value from the low-level comedi driver's cancel routine, but it always shuts the command down regardless, so `do_cancel_ioctl()` can wake up he wait queue regardless of the return value from `do_cancel()`. Signed-off-by: Ian Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 8647518259f6..6cdef9d5f8d0 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -1705,6 +1705,7 @@ static int do_cancel_ioctl(struct comedi_device *dev, unsigned int arg, void *file) { struct comedi_subdevice *s; + int ret; if (arg >= dev->n_subdevices) return -EINVAL; @@ -1721,7 +1722,11 @@ static int do_cancel_ioctl(struct comedi_device *dev, unsigned int arg, if (s->busy != file) return -EBUSY; - return do_cancel(dev, s); + ret = do_cancel(dev, s); + if (comedi_get_subdevice_runflags(s) & SRF_USER) + wake_up_interruptible(&s->async->wait_head); + + return ret; } /* -- cgit v1.2.3-58-ga151 From 4b18f08be01a7b3c7b6df497137b6e3cb28adaa3 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 5 Jul 2013 16:49:34 +0100 Subject: staging: comedi: fix a race between do_cmd_ioctl() and read/write `do_cmd_ioctl()` is called with the comedi device's mutex locked to process the `COMEDI_CMD` ioctl to set up comedi's asynchronous command handling on a comedi subdevice. `comedi_read()` and `comedi_write()` are the `read` and `write` handlers for the comedi device, but do not lock the mutex (for performance reasons, as some things can hold the mutex for quite a long time). There is a race condition if `comedi_read()` or `comedi_write()` is running at the same time and for the same file object and comedi subdevice as `do_cmd_ioctl()`. `do_cmd_ioctl()` sets the subdevice's `busy` pointer to the file object way before it sets the `SRF_RUNNING` flag in the subdevice's `runflags` member. `comedi_read() and `comedi_write()` check the subdevice's `busy` pointer is pointing to the current file object, then if the `SRF_RUNNING` flag is not set, will call `do_become_nonbusy()` to shut down the asyncronous command. Bad things can happen if the asynchronous command is being shutdown and set up at the same time. To prevent the race, don't set the `busy` pointer until after the `SRF_RUNNING` flag has been set. Also, make sure the mutex is held in `comedi_read()` and `comedi_write()` while calling `do_become_nonbusy()` in order to avoid moving the race condition to a point within that function. Change some error handling `goto cleanup` statements in `do_cmd_ioctl()` to simple `return -ERRFOO` statements as a result of changing when the `busy` pointer is set. Signed-off-by: Ian Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 6cdef9d5f8d0..f4a197b2d1fd 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -1413,22 +1413,19 @@ static int do_cmd_ioctl(struct comedi_device *dev, DPRINTK("subdevice busy\n"); return -EBUSY; } - s->busy = file; /* make sure channel/gain list isn't too long */ if (cmd.chanlist_len > s->len_chanlist) { DPRINTK("channel/gain list too long %u > %d\n", cmd.chanlist_len, s->len_chanlist); - ret = -EINVAL; - goto cleanup; + return -EINVAL; } /* make sure channel/gain list isn't too short */ if (cmd.chanlist_len < 1) { DPRINTK("channel/gain list too short %u < 1\n", cmd.chanlist_len); - ret = -EINVAL; - goto cleanup; + return -EINVAL; } async->cmd = cmd; @@ -1438,8 +1435,7 @@ static int do_cmd_ioctl(struct comedi_device *dev, kmalloc(async->cmd.chanlist_len * sizeof(int), GFP_KERNEL); if (!async->cmd.chanlist) { DPRINTK("allocation failed\n"); - ret = -ENOMEM; - goto cleanup; + return -ENOMEM; } if (copy_from_user(async->cmd.chanlist, user_chanlist, @@ -1491,6 +1487,9 @@ static int do_cmd_ioctl(struct comedi_device *dev, comedi_set_subdevice_runflags(s, ~0, SRF_USER | SRF_RUNNING); + /* set s->busy _after_ setting SRF_RUNNING flag to avoid race with + * comedi_read() or comedi_write() */ + s->busy = file; ret = s->do_cmd(dev, s); if (ret == 0) return 0; @@ -2058,11 +2057,13 @@ static ssize_t comedi_write(struct file *file, const char __user *buf, if (!comedi_is_subdevice_running(s)) { if (count == 0) { + mutex_lock(&dev->mutex); if (comedi_is_subdevice_in_error(s)) retval = -EPIPE; else retval = 0; do_become_nonbusy(dev, s); + mutex_unlock(&dev->mutex); } break; } @@ -2161,11 +2162,13 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes, if (n == 0) { if (!comedi_is_subdevice_running(s)) { + mutex_lock(&dev->mutex); do_become_nonbusy(dev, s); if (comedi_is_subdevice_in_error(s)) retval = -EPIPE; else retval = 0; + mutex_unlock(&dev->mutex); break; } if (file->f_flags & O_NONBLOCK) { @@ -2203,9 +2206,11 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes, buf += n; break; /* makes device work like a pipe */ } - if (comedi_is_subdevice_idle(s) && - async->buf_read_count - async->buf_write_count == 0) { - do_become_nonbusy(dev, s); + if (comedi_is_subdevice_idle(s)) { + mutex_lock(&dev->mutex); + if (async->buf_read_count - async->buf_write_count == 0) + do_become_nonbusy(dev, s); + mutex_unlock(&dev->mutex); } set_current_state(TASK_RUNNING); remove_wait_queue(&async->wait_head, &wait); -- cgit v1.2.3-58-ga151 From c8145c56103099565be2e18aeac2a32f79361f3f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 22 Jul 2013 09:57:49 +0300 Subject: staging: frontier: use after free in disconnect() usb_alphatrack_delete() frees "dev" so we can't use it on that path. Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/frontier/alphatrack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/staging/frontier/alphatrack.c b/drivers/staging/frontier/alphatrack.c index 5590ebf1da15..817f837b240d 100644 --- a/drivers/staging/frontier/alphatrack.c +++ b/drivers/staging/frontier/alphatrack.c @@ -827,11 +827,11 @@ static void usb_alphatrack_disconnect(struct usb_interface *intf) mutex_unlock(&dev->mtx); usb_alphatrack_delete(dev); } else { + atomic_set(&dev->writes_pending, 0); dev->intf = NULL; mutex_unlock(&dev->mtx); } - atomic_set(&dev->writes_pending, 0); mutex_unlock(&disconnect_mutex); dev_info(&intf->dev, "Alphatrack Surface #%d now disconnected\n", -- cgit v1.2.3-58-ga151 From 3a349cee593a464b25f329a1b76635900187fdb0 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 14 Jul 2013 13:29:48 +0200 Subject: staging: drm/imx: drop "select OF_VIDEOMODE" Commit ac4c1a9b33 ("staging: drm/imx: Add LDB support") added the DRM_IMX_LDB Kconfig entry. That entry selects OF_VIDEOMODE. But there is no Kconfig symbol named OF_VIDEOMODE. The select statement for that symbol is a nop. Drop it. Signed-off-by: Paul Bolle Acked-by: Sascha Hauer Signed-off-by: Greg Kroah-Hartman --- drivers/staging/imx-drm/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/staging/imx-drm/Kconfig b/drivers/staging/imx-drm/Kconfig index 22339059837f..bd0f2fd01db4 100644 --- a/drivers/staging/imx-drm/Kconfig +++ b/drivers/staging/imx-drm/Kconfig @@ -33,7 +33,6 @@ config DRM_IMX_TVE config DRM_IMX_LDB tristate "Support for LVDS displays" depends on DRM_IMX - select OF_VIDEOMODE help Choose this to enable the internal LVDS Display Bridge (LDB) found on i.MX53 and i.MX6 processors. -- cgit v1.2.3-58-ga151 From f2a6fed1ceaecf6054627f0ddbd4becf43c997fc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 22 Jul 2013 11:00:53 +0300 Subject: staging: gdm72xx: potential use after free in send_qos_list() Sometimes free_qos_entry() sometimes frees its argument. I have moved the dereference of "entry" ahead on line to avoid a use after free. Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gdm72xx/gdm_qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/staging/gdm72xx/gdm_qos.c b/drivers/staging/gdm72xx/gdm_qos.c index b795353e8348..cc3692439a5c 100644 --- a/drivers/staging/gdm72xx/gdm_qos.c +++ b/drivers/staging/gdm72xx/gdm_qos.c @@ -250,8 +250,8 @@ static void send_qos_list(struct nic *nic, struct list_head *head) list_for_each_entry_safe(entry, n, head, list) { list_del(&entry->list); - free_qos_entry(entry); gdm_wimax_send_tx(entry->skb, entry->dev); + free_qos_entry(entry); } } -- cgit v1.2.3-58-ga151 From 644d478793c6594277f8ae76954da4ace7ac6f96 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 26 Jun 2013 15:28:39 +0300 Subject: staging: zram: protect zram_reset_device() call Commit 9b3bb7abcdf2df0f1b2657e6cbc9d06bc2b3b36f (remove zram_sysfs file (v2)) accidentally made zram_reset_device() racy. Protect zram_reset_device() call with zram->lock. Signed-off-by: Sergey Senozhatsky Acked-by: Jerome Marchand Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zram/zram_drv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 82c7202fd5cc..e77fb6ea40c9 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -527,8 +527,11 @@ static void zram_reset_device(struct zram *zram) size_t index; struct zram_meta *meta; - if (!zram->init_done) + down_write(&zram->init_lock); + if (!zram->init_done) { + up_write(&zram->init_lock); return; + } meta = zram->meta; zram->init_done = 0; @@ -549,6 +552,7 @@ static void zram_reset_device(struct zram *zram) zram->disksize = 0; set_capacity(zram->disk, 0); + up_write(&zram->init_lock); } static void zram_init_device(struct zram *zram, struct zram_meta *meta) -- cgit v1.2.3-58-ga151 From 72bb99cfe9c57d2044445fb34bbc95b4c0bae6f2 Mon Sep 17 00:00:00 2001 From: Karlis Ogsts Date: Mon, 22 Jul 2013 13:51:42 -0700 Subject: staging: android: logger: Correct write offset reset on error In the situation that a writer fails to copy data from userspace it will reset the write offset to the value it had before it went to sleep. This discarding any messages written while aquiring the mutex. Therefore the reset offset needs to be retrieved after acquiring the mutex. Cc: Android Kernel Team Signed-off-by: Bjorn Andersson Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/logger.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index 080abf2faf97..a8c344422a77 100644 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -469,7 +469,7 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t ppos) { struct logger_log *log = file_get_log(iocb->ki_filp); - size_t orig = log->w_off; + size_t orig; struct logger_entry header; struct timespec now; ssize_t ret = 0; @@ -490,6 +490,8 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, mutex_lock(&log->mutex); + orig = log->w_off; + /* * Fix up any readers, pulling them forward to the first readable * entry after (what will be) the new write offset. We do this now -- cgit v1.2.3-58-ga151 From 53a983c4f8b4246f5ff00567411be55967b0350a Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 9 Jun 2013 09:23:16 -0700 Subject: [SCSI] mvsas: Fix kernel panic on tile due to unaligned data access slot->response is a 64 bit quantity (and accessed as such), but its alignment is only 32 bits. This doesn't cause a problem on x86, but apparently causes a kernel panic on Tile: Stack dump complete Kernel panic - not syncing: Kernel unalign fault running the idle task! Starting stack dump of tid 0, pid 0 (swapper) on cpu 1 at cycle 341586172541 frame 0: 0xfffffff700140ee0 dump_stack+0x0/0x20 (sp 0xfffffe43ffedf420) frame 1: 0xfffffff700283270 panic+0x150/0x3a0 (sp 0xfffffe43ffedf420) frame 2: 0xfffffff70012bff8 jit_bundle_gen+0xfd8/0x27e0 (sp 0xfffffe43ffedf4c8) frame 3: 0xfffffff7003b5b68 do_unaligned+0xc0/0x5a0 (sp 0xfffffe43ffedf710) frame 4: 0xfffffff70044ca78 handle_interrupt+0x270/0x278 (sp 0xfffffe43ffedf840) frame 5: 0xfffffff7002ac370 mvs_slot_complete+0x5f0/0x12a0 (sp 0xfffffe43ffedfa90) frame 6: 0xfffffff7002abec0 mvs_slot_complete+0x140/0x12a0 (sp 0xfffffe43ffedfa90) frame 7: 0xfffffff7005cc840 mvs_int_rx+0x140/0x2a0 (sp 0xfffffe43ffedfb00) frame 8: 0xfffffff7005bbaf0 mvs_94xx_isr+0xd8/0x2b8 (sp 0xfffffe43ffedfb68) frame 9: 0xfffffff700658ba0 mvs_tasklet+0x128/0x1f8 (sp 0xfffffe43ffedfba8) frame 10: 0xfffffff7003e8230 tasklet_action+0x178/0x2c8 (sp 0xfffffe43ffedfbe0) frame 11: 0xfffffff700103850 __do_softirq+0x210/0x398 (sp 0xfffffe43ffedfc40) frame 12: 0xfffffff700180308 do_softirq+0xc8/0x140 (sp 0xfffffe43ffedfcd8) frame 13: 0xfffffff7000bd7f0 irq_exit+0xb0/0x158 (sp 0xfffffe43ffedfcf0) frame 14: 0xfffffff70013fa58 tile_dev_intr+0x1d8/0x2f0 (sp 0xfffffe43ffedfd00) frame 15: 0xfffffff70044ca78 handle_interrupt+0x270/0x278 (sp 0xfffffe43ffedfd40) frame 16: 0xfffffff700143e68 _cpu_idle_nap+0x0/0x18 (sp 0xfffffe43ffedffb0) frame 17: 0xfffffff700482480 cpu_idle+0x310/0x428 (sp 0xfffffe43ffedffb0) Since the check is just for non-zero, split it to be two 32 bit accesses (preserving speed in the fast path) and do a get_unaligned() in the slow path. This is a modification of a wholly get_unaligned patch submitted by Paul Guo Reported-by: Paul Guo Signed-off-by: James Bottomley --- drivers/scsi/mvsas/mv_sas.c | 11 ++++++++--- drivers/scsi/mvsas/mv_sas.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index f14665a6293d..6b1b4e91e53f 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -1857,11 +1857,16 @@ int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags) goto out; } - /* error info record present */ - if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) { + /* + * error info record present; slot->response is 32 bit aligned but may + * not be 64 bit aligned, so check for zero in two 32 bit reads + */ + if (unlikely((rx_desc & RXQ_ERR) + && (*((u32 *)slot->response) + || *(((u32 *)slot->response) + 1)))) { mv_dprintk("port %d slot %d rx_desc %X has error info" "%016llX.\n", slot->port->sas_port.id, slot_idx, - rx_desc, (u64)(*(u64 *)slot->response)); + rx_desc, get_unaligned_le64(slot->response)); tstat->stat = mvs_slot_err(mvi, task, slot_idx); tstat->resp = SAS_TASK_COMPLETE; goto out; diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h index 60e2fb7f2dca..d6b19dc80bee 100644 --- a/drivers/scsi/mvsas/mv_sas.h +++ b/drivers/scsi/mvsas/mv_sas.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-58-ga151 From 88d84ac97378c2f1d5fec9af1e8b7d9a662d6b00 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 19 Jul 2013 12:28:25 +0200 Subject: EDAC: Fix lockdep splat Fix the following: BUG: key ffff88043bdd0330 not in .data! ------------[ cut here ]------------ WARNING: at kernel/lockdep.c:2987 lockdep_init_map+0x565/0x5a0() DEBUG_LOCKS_WARN_ON(1) Modules linked in: glue_helper sb_edac(+) edac_core snd acpi_cpufreq lrw gf128mul ablk_helper iTCO_wdt evdev i2c_i801 dcdbas button cryptd pcspkr iTCO_vendor_support usb_common lpc_ich mfd_core soundcore mperf processor microcode CPU: 2 PID: 599 Comm: modprobe Not tainted 3.10.0 #1 Hardware name: Dell Inc. Precision T3600/0PTTT9, BIOS A08 01/24/2013 0000000000000009 ffff880439a1d920 ffffffff8160a9a9 ffff880439a1d958 ffffffff8103d9e0 ffff88043af4a510 ffffffff81a16e11 0000000000000000 ffff88043bdd0330 0000000000000000 ffff880439a1d9b8 ffffffff8103dacc Call Trace: dump_stack warn_slowpath_common warn_slowpath_fmt lockdep_init_map ? trace_hardirqs_on_caller ? trace_hardirqs_on debug_mutex_init __mutex_init bus_register edac_create_sysfs_mci_device edac_mc_add_mc sbridge_probe pci_device_probe driver_probe_device __driver_attach ? driver_probe_device bus_for_each_dev driver_attach bus_add_driver driver_register __pci_register_driver ? 0xffffffffa0010fff sbridge_init ? 0xffffffffa0010fff do_one_initcall load_module ? unset_module_init_ro_nx SyS_init_module tracesys ---[ end trace d24a70b0d3ddf733 ]--- EDAC MC0: Giving out device to 'sbridge_edac.c' 'Sandy Bridge Socket#0': DEV 0000:3f:0e.0 EDAC sbridge: Driver loaded. What happens is that bus_register needs a statically allocated lock_key because the last is handed in to lockdep. However, struct mem_ctl_info embeds struct bus_type (the whole struct, not a pointer to it) and the whole thing gets dynamically allocated. Fix this by using a statically allocated struct bus_type for the MC bus. Signed-off-by: Borislav Petkov Acked-by: Mauro Carvalho Chehab Cc: Markus Trippelsdorf Cc: stable@kernel.org # v3.10 Signed-off-by: Tony Luck --- drivers/edac/edac_mc.c | 9 +++++++++ drivers/edac/edac_mc_sysfs.c | 28 +++++++++++++++------------- drivers/edac/i5100_edac.c | 2 +- include/linux/edac.h | 7 ++++++- 4 files changed, 31 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 27e86d938262..89e109022d78 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -48,6 +48,8 @@ static LIST_HEAD(mc_devices); */ static void const *edac_mc_owner; +static struct bus_type mc_bus[EDAC_MAX_MCS]; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -723,6 +725,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) int ret = -EINVAL; edac_dbg(0, "\n"); + if (mci->mc_idx >= EDAC_MAX_MCS) { + pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx); + return -ENODEV; + } + #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) edac_mc_dump_mci(mci); @@ -762,6 +769,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) /* set load time so that error rate can be tracked */ mci->start_time = jiffies; + mci->bus = &mc_bus[mci->mc_idx]; + if (edac_create_sysfs_mci_device(mci)) { edac_mc_printk(mci, KERN_WARNING, "failed to create sysfs device\n"); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index ef15a7e613bc..e7c32c4f7837 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -370,7 +370,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, return -ENODEV; csrow->dev.type = &csrow_attr_type; - csrow->dev.bus = &mci->bus; + csrow->dev.bus = mci->bus; device_initialize(&csrow->dev); csrow->dev.parent = &mci->dev; csrow->mci = mci; @@ -605,7 +605,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci, dimm->mci = mci; dimm->dev.type = &dimm_attr_type; - dimm->dev.bus = &mci->bus; + dimm->dev.bus = mci->bus; device_initialize(&dimm->dev); dimm->dev.parent = &mci->dev; @@ -975,11 +975,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) * The memory controller needs its own bus, in order to avoid * namespace conflicts at /sys/bus/edac. */ - mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus.name) + mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!mci->bus->name) return -ENOMEM; - edac_dbg(0, "creating bus %s\n", mci->bus.name); - err = bus_register(&mci->bus); + + edac_dbg(0, "creating bus %s\n", mci->bus->name); + + err = bus_register(mci->bus); if (err < 0) return err; @@ -988,7 +990,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) device_initialize(&mci->dev); mci->dev.parent = mci_pdev; - mci->dev.bus = &mci->bus; + mci->dev.bus = mci->bus; dev_set_name(&mci->dev, "mc%d", mci->mc_idx); dev_set_drvdata(&mci->dev, mci); pm_runtime_forbid(&mci->dev); @@ -997,8 +999,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) err = device_add(&mci->dev); if (err < 0) { edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1064,8 +1066,8 @@ fail: } fail2: device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1098,8 +1100,8 @@ void edac_unregister_sysfs(struct mem_ctl_info *mci) { edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); } static void mc_attr_release(struct device *dev) diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 1b635178cc44..157b934e8ce3 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -974,7 +974,7 @@ static int i5100_setup_debugfs(struct mem_ctl_info *mci) if (!i5100_debugfs) return -ENODEV; - priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs); if (!priv->debugfs) return -ENOMEM; diff --git a/include/linux/edac.h b/include/linux/edac.h index 0b763276f619..5c6d7fbaf89e 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -622,7 +622,7 @@ struct edac_raw_error_desc { */ struct mem_ctl_info { struct device dev; - struct bus_type bus; + struct bus_type *bus; struct list_head link; /* for global list of mem_ctl_info structs */ @@ -742,4 +742,9 @@ struct mem_ctl_info { #endif }; +/* + * Maximum number of memory controllers in the coherent fabric. + */ +#define EDAC_MAX_MCS 16 + #endif -- cgit v1.2.3-58-ga151 From 94190301ffa059c2d127b3a67ec5d161d5c62681 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 28 Jun 2013 17:15:25 +0200 Subject: usb: option: add TP-LINK MA260 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bjørn Mork Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 9cc40031c23c..bd02f257de13 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1334,6 +1334,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) }, { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000), /* TP-Link MA260 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CHANGHONG_VENDOR_ID, CHANGHONG_PRODUCT_CH690) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x02, 0x01) }, /* D-Link DWM-156 (variant) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x00, 0x00) }, /* D-Link DWM-156 (variant) */ -- cgit v1.2.3-58-ga151 From 0665f9f852b4ac05f2f62046a169f0f5b9212317 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 22 Jul 2013 14:37:39 +1000 Subject: drm/qxl: add delayed fb operations Due to the nature of qxl hw we cannot queue operations while in an irq context, so we queue these operations as best we can until atomic allocations fail, and dequeue them later in a work queue. Daniel looked over the locking on the list and agrees it should be sufficent. The atomic allocs use no warn, as the last thing we want if we haven't memory to allocate space for a printk in an irq context is more printks. Signed-off-by: Dave Airlie --- drivers/gpu/drm/qxl/qxl_drv.h | 1 + drivers/gpu/drm/qxl/qxl_fb.c | 184 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 164 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index aacb791464a3..6a4106fb4c21 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -314,6 +314,7 @@ struct qxl_device { struct workqueue_struct *gc_queue; struct work_struct gc_work; + struct work_struct fb_work; }; /* forward declaration for QXL_INFO_IO */ diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c index 76f39d88d684..88722f233430 100644 --- a/drivers/gpu/drm/qxl/qxl_fb.c +++ b/drivers/gpu/drm/qxl/qxl_fb.c @@ -37,12 +37,29 @@ #define QXL_DIRTY_DELAY (HZ / 30) +#define QXL_FB_OP_FILLRECT 1 +#define QXL_FB_OP_COPYAREA 2 +#define QXL_FB_OP_IMAGEBLIT 3 + +struct qxl_fb_op { + struct list_head head; + int op_type; + union { + struct fb_fillrect fr; + struct fb_copyarea ca; + struct fb_image ib; + } op; + void *img_data; +}; + struct qxl_fbdev { struct drm_fb_helper helper; struct qxl_framebuffer qfb; struct list_head fbdev_list; struct qxl_device *qdev; + spinlock_t delayed_ops_lock; + struct list_head delayed_ops; void *shadow; int size; @@ -164,8 +181,69 @@ static struct fb_deferred_io qxl_defio = { .deferred_io = qxl_deferred_io, }; -static void qxl_fb_fillrect(struct fb_info *info, - const struct fb_fillrect *fb_rect) +static void qxl_fb_delayed_fillrect(struct qxl_fbdev *qfbdev, + const struct fb_fillrect *fb_rect) +{ + struct qxl_fb_op *op; + unsigned long flags; + + op = kmalloc(sizeof(struct qxl_fb_op), GFP_ATOMIC | __GFP_NOWARN); + if (!op) + return; + + op->op.fr = *fb_rect; + op->img_data = NULL; + op->op_type = QXL_FB_OP_FILLRECT; + + spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags); + list_add_tail(&op->head, &qfbdev->delayed_ops); + spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags); +} + +static void qxl_fb_delayed_copyarea(struct qxl_fbdev *qfbdev, + const struct fb_copyarea *fb_copy) +{ + struct qxl_fb_op *op; + unsigned long flags; + + op = kmalloc(sizeof(struct qxl_fb_op), GFP_ATOMIC | __GFP_NOWARN); + if (!op) + return; + + op->op.ca = *fb_copy; + op->img_data = NULL; + op->op_type = QXL_FB_OP_COPYAREA; + + spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags); + list_add_tail(&op->head, &qfbdev->delayed_ops); + spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags); +} + +static void qxl_fb_delayed_imageblit(struct qxl_fbdev *qfbdev, + const struct fb_image *fb_image) +{ + struct qxl_fb_op *op; + unsigned long flags; + uint32_t size = fb_image->width * fb_image->height * (fb_image->depth >= 8 ? fb_image->depth / 8 : 1); + + op = kmalloc(sizeof(struct qxl_fb_op) + size, GFP_ATOMIC | __GFP_NOWARN); + if (!op) + return; + + op->op.ib = *fb_image; + op->img_data = (void *)(op + 1); + op->op_type = QXL_FB_OP_IMAGEBLIT; + + memcpy(op->img_data, fb_image->data, size); + + op->op.ib.data = op->img_data; + spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags); + list_add_tail(&op->head, &qfbdev->delayed_ops); + spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags); +} + +static void qxl_fb_fillrect_internal(struct fb_info *info, + const struct fb_fillrect *fb_rect) { struct qxl_fbdev *qfbdev = info->par; struct qxl_device *qdev = qfbdev->qdev; @@ -203,17 +281,28 @@ static void qxl_fb_fillrect(struct fb_info *info, qxl_draw_fill_rec.rect = rect; qxl_draw_fill_rec.color = color; qxl_draw_fill_rec.rop = rop; + + qxl_draw_fill(&qxl_draw_fill_rec); +} + +static void qxl_fb_fillrect(struct fb_info *info, + const struct fb_fillrect *fb_rect) +{ + struct qxl_fbdev *qfbdev = info->par; + struct qxl_device *qdev = qfbdev->qdev; + if (!drm_can_sleep()) { - qxl_io_log(qdev, - "%s: TODO use RCU, mysterious locks with spin_lock\n", - __func__); + qxl_fb_delayed_fillrect(qfbdev, fb_rect); + schedule_work(&qdev->fb_work); return; } - qxl_draw_fill(&qxl_draw_fill_rec); + /* make sure any previous work is done */ + flush_work(&qdev->fb_work); + qxl_fb_fillrect_internal(info, fb_rect); } -static void qxl_fb_copyarea(struct fb_info *info, - const struct fb_copyarea *region) +static void qxl_fb_copyarea_internal(struct fb_info *info, + const struct fb_copyarea *region) { struct qxl_fbdev *qfbdev = info->par; @@ -223,37 +312,89 @@ static void qxl_fb_copyarea(struct fb_info *info, region->dx, region->dy); } +static void qxl_fb_copyarea(struct fb_info *info, + const struct fb_copyarea *region) +{ + struct qxl_fbdev *qfbdev = info->par; + struct qxl_device *qdev = qfbdev->qdev; + + if (!drm_can_sleep()) { + qxl_fb_delayed_copyarea(qfbdev, region); + schedule_work(&qdev->fb_work); + return; + } + /* make sure any previous work is done */ + flush_work(&qdev->fb_work); + qxl_fb_copyarea_internal(info, region); +} + static void qxl_fb_imageblit_safe(struct qxl_fb_image *qxl_fb_image) { qxl_draw_opaque_fb(qxl_fb_image, 0); } +static void qxl_fb_imageblit_internal(struct fb_info *info, + const struct fb_image *image) +{ + struct qxl_fbdev *qfbdev = info->par; + struct qxl_fb_image qxl_fb_image; + + /* ensure proper order rendering operations - TODO: must do this + * for everything. */ + qxl_fb_image_init(&qxl_fb_image, qfbdev->qdev, info, image); + qxl_fb_imageblit_safe(&qxl_fb_image); +} + static void qxl_fb_imageblit(struct fb_info *info, const struct fb_image *image) { struct qxl_fbdev *qfbdev = info->par; struct qxl_device *qdev = qfbdev->qdev; - struct qxl_fb_image qxl_fb_image; if (!drm_can_sleep()) { - /* we cannot do any ttm_bo allocation since that will fail on - * ioremap_wc..__get_vm_area_node, so queue the work item - * instead This can happen from printk inside an interrupt - * context, i.e.: smp_apic_timer_interrupt..check_cpu_stall */ - qxl_io_log(qdev, - "%s: TODO use RCU, mysterious locks with spin_lock\n", - __func__); + qxl_fb_delayed_imageblit(qfbdev, image); + schedule_work(&qdev->fb_work); return; } + /* make sure any previous work is done */ + flush_work(&qdev->fb_work); + qxl_fb_imageblit_internal(info, image); +} - /* ensure proper order of rendering operations - TODO: must do this - * for everything. */ - qxl_fb_image_init(&qxl_fb_image, qfbdev->qdev, info, image); - qxl_fb_imageblit_safe(&qxl_fb_image); +static void qxl_fb_work(struct work_struct *work) +{ + struct qxl_device *qdev = container_of(work, struct qxl_device, fb_work); + unsigned long flags; + struct qxl_fb_op *entry, *tmp; + struct qxl_fbdev *qfbdev = qdev->mode_info.qfbdev; + + /* since the irq context just adds entries to the end of the + list dropping the lock should be fine, as entry isn't modified + in the operation code */ + spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags); + list_for_each_entry_safe(entry, tmp, &qfbdev->delayed_ops, head) { + spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags); + switch (entry->op_type) { + case QXL_FB_OP_FILLRECT: + qxl_fb_fillrect_internal(qfbdev->helper.fbdev, &entry->op.fr); + break; + case QXL_FB_OP_COPYAREA: + qxl_fb_copyarea_internal(qfbdev->helper.fbdev, &entry->op.ca); + break; + case QXL_FB_OP_IMAGEBLIT: + qxl_fb_imageblit_internal(qfbdev->helper.fbdev, &entry->op.ib); + break; + } + spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags); + list_del(&entry->head); + kfree(entry); + } + spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags); } int qxl_fb_init(struct qxl_device *qdev) { + INIT_WORK(&qdev->fb_work, qxl_fb_work); return 0; } @@ -536,7 +677,8 @@ int qxl_fbdev_init(struct qxl_device *qdev) qfbdev->qdev = qdev; qdev->mode_info.qfbdev = qfbdev; qfbdev->helper.funcs = &qxl_fb_helper_funcs; - + spin_lock_init(&qfbdev->delayed_ops_lock); + INIT_LIST_HEAD(&qfbdev->delayed_ops); ret = drm_fb_helper_init(qdev->ddev, &qfbdev->helper, qxl_num_crtc /* num_crtc - QXL supports just 1 */, QXLFB_CONN_LIMIT); -- cgit v1.2.3-58-ga151 From 4f49ec92be64ad1d96cf5d26fc8276f9849202a3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 23 Jul 2013 14:06:07 +1000 Subject: qxl: allow creation of pre-pinned objects and use for releases. In order to fix an issue with reservations we need to create the releases as pre-pinned objects, this changes the placement interface and bo creation interface to allow creating pinned objects to save nested reservations later. This is just a stepping stone to main fix which follows to actually fix how qxl deals with reservations. Signed-off-by: Dave Airlie --- drivers/gpu/drm/qxl/qxl_cmd.c | 2 +- drivers/gpu/drm/qxl/qxl_gem.c | 2 +- drivers/gpu/drm/qxl/qxl_ioctl.c | 2 +- drivers/gpu/drm/qxl/qxl_object.c | 23 +++++++++++------------ drivers/gpu/drm/qxl/qxl_object.h | 4 ++-- drivers/gpu/drm/qxl/qxl_release.c | 9 +++------ drivers/gpu/drm/qxl/qxl_ttm.c | 2 +- 7 files changed, 20 insertions(+), 24 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 93c2f2cceb51..4e6a62716618 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -266,7 +266,7 @@ int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size, int ret; ret = qxl_bo_create(qdev, size, false /* not kernel - device */, - QXL_GEM_DOMAIN_VRAM, NULL, &bo); + false, QXL_GEM_DOMAIN_VRAM, NULL, &bo); if (ret) { DRM_ERROR("failed to allocate VRAM BO\n"); return ret; diff --git a/drivers/gpu/drm/qxl/qxl_gem.c b/drivers/gpu/drm/qxl/qxl_gem.c index a235693aabba..25e1777fb0a2 100644 --- a/drivers/gpu/drm/qxl/qxl_gem.c +++ b/drivers/gpu/drm/qxl/qxl_gem.c @@ -55,7 +55,7 @@ int qxl_gem_object_create(struct qxl_device *qdev, int size, /* At least align on page size */ if (alignment < PAGE_SIZE) alignment = PAGE_SIZE; - r = qxl_bo_create(qdev, size, kernel, initial_domain, surf, &qbo); + r = qxl_bo_create(qdev, size, kernel, false, initial_domain, surf, &qbo); if (r) { if (r != -ERESTARTSYS) DRM_ERROR( diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index 27f45e49250d..7448c5ea92e6 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -305,7 +305,7 @@ static int qxl_update_area_ioctl(struct drm_device *dev, void *data, goto out; if (!qobj->pin_count) { - qxl_ttm_placement_from_domain(qobj, qobj->type); + qxl_ttm_placement_from_domain(qobj, qobj->type, false); ret = ttm_bo_validate(&qobj->tbo, &qobj->placement, true, false); if (unlikely(ret)) diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c index 1191fe7788c9..50e7a6177167 100644 --- a/drivers/gpu/drm/qxl/qxl_object.c +++ b/drivers/gpu/drm/qxl/qxl_object.c @@ -51,20 +51,21 @@ bool qxl_ttm_bo_is_qxl_bo(struct ttm_buffer_object *bo) return false; } -void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain) +void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned) { u32 c = 0; + u32 pflag = pinned ? TTM_PL_FLAG_NO_EVICT : 0; qbo->placement.fpfn = 0; qbo->placement.lpfn = 0; qbo->placement.placement = qbo->placements; qbo->placement.busy_placement = qbo->placements; if (domain == QXL_GEM_DOMAIN_VRAM) - qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM; + qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM | pflag; if (domain == QXL_GEM_DOMAIN_SURFACE) - qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0; + qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0 | pflag; if (domain == QXL_GEM_DOMAIN_CPU) - qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | pflag; if (!c) qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; qbo->placement.num_placement = c; @@ -73,7 +74,7 @@ void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain) int qxl_bo_create(struct qxl_device *qdev, - unsigned long size, bool kernel, u32 domain, + unsigned long size, bool kernel, bool pinned, u32 domain, struct qxl_surface *surf, struct qxl_bo **bo_ptr) { @@ -99,7 +100,7 @@ int qxl_bo_create(struct qxl_device *qdev, } bo->gem_base.driver_private = NULL; bo->type = domain; - bo->pin_count = 0; + bo->pin_count = pinned ? 1 : 0; bo->surface_id = 0; qxl_fence_init(qdev, &bo->fence); INIT_LIST_HEAD(&bo->list); @@ -107,7 +108,7 @@ int qxl_bo_create(struct qxl_device *qdev, if (surf) bo->surf = *surf; - qxl_ttm_placement_from_domain(bo, domain); + qxl_ttm_placement_from_domain(bo, domain, pinned); r = ttm_bo_init(&qdev->mman.bdev, &bo->tbo, size, type, &bo->placement, 0, !kernel, NULL, size, @@ -228,7 +229,7 @@ struct qxl_bo *qxl_bo_ref(struct qxl_bo *bo) int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr) { struct qxl_device *qdev = (struct qxl_device *)bo->gem_base.dev->dev_private; - int r, i; + int r; if (bo->pin_count) { bo->pin_count++; @@ -236,9 +237,7 @@ int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr) *gpu_addr = qxl_bo_gpu_offset(bo); return 0; } - qxl_ttm_placement_from_domain(bo, domain); - for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + qxl_ttm_placement_from_domain(bo, domain, true); r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (likely(r == 0)) { bo->pin_count = 1; @@ -350,7 +349,7 @@ int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo) return ret; if (!bo->pin_count) { - qxl_ttm_placement_from_domain(bo, bo->type); + qxl_ttm_placement_from_domain(bo, bo->type, false); ret = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); if (ret) diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h index ee7ad79ce781..116637f09347 100644 --- a/drivers/gpu/drm/qxl/qxl_object.h +++ b/drivers/gpu/drm/qxl/qxl_object.h @@ -88,7 +88,7 @@ static inline int qxl_bo_wait(struct qxl_bo *bo, u32 *mem_type, extern int qxl_bo_create(struct qxl_device *qdev, unsigned long size, - bool kernel, u32 domain, + bool kernel, bool pinned, u32 domain, struct qxl_surface *surf, struct qxl_bo **bo_ptr); extern int qxl_bo_kmap(struct qxl_bo *bo, void **ptr); @@ -99,7 +99,7 @@ extern struct qxl_bo *qxl_bo_ref(struct qxl_bo *bo); extern void qxl_bo_unref(struct qxl_bo **bo); extern int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr); extern int qxl_bo_unpin(struct qxl_bo *bo); -extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain); +extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned); extern bool qxl_ttm_bo_is_qxl_bo(struct ttm_buffer_object *bo); extern int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo); diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index b443d6751d5f..b7f1271c6014 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -118,7 +118,9 @@ static int qxl_release_bo_alloc(struct qxl_device *qdev, struct qxl_bo **bo) { int ret; - ret = qxl_bo_create(qdev, PAGE_SIZE, false, QXL_GEM_DOMAIN_VRAM, NULL, + /* pin releases bo's they are too messy to evict */ + ret = qxl_bo_create(qdev, PAGE_SIZE, false, true, + QXL_GEM_DOMAIN_VRAM, NULL, bo); return ret; } @@ -216,11 +218,6 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, mutex_unlock(&qdev->release_mutex); return ret; } - - /* pin releases bo's they are too messy to evict */ - ret = qxl_bo_reserve(qdev->current_release_bo[cur_idx], false); - qxl_bo_pin(qdev->current_release_bo[cur_idx], QXL_GEM_DOMAIN_VRAM, NULL); - qxl_bo_unreserve(qdev->current_release_bo[cur_idx]); } bo = qxl_bo_ref(qdev->current_release_bo[cur_idx]); diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 489cb8cece4d..1dfd84cda2a1 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -206,7 +206,7 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo, return; } qbo = container_of(bo, struct qxl_bo, tbo); - qxl_ttm_placement_from_domain(qbo, QXL_GEM_DOMAIN_CPU); + qxl_ttm_placement_from_domain(qbo, QXL_GEM_DOMAIN_CPU, false); *placement = qbo->placement; } -- cgit v1.2.3-58-ga151 From 8002db6336dd361fc13214e9515fe5d52ff294ee Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 23 Jul 2013 14:16:42 +1000 Subject: qxl: convert qxl driver to proper use for reservations The recent addition of lockdep support to reservations and their subsequent use by TTM showed up a number of potential problems with the way qxl was using TTM objects. a) it was allocating objects, and reserving them later without validating underneath the reservation, which meant in extreme conditions the objects could be evicted before the reservation ever used them. b) it was reserving objects straight after allocating them, but with no ability to back off should the reservations fail. It now allocates the necessary objects then does a complete reservation pass on them to avoid deadlocks. c) it had two lists per release tracking objects, unnecessary complicating the reservation process. This patch removes the dual object tracking, adds reservations ticket support to the release and fence object handling. It then ports the internal fb drawing code and the userspace facing ioctl to use the new interfaces properly, along with cleanup up the error path handling in some codepaths. Signed-off-by: Dave Airlie --- drivers/gpu/drm/qxl/qxl_cmd.c | 40 ++--- drivers/gpu/drm/qxl/qxl_display.c | 70 ++++++--- drivers/gpu/drm/qxl/qxl_draw.c | 263 +++++++++++++++++++++---------- drivers/gpu/drm/qxl/qxl_drv.h | 75 ++++----- drivers/gpu/drm/qxl/qxl_fence.c | 10 +- drivers/gpu/drm/qxl/qxl_image.c | 111 ++++++++++--- drivers/gpu/drm/qxl/qxl_ioctl.c | 317 +++++++++++++++++++++----------------- drivers/gpu/drm/qxl/qxl_object.c | 49 +----- drivers/gpu/drm/qxl/qxl_object.h | 2 - drivers/gpu/drm/qxl/qxl_release.c | 203 +++++++++++++++--------- 10 files changed, 676 insertions(+), 464 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 4e6a62716618..eb89653a7a17 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -179,9 +179,10 @@ qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *relea uint32_t type, bool interruptible) { struct qxl_command cmd; + struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); cmd.type = type; - cmd.data = qxl_bo_physical_address(qdev, release->bos[0], release->release_offset); + cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset); return qxl_ring_push(qdev->command_ring, &cmd, interruptible); } @@ -191,9 +192,10 @@ qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *releas uint32_t type, bool interruptible) { struct qxl_command cmd; + struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); cmd.type = type; - cmd.data = qxl_bo_physical_address(qdev, release->bos[0], release->release_offset); + cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset); return qxl_ring_push(qdev->cursor_ring, &cmd, interruptible); } @@ -214,7 +216,6 @@ int qxl_garbage_collect(struct qxl_device *qdev) struct qxl_release *release; uint64_t id, next_id; int i = 0; - int ret; union qxl_release_info *info; while (qxl_ring_pop(qdev->release_ring, &id)) { @@ -224,17 +225,10 @@ int qxl_garbage_collect(struct qxl_device *qdev) if (release == NULL) break; - ret = qxl_release_reserve(qdev, release, false); - if (ret) { - qxl_io_log(qdev, "failed to reserve release on garbage collect %lld\n", id); - DRM_ERROR("failed to reserve release %lld\n", id); - } - info = qxl_release_map(qdev, release); next_id = info->next; qxl_release_unmap(qdev, release, info); - qxl_release_unreserve(qdev, release); QXL_INFO(qdev, "popped %lld, next %lld\n", id, next_id); @@ -259,7 +253,9 @@ int qxl_garbage_collect(struct qxl_device *qdev) return i; } -int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size, +int qxl_alloc_bo_reserved(struct qxl_device *qdev, + struct qxl_release *release, + unsigned long size, struct qxl_bo **_bo) { struct qxl_bo *bo; @@ -271,15 +267,15 @@ int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size, DRM_ERROR("failed to allocate VRAM BO\n"); return ret; } - ret = qxl_bo_reserve(bo, false); - if (unlikely(ret != 0)) + ret = qxl_release_list_add(release, bo); + if (ret) goto out_unref; *_bo = bo; return 0; out_unref: qxl_bo_unref(&bo); - return 0; + return ret; } static int wait_for_io_cmd_user(struct qxl_device *qdev, uint8_t val, long port, bool intr) @@ -503,6 +499,10 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev, if (ret) return ret; + ret = qxl_release_reserve_list(release, true); + if (ret) + return ret; + cmd = (struct qxl_surface_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_SURFACE_CMD_CREATE; cmd->u.surface_create.format = surf->surf.format; @@ -524,14 +524,11 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev, surf->surf_create = release; - /* no need to add a release to the fence for this bo, + /* no need to add a release to the fence for this surface bo, since it is only released when we ask to destroy the surface and it would never signal otherwise */ - qxl_fence_releaseable(qdev, release); - qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); - - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); surf->hw_surf_alloc = true; spin_lock(&qdev->surf_id_idr_lock); @@ -573,12 +570,9 @@ int qxl_hw_surface_dealloc(struct qxl_device *qdev, cmd->surface_id = id; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_fence_releaseable(qdev, release); - qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); - qxl_release_unreserve(qdev, release); - + qxl_release_fence_buffer_objects(release); return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index f76f5dd7bfc4..835caba026d3 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -179,7 +179,7 @@ static void qxl_crtc_destroy(struct drm_crtc *crtc) kfree(qxl_crtc); } -static void +static int qxl_hide_cursor(struct qxl_device *qdev) { struct qxl_release *release; @@ -188,14 +188,22 @@ qxl_hide_cursor(struct qxl_device *qdev) ret = qxl_alloc_release_reserved(qdev, sizeof(*cmd), QXL_RELEASE_CURSOR_CMD, &release, NULL); + if (ret) + return ret; + + ret = qxl_release_reserve_list(release, true); + if (ret) { + qxl_release_free(qdev, release); + return ret; + } cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_HIDE; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_fence_releaseable(qdev, release); qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + return 0; } static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, @@ -216,10 +224,8 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, int size = 64*64*4; int ret = 0; - if (!handle) { - qxl_hide_cursor(qdev); - return 0; - } + if (!handle) + return qxl_hide_cursor(qdev); obj = drm_gem_object_lookup(crtc->dev, file_priv, handle); if (!obj) { @@ -234,8 +240,9 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, goto out_unref; ret = qxl_bo_pin(user_bo, QXL_GEM_DOMAIN_CPU, NULL); + qxl_bo_unreserve(user_bo); if (ret) - goto out_unreserve; + goto out_unref; ret = qxl_bo_kmap(user_bo, &user_ptr); if (ret) @@ -246,14 +253,20 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, &release, NULL); if (ret) goto out_kunmap; - ret = qxl_alloc_bo_reserved(qdev, sizeof(struct qxl_cursor) + size, - &cursor_bo); + + ret = qxl_alloc_bo_reserved(qdev, release, sizeof(struct qxl_cursor) + size, + &cursor_bo); if (ret) goto out_free_release; - ret = qxl_bo_kmap(cursor_bo, (void **)&cursor); + + ret = qxl_release_reserve_list(release, false); if (ret) goto out_free_bo; + ret = qxl_bo_kmap(cursor_bo, (void **)&cursor); + if (ret) + goto out_backoff; + cursor->header.unique = 0; cursor->header.type = SPICE_CURSOR_TYPE_ALPHA; cursor->header.width = 64; @@ -269,11 +282,7 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, qxl_bo_kunmap(cursor_bo); - /* finish with the userspace bo */ qxl_bo_kunmap(user_bo); - qxl_bo_unpin(user_bo); - qxl_bo_unreserve(user_bo); - drm_gem_object_unreference_unlocked(obj); cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_SET; @@ -281,30 +290,35 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, cmd->u.set.position.y = qcrtc->cur_y; cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0); - qxl_release_add_res(qdev, release, cursor_bo); cmd->u.set.visible = 1; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_fence_releaseable(qdev, release); qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + + /* finish with the userspace bo */ + ret = qxl_bo_reserve(user_bo, false); + if (!ret) { + qxl_bo_unpin(user_bo); + qxl_bo_unreserve(user_bo); + } + drm_gem_object_unreference_unlocked(obj); - qxl_bo_unreserve(cursor_bo); qxl_bo_unref(&cursor_bo); return ret; + +out_backoff: + qxl_release_backoff_reserve_list(release); out_free_bo: qxl_bo_unref(&cursor_bo); out_free_release: - qxl_release_unreserve(qdev, release); qxl_release_free(qdev, release); out_kunmap: qxl_bo_kunmap(user_bo); out_unpin: qxl_bo_unpin(user_bo); -out_unreserve: - qxl_bo_unreserve(user_bo); out_unref: drm_gem_object_unreference_unlocked(obj); return ret; @@ -322,6 +336,14 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc, ret = qxl_alloc_release_reserved(qdev, sizeof(*cmd), QXL_RELEASE_CURSOR_CMD, &release, NULL); + if (ret) + return ret; + + ret = qxl_release_reserve_list(release, true); + if (ret) { + qxl_release_free(qdev, release); + return ret; + } qcrtc->cur_x = x; qcrtc->cur_y = y; @@ -332,9 +354,9 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc, cmd->u.position.y = qcrtc->cur_y; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_fence_releaseable(qdev, release); qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c index 3c8c3dbf9378..56e1d633875e 100644 --- a/drivers/gpu/drm/qxl/qxl_draw.c +++ b/drivers/gpu/drm/qxl/qxl_draw.c @@ -23,25 +23,29 @@ #include "qxl_drv.h" #include "qxl_object.h" +static int alloc_clips(struct qxl_device *qdev, + struct qxl_release *release, + unsigned num_clips, + struct qxl_bo **clips_bo) +{ + int size = sizeof(struct qxl_clip_rects) + sizeof(struct qxl_rect) * num_clips; + + return qxl_alloc_bo_reserved(qdev, release, size, clips_bo); +} + /* returns a pointer to the already allocated qxl_rect array inside * the qxl_clip_rects. This is *not* the same as the memory allocated * on the device, it is offset to qxl_clip_rects.chunk.data */ static struct qxl_rect *drawable_set_clipping(struct qxl_device *qdev, struct qxl_drawable *drawable, unsigned num_clips, - struct qxl_bo **clips_bo, - struct qxl_release *release) + struct qxl_bo *clips_bo) { struct qxl_clip_rects *dev_clips; int ret; - int size = sizeof(*dev_clips) + sizeof(struct qxl_rect) * num_clips; - ret = qxl_alloc_bo_reserved(qdev, size, clips_bo); - if (ret) - return NULL; - ret = qxl_bo_kmap(*clips_bo, (void **)&dev_clips); + ret = qxl_bo_kmap(clips_bo, (void **)&dev_clips); if (ret) { - qxl_bo_unref(clips_bo); return NULL; } dev_clips->num_rects = num_clips; @@ -51,21 +55,35 @@ static struct qxl_rect *drawable_set_clipping(struct qxl_device *qdev, return (struct qxl_rect *)dev_clips->chunk.data; } +static int +alloc_drawable(struct qxl_device *qdev, struct qxl_release **release) +{ + int ret; + ret = qxl_alloc_release_reserved(qdev, sizeof(struct qxl_drawable), + QXL_RELEASE_DRAWABLE, release, + NULL); + return ret; +} + +static void +free_drawable(struct qxl_device *qdev, struct qxl_release *release) +{ + qxl_release_free(qdev, release); +} + +/* release needs to be reserved at this point */ static int make_drawable(struct qxl_device *qdev, int surface, uint8_t type, const struct qxl_rect *rect, - struct qxl_release **release) + struct qxl_release *release) { struct qxl_drawable *drawable; - int i, ret; + int i; - ret = qxl_alloc_release_reserved(qdev, sizeof(*drawable), - QXL_RELEASE_DRAWABLE, release, - NULL); - if (ret) - return ret; + drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); + if (!drawable) + return -ENOMEM; - drawable = (struct qxl_drawable *)qxl_release_map(qdev, *release); drawable->type = type; drawable->surface_id = surface; /* Only primary for now */ @@ -91,14 +109,23 @@ make_drawable(struct qxl_device *qdev, int surface, uint8_t type, drawable->bbox = *rect; drawable->mm_time = qdev->rom->mm_clock; - qxl_release_unmap(qdev, *release, &drawable->release_info); + qxl_release_unmap(qdev, release, &drawable->release_info); return 0; } -static int qxl_palette_create_1bit(struct qxl_bo **palette_bo, +static int alloc_palette_object(struct qxl_device *qdev, + struct qxl_release *release, + struct qxl_bo **palette_bo) +{ + return qxl_alloc_bo_reserved(qdev, release, + sizeof(struct qxl_palette) + sizeof(uint32_t) * 2, + palette_bo); +} + +static int qxl_palette_create_1bit(struct qxl_bo *palette_bo, + struct qxl_release *release, const struct qxl_fb_image *qxl_fb_image) { - struct qxl_device *qdev = qxl_fb_image->qdev; const struct fb_image *fb_image = &qxl_fb_image->fb_image; uint32_t visual = qxl_fb_image->visual; const uint32_t *pseudo_palette = qxl_fb_image->pseudo_palette; @@ -108,12 +135,7 @@ static int qxl_palette_create_1bit(struct qxl_bo **palette_bo, static uint64_t unique; /* we make no attempt to actually set this * correctly globaly, since that would require * tracking all of our palettes. */ - - ret = qxl_alloc_bo_reserved(qdev, - sizeof(struct qxl_palette) + sizeof(uint32_t) * 2, - palette_bo); - - ret = qxl_bo_kmap(*palette_bo, (void **)&pal); + ret = qxl_bo_kmap(palette_bo, (void **)&pal); pal->num_ents = 2; pal->unique = unique++; if (visual == FB_VISUAL_TRUECOLOR || visual == FB_VISUAL_DIRECTCOLOR) { @@ -126,7 +148,7 @@ static int qxl_palette_create_1bit(struct qxl_bo **palette_bo, } pal->ents[0] = bgcolor; pal->ents[1] = fgcolor; - qxl_bo_kunmap(*palette_bo); + qxl_bo_kunmap(palette_bo); return 0; } @@ -144,44 +166,63 @@ void qxl_draw_opaque_fb(const struct qxl_fb_image *qxl_fb_image, const char *src = fb_image->data; int depth = fb_image->depth; struct qxl_release *release; - struct qxl_bo *image_bo; struct qxl_image *image; int ret; - + struct qxl_drm_image *dimage; + struct qxl_bo *palette_bo = NULL; if (stride == 0) stride = depth * width / 8; + ret = alloc_drawable(qdev, &release); + if (ret) + return; + + ret = qxl_image_alloc_objects(qdev, release, + &dimage, + height, stride); + if (ret) + goto out_free_drawable; + + if (depth == 1) { + ret = alloc_palette_object(qdev, release, &palette_bo); + if (ret) + goto out_free_image; + } + + /* do a reservation run over all the objects we just allocated */ + ret = qxl_release_reserve_list(release, true); + if (ret) + goto out_free_palette; + rect.left = x; rect.right = x + width; rect.top = y; rect.bottom = y + height; - ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &rect, &release); - if (ret) - return; + ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &rect, release); + if (ret) { + qxl_release_backoff_reserve_list(release); + goto out_free_palette; + } - ret = qxl_image_create(qdev, release, &image_bo, - (const uint8_t *)src, 0, 0, - width, height, depth, stride); + ret = qxl_image_init(qdev, release, dimage, + (const uint8_t *)src, 0, 0, + width, height, depth, stride); if (ret) { - qxl_release_unreserve(qdev, release); + qxl_release_backoff_reserve_list(release); qxl_release_free(qdev, release); return; } if (depth == 1) { - struct qxl_bo *palette_bo; void *ptr; - ret = qxl_palette_create_1bit(&palette_bo, qxl_fb_image); - qxl_release_add_res(qdev, release, palette_bo); + ret = qxl_palette_create_1bit(palette_bo, release, qxl_fb_image); - ptr = qxl_bo_kmap_atomic_page(qdev, image_bo, 0); + ptr = qxl_bo_kmap_atomic_page(qdev, dimage->bo, 0); image = ptr; image->u.bitmap.palette = qxl_bo_physical_address(qdev, palette_bo, 0); - qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr); - qxl_bo_unreserve(palette_bo); - qxl_bo_unref(&palette_bo); + qxl_bo_kunmap_atomic_page(qdev, dimage->bo, ptr); } drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); @@ -199,16 +240,20 @@ void qxl_draw_opaque_fb(const struct qxl_fb_image *qxl_fb_image, drawable->u.copy.mask.bitmap = 0; drawable->u.copy.src_bitmap = - qxl_bo_physical_address(qdev, image_bo, 0); + qxl_bo_physical_address(qdev, dimage->bo, 0); qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_release_add_res(qdev, release, image_bo); - qxl_bo_unreserve(image_bo); - qxl_bo_unref(&image_bo); - - qxl_fence_releaseable(qdev, release); qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + +out_free_palette: + if (palette_bo) + qxl_bo_unref(&palette_bo); +out_free_image: + qxl_image_free_objects(qdev, dimage); +out_free_drawable: + if (ret) + free_drawable(qdev, release); } /* push a draw command using the given clipping rectangles as @@ -243,10 +288,14 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, int depth = qxl_fb->base.bits_per_pixel; uint8_t *surface_base; struct qxl_release *release; - struct qxl_bo *image_bo; struct qxl_bo *clips_bo; + struct qxl_drm_image *dimage; int ret; + ret = alloc_drawable(qdev, &release); + if (ret) + return; + left = clips->x1; right = clips->x2; top = clips->y1; @@ -263,36 +312,52 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, width = right - left; height = bottom - top; + + ret = alloc_clips(qdev, release, num_clips, &clips_bo); + if (ret) + goto out_free_drawable; + + ret = qxl_image_alloc_objects(qdev, release, + &dimage, + height, stride); + if (ret) + goto out_free_clips; + + /* do a reservation run over all the objects we just allocated */ + ret = qxl_release_reserve_list(release, true); + if (ret) + goto out_free_image; + drawable_rect.left = left; drawable_rect.right = right; drawable_rect.top = top; drawable_rect.bottom = bottom; + ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &drawable_rect, - &release); + release); if (ret) - return; + goto out_release_backoff; ret = qxl_bo_kmap(bo, (void **)&surface_base); if (ret) - goto out_unref; + goto out_release_backoff; - ret = qxl_image_create(qdev, release, &image_bo, surface_base, - left, top, width, height, depth, stride); + + ret = qxl_image_init(qdev, release, dimage, surface_base, + left, top, width, height, depth, stride); qxl_bo_kunmap(bo); if (ret) - goto out_unref; + goto out_release_backoff; + + rects = drawable_set_clipping(qdev, drawable, num_clips, clips_bo); + if (!rects) + goto out_release_backoff; - rects = drawable_set_clipping(qdev, drawable, num_clips, &clips_bo, release); - if (!rects) { - qxl_bo_unref(&image_bo); - goto out_unref; - } drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); drawable->clip.type = SPICE_CLIP_TYPE_RECTS; drawable->clip.data = qxl_bo_physical_address(qdev, clips_bo, 0); - qxl_release_add_res(qdev, release, clips_bo); drawable->u.copy.src_area.top = 0; drawable->u.copy.src_area.bottom = height; @@ -306,11 +371,9 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, drawable->u.copy.mask.pos.y = 0; drawable->u.copy.mask.bitmap = 0; - drawable->u.copy.src_bitmap = qxl_bo_physical_address(qdev, image_bo, 0); + drawable->u.copy.src_bitmap = qxl_bo_physical_address(qdev, dimage->bo, 0); qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_release_add_res(qdev, release, image_bo); - qxl_bo_unreserve(image_bo); - qxl_bo_unref(&image_bo); + clips_ptr = clips; for (i = 0; i < num_clips; i++, clips_ptr += inc) { rects[i].left = clips_ptr->x1; @@ -319,17 +382,22 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, rects[i].bottom = clips_ptr->y2; } qxl_bo_kunmap(clips_bo); - qxl_bo_unreserve(clips_bo); - qxl_bo_unref(&clips_bo); - qxl_fence_releaseable(qdev, release); qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); - qxl_release_unreserve(qdev, release); - return; + qxl_release_fence_buffer_objects(release); + +out_release_backoff: + if (ret) + qxl_release_backoff_reserve_list(release); +out_free_image: + qxl_image_free_objects(qdev, dimage); +out_free_clips: + qxl_bo_unref(&clips_bo); +out_free_drawable: + /* only free drawable on error */ + if (ret) + free_drawable(qdev, release); -out_unref: - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); } void qxl_draw_copyarea(struct qxl_device *qdev, @@ -342,22 +410,36 @@ void qxl_draw_copyarea(struct qxl_device *qdev, struct qxl_release *release; int ret; + ret = alloc_drawable(qdev, &release); + if (ret) + return; + + /* do a reservation run over all the objects we just allocated */ + ret = qxl_release_reserve_list(release, true); + if (ret) + goto out_free_release; + rect.left = dx; rect.top = dy; rect.right = dx + width; rect.bottom = dy + height; - ret = make_drawable(qdev, 0, QXL_COPY_BITS, &rect, &release); - if (ret) - return; + ret = make_drawable(qdev, 0, QXL_COPY_BITS, &rect, release); + if (ret) { + qxl_release_backoff_reserve_list(release); + goto out_free_release; + } drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); drawable->u.copy_bits.src_pos.x = sx; drawable->u.copy_bits.src_pos.y = sy; - qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_fence_releaseable(qdev, release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + +out_free_release: + if (ret) + free_drawable(qdev, release); } void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec) @@ -370,10 +452,21 @@ void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec) struct qxl_release *release; int ret; - ret = make_drawable(qdev, 0, QXL_DRAW_FILL, &rect, &release); + ret = alloc_drawable(qdev, &release); if (ret) return; + /* do a reservation run over all the objects we just allocated */ + ret = qxl_release_reserve_list(release, true); + if (ret) + goto out_free_release; + + ret = make_drawable(qdev, 0, QXL_DRAW_FILL, &rect, release); + if (ret) { + qxl_release_backoff_reserve_list(release); + goto out_free_release; + } + drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); drawable->u.fill.brush.type = SPICE_BRUSH_TYPE_SOLID; drawable->u.fill.brush.u.color = color; @@ -384,7 +477,11 @@ void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec) drawable->u.fill.mask.bitmap = 0; qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_fence_releaseable(qdev, release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + +out_free_release: + if (ret) + free_drawable(qdev, release); } diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 6a4106fb4c21..7e96f4f11738 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -42,6 +42,9 @@ #include #include +/* just for ttm_validate_buffer */ +#include + #include #include "qxl_dev.h" @@ -118,9 +121,9 @@ struct qxl_bo { uint32_t surface_id; struct qxl_fence fence; /* per bo fence - list of releases */ struct qxl_release *surf_create; - atomic_t reserve_count; }; #define gem_to_qxl_bo(gobj) container_of((gobj), struct qxl_bo, gem_base) +#define to_qxl_bo(tobj) container_of((tobj), struct qxl_bo, tbo) struct qxl_gem { struct mutex mutex; @@ -128,12 +131,7 @@ struct qxl_gem { }; struct qxl_bo_list { - struct list_head lhead; - struct qxl_bo *bo; -}; - -struct qxl_reloc_list { - struct list_head bos; + struct ttm_validate_buffer tv; }; struct qxl_crtc { @@ -195,10 +193,20 @@ enum { struct qxl_release { int id; int type; - int bo_count; uint32_t release_offset; uint32_t surface_release_id; - struct qxl_bo *bos[QXL_MAX_RES]; + struct ww_acquire_ctx ticket; + struct list_head bos; +}; + +struct qxl_drm_chunk { + struct list_head head; + struct qxl_bo *bo; +}; + +struct qxl_drm_image { + struct qxl_bo *bo; + struct list_head chunk_list; }; struct qxl_fb_image { @@ -434,12 +442,19 @@ int qxl_mmap(struct file *filp, struct vm_area_struct *vma); /* qxl image */ -int qxl_image_create(struct qxl_device *qdev, - struct qxl_release *release, - struct qxl_bo **image_bo, - const uint8_t *data, - int x, int y, int width, int height, - int depth, int stride); +int qxl_image_init(struct qxl_device *qdev, + struct qxl_release *release, + struct qxl_drm_image *dimage, + const uint8_t *data, + int x, int y, int width, int height, + int depth, int stride); +int +qxl_image_alloc_objects(struct qxl_device *qdev, + struct qxl_release *release, + struct qxl_drm_image **image_ptr, + int height, int stride); +void qxl_image_free_objects(struct qxl_device *qdev, struct qxl_drm_image *dimage); + void qxl_update_screen(struct qxl_device *qxl); /* qxl io operations (qxl_cmd.c) */ @@ -460,20 +475,15 @@ int qxl_ring_push(struct qxl_ring *ring, const void *new_elt, bool interruptible void qxl_io_flush_release(struct qxl_device *qdev); void qxl_io_flush_surfaces(struct qxl_device *qdev); -int qxl_release_reserve(struct qxl_device *qdev, - struct qxl_release *release, bool no_wait); -void qxl_release_unreserve(struct qxl_device *qdev, - struct qxl_release *release); union qxl_release_info *qxl_release_map(struct qxl_device *qdev, struct qxl_release *release); void qxl_release_unmap(struct qxl_device *qdev, struct qxl_release *release, union qxl_release_info *info); -/* - * qxl_bo_add_resource. - * - */ -void qxl_bo_add_resource(struct qxl_bo *main_bo, struct qxl_bo *resource); +int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo); +int qxl_release_reserve_list(struct qxl_release *release, bool no_intr); +void qxl_release_backoff_reserve_list(struct qxl_release *release); +void qxl_release_fence_buffer_objects(struct qxl_release *release); int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, enum qxl_surface_cmd_type surface_cmd_type, @@ -482,15 +492,16 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, int type, struct qxl_release **release, struct qxl_bo **rbo); -int qxl_fence_releaseable(struct qxl_device *qdev, - struct qxl_release *release); + int qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *release, uint32_t type, bool interruptible); int qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *release, uint32_t type, bool interruptible); -int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size, +int qxl_alloc_bo_reserved(struct qxl_device *qdev, + struct qxl_release *release, + unsigned long size, struct qxl_bo **_bo); /* qxl drawing commands */ @@ -511,15 +522,9 @@ void qxl_draw_copyarea(struct qxl_device *qdev, u32 sx, u32 sy, u32 dx, u32 dy); -uint64_t -qxl_release_alloc(struct qxl_device *qdev, int type, - struct qxl_release **ret); - void qxl_release_free(struct qxl_device *qdev, struct qxl_release *release); -void qxl_release_add_res(struct qxl_device *qdev, - struct qxl_release *release, - struct qxl_bo *bo); + /* used by qxl_debugfs_release */ struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev, uint64_t id); @@ -562,7 +567,7 @@ void qxl_surface_evict(struct qxl_device *qdev, struct qxl_bo *surf, bool freein int qxl_update_surface(struct qxl_device *qdev, struct qxl_bo *surf); /* qxl_fence.c */ -int qxl_fence_add_release(struct qxl_fence *qfence, uint32_t rel_id); +void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id); int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id); int qxl_fence_init(struct qxl_device *qdev, struct qxl_fence *qfence); void qxl_fence_fini(struct qxl_fence *qfence); diff --git a/drivers/gpu/drm/qxl/qxl_fence.c b/drivers/gpu/drm/qxl/qxl_fence.c index 63c6715ad385..ae59e91cfb9a 100644 --- a/drivers/gpu/drm/qxl/qxl_fence.c +++ b/drivers/gpu/drm/qxl/qxl_fence.c @@ -49,17 +49,11 @@ For some reason every so often qxl hw fails to release, things go wrong. */ - - -int qxl_fence_add_release(struct qxl_fence *qfence, uint32_t rel_id) +/* must be called with the fence lock held */ +void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id) { - struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence); - - spin_lock(&bo->tbo.bdev->fence_lock); radix_tree_insert(&qfence->tree, rel_id, qfence); qfence->num_active_releases++; - spin_unlock(&bo->tbo.bdev->fence_lock); - return 0; } int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id) diff --git a/drivers/gpu/drm/qxl/qxl_image.c b/drivers/gpu/drm/qxl/qxl_image.c index cf856206996b..7fbcc35e8ad3 100644 --- a/drivers/gpu/drm/qxl/qxl_image.c +++ b/drivers/gpu/drm/qxl/qxl_image.c @@ -30,31 +30,100 @@ #include "qxl_object.h" static int -qxl_image_create_helper(struct qxl_device *qdev, +qxl_allocate_chunk(struct qxl_device *qdev, + struct qxl_release *release, + struct qxl_drm_image *image, + unsigned int chunk_size) +{ + struct qxl_drm_chunk *chunk; + int ret; + + chunk = kmalloc(sizeof(struct qxl_drm_chunk), GFP_KERNEL); + if (!chunk) + return -ENOMEM; + + ret = qxl_alloc_bo_reserved(qdev, release, chunk_size, &chunk->bo); + if (ret) { + kfree(chunk); + return ret; + } + + list_add_tail(&chunk->head, &image->chunk_list); + return 0; +} + +int +qxl_image_alloc_objects(struct qxl_device *qdev, struct qxl_release *release, - struct qxl_bo **image_bo, - const uint8_t *data, - int width, int height, - int depth, unsigned int hash, - int stride) + struct qxl_drm_image **image_ptr, + int height, int stride) +{ + struct qxl_drm_image *image; + int ret; + + image = kmalloc(sizeof(struct qxl_drm_image), GFP_KERNEL); + if (!image) + return -ENOMEM; + + INIT_LIST_HEAD(&image->chunk_list); + + ret = qxl_alloc_bo_reserved(qdev, release, sizeof(struct qxl_image), &image->bo); + if (ret) { + kfree(image); + return ret; + } + + ret = qxl_allocate_chunk(qdev, release, image, sizeof(struct qxl_data_chunk) + stride * height); + if (ret) { + qxl_bo_unref(&image->bo); + kfree(image); + return ret; + } + *image_ptr = image; + return 0; +} + +void qxl_image_free_objects(struct qxl_device *qdev, struct qxl_drm_image *dimage) { + struct qxl_drm_chunk *chunk, *tmp; + + list_for_each_entry_safe(chunk, tmp, &dimage->chunk_list, head) { + qxl_bo_unref(&chunk->bo); + kfree(chunk); + } + + qxl_bo_unref(&dimage->bo); + kfree(dimage); +} + +static int +qxl_image_init_helper(struct qxl_device *qdev, + struct qxl_release *release, + struct qxl_drm_image *dimage, + const uint8_t *data, + int width, int height, + int depth, unsigned int hash, + int stride) +{ + struct qxl_drm_chunk *drv_chunk; struct qxl_image *image; struct qxl_data_chunk *chunk; int i; int chunk_stride; int linesize = width * depth / 8; - struct qxl_bo *chunk_bo; - int ret; + struct qxl_bo *chunk_bo, *image_bo; void *ptr; /* Chunk */ /* FIXME: Check integer overflow */ /* TODO: variable number of chunks */ + + drv_chunk = list_first_entry(&dimage->chunk_list, struct qxl_drm_chunk, head); + + chunk_bo = drv_chunk->bo; chunk_stride = stride; /* TODO: should use linesize, but it renders wrong (check the bitmaps are sent correctly first) */ - ret = qxl_alloc_bo_reserved(qdev, sizeof(*chunk) + height * chunk_stride, - &chunk_bo); - + ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, 0); chunk = ptr; chunk->data_size = height * chunk_stride; @@ -102,7 +171,6 @@ qxl_image_create_helper(struct qxl_device *qdev, while (remain > 0) { page_base = out_offset & PAGE_MASK; page_offset = offset_in_page(out_offset); - size = min((int)(PAGE_SIZE - page_offset), remain); ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, page_base); @@ -116,14 +184,10 @@ qxl_image_create_helper(struct qxl_device *qdev, } } } - - qxl_bo_kunmap(chunk_bo); - /* Image */ - ret = qxl_alloc_bo_reserved(qdev, sizeof(*image), image_bo); - - ptr = qxl_bo_kmap_atomic_page(qdev, *image_bo, 0); + image_bo = dimage->bo; + ptr = qxl_bo_kmap_atomic_page(qdev, image_bo, 0); image = ptr; image->descriptor.id = 0; @@ -154,23 +218,20 @@ qxl_image_create_helper(struct qxl_device *qdev, image->u.bitmap.stride = chunk_stride; image->u.bitmap.palette = 0; image->u.bitmap.data = qxl_bo_physical_address(qdev, chunk_bo, 0); - qxl_release_add_res(qdev, release, chunk_bo); - qxl_bo_unreserve(chunk_bo); - qxl_bo_unref(&chunk_bo); - qxl_bo_kunmap_atomic_page(qdev, *image_bo, ptr); + qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr); return 0; } -int qxl_image_create(struct qxl_device *qdev, +int qxl_image_init(struct qxl_device *qdev, struct qxl_release *release, - struct qxl_bo **image_bo, + struct qxl_drm_image *dimage, const uint8_t *data, int x, int y, int width, int height, int depth, int stride) { data += y * stride + x * (depth / 8); - return qxl_image_create_helper(qdev, release, image_bo, data, + return qxl_image_init_helper(qdev, release, dimage, data, width, height, depth, 0, stride); } diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index 7448c5ea92e6..6de33563d6f1 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -68,55 +68,60 @@ static int qxl_map_ioctl(struct drm_device *dev, void *data, &qxl_map->offset); } +struct qxl_reloc_info { + int type; + struct qxl_bo *dst_bo; + uint32_t dst_offset; + struct qxl_bo *src_bo; + int src_offset; +}; + /* * dst must be validated, i.e. whole bo on vram/surfacesram (right now all bo's * are on vram). * *(dst + dst_off) = qxl_bo_physical_address(src, src_off) */ static void -apply_reloc(struct qxl_device *qdev, struct qxl_bo *dst, uint64_t dst_off, - struct qxl_bo *src, uint64_t src_off) +apply_reloc(struct qxl_device *qdev, struct qxl_reloc_info *info) { void *reloc_page; - - reloc_page = qxl_bo_kmap_atomic_page(qdev, dst, dst_off & PAGE_MASK); - *(uint64_t *)(reloc_page + (dst_off & ~PAGE_MASK)) = qxl_bo_physical_address(qdev, - src, src_off); - qxl_bo_kunmap_atomic_page(qdev, dst, reloc_page); + reloc_page = qxl_bo_kmap_atomic_page(qdev, info->dst_bo, info->dst_offset & PAGE_MASK); + *(uint64_t *)(reloc_page + (info->dst_offset & ~PAGE_MASK)) = qxl_bo_physical_address(qdev, + info->src_bo, + info->src_offset); + qxl_bo_kunmap_atomic_page(qdev, info->dst_bo, reloc_page); } static void -apply_surf_reloc(struct qxl_device *qdev, struct qxl_bo *dst, uint64_t dst_off, - struct qxl_bo *src) +apply_surf_reloc(struct qxl_device *qdev, struct qxl_reloc_info *info) { uint32_t id = 0; void *reloc_page; - if (src && !src->is_primary) - id = src->surface_id; + if (info->src_bo && !info->src_bo->is_primary) + id = info->src_bo->surface_id; - reloc_page = qxl_bo_kmap_atomic_page(qdev, dst, dst_off & PAGE_MASK); - *(uint32_t *)(reloc_page + (dst_off & ~PAGE_MASK)) = id; - qxl_bo_kunmap_atomic_page(qdev, dst, reloc_page); + reloc_page = qxl_bo_kmap_atomic_page(qdev, info->dst_bo, info->dst_offset & PAGE_MASK); + *(uint32_t *)(reloc_page + (info->dst_offset & ~PAGE_MASK)) = id; + qxl_bo_kunmap_atomic_page(qdev, info->dst_bo, reloc_page); } /* return holding the reference to this object */ static struct qxl_bo *qxlhw_handle_to_bo(struct qxl_device *qdev, struct drm_file *file_priv, uint64_t handle, - struct qxl_reloc_list *reloc_list) + struct qxl_release *release) { struct drm_gem_object *gobj; struct qxl_bo *qobj; int ret; gobj = drm_gem_object_lookup(qdev->ddev, file_priv, handle); - if (!gobj) { - DRM_ERROR("bad bo handle %lld\n", handle); + if (!gobj) return NULL; - } + qobj = gem_to_qxl_bo(gobj); - ret = qxl_bo_list_add(reloc_list, qobj); + ret = qxl_release_list_add(release, qobj); if (ret) return NULL; @@ -129,151 +134,177 @@ static struct qxl_bo *qxlhw_handle_to_bo(struct qxl_device *qdev, * However, the command as passed from user space must *not* contain the initial * QXLReleaseInfo struct (first XXX bytes) */ -static int qxl_execbuffer_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) +static int qxl_process_single_command(struct qxl_device *qdev, + struct drm_qxl_command *cmd, + struct drm_file *file_priv) { - struct qxl_device *qdev = dev->dev_private; - struct drm_qxl_execbuffer *execbuffer = data; - struct drm_qxl_command user_cmd; - int cmd_num; - struct qxl_bo *reloc_src_bo; - struct qxl_bo *reloc_dst_bo; - struct drm_qxl_reloc reloc; + struct qxl_reloc_info *reloc_info; + int release_type; + struct qxl_release *release; + struct qxl_bo *cmd_bo; void *fb_cmd; - int i, ret; - struct qxl_reloc_list reloc_list; + int i, j, ret, num_relocs; int unwritten; - uint32_t reloc_dst_offset; - INIT_LIST_HEAD(&reloc_list.bos); - for (cmd_num = 0; cmd_num < execbuffer->commands_num; ++cmd_num) { - struct qxl_release *release; - struct qxl_bo *cmd_bo; - int release_type; - struct drm_qxl_command *commands = - (struct drm_qxl_command *)(uintptr_t)execbuffer->commands; + switch (cmd->type) { + case QXL_CMD_DRAW: + release_type = QXL_RELEASE_DRAWABLE; + break; + case QXL_CMD_SURFACE: + case QXL_CMD_CURSOR: + default: + DRM_DEBUG("Only draw commands in execbuffers\n"); + return -EINVAL; + break; + } - if (DRM_COPY_FROM_USER(&user_cmd, &commands[cmd_num], - sizeof(user_cmd))) - return -EFAULT; - switch (user_cmd.type) { - case QXL_CMD_DRAW: - release_type = QXL_RELEASE_DRAWABLE; - break; - case QXL_CMD_SURFACE: - case QXL_CMD_CURSOR: - default: - DRM_DEBUG("Only draw commands in execbuffers\n"); - return -EINVAL; - break; - } + if (cmd->command_size > PAGE_SIZE - sizeof(union qxl_release_info)) + return -EINVAL; - if (user_cmd.command_size > PAGE_SIZE - sizeof(union qxl_release_info)) - return -EINVAL; + if (!access_ok(VERIFY_READ, + (void *)(unsigned long)cmd->command, + cmd->command_size)) + return -EFAULT; - if (!access_ok(VERIFY_READ, - (void *)(unsigned long)user_cmd.command, - user_cmd.command_size)) - return -EFAULT; + reloc_info = kmalloc(sizeof(struct qxl_reloc_info) * cmd->relocs_num, GFP_KERNEL); + if (!reloc_info) + return -ENOMEM; - ret = qxl_alloc_release_reserved(qdev, - sizeof(union qxl_release_info) + - user_cmd.command_size, - release_type, - &release, - &cmd_bo); - if (ret) - return ret; + ret = qxl_alloc_release_reserved(qdev, + sizeof(union qxl_release_info) + + cmd->command_size, + release_type, + &release, + &cmd_bo); + if (ret) + goto out_free_reloc; - /* TODO copy slow path code from i915 */ - fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE)); - unwritten = __copy_from_user_inatomic_nocache(fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE), (void *)(unsigned long)user_cmd.command, user_cmd.command_size); + /* TODO copy slow path code from i915 */ + fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE)); + unwritten = __copy_from_user_inatomic_nocache(fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE), (void *)(unsigned long)cmd->command, cmd->command_size); - { - struct qxl_drawable *draw = fb_cmd; + { + struct qxl_drawable *draw = fb_cmd; + draw->mm_time = qdev->rom->mm_clock; + } - draw->mm_time = qdev->rom->mm_clock; - } - qxl_bo_kunmap_atomic_page(qdev, cmd_bo, fb_cmd); - if (unwritten) { - DRM_ERROR("got unwritten %d\n", unwritten); - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); - return -EFAULT; + qxl_bo_kunmap_atomic_page(qdev, cmd_bo, fb_cmd); + if (unwritten) { + DRM_ERROR("got unwritten %d\n", unwritten); + ret = -EFAULT; + goto out_free_release; + } + + /* fill out reloc info structs */ + num_relocs = 0; + for (i = 0; i < cmd->relocs_num; ++i) { + struct drm_qxl_reloc reloc; + + if (DRM_COPY_FROM_USER(&reloc, + &((struct drm_qxl_reloc *)(uintptr_t)cmd->relocs)[i], + sizeof(reloc))) { + ret = -EFAULT; + goto out_free_bos; } - for (i = 0 ; i < user_cmd.relocs_num; ++i) { - if (DRM_COPY_FROM_USER(&reloc, - &((struct drm_qxl_reloc *)(uintptr_t)user_cmd.relocs)[i], - sizeof(reloc))) { - qxl_bo_list_unreserve(&reloc_list, true); - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); - return -EFAULT; - } + /* add the bos to the list of bos to validate - + need to validate first then process relocs? */ + if (reloc.reloc_type != QXL_RELOC_TYPE_BO && reloc.reloc_type != QXL_RELOC_TYPE_SURF) { + DRM_DEBUG("unknown reloc type %d\n", reloc_info[i].type); - /* add the bos to the list of bos to validate - - need to validate first then process relocs? */ - if (reloc.dst_handle) { - reloc_dst_bo = qxlhw_handle_to_bo(qdev, file_priv, - reloc.dst_handle, &reloc_list); - if (!reloc_dst_bo) { - qxl_bo_list_unreserve(&reloc_list, true); - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); - return -EINVAL; - } - reloc_dst_offset = 0; - } else { - reloc_dst_bo = cmd_bo; - reloc_dst_offset = release->release_offset; + ret = -EINVAL; + goto out_free_bos; + } + reloc_info[i].type = reloc.reloc_type; + + if (reloc.dst_handle) { + reloc_info[i].dst_bo = qxlhw_handle_to_bo(qdev, file_priv, + reloc.dst_handle, release); + if (!reloc_info[i].dst_bo) { + ret = -EINVAL; + reloc_info[i].src_bo = NULL; + goto out_free_bos; } - - /* reserve and validate the reloc dst bo */ - if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle > 0) { - reloc_src_bo = - qxlhw_handle_to_bo(qdev, file_priv, - reloc.src_handle, &reloc_list); - if (!reloc_src_bo) { - if (reloc_dst_bo != cmd_bo) - drm_gem_object_unreference_unlocked(&reloc_dst_bo->gem_base); - qxl_bo_list_unreserve(&reloc_list, true); - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); - return -EINVAL; - } - } else - reloc_src_bo = NULL; - if (reloc.reloc_type == QXL_RELOC_TYPE_BO) { - apply_reloc(qdev, reloc_dst_bo, reloc_dst_offset + reloc.dst_offset, - reloc_src_bo, reloc.src_offset); - } else if (reloc.reloc_type == QXL_RELOC_TYPE_SURF) { - apply_surf_reloc(qdev, reloc_dst_bo, reloc_dst_offset + reloc.dst_offset, reloc_src_bo); - } else { - DRM_ERROR("unknown reloc type %d\n", reloc.reloc_type); - return -EINVAL; + reloc_info[i].dst_offset = reloc.dst_offset; + } else { + reloc_info[i].dst_bo = cmd_bo; + reloc_info[i].dst_offset = reloc.dst_offset + release->release_offset; + } + num_relocs++; + + /* reserve and validate the reloc dst bo */ + if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle > 0) { + reloc_info[i].src_bo = + qxlhw_handle_to_bo(qdev, file_priv, + reloc.src_handle, release); + if (!reloc_info[i].src_bo) { + if (reloc_info[i].dst_bo != cmd_bo) + drm_gem_object_unreference_unlocked(&reloc_info[i].dst_bo->gem_base); + ret = -EINVAL; + goto out_free_bos; } + reloc_info[i].src_offset = reloc.src_offset; + } else { + reloc_info[i].src_bo = NULL; + reloc_info[i].src_offset = 0; + } + } - if (reloc_src_bo && reloc_src_bo != cmd_bo) { - qxl_release_add_res(qdev, release, reloc_src_bo); - drm_gem_object_unreference_unlocked(&reloc_src_bo->gem_base); - } + /* validate all buffers */ + ret = qxl_release_reserve_list(release, false); + if (ret) + goto out_free_bos; - if (reloc_dst_bo != cmd_bo) - drm_gem_object_unreference_unlocked(&reloc_dst_bo->gem_base); - } - qxl_fence_releaseable(qdev, release); + for (i = 0; i < cmd->relocs_num; ++i) { + if (reloc_info[i].type == QXL_RELOC_TYPE_BO) + apply_reloc(qdev, &reloc_info[i]); + else if (reloc_info[i].type == QXL_RELOC_TYPE_SURF) + apply_surf_reloc(qdev, &reloc_info[i]); + } - ret = qxl_push_command_ring_release(qdev, release, user_cmd.type, true); - if (ret == -ERESTARTSYS) { - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); - qxl_bo_list_unreserve(&reloc_list, true); + ret = qxl_push_command_ring_release(qdev, release, cmd->type, true); + if (ret) + qxl_release_backoff_reserve_list(release); + else + qxl_release_fence_buffer_objects(release); + +out_free_bos: + for (j = 0; j < num_relocs; j++) { + if (reloc_info[j].dst_bo != cmd_bo) + drm_gem_object_unreference_unlocked(&reloc_info[j].dst_bo->gem_base); + if (reloc_info[j].src_bo && reloc_info[j].src_bo != cmd_bo) + drm_gem_object_unreference_unlocked(&reloc_info[j].src_bo->gem_base); + } +out_free_release: + if (ret) + qxl_release_free(qdev, release); +out_free_reloc: + kfree(reloc_info); + return ret; +} + +static int qxl_execbuffer_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct qxl_device *qdev = dev->dev_private; + struct drm_qxl_execbuffer *execbuffer = data; + struct drm_qxl_command user_cmd; + int cmd_num; + int ret; + + for (cmd_num = 0; cmd_num < execbuffer->commands_num; ++cmd_num) { + + struct drm_qxl_command *commands = + (struct drm_qxl_command *)(uintptr_t)execbuffer->commands; + + if (DRM_COPY_FROM_USER(&user_cmd, &commands[cmd_num], + sizeof(user_cmd))) + return -EFAULT; + + ret = qxl_process_single_command(qdev, &user_cmd, file_priv); + if (ret) return ret; - } - qxl_release_unreserve(qdev, release); } - qxl_bo_list_unreserve(&reloc_list, 0); return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c index 50e7a6177167..aa161cddd87e 100644 --- a/drivers/gpu/drm/qxl/qxl_object.c +++ b/drivers/gpu/drm/qxl/qxl_object.c @@ -104,7 +104,7 @@ int qxl_bo_create(struct qxl_device *qdev, bo->surface_id = 0; qxl_fence_init(qdev, &bo->fence); INIT_LIST_HEAD(&bo->list); - atomic_set(&bo->reserve_count, 0); + if (surf) bo->surf = *surf; @@ -316,53 +316,6 @@ int qxl_bo_check_id(struct qxl_device *qdev, struct qxl_bo *bo) return 0; } -void qxl_bo_list_unreserve(struct qxl_reloc_list *reloc_list, bool failed) -{ - struct qxl_bo_list *entry, *sf; - - list_for_each_entry_safe(entry, sf, &reloc_list->bos, lhead) { - qxl_bo_unreserve(entry->bo); - list_del(&entry->lhead); - kfree(entry); - } -} - -int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo) -{ - struct qxl_bo_list *entry; - int ret; - - list_for_each_entry(entry, &reloc_list->bos, lhead) { - if (entry->bo == bo) - return 0; - } - - entry = kmalloc(sizeof(struct qxl_bo_list), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - entry->bo = bo; - list_add(&entry->lhead, &reloc_list->bos); - - ret = qxl_bo_reserve(bo, false); - if (ret) - return ret; - - if (!bo->pin_count) { - qxl_ttm_placement_from_domain(bo, bo->type, false); - ret = ttm_bo_validate(&bo->tbo, &bo->placement, - true, false); - if (ret) - return ret; - } - - /* allocate a surface for reserved + validated buffers */ - ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo); - if (ret) - return ret; - return 0; -} - int qxl_surf_evict(struct qxl_device *qdev) { return ttm_bo_evict_mm(&qdev->mman.bdev, TTM_PL_PRIV0); diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h index 116637f09347..8cb6167038e5 100644 --- a/drivers/gpu/drm/qxl/qxl_object.h +++ b/drivers/gpu/drm/qxl/qxl_object.h @@ -102,6 +102,4 @@ extern int qxl_bo_unpin(struct qxl_bo *bo); extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned); extern bool qxl_ttm_bo_is_qxl_bo(struct ttm_buffer_object *bo); -extern int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo); -extern void qxl_bo_list_unreserve(struct qxl_reloc_list *reloc_list, bool failed); #endif diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index b7f1271c6014..b61449e52cd5 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -38,7 +38,8 @@ static const int release_size_per_bo[] = { RELEASE_SIZE, SURFACE_RELEASE_SIZE, RELEASE_SIZE }; static const int releases_per_bo[] = { RELEASES_PER_BO, SURFACE_RELEASES_PER_BO, RELEASES_PER_BO }; -uint64_t + +static uint64_t qxl_release_alloc(struct qxl_device *qdev, int type, struct qxl_release **ret) { @@ -53,9 +54,9 @@ qxl_release_alloc(struct qxl_device *qdev, int type, return 0; } release->type = type; - release->bo_count = 0; release->release_offset = 0; release->surface_release_id = 0; + INIT_LIST_HEAD(&release->bos); idr_preload(GFP_KERNEL); spin_lock(&qdev->release_idr_lock); @@ -77,20 +78,20 @@ void qxl_release_free(struct qxl_device *qdev, struct qxl_release *release) { - int i; - - QXL_INFO(qdev, "release %d, type %d, %d bos\n", release->id, - release->type, release->bo_count); + struct qxl_bo_list *entry, *tmp; + QXL_INFO(qdev, "release %d, type %d\n", release->id, + release->type); if (release->surface_release_id) qxl_surface_id_dealloc(qdev, release->surface_release_id); - for (i = 0 ; i < release->bo_count; ++i) { + list_for_each_entry_safe(entry, tmp, &release->bos, tv.head) { + struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); QXL_INFO(qdev, "release %llx\n", - release->bos[i]->tbo.addr_space_offset + entry->tv.bo->addr_space_offset - DRM_FILE_OFFSET); - qxl_fence_remove_release(&release->bos[i]->fence, release->id); - qxl_bo_unref(&release->bos[i]); + qxl_fence_remove_release(&bo->fence, release->id); + qxl_bo_unref(&bo); } spin_lock(&qdev->release_idr_lock); idr_remove(&qdev->release_idr, release->id); @@ -98,22 +99,6 @@ qxl_release_free(struct qxl_device *qdev, kfree(release); } -void -qxl_release_add_res(struct qxl_device *qdev, struct qxl_release *release, - struct qxl_bo *bo) -{ - int i; - for (i = 0; i < release->bo_count; i++) - if (release->bos[i] == bo) - return; - - if (release->bo_count >= QXL_MAX_RES) { - DRM_ERROR("exceeded max resource on a qxl_release item\n"); - return; - } - release->bos[release->bo_count++] = qxl_bo_ref(bo); -} - static int qxl_release_bo_alloc(struct qxl_device *qdev, struct qxl_bo **bo) { @@ -125,58 +110,106 @@ static int qxl_release_bo_alloc(struct qxl_device *qdev, return ret; } -int qxl_release_reserve(struct qxl_device *qdev, - struct qxl_release *release, bool no_wait) +int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo) +{ + struct qxl_bo_list *entry; + + list_for_each_entry(entry, &release->bos, tv.head) { + if (entry->tv.bo == &bo->tbo) + return 0; + } + + entry = kmalloc(sizeof(struct qxl_bo_list), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + qxl_bo_ref(bo); + entry->tv.bo = &bo->tbo; + list_add_tail(&entry->tv.head, &release->bos); + return 0; +} + +static int qxl_release_validate_bo(struct qxl_bo *bo) { int ret; - if (atomic_inc_return(&release->bos[0]->reserve_count) == 1) { - ret = qxl_bo_reserve(release->bos[0], no_wait); + + if (!bo->pin_count) { + qxl_ttm_placement_from_domain(bo, bo->type, false); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, + true, false); if (ret) return ret; } + + /* allocate a surface for reserved + validated buffers */ + ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo); + if (ret) + return ret; return 0; } -void qxl_release_unreserve(struct qxl_device *qdev, - struct qxl_release *release) +int qxl_release_reserve_list(struct qxl_release *release, bool no_intr) { - if (atomic_dec_and_test(&release->bos[0]->reserve_count)) - qxl_bo_unreserve(release->bos[0]); + int ret; + struct qxl_bo_list *entry; + + /* if only one object on the release its the release itself + since these objects are pinned no need to reserve */ + if (list_is_singular(&release->bos)) + return 0; + + ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos); + if (ret) + return ret; + + list_for_each_entry(entry, &release->bos, tv.head) { + struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); + + ret = qxl_release_validate_bo(bo); + if (ret) { + ttm_eu_backoff_reservation(&release->ticket, &release->bos); + return ret; + } + } + return 0; } +void qxl_release_backoff_reserve_list(struct qxl_release *release) +{ + /* if only one object on the release its the release itself + since these objects are pinned no need to reserve */ + if (list_is_singular(&release->bos)) + return; + + ttm_eu_backoff_reservation(&release->ticket, &release->bos); +} + + int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, enum qxl_surface_cmd_type surface_cmd_type, struct qxl_release *create_rel, struct qxl_release **release) { - int ret; - if (surface_cmd_type == QXL_SURFACE_CMD_DESTROY && create_rel) { int idr_ret; + struct qxl_bo_list *entry = list_first_entry(&create_rel->bos, struct qxl_bo_list, tv.head); struct qxl_bo *bo; union qxl_release_info *info; /* stash the release after the create command */ idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release); - bo = qxl_bo_ref(create_rel->bos[0]); + bo = qxl_bo_ref(to_qxl_bo(entry->tv.bo)); (*release)->release_offset = create_rel->release_offset + 64; - qxl_release_add_res(qdev, *release, bo); + qxl_release_list_add(*release, bo); - ret = qxl_release_reserve(qdev, *release, false); - if (ret) { - DRM_ERROR("release reserve failed\n"); - goto out_unref; - } info = qxl_release_map(qdev, *release); info->id = idr_ret; qxl_release_unmap(qdev, *release, info); - -out_unref: qxl_bo_unref(&bo); - return ret; + return 0; } return qxl_alloc_release_reserved(qdev, sizeof(struct qxl_surface_cmd), @@ -189,7 +222,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, { struct qxl_bo *bo; int idr_ret; - int ret; + int ret = 0; union qxl_release_info *info; int cur_idx; @@ -228,36 +261,18 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, if (rbo) *rbo = bo; - qxl_release_add_res(qdev, *release, bo); - - ret = qxl_release_reserve(qdev, *release, false); mutex_unlock(&qdev->release_mutex); - if (ret) - goto out_unref; + + qxl_release_list_add(*release, bo); info = qxl_release_map(qdev, *release); info->id = idr_ret; qxl_release_unmap(qdev, *release, info); -out_unref: qxl_bo_unref(&bo); return ret; } -int qxl_fence_releaseable(struct qxl_device *qdev, - struct qxl_release *release) -{ - int i, ret; - for (i = 0; i < release->bo_count; i++) { - if (!release->bos[i]->tbo.sync_obj) - release->bos[i]->tbo.sync_obj = &release->bos[i]->fence; - ret = qxl_fence_add_release(&release->bos[i]->fence, release->id); - if (ret) - return ret; - } - return 0; -} - struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev, uint64_t id) { @@ -270,10 +285,7 @@ struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev, DRM_ERROR("failed to find id in release_idr\n"); return NULL; } - if (release->bo_count < 1) { - DRM_ERROR("read a released resource with 0 bos\n"); - return NULL; - } + return release; } @@ -282,9 +294,12 @@ union qxl_release_info *qxl_release_map(struct qxl_device *qdev, { void *ptr; union qxl_release_info *info; - struct qxl_bo *bo = release->bos[0]; + struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); + struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_SIZE); + if (!ptr) + return NULL; info = ptr + (release->release_offset & ~PAGE_SIZE); return info; } @@ -293,9 +308,51 @@ void qxl_release_unmap(struct qxl_device *qdev, struct qxl_release *release, union qxl_release_info *info) { - struct qxl_bo *bo = release->bos[0]; + struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); + struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); void *ptr; ptr = ((void *)info) - (release->release_offset & ~PAGE_SIZE); qxl_bo_kunmap_atomic_page(qdev, bo, ptr); } + +void qxl_release_fence_buffer_objects(struct qxl_release *release) +{ + struct ttm_validate_buffer *entry; + struct ttm_buffer_object *bo; + struct ttm_bo_global *glob; + struct ttm_bo_device *bdev; + struct ttm_bo_driver *driver; + struct qxl_bo *qbo; + + /* if only one object on the release its the release itself + since these objects are pinned no need to reserve */ + if (list_is_singular(&release->bos)) + return; + + bo = list_first_entry(&release->bos, struct ttm_validate_buffer, head)->bo; + bdev = bo->bdev; + driver = bdev->driver; + glob = bo->glob; + + spin_lock(&glob->lru_lock); + spin_lock(&bdev->fence_lock); + + list_for_each_entry(entry, &release->bos, head) { + bo = entry->bo; + qbo = to_qxl_bo(bo); + + if (!entry->bo->sync_obj) + entry->bo->sync_obj = &qbo->fence; + + qxl_fence_add_release_locked(&qbo->fence, release->id); + + ttm_bo_add_to_lru(bo); + ww_mutex_unlock(&bo->resv->lock); + entry->reserved = false; + } + spin_unlock(&bdev->fence_lock); + spin_unlock(&glob->lru_lock); + ww_acquire_fini(&release->ticket); +} + -- cgit v1.2.3-58-ga151 From 9310f61a94ec779b2919864c7e66241ff80fe96d Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 17 Jul 2013 15:13:14 +0300 Subject: mei: hbm: fix typo in error message writet -> write Tested-by: Shuah Khan Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hbm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c index f9296abcf02a..6127ab64bb39 100644 --- a/drivers/misc/mei/hbm.c +++ b/drivers/misc/mei/hbm.c @@ -167,7 +167,7 @@ int mei_hbm_start_req(struct mei_device *dev) dev->hbm_state = MEI_HBM_IDLE; if (mei_write_message(dev, mei_hdr, dev->wr_msg.data)) { - dev_err(&dev->pdev->dev, "version message writet failed\n"); + dev_err(&dev->pdev->dev, "version message write failed\n"); dev->dev_state = MEI_DEV_RESETTING; mei_reset(dev, 1); return -ENODEV; -- cgit v1.2.3-58-ga151 From 315a383ad7dbd484fafb93ef08038e3dbafbb7a8 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 17 Jul 2013 15:13:15 +0300 Subject: mei: me: fix reset state machine ME HW ready bit is down after hw reset was asserted or on error. Only on error we need to enter the reset flow, additional reset need to be prevented when reset was triggered during initialization , power up/down or a reset is already in progress Tested-by: Shuah Khan Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index e4f8dec4dc3c..a0e19e61e7df 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -483,7 +483,9 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) /* check if ME wants a reset */ if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING && - dev->dev_state != MEI_DEV_INITIALIZING) { + dev->dev_state != MEI_DEV_INITIALIZING && + dev->dev_state != MEI_DEV_POWER_DOWN && + dev->dev_state != MEI_DEV_POWER_UP) { dev_dbg(&dev->pdev->dev, "FW not ready.\n"); mei_reset(dev, 1); mutex_unlock(&dev->device_lock); -- cgit v1.2.3-58-ga151 From 99f22c4ef24cf87b0dae6aabe6b5e620b62961d9 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 17 Jul 2013 15:13:16 +0300 Subject: mei: don't have to clean the state on power up When powering up, we don't have to clean up the device state nothing is connected. Tested-by: Shuah Khan Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index ed1d75203af6..e6f16f83ecde 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -148,7 +148,8 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled) dev->hbm_state = MEI_HBM_IDLE; - if (dev->dev_state != MEI_DEV_INITIALIZING) { + if (dev->dev_state != MEI_DEV_INITIALIZING && + dev->dev_state != MEI_DEV_POWER_UP) { if (dev->dev_state != MEI_DEV_DISABLED && dev->dev_state != MEI_DEV_POWER_DOWN) dev->dev_state = MEI_DEV_RESETTING; -- cgit v1.2.3-58-ga151 From dab9bf41b23fe700c4a74133e41eb6a21706031e Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 17 Jul 2013 15:13:17 +0300 Subject: mei: me: fix waiting for hw ready 1. MEI_INTEROP_TIMEOUT is in seconds not in jiffies so we use mei_secs_to_jiffies macro While cold boot is fast this is relevant in resume 2. wait_event_interruptible_timeout can return with -ERESTARTSYS so do not override it with -ETIMEDOUT 3.Adjust error message Tested-by: Shuah Khan Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index a0e19e61e7df..b22c7e247225 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -239,14 +239,18 @@ static int mei_me_hw_ready_wait(struct mei_device *dev) if (mei_me_hw_is_ready(dev)) return 0; + dev->recvd_hw_ready = false; mutex_unlock(&dev->device_lock); err = wait_event_interruptible_timeout(dev->wait_hw_ready, - dev->recvd_hw_ready, MEI_INTEROP_TIMEOUT); + dev->recvd_hw_ready, + mei_secs_to_jiffies(MEI_INTEROP_TIMEOUT)); mutex_lock(&dev->device_lock); if (!err && !dev->recvd_hw_ready) { + if (!err) + err = -ETIMEDOUT; dev_err(&dev->pdev->dev, - "wait hw ready failed. status = 0x%x\n", err); - return -ETIMEDOUT; + "wait hw ready failed. status = %d\n", err); + return err; } dev->recvd_hw_ready = false; -- cgit v1.2.3-58-ga151 From c7b51bce636e4990662bb100bc17e1d4d6c02d34 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:54 +0800 Subject: powerpc/pci/hotplug: Don't need to remove from EEH cache twice Since pcibios_release_device() called by pci_stop_and_remove_bus_device() has removed the device from the EEH cache, we needn't do that again. Cc: Bjorn Helgaas Acked-by: Bjorn Helgaas Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- drivers/pci/hotplug/rpadlpar_core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index b29e20b7862f..bb7af78e4eed 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -388,7 +388,6 @@ int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn) /* Remove the EADS bridge device itself */ BUG_ON(!bus->self); pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self)); - eeh_remove_bus_device(bus->self, true); pci_stop_and_remove_bus_device(bus->self); return 0; -- cgit v1.2.3-58-ga151 From 9e54eae23bc9cca0d8a955018c35b1250e09a73a Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Wed, 24 Jul 2013 14:15:29 +0800 Subject: ahci_imx: add ahci sata support on imx platforms imx6q contains one Synopsys AHCI SATA controller, But it can't share ahci_platform driver with other controllers because there are some misalignments of the generic AHCI controller - the bits definitions of the HBA registers, the Vendor Specific registers, the AHCI PHY clock and the AHCI signals adjustment window(GPR13 register). - CAP_SSS(bit20) of the HOST_CAP is writable, default value is '0', should be configured to be '1' - bit0 (only one AHCI SATA port on imx6q) of the HOST_PORTS_IMPL should be set to be '1'.(default 0) - One Vendor Specific register HOST_TIMER1MS(offset:0xe0) should be configured regarding to the frequency of AHB bus clock. - Configurations of the AHCI PHY clock, and the signal parameters of the GPR13 Setup its own ahci sata driver, contained the imx6q specific initialized codes, re-use the generic ahci_platform driver, and keep the generic ahci_platform driver clean as much as possible. tj: patch description reformatted Signed-off-by: Richard Zhu Reviewed-by: Shawn Guo Signed-off-by: Tejun Heo --- drivers/ata/Kconfig | 9 ++ drivers/ata/Makefile | 1 + drivers/ata/ahci_imx.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 drivers/ata/ahci_imx.c (limited to 'drivers') diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 5cddaf86cd0a..a4af0a2a089c 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -97,6 +97,15 @@ config SATA_AHCI_PLATFORM If unsure, say N. +config AHCI_IMX + tristate "Freescale i.MX AHCI SATA support" + depends on SATA_AHCI_PLATFORM + help + This option enables support for the Freescale i.MX SoC's + onboard AHCI SATA. + + If unsure, say N. + config SATA_FSL tristate "Freescale 3.0Gbps SATA support" depends on FSL_SOC diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index c04d0fd038a3..46518c622460 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_SATA_INIC162X) += sata_inic162x.o obj-$(CONFIG_SATA_SIL24) += sata_sil24.o obj-$(CONFIG_SATA_DWC) += sata_dwc_460ex.o obj-$(CONFIG_SATA_HIGHBANK) += sata_highbank.o libahci.o +obj-$(CONFIG_AHCI_IMX) += ahci_imx.o # SFF w/ custom DMA obj-$(CONFIG_PDC_ADMA) += pdc_adma.o diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c new file mode 100644 index 000000000000..58debb0acc3a --- /dev/null +++ b/drivers/ata/ahci_imx.c @@ -0,0 +1,236 @@ +/* + * Freescale IMX AHCI SATA platform driver + * Copyright 2013 Freescale Semiconductor, Inc. + * + * based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ahci.h" + +enum { + HOST_TIMER1MS = 0xe0, /* Timer 1-ms */ +}; + +struct imx_ahci_priv { + struct platform_device *ahci_pdev; + struct clk *sata_ref_clk; + struct clk *ahb_clk; + struct regmap *gpr; +}; + +static int imx6q_sata_init(struct device *dev, void __iomem *mmio) +{ + int ret = 0; + unsigned int reg_val; + struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); + + imxpriv->gpr = + syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); + if (IS_ERR(imxpriv->gpr)) { + dev_err(dev, "failed to find fsl,imx6q-iomux-gpr regmap\n"); + return PTR_ERR(imxpriv->gpr); + } + + ret = clk_prepare_enable(imxpriv->sata_ref_clk); + if (ret < 0) { + dev_err(dev, "prepare-enable sata_ref clock err:%d\n", ret); + return ret; + } + + /* + * set PHY Paremeters, two steps to configure the GPR13, + * one write for rest of parameters, mask of first write + * is 0x07fffffd, and the other one write for setting + * the mpll_clk_en. + */ + regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK + | IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK + | IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK + | IMX6Q_GPR13_SATA_SPD_MODE_MASK + | IMX6Q_GPR13_SATA_MPLL_SS_EN + | IMX6Q_GPR13_SATA_TX_ATTEN_MASK + | IMX6Q_GPR13_SATA_TX_BOOST_MASK + | IMX6Q_GPR13_SATA_TX_LVL_MASK + | IMX6Q_GPR13_SATA_TX_EDGE_RATE + , IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB + | IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M + | IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F + | IMX6Q_GPR13_SATA_SPD_MODE_3P0G + | IMX6Q_GPR13_SATA_MPLL_SS_EN + | IMX6Q_GPR13_SATA_TX_ATTEN_9_16 + | IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB + | IMX6Q_GPR13_SATA_TX_LVL_1_025_V); + regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN, + IMX6Q_GPR13_SATA_MPLL_CLK_EN); + usleep_range(100, 200); + + /* + * Configure the HWINIT bits of the HOST_CAP and HOST_PORTS_IMPL, + * and IP vendor specific register HOST_TIMER1MS. + * Configure CAP_SSS (support stagered spin up). + * Implement the port0. + * Get the ahb clock rate, and configure the TIMER1MS register. + */ + reg_val = readl(mmio + HOST_CAP); + if (!(reg_val & HOST_CAP_SSS)) { + reg_val |= HOST_CAP_SSS; + writel(reg_val, mmio + HOST_CAP); + } + reg_val = readl(mmio + HOST_PORTS_IMPL); + if (!(reg_val & 0x1)) { + reg_val |= 0x1; + writel(reg_val, mmio + HOST_PORTS_IMPL); + } + + reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000; + writel(reg_val, mmio + HOST_TIMER1MS); + + return 0; +} + +static void imx6q_sata_exit(struct device *dev) +{ + struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); + + regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN, + !IMX6Q_GPR13_SATA_MPLL_CLK_EN); + clk_disable_unprepare(imxpriv->sata_ref_clk); +} + +static struct ahci_platform_data imx6q_sata_pdata = { + .init = imx6q_sata_init, + .exit = imx6q_sata_exit, +}; + +static const struct of_device_id imx_ahci_of_match[] = { + { .compatible = "fsl,imx6q-ahci", .data = &imx6q_sata_pdata}, + {}, +}; +MODULE_DEVICE_TABLE(of, imx_ahci_of_match); + +static int imx_ahci_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *mem, *irq, res[2]; + const struct of_device_id *of_id; + const struct ahci_platform_data *pdata = NULL; + struct imx_ahci_priv *imxpriv; + struct device *ahci_dev; + struct platform_device *ahci_pdev; + int ret; + + imxpriv = devm_kzalloc(dev, sizeof(*imxpriv), GFP_KERNEL); + if (!imxpriv) { + dev_err(dev, "can't alloc ahci_host_priv\n"); + return -ENOMEM; + } + + ahci_pdev = platform_device_alloc("ahci", -1); + if (!ahci_pdev) + return -ENODEV; + + ahci_dev = &ahci_pdev->dev; + ahci_dev->parent = dev; + + imxpriv->ahb_clk = devm_clk_get(dev, "ahb"); + if (IS_ERR(imxpriv->ahb_clk)) { + dev_err(dev, "can't get ahb clock.\n"); + ret = PTR_ERR(imxpriv->ahb_clk); + goto err_out; + } + + imxpriv->sata_ref_clk = devm_clk_get(dev, "sata_ref"); + if (IS_ERR(imxpriv->sata_ref_clk)) { + dev_err(dev, "can't get sata_ref clock.\n"); + ret = PTR_ERR(imxpriv->sata_ref_clk); + goto err_out; + } + + imxpriv->ahci_pdev = ahci_pdev; + platform_set_drvdata(pdev, imxpriv); + + of_id = of_match_device(imx_ahci_of_match, dev); + if (of_id) { + pdata = of_id->data; + } else { + ret = -EINVAL; + goto err_out; + } + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!mem || !irq) { + dev_err(dev, "no mmio/irq resource\n"); + ret = -ENOMEM; + goto err_out; + } + + res[0] = *mem; + res[1] = *irq; + + ahci_dev->coherent_dma_mask = DMA_BIT_MASK(32); + ahci_dev->dma_mask = &ahci_dev->coherent_dma_mask; + ahci_dev->of_node = dev->of_node; + + ret = platform_device_add_resources(ahci_pdev, res, 2); + if (ret) + goto err_out; + + ret = platform_device_add_data(ahci_pdev, pdata, sizeof(*pdata)); + if (ret) + goto err_out; + + ret = platform_device_add(ahci_pdev); + if (ret) { +err_out: + platform_device_put(ahci_pdev); + return ret; + } + + return 0; +} + +static int imx_ahci_remove(struct platform_device *pdev) +{ + struct imx_ahci_priv *imxpriv = platform_get_drvdata(pdev); + struct platform_device *ahci_pdev = imxpriv->ahci_pdev; + + platform_device_unregister(ahci_pdev); + return 0; +} + +static struct platform_driver imx_ahci_driver = { + .probe = imx_ahci_probe, + .remove = imx_ahci_remove, + .driver = { + .name = "ahci-imx", + .owner = THIS_MODULE, + .of_match_table = imx_ahci_of_match, + }, +}; +module_platform_driver(imx_ahci_driver); + +MODULE_DESCRIPTION("Freescale i.MX AHCI SATA platform driver"); +MODULE_AUTHOR("Richard Zhu "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ahci:imx"); -- cgit v1.2.3-58-ga151 From 932ef3685f287799fa844862d607a6b596ba5b9e Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 24 Jul 2013 14:34:08 +0900 Subject: staging: tidspbridge: replace strict_strtol() with kstrtos32() The usage of strict_strtol() is not preferred, because strict_strtol() is obsolete. Thus, kstrtos32() should be used in order to convert a string to s32. Also, error handling is added to get rid of a __must_check warning. This fixes a memory corruption bug as well. Signed-off-by: Jingoo Han Reviewed-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/tidspbridge/pmgr/dbll.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/tidspbridge/pmgr/dbll.c b/drivers/staging/tidspbridge/pmgr/dbll.c index c191ae203565..41e88abe47af 100644 --- a/drivers/staging/tidspbridge/pmgr/dbll.c +++ b/drivers/staging/tidspbridge/pmgr/dbll.c @@ -1120,8 +1120,11 @@ static int dbll_rmm_alloc(struct dynamic_loader_allocate *this, or DYN_EXTERNAL, then mem granularity information is present within the section name - only process if there are at least three tokens within the section name (just a minor optimization) */ - if (count >= 3) - strict_strtol(sz_last_token, 10, (long *)&req); + if (count >= 3) { + status = kstrtos32(sz_last_token, 10, &req); + if (status) + goto func_cont; + } if ((req == 0) || (req == 1)) { if (strcmp(sz_sec_last_token, "DYN_DARAM") == 0) { -- cgit v1.2.3-58-ga151 From 81b884c9dfe6753a78df0cbf4e96f095d718bd95 Mon Sep 17 00:00:00 2001 From: Lidza Louina Date: Wed, 24 Jul 2013 11:29:33 -0400 Subject: MAINTAINERS: Update the list of maintainers for staging/comedi driver. This patch updates the list of maintainers for the staging/comedi driver. Signed-off-by: Lidza Louina Acked-by: H Hartley Sweeten Acked-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- drivers/staging/comedi/TODO | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/MAINTAINERS b/MAINTAINERS index bf61e04291ab..6ddb33d5c653 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7812,7 +7812,7 @@ F: drivers/staging/asus_oled/ STAGING - COMEDI M: Ian Abbott -M: Mori Hess +M: H Hartley Sweeten S: Odd Fixes F: drivers/staging/comedi/ diff --git a/drivers/staging/comedi/TODO b/drivers/staging/comedi/TODO index b10f739b7e3e..fa8da9aada30 100644 --- a/drivers/staging/comedi/TODO +++ b/drivers/staging/comedi/TODO @@ -9,4 +9,4 @@ TODO: Please send patches to Greg Kroah-Hartman and copy: Ian Abbott - Frank Mori Hess + H Hartley Sweeten -- cgit v1.2.3-58-ga151 From e1be09808e030d57bb743618eb41e2bc31dd464c Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 24 Jul 2013 12:43:18 -0700 Subject: [SCSI] isci: fix breakage caused by >16byte CDB patch Oops, apparently no-one I cc'd at intel actually bothered to check this patch for the isci driver: commit e73823f7a2c921dcf068d34ea03bd682498d9e42 Author: James Bottomley Date: Tue May 7 15:38:18 2013 -0700 [SCSI] libsas: implement > 16 byte CDB support sci_swab32_cpy needs multiples of four, so for commands that aren't that, it's rounding the wrong way. fix by doing (len+3)/4 instead of len/4. Reported-by: Tony Luck Tested-by: Tony Luck Signed-off-by: James Bottomley --- drivers/scsi/isci/request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 7b082157eb79..99d2930b18c8 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -185,7 +185,7 @@ static void sci_io_request_build_ssp_command_iu(struct isci_request *ireq) cmd_iu->_r_c = 0; sci_swab32_cpy(&cmd_iu->cdb, task->ssp_task.cmd->cmnd, - task->ssp_task.cmd->cmd_len / sizeof(u32)); + (task->ssp_task.cmd->cmd_len+3) / sizeof(u32)); } static void sci_task_request_build_ssp_task_iu(struct isci_request *ireq) -- cgit v1.2.3-58-ga151 From 2652c2163d781676284b2af494a97e64d65a5ee6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 19 Jul 2013 08:53:13 +0300 Subject: TTY: snyclinkmp: calculating wrong addresses This is a static checker fix and I don't have a way to test it. But from the context it looks like this is a typo where SCABUFSIZE was intended instead of sizeof(SCABUFSIZE). SCABUFSIZE is 1024 and sizeof(int) is 4. I would suspect this is a bad bug. Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/tty/synclinkmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index ff171384ea52..dc6e96996ead 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -3478,7 +3478,7 @@ static int alloc_buf_list(SLMP_INFO *info) for ( i = 0; i < info->rx_buf_count; i++ ) { /* calculate and store physical address of this buffer entry */ info->rx_buf_list_ex[i].phys_entry = - info->buffer_list_phys + (i * sizeof(SCABUFSIZE)); + info->buffer_list_phys + (i * SCABUFSIZE); /* calculate and store physical address of */ /* next entry in cirular list of entries */ -- cgit v1.2.3-58-ga151 From 258a9fd17b92552637bc74776c11737e0472dc86 Mon Sep 17 00:00:00 2001 From: Henrik Nordström Date: Tue, 23 Jul 2013 21:06:07 +0200 Subject: tty/8250_early: Don't truncate last character of options The earlier change to use strlcpy uncovered a bug in the options argument length calculation causing last character to be truncated. This makes the actual console to be configured with incorrect baudrate when specifying the console using console=uart,... syntax. Bug symptom seen in kernel log output: Kernel command line: console=uart,mmio,0x90000000,115200 Early serial console at MMIO 0x90000000 (options '11520') which then results in a invalid baud rate 11520 instead of the expected 115200 when the console is switched to ttyS0 later in the boot process. Signed-off-by: Henrik Nordstrom Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_early.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c index 721904f8efa9..946ddd2b3a54 100644 --- a/drivers/tty/serial/8250/8250_early.c +++ b/drivers/tty/serial/8250/8250_early.c @@ -193,7 +193,8 @@ static int __init parse_options(struct early_serial8250_device *device, if (options) { options++; device->baud = simple_strtoul(options, NULL, 0); - length = min(strcspn(options, " "), sizeof(device->options)); + length = min(strcspn(options, " ") + 1, + sizeof(device->options)); strlcpy(device->options, options, length); } else { device->baud = probe_baud(port); -- cgit v1.2.3-58-ga151 From 3bf5d350586d98eb28ab7f86ffbd66518ffd95d8 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Sun, 21 Jul 2013 10:43:26 +0800 Subject: serial: tegra: correct help message in Kconfig from 'ttyHS' to 'ttyTHS' ttyTHS is consistent with the name used in driver. Signed-off-by: Richard Zhao Acked-by: Stephen Warren Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 5e3d68917ffe..1456673bcca0 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -277,7 +277,7 @@ config SERIAL_TEGRA select SERIAL_CORE help Support for the on-chip UARTs on the NVIDIA Tegra series SOCs - providing /dev/ttyHS0, 1, 2, 3 and 4 (note, some machines may not + providing /dev/ttyTHS0, 1, 2, 3 and 4 (note, some machines may not provide all of these ports, depending on how the serial port are enabled). This driver uses the APB DMA to achieve higher baudrate and better performance. -- cgit v1.2.3-58-ga151 From 1fad56424f5ad3ce4973505a357212b2e2282b3f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Jun 2013 12:24:26 +0200 Subject: USB: ti_usb_3410_5052: fix dynamic-id matching The driver failed to take the dynamic ids into account when determining the device type and therefore all devices were detected as 2-port devices when using the dynamic-id interface. Match on the usb-serial-driver field instead of doing redundant id-table searches. Reported-by: Anders Hammarquist Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ti_usb_3410_5052.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 7182bb774b79..375b5a400b6f 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -371,7 +371,7 @@ static int ti_startup(struct usb_serial *serial) usb_set_serial_data(serial, tdev); /* determine device type */ - if (usb_match_id(serial->interface, ti_id_table_3410)) + if (serial->type == &ti_1port_device) tdev->td_is_3410 = 1; dev_dbg(&dev->dev, "%s - device type is %s\n", __func__, tdev->td_is_3410 ? "3410" : "5052"); -- cgit v1.2.3-58-ga151 From 63b5df963f52ccbab6fabedf05b7ac6b465789a4 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Thu, 25 Jul 2013 02:01:39 +0200 Subject: usb: serial: option: Add ONYX 3G device support This patch adds support for the ONYX 3G device (version 1) from ALFA NETWORK. Signed-off-by: Enrico Mioso Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bd02f257de13..43093850045b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -783,6 +783,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) }, { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6280) }, /* BP3-USB & BP3-EXT HSDPA */ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6008) }, -- cgit v1.2.3-58-ga151 From 0eb25bb027a100f5a9df8991f2f628e7d851bc1e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 24 Jul 2013 15:37:42 +1000 Subject: md/raid10: remove use-after-free bug. We always need to be careful when calling generic_make_request, as it can start a chain of events which might free something that we are using. Here is one place I wasn't careful enough. If the wbio2 is not in use, then it might get freed at the first generic_make_request call. So perform all necessary tests first. This bug was introduced in 3.3-rc3 (24afd80d99) and can cause an oops, so fix is suitable for any -stable since then. Cc: stable@vger.kernel.org (3.3+) Signed-off-by: NeilBrown --- drivers/md/raid10.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 957a719e8c2f..df7b0a06b0ea 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2290,12 +2290,18 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) d = r10_bio->devs[1].devnum; wbio = r10_bio->devs[1].bio; wbio2 = r10_bio->devs[1].repl_bio; + /* Need to test wbio2->bi_end_io before we call + * generic_make_request as if the former is NULL, + * the latter is free to free wbio2. + */ + if (wbio2 && !wbio2->bi_end_io) + wbio2 = NULL; if (wbio->bi_end_io) { atomic_inc(&conf->mirrors[d].rdev->nr_pending); md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio)); generic_make_request(wbio); } - if (wbio2 && wbio2->bi_end_io) { + if (wbio2) { atomic_inc(&conf->mirrors[d].replacement->nr_pending); md_sync_acct(conf->mirrors[d].replacement->bdev, bio_sectors(wbio2)); -- cgit v1.2.3-58-ga151 From f94c0b6658c7edea8bc19d13be321e3860a3fa54 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 22 Jul 2013 12:57:21 +1000 Subject: md/raid5: fix interaction of 'replace' and 'recovery'. If a device in a RAID4/5/6 is being replaced while another is being recovered, then the writes to the replacement device currently don't happen, resulting in corruption when the replacement completes and the new drive takes over. This is because the replacement writes are only triggered when 's.replacing' is set and not when the similar 's.sync' is set (which is the case during resync and recovery - it means all devices need to be read). So schedule those writes when s.replacing is set as well. In this case we cannot use "STRIPE_INSYNC" to record that the replacement has happened as that is needed for recording that any parity calculation is complete. So introduce STRIPE_REPLACED to record if the replacement has happened. For safety we should also check that STRIPE_COMPUTE_RUN is not set. This has a similar effect to the "s.locked == 0" test. The latter ensure that now IO has been flagged but not started. The former checks if any parity calculation has been flagged by not started. We must wait for both of these to complete before triggering the 'replace'. Add a similar test to the subsequent check for "are we finished yet". This possibly isn't needed (is subsumed in the STRIPE_INSYNC test), but it makes it more obvious that the REPLACE will happen before we think we are finished. Finally if a NeedReplace device is not UPTODATE then that is an error. We really must trigger a warning. This bug was introduced in commit 9a3e1101b827a59ac9036a672f5fa8d5279d0fe2 (md/raid5: detect and handle replacements during recovery.) which introduced replacement for raid5. That was in 3.3-rc3, so any stable kernel since then would benefit from this fix. Cc: stable@vger.kernel.org (3.3+) Reported-by: qindehua <13691222965@163.com> Tested-by: qindehua Signed-off-by: NeilBrown --- drivers/md/raid5.c | 15 ++++++++++----- drivers/md/raid5.h | 1 + 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2bf094a587cb..78ea44336e75 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3462,6 +3462,7 @@ static void handle_stripe(struct stripe_head *sh) test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { set_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_INSYNC, &sh->state); + clear_bit(STRIPE_REPLACED, &sh->state); } spin_unlock(&sh->stripe_lock); } @@ -3607,19 +3608,23 @@ static void handle_stripe(struct stripe_head *sh) handle_parity_checks5(conf, sh, &s, disks); } - if (s.replacing && s.locked == 0 - && !test_bit(STRIPE_INSYNC, &sh->state)) { + if ((s.replacing || s.syncing) && s.locked == 0 + && !test_bit(STRIPE_COMPUTE_RUN, &sh->state) + && !test_bit(STRIPE_REPLACED, &sh->state)) { /* Write out to replacement devices where possible */ for (i = 0; i < conf->raid_disks; i++) - if (test_bit(R5_UPTODATE, &sh->dev[i].flags) && - test_bit(R5_NeedReplace, &sh->dev[i].flags)) { + if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) { + WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags)); set_bit(R5_WantReplace, &sh->dev[i].flags); set_bit(R5_LOCKED, &sh->dev[i].flags); s.locked++; } - set_bit(STRIPE_INSYNC, &sh->state); + if (s.replacing) + set_bit(STRIPE_INSYNC, &sh->state); + set_bit(STRIPE_REPLACED, &sh->state); } if ((s.syncing || s.replacing) && s.locked == 0 && + !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS, 1); clear_bit(STRIPE_SYNCING, &sh->state); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index b0b663b119a8..70c49329ca9a 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -306,6 +306,7 @@ enum { STRIPE_SYNC_REQUESTED, STRIPE_SYNCING, STRIPE_INSYNC, + STRIPE_REPLACED, STRIPE_PREREAD_ACTIVE, STRIPE_DELAYED, STRIPE_DEGRADED, -- cgit v1.2.3-58-ga151 From 14c5cec5d0cd73e7e9d4fbea2bbfeea8f3ade871 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 25 Jul 2013 12:44:34 +0300 Subject: drm/i915: initialize gt_lock early with other spin locks commit 181d1b9e31c668259d3798c521672afb8edd355c Author: Daniel Vetter Date: Sun Jul 21 13:16:24 2013 +0200 drm/i915: fix up gt init sequence fallout moved dev_priv->gt_lock initialization after use. Do the initialization much earlier with other spin lock initializations. Reported-by: Sedat Dilek Signed-off-by: Jani Nikula Tested-by: Sedat Dilek Cc: stable@vger.kernel.org (since the regressing patch is also cc: stable) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/intel_pm.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 67ec54f67afe..b1520682cb23 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1498,6 +1498,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); spin_lock_init(&dev_priv->rps.lock); + spin_lock_init(&dev_priv->gt_lock); spin_lock_init(&dev_priv->backlight.lock); mutex_init(&dev_priv->dpio_lock); mutex_init(&dev_priv->rps.hw_lock); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6a347f54d39f..51a2a60f5bfc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5497,8 +5497,6 @@ void intel_gt_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - spin_lock_init(&dev_priv->gt_lock); - if (IS_VALLEYVIEW(dev)) { dev_priv->gt.force_wake_get = vlv_force_wake_get; dev_priv->gt.force_wake_put = vlv_force_wake_put; -- cgit v1.2.3-58-ga151 From 203a86613fb3bf2767335659513fa98563a3eb71 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Wed, 24 Jul 2013 10:27:13 -0700 Subject: xhci: Avoid NULL pointer deref when host dies. When the host controller fails to respond to an Enable Slot command, and the host fails to respond to the register write to abort the command ring, the xHCI driver will assume the host is dead, and call usb_hc_died(). The USB device's slot_id is still set to zero, and the pointer stored at xhci->devs[0] will always be NULL. The call to xhci_check_args in xhci_free_dev should have caught the NULL virt_dev pointer. However, xhci_free_dev is designed to free the xhci_virt_device structures, even if the host is dead, so that we don't leak kernel memory. xhci_free_dev checks the return value from the generic xhci_check_args function. If the return value is -ENODEV, it carries on trying to free the virtual device. The issue is that xhci_check_args looks at the host controller state before it looks at the xhci_virt_device pointer. It will return -ENIVAL because the host is dead, and xhci_free_dev will ignore the return value, and happily dereference the NULL xhci_virt_device pointer. The fix is to make sure that xhci_check_args checks the xhci_virt_device pointer before it checks the host state. See https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1203453 for further details. This patch doesn't solve the underlying issue, but will ensure we don't see any more NULL pointer dereferences because of the issue. This patch should be backported to kernels as old as 3.1, that contain the commit 7bd89b4017f46a9b92853940fd9771319acb578a "xhci: Don't submit commands or URBs to halted hosts." Signed-off-by: Sarah Sharp Reported-by: Vincent Thiele Cc: stable@vger.kernel.org --- drivers/usb/host/xhci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2c49f00260ca..98aac742a2c1 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1181,9 +1181,6 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev, } xhci = hcd_to_xhci(hcd); - if (xhci->xhc_state & XHCI_STATE_HALTED) - return -ENODEV; - if (check_virt_dev) { if (!udev->slot_id || !xhci->devs[udev->slot_id]) { printk(KERN_DEBUG "xHCI %s called with unaddressed " @@ -1199,6 +1196,9 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev, } } + if (xhci->xhc_state & XHCI_STATE_HALTED) + return -ENODEV; + return 1; } -- cgit v1.2.3-58-ga151 From d5c82feb5cf8026cd4af048330fdcd46e861c686 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Tue, 23 Jul 2013 11:58:20 -0700 Subject: usb: xhci: Mark two functions __maybe_unused Resolves the following build warnings: drivers/usb/host/xhci.c:332:13: warning: 'xhci_msix_sync_irqs' defined but not used [-Wunused-function] drivers/usb/host/xhci.c:3901:12: warning: 'xhci_change_max_exit_latency' defined but not used [-Wunused-function] These functions are not always used, and since they're marked static they will produce build warnings: - xhci_msix_sync_irqs is only used with CONFIG_PCI. - xhci_change_max_exit_latency is a little more complicated with dependencies on CONFIG_PM and CONFIG_PM_RUNTIME. Instead of building a bigger maze of ifdefs in this code, I've just marked both with __maybe_unused. Signed-off-by: Olof Johansson Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 98aac742a2c1..6bbf511645eb 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -329,7 +329,7 @@ static void xhci_cleanup_msix(struct xhci_hcd *xhci) return; } -static void xhci_msix_sync_irqs(struct xhci_hcd *xhci) +static void __maybe_unused xhci_msix_sync_irqs(struct xhci_hcd *xhci) { int i; @@ -3898,7 +3898,7 @@ int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1) * Issue an Evaluate Context command to change the Maximum Exit Latency in the * slot context. If that succeeds, store the new MEL in the xhci_virt_device. */ -static int xhci_change_max_exit_latency(struct xhci_hcd *xhci, +static int __maybe_unused xhci_change_max_exit_latency(struct xhci_hcd *xhci, struct usb_device *udev, u16 max_exit_latency) { struct xhci_virt_device *virt_dev; -- cgit v1.2.3-58-ga151 From c4d949b76f2c714e0da20c53252e586e8127ac79 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 23 Jul 2013 15:22:59 -0700 Subject: usb: fix build warning in pci-quirks.h when CONFIG_PCI is not enabled Fix warning when CONFIG_PCI is not enabled (from commit 296365781903226a3fb8758901eaeec09d2798e4). drivers/usb/host/pci-quirks.h: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] Signed-off-by: Randy Dunlap Reported-by: Geert Uytterhoeven Cc: Moiz Sonasath Signed-off-by: Sarah Sharp --- drivers/usb/host/pci-quirks.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h index 4b8a2092432f..978c849f9c9a 100644 --- a/drivers/usb/host/pci-quirks.h +++ b/drivers/usb/host/pci-quirks.h @@ -13,6 +13,7 @@ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev); void usb_disable_xhci_ports(struct pci_dev *xhci_pdev); void sb800_prefetch(struct device *dev, int on); #else +struct pci_dev; static inline void usb_amd_quirk_pll_disable(void) {} static inline void usb_amd_quirk_pll_enable(void) {} static inline void usb_amd_dev_put(void) {} -- cgit v1.2.3-58-ga151 From 07f3cb7c28bf3f4dd80bfb136cf45810c46ac474 Mon Sep 17 00:00:00 2001 From: George Cherian Date: Mon, 1 Jul 2013 10:59:12 +0530 Subject: usb: host: xhci: Enable XHCI_SPURIOUS_SUCCESS for all controllers with xhci 1.0 Xhci controllers with hci_version > 0.96 gives spurious success events on short packet completion. During webcam capture the "ERROR Transfer event TRB DMA ptr not part of current TD" was observed. The same application works fine with synopsis controllers hci_version 0.96. The same issue is seen with Intel Pantherpoint xhci controller. So enabling this quirk in xhci_gen_setup if controller verion is greater than 0.96. For xhci-pci move the quirk to much generic place xhci_gen_setup. Note from Sarah: The xHCI 1.0 spec changed how hardware handles short packets. The HW will notify SW of the TRB where the short packet occurred, and it will also give a successful status for the last TRB in a TD (the one with the IOC flag set). On the second successful status, that warning will be triggered in the driver. Software is now supposed to not assume the TD is not completed until it gets that last successful status. That means we have a slight race condition, although it should have little practical impact. This patch papers over that issue. It's on my long-term to-do list to fix this race condition, but it is a much more involved patch that will probably be too big for stable. This patch is needed for stable to avoid serious log spam. This patch should be backported to kernels as old as 3.0, that contain the commit ad808333d8201d53075a11bc8dd83b81f3d68f0b "Intel xhci: Ignore spurious successful event." The patch will have to be modified for kernels older than 3.2, since that kernel added the xhci_gen_setup function for xhci platform devices. The correct conflict resolution for kernels older than 3.2 is to set XHCI_SPURIOUS_SUCCESS in xhci_pci_quirks for all xHCI 1.0 hosts. Signed-off-by: George Cherian Signed-off-by: Sarah Sharp Cc: stable@vger.kernel.org --- drivers/usb/host/xhci-pci.c | 1 - drivers/usb/host/xhci.c | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index cc24e39b97d5..f00cb203faea 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -93,7 +93,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) { - xhci->quirks |= XHCI_SPURIOUS_SUCCESS; xhci->quirks |= XHCI_EP_LIMIT_QUIRK; xhci->limit_active_eps = 64; xhci->quirks |= XHCI_SW_BW_CHECKING; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6bbf511645eb..41eb4fc33453 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4892,6 +4892,13 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) get_quirks(dev, xhci); + /* In xhci controllers which follow xhci 1.0 spec gives a spurious + * success event after a short transfer. This quirk will ignore such + * spurious event. + */ + if (xhci->hci_version > 0x96) + xhci->quirks |= XHCI_SPURIOUS_SUCCESS; + /* Make sure the HC is halted. */ retval = xhci_halt(xhci); if (retval) -- cgit v1.2.3-58-ga151 From d66eaf9f89502971fddcb0de550b01fa6f409d83 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sun, 21 Jul 2013 15:36:19 +0200 Subject: xhci: fix null pointer dereference on ring_doorbell_for_active_rings in some cases where device is attched to xhci port and do not responding, for example ath9k_htc with stalled firmware, kernel will crash on ring_doorbell_for_active_rings. This patch check if pointer exist before it is used. This patch should be backported to kernels as old as 2.6.35, that contain the commit e9df17eb1408cfafa3d1844bfc7f22c7237b31b8 "USB: xhci: Correct assumptions about number of rings per endpoint" Signed-off-by: Oleksij Rempel Signed-off-by: Sarah Sharp Cc: stable@vger.kernel.org --- drivers/usb/host/xhci-ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1e57eafa6910..5b08cd85f8e7 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -434,7 +434,7 @@ static void ring_doorbell_for_active_rings(struct xhci_hcd *xhci, /* A ring has pending URBs if its TD list is not empty */ if (!(ep->ep_state & EP_HAS_STREAMS)) { - if (!(list_empty(&ep->ring->td_list))) + if (ep->ring && !(list_empty(&ep->ring->td_list))) xhci_ring_ep_doorbell(xhci, slot_id, ep_index, 0); return; } -- cgit v1.2.3-58-ga151 From db6c2c69c27f2c4fd1e2a1c6b0b1119b1e885f8a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 23 Jul 2013 01:43:20 +0200 Subject: pinctrl: fix a memleak when freeing maps We forgot to free the node itself when free:ing a map. Reported-by: xulinuxkernel Reviewed-by: Stephen Warren Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 5b272bfd261d..2a00239661b3 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1193,6 +1193,7 @@ void pinctrl_unregister_map(struct pinctrl_map const *map) list_for_each_entry(maps_node, &pinctrl_maps, node) { if (maps_node->maps == map) { list_del(&maps_node->node); + kfree(maps_node); mutex_unlock(&pinctrl_maps_mutex); return; } -- cgit v1.2.3-58-ga151 From 3d1a69e726406ab662ab88fa30a3a05ed404334d Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Sat, 29 Jun 2013 15:33:35 +0200 Subject: usb: serial: option: blacklist ONDA MT689DC QMI interface Prevent the option driver from binding itself to the QMI/WWAN interface, making it unusable by the proper driver. Signed-off-by: enrico Mioso Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 43093850045b..085da5f210ec 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -819,7 +819,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0017, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0018, 0xff, 0xff, 0xff) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0020, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0021, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, -- cgit v1.2.3-58-ga151 From 4cf76df06ecc852633ed927d91e01c83c33bc331 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 10 Jul 2013 12:25:02 -0500 Subject: usb: serial: option: add Olivetti Olicard 200 Speaks AT on interfaces 5 (command & PPP) and 3 (secondary), other interface protocols are unknown. Signed-off-by: Dan Williams Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 085da5f210ec..083b3e902894 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -341,6 +341,7 @@ static void option_instat_callback(struct urb *urb); #define OLIVETTI_VENDOR_ID 0x0b3c #define OLIVETTI_PRODUCT_OLICARD100 0xc000 #define OLIVETTI_PRODUCT_OLICARD145 0xc003 +#define OLIVETTI_PRODUCT_OLICARD200 0xc005 /* Celot products */ #define CELOT_VENDOR_ID 0x211f @@ -1259,6 +1260,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) }, + { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) }, { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */ { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/ -- cgit v1.2.3-58-ga151 From 878c69aae986ae97084458c0183a8c0a059865b1 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Sat, 13 Jul 2013 18:54:14 +0200 Subject: usb: serial: option.c: remove ONDA MT825UP product ID fromdriver Some (very few) early devices like mine, where not exposting a proper CDC descriptor. This was fixed with an immediate firmware update from the vendor, and pre-installed on newer devices. So actual devices can be driven by cdc_acm.c + cdc_ether.c. Signed-off-by: Enrico Mioso Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 083b3e902894..1cf6f125f5f0 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -347,12 +347,6 @@ static void option_instat_callback(struct urb *urb); #define CELOT_VENDOR_ID 0x211f #define CELOT_PRODUCT_CT680M 0x6801 -/* ONDA Communication vendor id */ -#define ONDA_VENDOR_ID 0x1ee8 - -/* ONDA MT825UP HSDPA 14.2 modem */ -#define ONDA_MT825UP 0x000b - /* Samsung products */ #define SAMSUNG_VENDOR_ID 0x04e8 #define SAMSUNG_PRODUCT_GT_B3730 0x6889 @@ -1262,7 +1256,6 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) }, { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ - { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */ { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/ { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) }, { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) }, -- cgit v1.2.3-58-ga151 From 5f8a2e68b679b41cc8e9b642f2f5aa45dd678641 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 1 Jul 2013 14:03:33 +0200 Subject: USB: mos7840: fix memory leak in open Allocated urbs and buffers were never freed on errors in open. Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 0a818b238508..603fb70dde80 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -905,20 +905,20 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) status = mos7840_get_reg_sync(port, mos7840_port->SpRegOffset, &Data); if (status < 0) { dev_dbg(&port->dev, "Reading Spreg failed\n"); - return -1; + goto err; } Data |= 0x80; status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data); if (status < 0) { dev_dbg(&port->dev, "writing Spreg failed\n"); - return -1; + goto err; } Data &= ~0x80; status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data); if (status < 0) { dev_dbg(&port->dev, "writing Spreg failed\n"); - return -1; + goto err; } /* End of block to be checked */ @@ -927,7 +927,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) &Data); if (status < 0) { dev_dbg(&port->dev, "Reading Controlreg failed\n"); - return -1; + goto err; } Data |= 0x08; /* Driver done bit */ Data |= 0x20; /* rx_disable */ @@ -935,7 +935,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) mos7840_port->ControlRegOffset, Data); if (status < 0) { dev_dbg(&port->dev, "writing Controlreg failed\n"); - return -1; + goto err; } /* do register settings here */ /* Set all regs to the device default values. */ @@ -946,21 +946,21 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) status = mos7840_set_uart_reg(port, INTERRUPT_ENABLE_REGISTER, Data); if (status < 0) { dev_dbg(&port->dev, "disabling interrupts failed\n"); - return -1; + goto err; } /* Set FIFO_CONTROL_REGISTER to the default value */ Data = 0x00; status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data); if (status < 0) { dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER failed\n"); - return -1; + goto err; } Data = 0xcf; status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data); if (status < 0) { dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER failed\n"); - return -1; + goto err; } Data = 0x03; @@ -1103,6 +1103,15 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) /* mos7840_change_port_settings(mos7840_port,old_termios); */ return 0; +err: + for (j = 0; j < NUM_URBS; ++j) { + urb = mos7840_port->write_urb_pool[j]; + if (!urb) + continue; + kfree(urb->transfer_buffer); + usb_free_urb(urb); + } + return status; } /***************************************************************************** -- cgit v1.2.3-58-ga151 From 172d934c92089e9183165f7024eba9508cb9ebce Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Thu, 25 Jul 2013 09:13:18 +0200 Subject: usb/gadget: free opts struct on error recovery Fix memory leaks introduced in commits: 40d133d7f542616cf9538508a372306e626a16e9 usb: gadget: f_ncm: convert to new function interface with backward compatibility fee562a6450b7806f1fbbe1469a67b5395b5c10a usb: gadget: f_ecm: convert to new function interface with backward compatibility fcbdf12ebef73a6069e2a1aada1e546fb578a4aa usb: gadget: f_phonet: convert to new function interface with backward compatibility b29002a157940752dfed2c488b2011f63f007d71 usb: gadget: f_eem: convert to new function interface with backward compatibility 8cedba7c73af1369599b1111639cfeb66fe13aaa usb: gadget: f_subset: convert to new function interface with backward compatibility f466c6353819326873fa48a02c6f2d7c903240d6 usb: gadget: f_rndis: convert to new function interface with backward compatibility Signed-off-by: Andrzej Pietrasiewicz Signed-off-by: Kyungmin Park Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/f_ecm.c | 7 +++++-- drivers/usb/gadget/f_eem.c | 7 +++++-- drivers/usb/gadget/f_ncm.c | 7 +++++-- drivers/usb/gadget/f_phonet.c | 7 +++++-- drivers/usb/gadget/f_rndis.c | 7 +++++-- drivers/usb/gadget/f_subset.c | 7 +++++-- 6 files changed, 30 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/gadget/f_ecm.c b/drivers/usb/gadget/f_ecm.c index 5d3561ea1c15..edab45da3741 100644 --- a/drivers/usb/gadget/f_ecm.c +++ b/drivers/usb/gadget/f_ecm.c @@ -959,8 +959,11 @@ static struct usb_function_instance *ecm_alloc_inst(void) mutex_init(&opts->lock); opts->func_inst.free_func_inst = ecm_free_inst; opts->net = gether_setup_default(); - if (IS_ERR(opts->net)) - return ERR_PTR(PTR_ERR(opts->net)); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &ecm_func_type); diff --git a/drivers/usb/gadget/f_eem.c b/drivers/usb/gadget/f_eem.c index 90ee8022e8d8..d00392d879db 100644 --- a/drivers/usb/gadget/f_eem.c +++ b/drivers/usb/gadget/f_eem.c @@ -593,8 +593,11 @@ static struct usb_function_instance *eem_alloc_inst(void) mutex_init(&opts->lock); opts->func_inst.free_func_inst = eem_free_inst; opts->net = gether_setup_default(); - if (IS_ERR(opts->net)) - return ERR_CAST(opts->net); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &eem_func_type); diff --git a/drivers/usb/gadget/f_ncm.c b/drivers/usb/gadget/f_ncm.c index 952177f7eb9b..1c28fe13328a 100644 --- a/drivers/usb/gadget/f_ncm.c +++ b/drivers/usb/gadget/f_ncm.c @@ -1350,8 +1350,11 @@ static struct usb_function_instance *ncm_alloc_inst(void) mutex_init(&opts->lock); opts->func_inst.free_func_inst = ncm_free_inst; opts->net = gether_setup_default(); - if (IS_ERR(opts->net)) - return ERR_PTR(PTR_ERR(opts->net)); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &ncm_func_type); diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c index 7944fb0efe3b..1bf26e9f38cd 100644 --- a/drivers/usb/gadget/f_phonet.c +++ b/drivers/usb/gadget/f_phonet.c @@ -656,8 +656,11 @@ static struct usb_function_instance *phonet_alloc_inst(void) opts->func_inst.free_func_inst = phonet_free_inst; opts->net = gphonet_setup_default(); - if (IS_ERR(opts->net)) - return ERR_PTR(PTR_ERR(opts->net)); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &phonet_func_type); diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c index 191df35ae69d..717ed7f95639 100644 --- a/drivers/usb/gadget/f_rndis.c +++ b/drivers/usb/gadget/f_rndis.c @@ -963,8 +963,11 @@ static struct usb_function_instance *rndis_alloc_inst(void) mutex_init(&opts->lock); opts->func_inst.free_func_inst = rndis_free_inst; opts->net = gether_setup_default(); - if (IS_ERR(opts->net)) - return ERR_CAST(opts->net); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &rndis_func_type); diff --git a/drivers/usb/gadget/f_subset.c b/drivers/usb/gadget/f_subset.c index 5601e1d96c4f..7c8674fa7e80 100644 --- a/drivers/usb/gadget/f_subset.c +++ b/drivers/usb/gadget/f_subset.c @@ -505,8 +505,11 @@ static struct usb_function_instance *geth_alloc_inst(void) mutex_init(&opts->lock); opts->func_inst.free_func_inst = geth_free_inst; opts->net = gether_setup_default(); - if (IS_ERR(opts->net)) - return ERR_CAST(opts->net); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &gether_func_type); -- cgit v1.2.3-58-ga151 From 2c7b871b9102c497ba8f972aa5d38532f05b654d Mon Sep 17 00:00:00 2001 From: William Gulland Date: Thu, 27 Jun 2013 16:10:20 -0700 Subject: usb: Clear both buffers when clearing a control transfer TT buffer. Control transfers have both IN and OUT (or SETUP) packets, so when clearing TT buffers for a control transfer it's necessary to send two HUB_CLEAR_TT_BUFFER requests to the hub. Signed-off-by: William Gulland Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index c857471519e3..4a8a1d68002c 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -668,6 +668,15 @@ resubmit: static inline int hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt) { + /* Need to clear both directions for control ep */ + if (((devinfo >> 11) & USB_ENDPOINT_XFERTYPE_MASK) == + USB_ENDPOINT_XFER_CONTROL) { + int status = usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), + HUB_CLEAR_TT_BUFFER, USB_RT_PORT, + devinfo ^ 0x8000, tt, NULL, 0, 1000); + if (status) + return status; + } return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo, tt, NULL, 0, 1000); -- cgit v1.2.3-58-ga151 From e3c736fe47289dc6f577c8b20cc146bd7e69aff0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 25 Jul 2013 18:27:45 -0400 Subject: drm/radeon/dpm: fix a typo in the rv6xx mclk setup Need to set high for the last two entries. Looks like a copy and paste typo. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/rv6xx_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c index 65e33f387341..b1c2a62df950 100644 --- a/drivers/gpu/drm/radeon/rv6xx_dpm.c +++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c @@ -819,7 +819,7 @@ static void rv6xx_program_memory_timing_parameters(struct radeon_device *rdev) POWERMODE1(calculate_memory_refresh_rate(rdev, pi->hw.sclks[R600_POWER_LEVEL_MEDIUM])) | POWERMODE2(calculate_memory_refresh_rate(rdev, - pi->hw.sclks[R600_POWER_LEVEL_MEDIUM])) | + pi->hw.sclks[R600_POWER_LEVEL_HIGH])) | POWERMODE3(calculate_memory_refresh_rate(rdev, pi->hw.sclks[R600_POWER_LEVEL_HIGH]))); WREG32(ARB_RFSH_RATE, arb_refresh_rate); -- cgit v1.2.3-58-ga151 From 2333a003a83ae8b257ac4bc1bb297c897c1ebb90 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 25 Jul 2013 18:29:14 -0400 Subject: drm/radeon/dpm: fix displaygap programming on rv6xx Need to use the driver state rather than the register state since the displays may not be enabled when the power state is programmed. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/rv6xx_dpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c index b1c2a62df950..dde402340f22 100644 --- a/drivers/gpu/drm/radeon/rv6xx_dpm.c +++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c @@ -1182,10 +1182,10 @@ static void rv6xx_program_display_gap(struct radeon_device *rdev) u32 tmp = RREG32(CG_DISPLAY_GAP_CNTL); tmp &= ~(DISP1_GAP_MCHG_MASK | DISP2_GAP_MCHG_MASK); - if (RREG32(AVIVO_D1CRTC_CONTROL) & AVIVO_CRTC_EN) { + if (rdev->pm.dpm.new_active_crtcs & 1) { tmp |= DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK); tmp |= DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE); - } else if (RREG32(AVIVO_D2CRTC_CONTROL) & AVIVO_CRTC_EN) { + } else if (rdev->pm.dpm.new_active_crtcs & 2) { tmp |= DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE); tmp |= DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK); } else { -- cgit v1.2.3-58-ga151 From 515c0967205f2e6d0ca1602ce0de65f9aec1d215 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 24 Jul 2013 08:42:27 -0700 Subject: mfd: max8925: fix dt code for backlight The device-tree enablement for max8925 has several problems, but besides the bindings being wrong (and not having seen review) there's also some bad coding practices on how to fill in the platform_data from device tree. I came across this since it causes a warning when compiling mmp2_defconfig, and instead of doing the minimal fix to silence the warning, I restructured the code a bit. This silences the warning: drivers/video/backlight/max8925_bl.c: In function 'max8925_backlight_probe': drivers/video/backlight/max8925_bl.c:177:3: warning: statement with no effect [-Wunused-value] Note that the bindings themselves need to be revisited too, but that will affect more than just the backlight driver and is best done separately; this just fixes the bad code for the backlight driver. Acked-by: Jingoo Han Signed-off-by: Olof Johansson --- drivers/video/backlight/max8925_bl.c | 41 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/video/backlight/max8925_bl.c b/drivers/video/backlight/max8925_bl.c index 5ca11b066b7e..886e797f75f9 100644 --- a/drivers/video/backlight/max8925_bl.c +++ b/drivers/video/backlight/max8925_bl.c @@ -101,33 +101,37 @@ static const struct backlight_ops max8925_backlight_ops = { .get_brightness = max8925_backlight_get_brightness, }; -#ifdef CONFIG_OF -static int max8925_backlight_dt_init(struct platform_device *pdev, - struct max8925_backlight_pdata *pdata) +static void max8925_backlight_dt_init(struct platform_device *pdev) { struct device_node *nproot = pdev->dev.parent->of_node, *np; - int dual_string; + struct max8925_backlight_pdata *pdata; + u32 val; + + if (!nproot || !IS_ENABLED(CONFIG_OF)) + return; + + pdata = devm_kzalloc(&pdev->dev, + sizeof(struct max8925_backlight_pdata), + GFP_KERNEL); + if (!pdata) + return; - if (!nproot) - return -ENODEV; np = of_find_node_by_name(nproot, "backlight"); if (!np) { dev_err(&pdev->dev, "failed to find backlight node\n"); - return -ENODEV; + return; } - of_property_read_u32(np, "maxim,max8925-dual-string", &dual_string); - pdata->dual_string = dual_string; - return 0; + if (!of_property_read_u32(np, "maxim,max8925-dual-string", &val)) + pdata->dual_string = val; + + pdev->dev.platform_data = pdata; } -#else -#define max8925_backlight_dt_init(x, y) (-1) -#endif static int max8925_backlight_probe(struct platform_device *pdev) { struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent); - struct max8925_backlight_pdata *pdata = pdev->dev.platform_data; + struct max8925_backlight_pdata *pdata; struct max8925_backlight_data *data; struct backlight_device *bl; struct backlight_properties props; @@ -170,13 +174,10 @@ static int max8925_backlight_probe(struct platform_device *pdev) platform_set_drvdata(pdev, bl); value = 0; - if (pdev->dev.parent->of_node && !pdata) { - pdata = devm_kzalloc(&pdev->dev, - sizeof(struct max8925_backlight_pdata), - GFP_KERNEL); - max8925_backlight_dt_init(pdev, pdata); - } + if (!pdev->dev.platform_data) + max8925_backlight_dt_init(pdev); + pdata = pdev->dev.platform_data; if (pdata) { if (pdata->lxw_scl) value |= (1 << 7); -- cgit v1.2.3-58-ga151 From f4f85a8c94752300ad2c4f7a3475a3d3b8ef72ca Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 25 Jul 2013 20:07:25 -0400 Subject: drm/radeon/dpm: implement force performance levels for rv6xx Allows you to limit the selected power levels via sysfs. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_asic.c | 1 + drivers/gpu/drm/radeon/radeon_asic.h | 2 ++ drivers/gpu/drm/radeon/rv6xx_dpm.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 78bec1a58ed1..f8f8b3113ddd 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1161,6 +1161,7 @@ static struct radeon_asic rv6xx_asic = { .get_mclk = &rv6xx_dpm_get_mclk, .print_power_state = &rv6xx_dpm_print_power_state, .debugfs_print_current_performance_level = &rv6xx_dpm_debugfs_print_current_performance_level, + .force_performance_level = &rv6xx_dpm_force_performance_level, }, .pflip = { .pre_page_flip = &rs600_pre_page_flip, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index ca1895709908..902479fa737f 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -421,6 +421,8 @@ void rv6xx_dpm_print_power_state(struct radeon_device *rdev, struct radeon_ps *ps); void rv6xx_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, struct seq_file *m); +int rv6xx_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level); /* rs780 dpm */ int rs780_dpm_init(struct radeon_device *rdev); int rs780_dpm_enable(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c index dde402340f22..363018c60412 100644 --- a/drivers/gpu/drm/radeon/rv6xx_dpm.c +++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c @@ -1670,6 +1670,8 @@ int rv6xx_dpm_set_power_state(struct radeon_device *rdev) struct radeon_ps *old_ps = rdev->pm.dpm.current_ps; int ret; + pi->restricted_levels = 0; + rv6xx_set_uvd_clock_before_set_eng_clock(rdev, new_ps, old_ps); rv6xx_clear_vc(rdev); @@ -1756,6 +1758,8 @@ int rv6xx_dpm_set_power_state(struct radeon_device *rdev) rv6xx_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); + rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; + return 0; } @@ -2085,3 +2089,34 @@ u32 rv6xx_dpm_get_mclk(struct radeon_device *rdev, bool low) else return requested_state->high.mclk; } + +int rv6xx_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level) +{ + struct rv6xx_power_info *pi = rv6xx_get_pi(rdev); + + if (level == RADEON_DPM_FORCED_LEVEL_HIGH) { + pi->restricted_levels = 3; + } else if (level == RADEON_DPM_FORCED_LEVEL_LOW) { + pi->restricted_levels = 2; + } else { + pi->restricted_levels = 0; + } + + rv6xx_clear_vc(rdev); + r600_power_level_enable(rdev, R600_POWER_LEVEL_LOW, true); + r600_set_at(rdev, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); + r600_wait_for_power_level(rdev, R600_POWER_LEVEL_LOW); + r600_power_level_enable(rdev, R600_POWER_LEVEL_HIGH, false); + r600_power_level_enable(rdev, R600_POWER_LEVEL_MEDIUM, false); + rv6xx_enable_medium(rdev); + rv6xx_enable_high(rdev); + if (pi->restricted_levels == 3) + r600_power_level_enable(rdev, R600_POWER_LEVEL_LOW, false); + rv6xx_program_vc(rdev); + rv6xx_program_at(rdev); + + rdev->pm.dpm.forced_level = level; + + return 0; +} -- cgit v1.2.3-58-ga151 From f5d9b7f0f93c6a7d40750b8b5528a1e0f0c678fb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 25 Jul 2013 21:46:21 -0400 Subject: drm/radeon/dpm: fix r600_enable_sclk_control() Actually program the correct register to enable engine clock scaling control. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600_dpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index b88f54b134ab..e5c860f4ccbe 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -278,9 +278,9 @@ bool r600_dynamicpm_enabled(struct radeon_device *rdev) void r600_enable_sclk_control(struct radeon_device *rdev, bool enable) { if (enable) - WREG32_P(GENERAL_PWRMGT, 0, ~SCLK_PWRMGT_OFF); + WREG32_P(SCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_OFF); else - WREG32_P(GENERAL_PWRMGT, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF); + WREG32_P(SCLK_PWRMGT_CNTL, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF); } void r600_enable_mclk_control(struct radeon_device *rdev, bool enable) -- cgit v1.2.3-58-ga151 From 867974fc09f93bdd7f98d46ac3733934486bbf4a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Jul 2013 08:57:56 -0400 Subject: ahci_imx: depend on CONFIG_MFD_SYSCON ahci_imx makes use of regmap but the dependency wasn't specified in Kconfig leading build failures if CONFIG_AHCI_IMX is enabled but CONFIG_MFD_SYSCON is not. Add the Kconfig dependency. Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell --- drivers/ata/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index a4af0a2a089c..4e737728aee2 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -99,7 +99,7 @@ config SATA_AHCI_PLATFORM config AHCI_IMX tristate "Freescale i.MX AHCI SATA support" - depends on SATA_AHCI_PLATFORM + depends on SATA_AHCI_PLATFORM && MFD_SYSCON help This option enables support for the Freescale i.MX SoC's onboard AHCI SATA. -- cgit v1.2.3-58-ga151 From 8e5c2b776ae4c35f54547c017e0a943429f5748a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Jul 2013 21:43:39 +0200 Subject: Revert "ACPI / video / i915: No ACPI backlight if firmware expects Windows 8" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We attempted to address a regression introduced by commit a57f7f9 (ACPICA: Add Windows8/Server2012 string for _OSI method.) after which ACPI video backlight support doesn't work on a number of systems, because the relevant AML methods in the ACPI tables in their BIOSes become useless after the BIOS has been told that the OS is compatible with Windows 8. That problem is tracked by the bug entry at: https://bugzilla.kernel.org/show_bug.cgi?id=51231 Commit 8c5bd7a (ACPI / video / i915: No ACPI backlight if firmware expects Windows 8) introduced for this purpose essentially prevented the ACPI backlight support from being used if the BIOS had been told that the OS was compatible with Windows 8 and the i915 driver was loaded, in which case the backlight would always be handled by i915. Unfortunately, however, that turned out to cause problems with backlight to appear on multiple systems with symptoms indicating that i915 was unable to control the backlight on those systems as expected. For this reason, revert commit 8c5bd7a, but leave the function acpi_video_backlight_quirks() introduced by it, because another commit on top of it uses that function. References: https://lkml.org/lkml/2013/7/21/119 References: https://lkml.org/lkml/2013/7/22/261 References: https://lkml.org/lkml/2013/7/23/429 References: https://lkml.org/lkml/2013/7/23/459 References: https://lkml.org/lkml/2013/7/23/81 References: https://lkml.org/lkml/2013/7/24/27 Reported-and-tested-by: James Hogan Reported-and-tested-by: Kamal Mostafa Reported-and-tested-by: Jörg Otte Reported-and-tested-by: Steven Newbury Reported-by: Martin Steigerwald Reported-by: Kalle Valo Tested-by: Joerg Platte Signed-off-by: Rafael J. Wysocki --- drivers/acpi/internal.h | 2 -- drivers/acpi/video.c | 67 +++++------------------------------------ drivers/acpi/video_detect.c | 15 +-------- drivers/gpu/drm/i915/i915_dma.c | 2 +- include/acpi/video.h | 11 +------ include/linux/acpi.h | 1 - 6 files changed, 11 insertions(+), 87 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 227aca77ee1e..5da44e81dd4d 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -169,10 +169,8 @@ int acpi_create_platform_device(struct acpi_device *adev, -------------------------------------------------------------------------- */ #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) bool acpi_video_backlight_quirks(void); -bool acpi_video_verify_backlight_support(void); #else static inline bool acpi_video_backlight_quirks(void) { return false; } -static inline bool acpi_video_verify_backlight_support(void) { return false; } #endif #endif /* _ACPI_INTERNAL_H_ */ diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 6dd237e79b4f..0ec434d2586d 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -911,7 +911,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) if (acpi_video_init_brightness(device)) return; - if (acpi_video_verify_backlight_support()) { + if (acpi_video_backlight_support()) { struct backlight_properties props; struct pci_dev *pdev; acpi_handle acpi_parent; @@ -1366,8 +1366,8 @@ acpi_video_switch_brightness(struct acpi_video_device *device, int event) unsigned long long level_current, level_next; int result = -EINVAL; - /* no warning message if acpi_backlight=vendor or a quirk is used */ - if (!acpi_video_verify_backlight_support()) + /* no warning message if acpi_backlight=vendor is used */ + if (!acpi_video_backlight_support()) return 0; if (!device->brightness) @@ -1875,46 +1875,6 @@ static int acpi_video_bus_remove(struct acpi_device *device) return 0; } -static acpi_status video_unregister_backlight(acpi_handle handle, u32 lvl, - void *context, void **rv) -{ - struct acpi_device *acpi_dev; - struct acpi_video_bus *video; - struct acpi_video_device *dev, *next; - - if (acpi_bus_get_device(handle, &acpi_dev)) - return AE_OK; - - if (acpi_match_device_ids(acpi_dev, video_device_ids)) - return AE_OK; - - video = acpi_driver_data(acpi_dev); - if (!video) - return AE_OK; - - acpi_video_bus_stop_devices(video); - mutex_lock(&video->device_list_lock); - list_for_each_entry_safe(dev, next, &video->video_device_list, entry) { - if (dev->backlight) { - backlight_device_unregister(dev->backlight); - dev->backlight = NULL; - kfree(dev->brightness->levels); - kfree(dev->brightness); - } - if (dev->cooling_dev) { - sysfs_remove_link(&dev->dev->dev.kobj, - "thermal_cooling"); - sysfs_remove_link(&dev->cooling_dev->device.kobj, - "device"); - thermal_cooling_device_unregister(dev->cooling_dev); - dev->cooling_dev = NULL; - } - } - mutex_unlock(&video->device_list_lock); - acpi_video_bus_start_devices(video); - return AE_OK; -} - static int __init is_i740(struct pci_dev *dev) { if (dev->device == 0x00D1) @@ -1946,25 +1906,14 @@ static int __init intel_opregion_present(void) return opregion; } -int __acpi_video_register(bool backlight_quirks) +int acpi_video_register(void) { - bool no_backlight; - int result; - - no_backlight = backlight_quirks ? acpi_video_backlight_quirks() : false; - + int result = 0; if (register_count) { /* - * If acpi_video_register() has been called already, don't try - * to register acpi_video_bus, but unregister backlight devices - * if no backlight support is requested. + * if the function of acpi_video_register is already called, + * don't register the acpi_vide_bus again and return no error. */ - if (no_backlight) - acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, - video_unregister_backlight, - NULL, NULL, NULL); - return 0; } @@ -1980,7 +1929,7 @@ int __acpi_video_register(bool backlight_quirks) return 0; } -EXPORT_SYMBOL(__acpi_video_register); +EXPORT_SYMBOL(acpi_video_register); void acpi_video_unregister(void) { diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 826e52def080..c3397748ba46 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -238,12 +238,7 @@ static void acpi_video_caps_check(void) bool acpi_video_backlight_quirks(void) { - if (acpi_gbl_osi_data >= ACPI_OSI_WIN_8) { - acpi_video_caps_check(); - acpi_video_support |= ACPI_VIDEO_SKIP_BACKLIGHT; - return true; - } - return false; + return acpi_gbl_osi_data >= ACPI_OSI_WIN_8; } EXPORT_SYMBOL(acpi_video_backlight_quirks); @@ -291,14 +286,6 @@ int acpi_video_backlight_support(void) } EXPORT_SYMBOL(acpi_video_backlight_support); -/* For the ACPI video driver use only. */ -bool acpi_video_verify_backlight_support(void) -{ - return (acpi_video_support & ACPI_VIDEO_SKIP_BACKLIGHT) ? - false : acpi_video_backlight_support(); -} -EXPORT_SYMBOL(acpi_video_verify_backlight_support); - /* * Use acpi_backlight=vendor/video to force that backlight switching * is processed by vendor specific acpi drivers or video.ko driver. diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index cf188ab7051a..adb319b53ecd 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1648,7 +1648,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (INTEL_INFO(dev)->num_pipes) { /* Must be done after probing outputs */ intel_opregion_init(dev); - acpi_video_register_with_quirks(); + acpi_video_register(); } if (IS_GEN5(dev)) diff --git a/include/acpi/video.h b/include/acpi/video.h index b26dc4fb7ba8..61109f2609fc 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -17,21 +17,12 @@ struct acpi_device; #define ACPI_VIDEO_DISPLAY_LEGACY_TV 0x0200 #if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE) -extern int __acpi_video_register(bool backlight_quirks); -static inline int acpi_video_register(void) -{ - return __acpi_video_register(false); -} -static inline int acpi_video_register_with_quirks(void) -{ - return __acpi_video_register(true); -} +extern int acpi_video_register(void); extern void acpi_video_unregister(void); extern int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, void **edid); #else static inline int acpi_video_register(void) { return 0; } -static inline int acpi_video_register_with_quirks(void) { return 0; } static inline void acpi_video_unregister(void) { return; } static inline int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, void **edid) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6ad72f92469c..353ba256f368 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -191,7 +191,6 @@ extern bool wmi_has_guid(const char *guid); #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO 0x0200 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR 0x0400 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO 0x0800 -#define ACPI_VIDEO_SKIP_BACKLIGHT 0x1000 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) -- cgit v1.2.3-58-ga151