/* * Copyright (C) 2008 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License v2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. * * Based on jffs2 zlib code: * Copyright © 2001-2007 Red Hat, Inc. * Created by David Woodhouse <dwmw2@infradead.org> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/zlib.h> #include <linux/zutil.h> #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/err.h> #include <linux/sched.h> #include <linux/pagemap.h> #include <linux/bio.h> #include "compression.h" /* Plan: call deflate() with avail_in == *sourcelen, avail_out = *dstlen - 12 and flush == Z_FINISH. If it doesn't manage to finish, call it again with avail_in == 0 and avail_out set to the remaining 12 bytes for it to clean up. Q: Is 12 bytes sufficient? */ #define STREAM_END_SPACE 12 struct workspace { z_stream inf_strm; z_stream def_strm; char *buf; struct list_head list; }; static LIST_HEAD(idle_workspace); static DEFINE_SPINLOCK(workspace_lock); static unsigned long num_workspace; static atomic_t alloc_workspace = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); /* * this finds an available zlib workspace or allocates a new one * NULL or an ERR_PTR is returned if things go bad. */ static struct workspace *find_zlib_workspace(void) { struct workspace *workspace; int ret; int cpus = num_online_cpus(); again: spin_lock(&workspace_lock); if (!list_empty(&idle_workspace)) { workspace = list_entry(idle_workspace.next, struct workspace, list); list_del(&workspace->list); num_workspace--; spin_unlock(&workspace_lock); return workspace; } spin_unlock(&workspace_lock); if (atomic_read(&alloc_workspace) > cpus) { DEFINE_WAIT(wait); prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); if (atomic_read(&alloc_workspace) > cpus) schedule(); finish_wait(&workspace_wait, &wait); goto again; } atomic_inc(&alloc_workspace); workspace = kzalloc(sizeof(*workspace), GFP_NOFS); if (!workspace) { ret = -ENOMEM; goto fail; } workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); if (!workspace->def_strm.workspace) { ret = -ENOMEM; goto fail; } workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); if (!workspace->inf_strm.workspace) { ret = -ENOMEM; goto fail_inflate; } workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); if (!workspace->buf) { ret = -ENOMEM; goto fail_kmalloc; } return workspace; fail_kmalloc: vfree(workspace->inf_strm.workspace); fail_inflate: vfree(workspace->def_strm.workspace); fail: kfree(workspace); atomic_dec(&alloc_workspace); wake_up(&workspace_wait); return ERR_PTR(ret); } /* * put a workspace struct back on the list or free it if we have enough * idle ones sitting around */ static int free_workspace(struct workspace *workspace) { spin_lock(&workspace_lock); if (num_workspace < num_online_cpus()) { list_add_tail(&workspace->list, &idle_workspace); num_workspace++; spin_unlock(&workspace_lock); if (waitqueue_active(&workspace_wait)) wake_up(&workspace_wait); return 0; } spin_unlock(&workspace_lock); vfree(workspace->def_strm.workspace); vfree(workspace->inf_strm.workspace); kfree(workspace->buf); kfree(workspace); atomic_dec(&alloc_workspace); if (waitqueue_active(&workspace_wait)) wake_up(&workspace_wait); return 0; } /* * cleanup function for module exit */ static void free_workspaces(void) { struct workspace *workspace; while (!list_empty(&idle_workspace)) { workspace = list_entry(idle_workspace.next, struct workspace, list); list_del(&workspace->list); vfree(workspace->def_strm.workspace); vfree(workspace->inf_strm.workspace); kfree(workspace->buf); kfree(workspace); atomic_dec(&alloc_workspace); } } /* * given an address space and start/len, compress the bytes. * * pages are allocated to hold the compressed result and stored * in 'pages' * * out_pages is used to return the number of pages allocated. There * may be pages allocated even if we return an error * * total_in is used to return the number of bytes actually read. It * may be smaller then len if we had to exit early because we * ran out of room in the pages array or because we cross the * max_out threshold. * * total_out is used to return the total number of compressed bytes * * max_out tells us the max number of bytes that we're allowed to * stuff into pages */ int btrfs_zlib_compress_pages(struct address_space *mapping, u64 start, unsigned long len, struct page **pages, unsigned long nr_dest_pages, unsigned long *out_pages, unsigned long *total_in, unsigned long *total_out, unsigned long max_out) { int ret; struct workspace *workspace; char *data_in; char *cpage_out; int nr_pages = 0; struct page *in_page = NULL; struct page *out_page = NULL; int out_written = 0; int in_read = 0; unsigned long bytes_left; *out_pages = 0; *total_out = 0; *total_in = 0; workspace = find_zlib_workspace(); if (IS_ERR(workspace)) return -1; if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { printk(KERN_WARNING "deflateInit failed\n"); ret = -1; goto out; } workspace->def_strm.total_in = 0; workspace->def_strm.total_out = 0; in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); data_in = kmap(in_page); out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); cpage_out = kmap(out_page); pages[0] = out_page; nr_pages = 1; workspace->def_strm.next_in = data_in; workspace->def_strm.next_out = cpage_out; workspace->def_strm.avail_out = PAGE_CACHE_SIZE; workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); out_written = 0; in_read = 0; while (workspace->def_strm.total_in < len) { ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); if (ret != Z_OK) { printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", ret); zlib_deflateEnd(&workspace->def_strm); ret = -1; goto out; } /* we're making it bigger, give up */ if (workspace->def_strm.total_in > 8192 && workspace->def_strm.total_in < workspace->def_strm.total_out) { ret = -1; goto out; } /* we need another page for writing out. Test this * before the total_in so we will pull in a new page for * the stream end if required */ if (workspace->def_strm.avail_out == 0) { kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -1; goto out; } out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); cpage_out = kmap(out_page); pages[nr_pages] = out_page; nr_pages++; workspace->def_strm.avail_out = PAGE_CACHE_SIZE; workspace->def_strm.next_out = cpage_out; } /* we're all done */ if (workspace->def_strm.total_in >= len) break; /* we've read in a full page, get a new one */ if (workspace->def_strm.avail_in == 0) { if (workspace->def_strm.total_out > max_out) break; bytes_left = len - workspace->def_strm.total_in; kunmap(in_page); page_cache_release(in_page); start += PAGE_CACHE_SIZE; in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); data_in = kmap(in_page); workspace->def_strm.avail_in = min(bytes_left, PAGE_CACHE_SIZE); workspace->def_strm.next_in = data_in; } } workspace->def_strm.avail_in = 0; ret = zlib_deflate(&workspace->def_strm, Z_FINISH); zlib_deflateEnd(&workspace->def_strm); if (ret != Z_STREAM_END) { ret = -1; goto out; } if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { ret = -1; goto out; } ret = 0; *total_out = workspace->def_strm.total_out; *total_in = workspace->def_strm.total_in; out: *out_pages = nr_pages; if (out_page) kunmap(out_page); if (in_page) { kunmap(in_page); page_cache_release(in_page); } free_workspace(workspace); return ret; } /* * pages_in is an array of pages with compressed data. * * disk_start is the starting logical offset of this array in the file * * bvec is a bio_vec of pages from the file that we want to decompress into * * vcnt is the count of pages in the biovec * * srclen is the number of bytes in pages_in * * The basic idea is that we have a bio that was created by readpages. * The pages in the bio are for the uncompressed data, and they may not * be contiguous. They all correspond to the range of bytes covered by * the compressed extent. */ int btrfs_zlib_decompress_biovec(struct page **pages_in, u64 disk_start, struct bio_vec *bvec, int vcnt, size_t srclen) { int ret = 0; int wbits = MAX_WBITS; struct workspace *workspace; char *data_in; size_t total_out = 0; unsigned long page_bytes_left; unsigned long page_in_index = 0; unsigned long page_out_index = 0; struct page *page_out; unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE; unsigned long buf_start; unsigned long buf_offset; unsigned long bytes; unsigned long working_bytes; unsigned long pg_offset; unsigned long start_byte; unsigned long current_buf_start; char *kaddr; workspace = find_zlib_workspace(); if (IS_ERR(workspace)) return -ENOMEM; data_in = kmap(pages_in[page_in_index]); workspace->inf_strm.next_in = data_in; workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE); workspace->inf_strm.total_in = 0; workspace->inf_strm.total_out = 0; workspace->inf_strm.next_out = workspace->buf; workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; page_out = bvec[page_out_index].bv_page; page_bytes_left = PAGE_CACHE_SIZE; pg_offset = 0; /* If it's deflate, and it's got no preset dictionary, then we can tell zlib to skip the adler32 check. */ if (srclen > 2 && !(data_in[1] & PRESET_DICT) && ((data_in[0] & 0x0f) == Z_DEFLATED) && !(((data_in[0]<<8) + data_in[1]) % 31)) { wbits = -((data_in[0] >> 4) + 8); workspace->inf_strm.next_in += 2; workspace->inf_strm.avail_in -= 2; } if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { printk(KERN_WARNING "inflateInit failed\n"); ret = -1; goto out; } while (workspace->inf_strm.total_in < srclen) { ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); if (ret != Z_OK && ret != Z_STREAM_END) break; /* * buf start is the byte offset we're of the start of * our workspace buffer */ buf_start = total_out; /* total_out is the last byte of the workspace buffer */ total_out = workspace->inf_strm.total_out; working_bytes = total_out - buf_start; /* * start byte is the first byte of the page we're currently * copying into relative to the start of the compressed data. */ start_byte = page_offset(page_out) - disk_start; if (working_bytes == 0) { /* we didn't make progress in this inflate * call, we're done */ if (ret != Z_STREAM_END) ret = -1; break; } /* we haven't yet hit data corresponding to this page */ if (total_out <= start_byte) goto next; /* * the start of the data we care about is offset into * the middle of our working buffer */ if (total_out > start_byte && buf_start < start_byte) { buf_offset = start_byte - buf_start; working_bytes -= buf_offset; } else { buf_offset = 0; } current_buf_start = buf_start; /* copy bytes from the working buffer into the pages */ while (working_bytes > 0) { bytes = min(PAGE_CACHE_SIZE - pg_offset, PAGE_CACHE_SIZE - buf_offset); bytes = min(bytes, working_bytes); kaddr = kmap_atomic(page_out, KM_USER0); memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); kunmap_atomic(kaddr, KM_USER0); flush_dcache_page(page_out); pg_offset += bytes; page_bytes_left -= bytes; buf_offset += bytes; working_bytes -= bytes; current_buf_start += bytes; /* check if we need to pick another page */ if (page_bytes_left == 0) { page_out_index++; if (page_out_index >= vcnt) { ret = 0; goto done; } page_out = bvec[page_out_index].bv_page; pg_offset = 0; page_bytes_left = PAGE_CACHE_SIZE; start_byte = page_offset(page_out) - disk_start; /* * make sure our new page is covered by this * working buffer */ if (total_out <= start_byte) goto next; /* the next page in the biovec might not * be adjacent to the last page, but it * might still be found inside this working * buffer. bump our offset pointer */ if (total_out > start_byte && current_buf_start < start_byte) { buf_offset = start_byte - buf_start; working_bytes = total_out - start_byte; current_buf_start = buf_start + buf_offset; } } } next: workspace->inf_strm.next_out = workspace->buf; workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; if (workspace->inf_strm.avail_in == 0) { unsigned long tmp; kunmap(pages_in[page_in_index]); page_in_index++; if (page_in_index >= total_pages_in) { data_in = NULL; break; } data_in = kmap(pages_in[page_in_index]); workspace->inf_strm.next_in = data_in; tmp = srclen - workspace->inf_strm.total_in; workspace->inf_strm.avail_in = min(tmp, PAGE_CACHE_SIZE); } } if (ret != Z_STREAM_END) ret = -1; else ret = 0; done: zlib_inflateEnd(&workspace->inf_strm); if (data_in) kunmap(pages_in[page_in_index]); out: free_workspace(workspace); return ret; } /* * a less complex decompression routine. Our compressed data fits in a * single page, and we want to read a single page out of it. * start_byte tells us the offset into the compressed data we're interested in */ int btrfs_zlib_decompress(unsigned char *data_in, struct page *dest_page, unsigned long start_byte, size_t srclen, size_t destlen) { int ret = 0; int wbits = MAX_WBITS; struct workspace *workspace; unsigned long bytes_left = destlen; unsigned long total_out = 0; char *kaddr; if (destlen > PAGE_CACHE_SIZE) return -ENOMEM; workspace = find_zlib_workspace(); if (IS_ERR(workspace)) return -ENOMEM; workspace->inf_strm.next_in = data_in; workspace->inf_strm.avail_in = srclen; workspace->inf_strm.total_in = 0; workspace->inf_strm.next_out = workspace->buf; workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; workspace->inf_strm.total_out = 0; /* If it's deflate, and it's got no preset dictionary, then we can tell zlib to skip the adler32 check. */ if (srclen > 2 && !(data_in[1] & PRESET_DICT) && ((data_in[0] & 0x0f) == Z_DEFLATED) && !(((data_in[0]<<8) + data_in[1]) % 31)) { wbits = -((data_in[0] >> 4) + 8); workspace->inf_strm.next_in += 2; workspace->inf_strm.avail_in -= 2; } if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { printk(KERN_WARNING "inflateInit failed\n"); ret = -1; goto out; } while (bytes_left > 0) { unsigned long buf_start; unsigned long buf_offset; unsigned long bytes; unsigned long pg_offset = 0; ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); if (ret != Z_OK && ret != Z_STREAM_END) break; buf_start = total_out; total_out = workspace->inf_strm.total_out; if (total_out == buf_start) { ret = -1; break; } if (total_out <= start_byte) goto next; if (total_out > start_byte && buf_start < start_byte) buf_offset = start_byte - buf_start; else buf_offset = 0; bytes = min(PAGE_CACHE_SIZE - pg_offset, PAGE_CACHE_SIZE - buf_offset); bytes = min(bytes, bytes_left); kaddr = kmap_atomic(dest_page, KM_USER0); memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); kunmap_atomic(kaddr, KM_USER0); pg_offset += bytes; bytes_left -= bytes; next: workspace->inf_strm.next_out = workspace->buf; workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; } if (ret != Z_STREAM_END && bytes_left != 0) ret = -1; else ret = 0; zlib_inflateEnd(&workspace->inf_strm); out: free_workspace(workspace); return ret; } void btrfs_zlib_exit(void) { free_workspaces(); }