From d208383d640727b70cd6689bc17e67e9b5ebf4ff Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 18 Nov 2013 02:31:36 +0000 Subject: [PATCH] Squashfs: add multi-threaded decompression using percpu variable Add a multi-threaded decompression implementation which uses percpu variables. Using percpu variables has advantages and disadvantages over implementations which do not use percpu variables. Advantages: * the nature of percpu variables ensures decompression is load-balanced across the multiple cores. * simplicity. Disadvantages: it limits decompression to one thread per core. Signed-off-by: Phillip Lougher --- fs/squashfs/Kconfig | 57 ++++++++++---- fs/squashfs/Makefile | 10 +-- fs/squashfs/decompressor_multi_percpu.c | 98 +++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 20 deletions(-) create mode 100644 fs/squashfs/decompressor_multi_percpu.c diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 1c6d340fc61..159bd6676dc 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -25,6 +25,50 @@ config SQUASHFS If unsure, say N. +choice + prompt "Decompressor parallelisation options" + depends on SQUASHFS + help + Squashfs now supports three parallelisation options for + decompression. Each one exhibits various trade-offs between + decompression performance and CPU and memory usage. + + If in doubt, select "Single threaded compression" + +config SQUASHFS_DECOMP_SINGLE + bool "Single threaded compression" + help + Traditionally Squashfs has used single-threaded decompression. + Only one block (data or metadata) can be decompressed at any + one time. This limits CPU and memory usage to a minimum. + +config SQUASHFS_DECOMP_MULTI + bool "Use multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + If you have a parallel I/O workload and your system has enough memory, + using this option may improve overall I/O performance. + + This decompressor implementation uses up to two parallel + decompressors per core. It dynamically allocates decompressors + on a demand basis. + +config SQUASHFS_DECOMP_MULTI_PERCPU + bool "Use percpu multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + This decompressor implementation uses a maximum of one + decompressor per core. It uses percpu variables to ensure + decompression is load-balanced across the cores. + +endchoice + config SQUASHFS_XATTR bool "Squashfs XATTR support" depends on SQUASHFS @@ -63,19 +107,6 @@ config SQUASHFS_LZO If unsure, say N. -config SQUASHFS_MULTI_DECOMPRESSOR - bool "Use multiple decompressors for handling parallel I/O" - depends on SQUASHFS - help - By default Squashfs uses a single decompressor but it gives - poor performance on parallel I/O workloads when using multiple CPU - machines due to waiting on decompressor availability. - - If you have a parallel I/O workload and your system has enough memory, - using this option may improve overall I/O performance. - - If unsure, say N. - config SQUASHFS_XZ bool "Include support for XZ compressed file systems" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index dfebc3b12d6..5833b96ee69 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,14 +5,10 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o - +squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o - -ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR - squashfs-y += decompressor_multi.o -else - squashfs-y += decompressor_single.o -endif diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c new file mode 100644 index 00000000000..0e7b679bc4a --- /dev/null +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression using percpu + * variables, one thread per cpu core. + */ + +struct squashfs_stream { + void *stream; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct squashfs_stream __percpu *percpu; + int err, cpu; + + percpu = alloc_percpu(struct squashfs_stream); + if (percpu == NULL) + return ERR_PTR(-ENOMEM); + + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + } + + kfree(comp_opts); + return (__force void *) percpu; + +out: + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + if (!IS_ERR_OR_NULL(stream->stream)) + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream; + int cpu; + + if (msblk->stream) { + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream = get_cpu_ptr(percpu); + int res = msblk->decompressor->decompress(msblk, stream->stream, buffer, + bh, b, offset, length, srclength, pages); + put_cpu_ptr(stream); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return num_possible_cpus(); +} -- 2.41.0