diff --git a/drivers/staging/erofs/Kconfig b/drivers/staging/erofs/Kconfig index 663b755bf2fb..74b0aaa7114c 100644 --- a/drivers/staging/erofs/Kconfig +++ b/drivers/staging/erofs/Kconfig @@ -1,19 +1,20 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0-only config EROFS_FS tristate "EROFS filesystem support" depends on BLOCK + select LIBCRC32C help - EROFS(Enhanced Read-Only File System) is a lightweight + EROFS (Enhanced Read-Only File System) is a lightweight read-only file system with modern designs (eg. page-sized blocks, inline xattrs/data, etc.) for scenarios which need - high-performance read-only requirements, eg. firmwares in - mobile phone or LIVECDs. + high-performance read-only requirements, e.g. Android OS + for mobile phones and LIVECDs. - It also provides VLE compression support, focusing on - random read improvements, keeping relatively lower - compression ratios, which is useful for high-performance - devices with limited memory and ROM space. + It also provides fixed-sized output compression support, + which improves storage density, keeps relatively higher + compression ratios, which is more useful to achieve high + performance for embedded devices with limited memory. If unsure, say N. @@ -21,8 +22,9 @@ config EROFS_FS_DEBUG bool "EROFS debugging feature" depends on EROFS_FS help - Print EROFS debugging messages and enable more BUG_ONs - which check the filesystem consistency aggressively. + Print debugging messages and enable more BUG_ONs which check + filesystem consistency and find potential issues aggressively, + which can be used for Android eng build, for example. For daily use, say N. @@ -54,6 +56,7 @@ config EROFS_FS_POSIX_ACL config EROFS_FS_SECURITY bool "EROFS Security Labels" depends on EROFS_FS_XATTR + default y help Security labels provide an access control facility to support Linux Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO @@ -63,29 +66,15 @@ config EROFS_FS_SECURITY If you are not using a security module, say N. -config EROFS_FS_USE_VM_MAP_RAM - bool "EROFS VM_MAP_RAM Support" - depends on EROFS_FS - help - use vm_map_ram/vm_unmap_ram instead of vmap/vunmap. - - If you don't know what these are, say N. - -config EROFS_FAULT_INJECTION - bool "EROFS fault injection facility" - depends on EROFS_FS - help - Test EROFS to inject faults such as ENOMEM, EIO, and so on. - If unsure, say N. - config EROFS_FS_ZIP - bool "EROFS Data Compresssion Support" + bool "EROFS Data Compression Support" depends on EROFS_FS + select LZ4_DECOMPRESS + default y help - Currently we support VLE Compression only. - Play at your own risk. + Enable fixed-sized output compression for EROFS. - If you don't want to use compression feature, say N. + If you don't want to enable compression feature, say N. config EROFS_FS_CLUSTER_PAGE_LIMIT int "EROFS Cluster Pages Hard Limit" @@ -93,49 +82,11 @@ config EROFS_FS_CLUSTER_PAGE_LIMIT range 1 256 default "1" help - Indicates VLE compressed pages hard limit of a - compressed cluster. - - For example, if files of a image are compressed - into 8k-unit, the hard limit should not be less - than 2. Otherwise, the image cannot be mounted - correctly on this kernel. - -choice - prompt "EROFS VLE Data Decompression mode" - depends on EROFS_FS_ZIP - default EROFS_FS_ZIP_CACHE_BIPOLAR - help - EROFS supports three options for VLE decompression. - "In-place Decompression Only" consumes the minimum memory - with lowest random read. - - "Bipolar Cached Decompression" consumes the maximum memory - with highest random read. - - If unsure, select "Bipolar Cached Decompression" - -config EROFS_FS_ZIP_NO_CACHE - bool "In-place Decompression Only" - help - Read compressed data into page cache and do in-place - decompression directly. - -config EROFS_FS_ZIP_CACHE_UNIPOLAR - bool "Unipolar Cached Decompression" - help - For each request, it caches the last compressed page - for further reading. - It still decompresses in place for the rest compressed pages. - -config EROFS_FS_ZIP_CACHE_BIPOLAR - bool "Bipolar Cached Decompression" - help - For each request, it caches the both end compressed pages - for further reading. - It still decompresses in place for the rest compressed pages. - - Recommended for performance priority. + Indicates maximum # of pages of a compressed + physical cluster. -endchoice + For example, if files in a image were compressed + into 8k-unit, hard limit should not be configured + less than 2. Otherwise, the image will be refused + to mount on this kernel. diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile index 9a766eb7ed75..46f2aa4ba46c 100644 --- a/drivers/staging/erofs/Makefile +++ b/drivers/staging/erofs/Makefile @@ -1,13 +1,11 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0-only -EROFS_VERSION = "1.0pre1" +EROFS_VERSION = "1.0" -ccflags-y += -Wall -DEROFS_VERSION=\"$(EROFS_VERSION)\" +ccflags-y += -DEROFS_VERSION=\"$(EROFS_VERSION)\" obj-$(CONFIG_EROFS_FS) += erofs.o -# staging requirement: to be self-contained in its own directory -ccflags-y += -I$(src)/include erofs-objs := super.o inode.o data.o namei.o dir.o utils.o erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o -erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_lz4.o unzip_vle_lz4.o +erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o diff --git a/drivers/staging/erofs/compress.h b/drivers/staging/erofs/compress.h new file mode 100644 index 000000000000..e9b1aa0afe0e --- /dev/null +++ b/drivers/staging/erofs/compress.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2019 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#ifndef __EROFS_FS_COMPRESS_H +#define __EROFS_FS_COMPRESS_H + +#include "internal.h" + +enum { + Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, + Z_EROFS_COMPRESSION_RUNTIME_MAX +}; + +struct z_erofs_decompress_req { + struct super_block *sb; + struct page **in, **out; + + unsigned short pageofs_out; + unsigned int inputsize, outputsize; + + /* indicate the algorithm will be used for decompression */ + unsigned int alg; + bool inplace_io, partial_decoding; +}; + +/* + * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) - + * used to mark temporary allocated pages from other + * file/cached pages and NULL mapping pages. + */ +#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D) + +/* check if a page is marked as staging */ +static inline bool z_erofs_page_is_staging(struct page *page) +{ + return page->mapping == Z_EROFS_MAPPING_STAGING; +} + +static inline bool z_erofs_put_stagingpage(struct list_head *pagepool, + struct page *page) +{ + if (!z_erofs_page_is_staging(page)) + return false; + + /* staging pages should not be used by others at the same time */ + if (page_ref_count(page) > 1) + put_page(page); + else + list_add(&page->lru, pagepool); + return true; +} + +int z_erofs_decompress(struct z_erofs_decompress_req *rq, + struct list_head *pagepool); + +#endif diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c index 894e60ecebe2..4f305b68497e 100644 --- a/drivers/staging/erofs/data.c +++ b/drivers/staging/erofs/data.c @@ -1,21 +1,14 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only /* - * linux/drivers/staging/erofs/data.c - * * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. + * https://www.huawei.com/ */ #include "internal.h" #include #include -static inline void read_endio(struct bio *bio) +static void erofs_readendio(struct bio *bio) { int i; struct bio_vec *bvec; @@ -27,7 +20,7 @@ static inline void read_endio(struct bio *bio) /* page is already locked */ DBG_BUGON(PageUptodate(page)); - if (unlikely(err)) + if (err) SetPageError(page); else SetPageUptodate(page); @@ -38,70 +31,35 @@ static inline void read_endio(struct bio *bio) bio_put(bio); } -/* prio -- true is used for dir */ -struct page *erofs_get_meta_page(struct super_block *sb, - erofs_blk_t blkaddr, bool prio) +struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr) { - struct inode *bd_inode = sb->s_bdev->bd_inode; - struct address_space *mapping = bd_inode->i_mapping; + struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; struct page *page; -repeat: - page = find_or_create_page(mapping, blkaddr, - /* - * Prefer looping in the allocator rather than here, - * at least that code knows what it's doing. - */ - mapping_gfp_constraint(mapping, ~__GFP_FS) | __GFP_NOFAIL); - - BUG_ON(!page || !PageLocked(page)); - - if (!PageUptodate(page)) { - struct bio *bio; - int err; - - bio = prepare_bio(sb, blkaddr, 1, read_endio); - err = bio_add_page(bio, page, PAGE_SIZE, 0); - BUG_ON(err != PAGE_SIZE); - - __submit_bio(bio, REQ_OP_READ, - REQ_META | (prio ? REQ_PRIO : 0)); - + page = read_cache_page_gfp(mapping, blkaddr, + mapping_gfp_constraint(mapping, ~__GFP_FS)); + /* should already be PageUptodate */ + if (!IS_ERR(page)) lock_page(page); - - /* the page has been truncated by others? */ - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - put_page(page); - goto repeat; - } - - /* more likely a read error */ - if (unlikely(!PageUptodate(page))) { - unlock_page(page); - put_page(page); - - page = ERR_PTR(-EIO); - } - } return page; } static int erofs_map_blocks_flatmode(struct inode *inode, - struct erofs_map_blocks *map, - int flags) + struct erofs_map_blocks *map, + int flags) { int err = 0; erofs_blk_t nblocks, lastblk; u64 offset = map->m_la; - struct erofs_vnode *vi = EROFS_V(inode); + struct erofs_inode *vi = EROFS_I(inode); + bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); trace_erofs_map_blocks_flatmode_enter(inode, map, flags); nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE); - lastblk = nblocks - is_inode_layout_inline(inode); + lastblk = nblocks - tailendpacking; - if (unlikely(offset >= inode->i_size)) { + if (offset >= inode->i_size) { /* leave out-of-bound access unmapped */ map->m_flags = 0; map->m_plen = 0; @@ -114,7 +72,7 @@ static int erofs_map_blocks_flatmode(struct inode *inode, if (offset < blknr_to_addr(lastblk)) { map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la; map->m_plen = blknr_to_addr(lastblk) - offset; - } else if (is_inode_layout_inline(inode)) { + } else if (tailendpacking) { /* 2 - inode inline B: inode, [xattrs], inline last blk... */ struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); @@ -122,17 +80,21 @@ static int erofs_map_blocks_flatmode(struct inode *inode, vi->xattr_isize + erofs_blkoff(map->m_la); map->m_plen = inode->i_size - offset; - /* inline data should locate in one meta block */ + /* inline data should be located in one meta block */ if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) { + erofs_err(inode->i_sb, + "inline data cross block boundary @ nid %llu", + vi->nid); DBG_BUGON(1); - err = -EIO; + err = -EFSCORRUPTED; goto err_out; } map->m_flags |= EROFS_MAP_META; } else { - errln("internal error @ nid: %llu (size %llu), m_la 0x%llx", - vi->nid, inode->i_size, map->m_la); + erofs_err(inode->i_sb, + "internal error @ nid: %llu (size %llu), m_la 0x%llx", + vi->nid, inode->i_size, map->m_la); DBG_BUGON(1); err = -EIO; goto err_out; @@ -146,56 +108,30 @@ err_out: return err; } -#ifdef CONFIG_EROFS_FS_ZIP -extern int z_erofs_map_blocks_iter(struct inode *, - struct erofs_map_blocks *, struct page **, int); -#endif - -int erofs_map_blocks_iter(struct inode *inode, - struct erofs_map_blocks *map, - struct page **mpage_ret, int flags) -{ - /* by default, reading raw data never use erofs_map_blocks_iter */ - if (unlikely(!is_inode_layout_compression(inode))) { - if (*mpage_ret != NULL) - put_page(*mpage_ret); - *mpage_ret = NULL; - - return erofs_map_blocks(inode, map, flags); - } - -#ifdef CONFIG_EROFS_FS_ZIP - return z_erofs_map_blocks_iter(inode, map, mpage_ret, flags); -#else - /* data compression is not available */ - return -ENOTSUPP; -#endif -} - int erofs_map_blocks(struct inode *inode, - struct erofs_map_blocks *map, int flags) + struct erofs_map_blocks *map, int flags) { - if (unlikely(is_inode_layout_compression(inode))) { - struct page *mpage = NULL; - int err; + if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) { + int err = z_erofs_map_blocks_iter(inode, map, flags); - err = erofs_map_blocks_iter(inode, map, &mpage, flags); - if (mpage != NULL) - put_page(mpage); + if (map->mpage) { + put_page(map->mpage); + map->mpage = NULL; + } return err; } return erofs_map_blocks_flatmode(inode, map, flags); } -static inline struct bio *erofs_read_raw_page( - struct bio *bio, - struct address_space *mapping, - struct page *page, - erofs_off_t *last_block, - unsigned nblocks, - bool ra) +static inline struct bio *erofs_read_raw_page(struct bio *bio, + struct address_space *mapping, + struct page *page, + erofs_off_t *last_block, + unsigned int nblocks, + bool ra) { - struct inode *inode = mapping->host; + struct inode *const inode = mapping->host; + struct super_block *const sb = inode->i_sb; erofs_off_t current_block = (erofs_off_t)page->index; int err; @@ -206,34 +142,28 @@ static inline struct bio *erofs_read_raw_page( goto has_updated; } - if (cleancache_get_page(page) == 0) { - err = 0; - SetPageUptodate(page); - goto has_updated; - } - /* note that for readpage case, bio also equals to NULL */ - if (bio != NULL && - /* not continuous */ - *last_block + 1 != current_block) { + if (bio && + /* not continuous */ + *last_block + 1 != current_block) { submit_bio_retry: - __submit_bio(bio, REQ_OP_READ, 0); + submit_bio(bio); bio = NULL; } - if (bio == NULL) { + if (!bio) { struct erofs_map_blocks map = { .m_la = blknr_to_addr(current_block), }; erofs_blk_t blknr; - unsigned blkoff; + unsigned int blkoff; err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); - if (unlikely(err)) + if (err) goto err_out; /* zero out the holed page */ - if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) { + if (!(map.m_flags & EROFS_MAP_MAPPED)) { zero_user_segment(page, 0, PAGE_SIZE); SetPageUptodate(page); @@ -254,7 +184,7 @@ submit_bio_retry: DBG_BUGON(map.m_plen > PAGE_SIZE); - ipage = erofs_get_meta_page(inode->i_sb, blknr, 0); + ipage = erofs_get_meta_page(inode->i_sb, blknr); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); @@ -287,7 +217,13 @@ submit_bio_retry: if (nblocks > BIO_MAX_PAGES) nblocks = BIO_MAX_PAGES; - bio = prepare_bio(inode->i_sb, blknr, nblocks, read_endio); + bio = bio_alloc(GFP_NOIO, nblocks); + + bio->bi_end_io = erofs_readendio; + bio_set_dev(bio, sb->s_bdev); + bio->bi_iter.bi_sector = (sector_t)blknr << + LOG_SECTORS_PER_BLOCK; + bio->bi_opf = REQ_OP_READ | (ra ? REQ_RAHEAD : 0); } err = bio_add_page(bio, page, PAGE_SIZE, 0); @@ -298,7 +234,7 @@ submit_bio_retry: *last_block = current_block; /* shift in advance in case of it followed by too many gaps */ - if (unlikely(bio->bi_vcnt >= bio->bi_max_vecs)) { + if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) { /* err should reassign to 0 after submitting */ err = 0; goto submit_bio_out; @@ -316,11 +252,10 @@ has_updated: unlock_page(page); /* if updated manually, continuous pages has a gap */ - if (bio != NULL) + if (bio) submit_bio_out: - __submit_bio(bio, REQ_OP_READ, 0); - - return unlikely(err) ? ERR_PTR(err) : NULL; + submit_bio(bio); + return err ? ERR_PTR(err) : NULL; } /* @@ -335,7 +270,7 @@ static int erofs_raw_access_readpage(struct file *file, struct page *page) trace_erofs_readpage(page, true); bio = erofs_read_raw_page(NULL, page->mapping, - page, &last_block, 1, false); + page, &last_block, 1, false); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -345,8 +280,9 @@ static int erofs_raw_access_readpage(struct file *file, struct page *page) } static int erofs_raw_access_readpages(struct file *filp, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) + struct address_space *mapping, + struct list_head *pages, + unsigned int nr_pages) { erofs_off_t last_block; struct bio *bio = NULL; @@ -363,13 +299,13 @@ static int erofs_raw_access_readpages(struct file *filp, if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) { bio = erofs_read_raw_page(bio, mapping, page, - &last_block, nr_pages, true); + &last_block, nr_pages, true); /* all the page errors are ignored when readahead */ if (IS_ERR(bio)) { pr_err("%s, readahead error at page %lu of nid %llu\n", - __func__, page->index, - EROFS_V(mapping->host)->nid); + __func__, page->index, + EROFS_I(mapping->host)->nid); bio = NULL; } @@ -381,8 +317,28 @@ static int erofs_raw_access_readpages(struct file *filp, DBG_BUGON(!list_empty(pages)); /* the rare case (end in gaps) */ - if (unlikely(bio != NULL)) - __submit_bio(bio, REQ_OP_READ, 0); + if (bio) + submit_bio(bio); + return 0; +} + +static sector_t erofs_bmap(struct address_space *mapping, sector_t block) +{ + struct inode *inode = mapping->host; + struct erofs_map_blocks map = { + .m_la = blknr_to_addr(block), + }; + + if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) { + erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE; + + if (block >> LOG_SECTORS_PER_BLOCK >= blks) + return 0; + } + + if (!erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW)) + return erofs_blknr(map.m_pa); + return 0; } @@ -390,5 +346,5 @@ static int erofs_raw_access_readpages(struct file *filp, const struct address_space_operations erofs_raw_access_aops = { .readpage = erofs_raw_access_readpage, .readpages = erofs_raw_access_readpages, + .bmap = erofs_bmap, }; - diff --git a/drivers/staging/erofs/decompressor.c b/drivers/staging/erofs/decompressor.c new file mode 100644 index 000000000000..7822ab4a8262 --- /dev/null +++ b/drivers/staging/erofs/decompressor.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2019 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "compress.h" +#include +#include + +#ifndef LZ4_DISTANCE_MAX /* history window size */ +#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + +#define LZ4_MAX_DISTANCE_PAGES (DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1) +#ifndef LZ4_DECOMPRESS_INPLACE_MARGIN +#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32) +#endif + +struct z_erofs_decompressor { + /* + * if destpages have sparsed pages, fill them with bounce pages. + * it also check whether destpages indicate continuous physical memory. + */ + int (*prepare_destpages)(struct z_erofs_decompress_req *rq, + struct list_head *pagepool); + int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out); + char *name; +}; + +static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq, + struct list_head *pagepool) +{ + const unsigned int nr = + PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL }; + unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES, + BITS_PER_LONG)] = { 0 }; + void *kaddr = NULL; + unsigned int i, j, top; + + top = 0; + for (i = j = 0; i < nr; ++i, ++j) { + struct page *const page = rq->out[i]; + struct page *victim; + + if (j >= LZ4_MAX_DISTANCE_PAGES) + j = 0; + + /* 'valid' bounced can only be tested after a complete round */ + if (test_bit(j, bounced)) { + DBG_BUGON(i < LZ4_MAX_DISTANCE_PAGES); + DBG_BUGON(top >= LZ4_MAX_DISTANCE_PAGES); + availables[top++] = rq->out[i - LZ4_MAX_DISTANCE_PAGES]; + } + + if (page) { + __clear_bit(j, bounced); + if (kaddr) { + if (kaddr + PAGE_SIZE == page_address(page)) + kaddr += PAGE_SIZE; + else + kaddr = NULL; + } else if (!i) { + kaddr = page_address(page); + } + continue; + } + kaddr = NULL; + __set_bit(j, bounced); + + if (top) { + victim = availables[--top]; + get_page(victim); + } else { + victim = erofs_allocpage(pagepool, GFP_KERNEL); + if (!victim) + return -ENOMEM; + victim->mapping = Z_EROFS_MAPPING_STAGING; + } + rq->out[i] = victim; + } + return kaddr ? 1 : 0; +} + +static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq, + u8 *src, unsigned int pageofs_in) +{ + /* + * if in-place decompression is ongoing, those decompressed + * pages should be copied in order to avoid being overlapped. + */ + struct page **in = rq->in; + u8 *const tmp = erofs_get_pcpubuf(0); + u8 *tmpp = tmp; + unsigned int inlen = rq->inputsize - pageofs_in; + unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in); + + while (tmpp < tmp + inlen) { + if (!src) + src = kmap_atomic(*in); + memcpy(tmpp, src + pageofs_in, count); + kunmap_atomic(src); + src = NULL; + tmpp += count; + pageofs_in = 0; + count = PAGE_SIZE; + ++in; + } + return tmp; +} + +static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out) +{ + unsigned int inputmargin, inlen; + u8 *src; + bool copied, support_0padding; + int ret; + + if (rq->inputsize > PAGE_SIZE) + return -EOPNOTSUPP; + + src = kmap_atomic(*rq->in); + inputmargin = 0; + support_0padding = false; + + /* decompression inplace is only safe when 0padding is enabled */ + if (EROFS_SB(rq->sb)->feature_incompat & + EROFS_FEATURE_INCOMPAT_LZ4_0PADDING) { + support_0padding = true; + + while (!src[inputmargin & ~PAGE_MASK]) + if (!(++inputmargin & ~PAGE_MASK)) + break; + + if (inputmargin >= rq->inputsize) { + kunmap_atomic(src); + return -EIO; + } + } + + copied = false; + inlen = rq->inputsize - inputmargin; + if (rq->inplace_io) { + const uint oend = (rq->pageofs_out + + rq->outputsize) & ~PAGE_MASK; + const uint nr = PAGE_ALIGN(rq->pageofs_out + + rq->outputsize) >> PAGE_SHIFT; + + if (rq->partial_decoding || !support_0padding || + rq->out[nr - 1] != rq->in[0] || + rq->inputsize - oend < + LZ4_DECOMPRESS_INPLACE_MARGIN(inlen)) { + src = generic_copy_inplace_data(rq, src, inputmargin); + inputmargin = 0; + copied = true; + } + } + + /* legacy format could compress extra data in a pcluster. */ + if (rq->partial_decoding || !support_0padding) + ret = LZ4_decompress_safe_partial(src + inputmargin, out, + inlen, rq->outputsize, + rq->outputsize); + else + ret = LZ4_decompress_safe(src + inputmargin, out, + inlen, rq->outputsize); + + if (ret != rq->outputsize) { + erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]", + ret, inlen, inputmargin, rq->outputsize); + + WARN_ON(1); + print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET, + 16, 1, src + inputmargin, inlen, true); + print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET, + 16, 1, out, rq->outputsize, true); + + if (ret >= 0) + memset(out + ret, 0, rq->outputsize - ret); + ret = -EIO; + } + + if (copied) + erofs_put_pcpubuf(src); + else + kunmap_atomic(src); + return ret; +} + +static struct z_erofs_decompressor decompressors[] = { + [Z_EROFS_COMPRESSION_SHIFTED] = { + .name = "shifted" + }, + [Z_EROFS_COMPRESSION_LZ4] = { + .prepare_destpages = z_erofs_lz4_prepare_destpages, + .decompress = z_erofs_lz4_decompress, + .name = "lz4" + }, +}; + +static void copy_from_pcpubuf(struct page **out, const char *dst, + unsigned short pageofs_out, + unsigned int outputsize) +{ + const char *end = dst + outputsize; + const unsigned int righthalf = PAGE_SIZE - pageofs_out; + const char *cur = dst - pageofs_out; + + while (cur < end) { + struct page *const page = *out++; + + if (page) { + char *buf = kmap_atomic(page); + + if (cur >= dst) { + memcpy(buf, cur, min_t(uint, PAGE_SIZE, + end - cur)); + } else { + memcpy(buf + pageofs_out, cur + pageofs_out, + min_t(uint, righthalf, end - cur)); + } + kunmap_atomic(buf); + } + cur += PAGE_SIZE; + } +} + +static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq, + struct list_head *pagepool) +{ + const unsigned int nrpages_out = + PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + const struct z_erofs_decompressor *alg = decompressors + rq->alg; + unsigned int dst_maptype; + void *dst; + int ret, i; + + if (nrpages_out == 1 && !rq->inplace_io) { + DBG_BUGON(!*rq->out); + dst = kmap_atomic(*rq->out); + dst_maptype = 0; + goto dstmap_out; + } + + /* + * For the case of small output size (especially much less + * than PAGE_SIZE), memcpy the decompressed data rather than + * compressed data is preferred. + */ + if (rq->outputsize <= PAGE_SIZE * 7 / 8) { + dst = erofs_get_pcpubuf(0); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + rq->inplace_io = false; + ret = alg->decompress(rq, dst); + if (!ret) + copy_from_pcpubuf(rq->out, dst, rq->pageofs_out, + rq->outputsize); + + erofs_put_pcpubuf(dst); + return ret; + } + + ret = alg->prepare_destpages(rq, pagepool); + if (ret < 0) { + return ret; + } else if (ret) { + dst = page_address(*rq->out); + dst_maptype = 1; + goto dstmap_out; + } + + i = 0; + while (1) { + dst = vm_map_ram(rq->out, nrpages_out, -1, PAGE_KERNEL); + + /* retry two more times (totally 3 times) */ + if (dst || ++i >= 3) + break; + vm_unmap_aliases(); + } + + if (!dst) + return -ENOMEM; + + dst_maptype = 2; + +dstmap_out: + ret = alg->decompress(rq, dst + rq->pageofs_out); + + if (!dst_maptype) + kunmap_atomic(dst); + else if (dst_maptype == 2) + vm_unmap_ram(dst, nrpages_out); + return ret; +} + +static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq, + struct list_head *pagepool) +{ + const unsigned int nrpages_out = + PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out; + unsigned char *src, *dst; + + if (nrpages_out > 2) { + DBG_BUGON(1); + return -EIO; + } + + if (rq->out[0] == *rq->in) { + DBG_BUGON(nrpages_out != 1); + return 0; + } + + src = kmap_atomic(*rq->in); + if (rq->out[0]) { + dst = kmap_atomic(rq->out[0]); + memcpy(dst + rq->pageofs_out, src, righthalf); + kunmap_atomic(dst); + } + + if (nrpages_out == 2) { + DBG_BUGON(!rq->out[1]); + if (rq->out[1] == *rq->in) { + memmove(src, src + righthalf, rq->pageofs_out); + } else { + dst = kmap_atomic(rq->out[1]); + memcpy(dst, src + righthalf, rq->pageofs_out); + kunmap_atomic(dst); + } + } + kunmap_atomic(src); + return 0; +} + +int z_erofs_decompress(struct z_erofs_decompress_req *rq, + struct list_head *pagepool) +{ + if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) + return z_erofs_shifted_transform(rq, pagepool); + return z_erofs_decompress_generic(rq, pagepool); +} diff --git a/drivers/staging/erofs/dir.c b/drivers/staging/erofs/dir.c index fe6683effd05..722b653c7a77 100644 --- a/drivers/staging/erofs/dir.c +++ b/drivers/staging/erofs/dir.c @@ -1,14 +1,7 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only /* - * linux/drivers/staging/erofs/dir.c - * * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. + * https://www.huawei.com/ */ #include "internal.h" @@ -33,19 +26,18 @@ static void debug_one_dentry(unsigned char d_type, const char *de_name, memcpy(dbg_namebuf, de_name, de_namelen); dbg_namebuf[de_namelen] = '\0'; - debugln("found dirent %s de_len %u d_type %d", dbg_namebuf, - de_namelen, d_type); + erofs_dbg("found dirent %s de_len %u d_type %d", dbg_namebuf, + de_namelen, d_type); #endif } -static int erofs_fill_dentries(struct dir_context *ctx, - void *dentry_blk, unsigned *ofs, - unsigned nameoff, unsigned maxsize) +static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, + void *dentry_blk, unsigned int *ofs, + unsigned int nameoff, unsigned int maxsize) { - struct erofs_dirent *de = dentry_blk; + struct erofs_dirent *de = dentry_blk + *ofs; const struct erofs_dirent *end = dentry_blk + nameoff; - de = dentry_blk + *ofs; while (de < end) { const char *de_name; unsigned int de_namelen; @@ -66,16 +58,18 @@ static int erofs_fill_dentries(struct dir_context *ctx, de_namelen = le16_to_cpu(de[1].nameoff) - nameoff; /* a corrupted entry is found */ - if (unlikely(nameoff + de_namelen > maxsize || - de_namelen > EROFS_NAME_LEN)) { + if (nameoff + de_namelen > maxsize || + de_namelen > EROFS_NAME_LEN) { + erofs_err(dir->i_sb, "bogus dirent @ nid %llu", + EROFS_I(dir)->nid); DBG_BUGON(1); - return -EIO; + return -EFSCORRUPTED; } debug_one_dentry(d_type, de_name, de_namelen); if (!dir_emit(ctx, de_name, de_namelen, - le64_to_cpu(de->nid), d_type)) - /* stoped by some reason */ + le64_to_cpu(de->nid), d_type)) + /* stopped by some reason */ return 1; ++de; *ofs += sizeof(struct erofs_dirent); @@ -89,62 +83,63 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct inode *dir = file_inode(f); struct address_space *mapping = dir->i_mapping; const size_t dirsize = i_size_read(dir); - unsigned i = ctx->pos / EROFS_BLKSIZ; - unsigned ofs = ctx->pos % EROFS_BLKSIZ; + unsigned int i = ctx->pos / EROFS_BLKSIZ; + unsigned int ofs = ctx->pos % EROFS_BLKSIZ; int err = 0; bool initial = true; while (ctx->pos < dirsize) { struct page *dentry_page; struct erofs_dirent *de; - unsigned nameoff, maxsize; + unsigned int nameoff, maxsize; dentry_page = read_mapping_page(mapping, i, NULL); if (dentry_page == ERR_PTR(-ENOMEM)) { err = -ENOMEM; break; } else if (IS_ERR(dentry_page)) { - errln("fail to readdir of logical block %u of nid %llu", - i, EROFS_V(dir)->nid); - err = PTR_ERR(dentry_page); + erofs_err(dir->i_sb, + "fail to readdir of logical block %u of nid %llu", + i, EROFS_I(dir)->nid); + err = -EFSCORRUPTED; break; } - lock_page(dentry_page); de = (struct erofs_dirent *)kmap(dentry_page); nameoff = le16_to_cpu(de->nameoff); - if (unlikely(nameoff < sizeof(struct erofs_dirent) || - nameoff >= PAGE_SIZE)) { - errln("%s, invalid de[0].nameoff %u", - __func__, nameoff); - - err = -EIO; + if (nameoff < sizeof(struct erofs_dirent) || + nameoff >= PAGE_SIZE) { + erofs_err(dir->i_sb, + "invalid de[0].nameoff %u @ nid %llu", + nameoff, EROFS_I(dir)->nid); + err = -EFSCORRUPTED; goto skip_this; } - maxsize = min_t(unsigned, dirsize - ctx->pos + ofs, PAGE_SIZE); + maxsize = min_t(unsigned int, + dirsize - ctx->pos + ofs, PAGE_SIZE); /* search dirents at the arbitrary position */ - if (unlikely(initial)) { + if (initial) { initial = false; ofs = roundup(ofs, sizeof(struct erofs_dirent)); - if (unlikely(ofs >= nameoff)) + if (ofs >= nameoff) goto skip_this; } - err = erofs_fill_dentries(ctx, de, &ofs, nameoff, maxsize); + err = erofs_fill_dentries(dir, ctx, de, &ofs, + nameoff, maxsize); skip_this: kunmap(dentry_page); - unlock_page(dentry_page); put_page(dentry_page); ctx->pos = blknr_to_addr(i) + ofs; - if (unlikely(err)) + if (err) break; ++i; ofs = 0; @@ -155,6 +150,5 @@ skip_this: const struct file_operations erofs_dir_fops = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = erofs_readdir, + .iterate_shared = erofs_readdir, }; - diff --git a/drivers/staging/erofs/erofs_fs.h b/drivers/staging/erofs/erofs_fs.h index d8838ce66941..011c3e281f7f 100644 --- a/drivers/staging/erofs/erofs_fs.h +++ b/drivers/staging/erofs/erofs_fs.h @@ -1,115 +1,121 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Apache-2.0 - * - * linux/drivers/staging/erofs/erofs_fs.h +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */ +/* + * EROFS (Enhanced ROM File System) on-disk format definition * * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is dual-licensed; you may select either the GNU General Public - * License version 2 or Apache License, Version 2.0. See the file COPYING - * in the main directory of the Linux distribution for more details. + * https://www.huawei.com/ */ #ifndef __EROFS_FS_H #define __EROFS_FS_H -/* Enhanced(Extended) ROM File System */ -#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2 +#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2 #define EROFS_SUPER_OFFSET 1024 +#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 + /* - * Any bits that aren't in EROFS_ALL_REQUIREMENTS should be - * incompatible with this kernel version. + * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should + * be incompatible with this kernel version. */ -#define EROFS_ALL_REQUIREMENTS 0 +#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001 +#define EROFS_ALL_FEATURE_INCOMPAT EROFS_FEATURE_INCOMPAT_LZ4_0PADDING +/* 128-byte erofs on-disk super block */ struct erofs_super_block { -/* 0 */__le32 magic; /* in the little endian */ -/* 4 */__le32 checksum; /* crc32c(super_block) */ -/* 8 */__le32 features; /* (aka. feature_compat) */ -/* 12 */__u8 blkszbits; /* support block_size == PAGE_SIZE only */ -/* 13 */__u8 reserved; - -/* 14 */__le16 root_nid; -/* 16 */__le64 inos; /* total valid ino # (== f_files - f_favail) */ - -/* 24 */__le64 build_time; /* inode v1 time derivation */ -/* 32 */__le32 build_time_nsec; -/* 36 */__le32 blocks; /* used for statfs */ -/* 40 */__le32 meta_blkaddr; -/* 44 */__le32 xattr_blkaddr; -/* 48 */__u8 uuid[16]; /* 128-bit uuid for volume */ -/* 64 */__u8 volume_name[16]; /* volume name */ -/* 80 */__le32 requirements; /* (aka. feature_incompat) */ - -/* 84 */__u8 reserved2[44]; -} __packed; /* 128 bytes */ - -#define __EROFS_BIT(_prefix, _cur, _pre) enum { \ - _prefix ## _cur ## _BIT = _prefix ## _pre ## _BIT + \ - _prefix ## _pre ## _BITS } + __le32 magic; /* file system magic number */ + __le32 checksum; /* crc32c(super_block) */ + __le32 feature_compat; + __u8 blkszbits; /* support block_size == PAGE_SIZE only */ + __u8 reserved; + + __le16 root_nid; /* nid of root directory */ + __le64 inos; /* total valid ino # (== f_files - f_favail) */ + + __le64 build_time; /* inode v1 time derivation */ + __le32 build_time_nsec; /* inode v1 time derivation in nano scale */ + __le32 blocks; /* used for statfs */ + __le32 meta_blkaddr; /* start block address of metadata area */ + __le32 xattr_blkaddr; /* start block address of shared xattr area */ + __u8 uuid[16]; /* 128-bit uuid for volume */ + __u8 volume_name[16]; /* volume name */ + __le32 feature_incompat; + __u8 reserved2[44]; +}; /* - * erofs inode data mapping: + * erofs inode datalayout (i_format in on-disk inode): * 0 - inode plain without inline data A: * inode, [xattrs], ... | ... | no-holed data - * 1 - inode VLE compression B: + * 1 - inode VLE compression B (legacy): * inode, [xattrs], extents ... | ... * 2 - inode plain with inline data C: * inode, [xattrs], last_inline_data, ... | ... | no-holed data - * 3~7 - reserved + * 3 - inode compression D: + * inode, [xattrs], map_header, extents ... | ... + * 4~7 - reserved */ enum { - EROFS_INODE_LAYOUT_PLAIN, - EROFS_INODE_LAYOUT_COMPRESSION, - EROFS_INODE_LAYOUT_INLINE, - EROFS_INODE_LAYOUT_MAX + EROFS_INODE_FLAT_PLAIN = 0, + EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1, + EROFS_INODE_FLAT_INLINE = 2, + EROFS_INODE_FLAT_COMPRESSION = 3, + EROFS_INODE_DATALAYOUT_MAX }; + +static inline bool erofs_inode_is_data_compressed(unsigned int datamode) +{ + return datamode == EROFS_INODE_FLAT_COMPRESSION || + datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY; +} + +/* bit definitions of inode i_advise */ #define EROFS_I_VERSION_BITS 1 -#define EROFS_I_DATA_MAPPING_BITS 3 +#define EROFS_I_DATALAYOUT_BITS 3 #define EROFS_I_VERSION_BIT 0 -__EROFS_BIT(EROFS_I_, DATA_MAPPING, VERSION); +#define EROFS_I_DATALAYOUT_BIT 1 #define EROFS_I_ALL \ - ((1 << (EROFS_I_DATA_MAPPING_BIT + EROFS_I_DATA_MAPPING_BITS)) - 1) + ((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1) -struct erofs_inode_v1 { -/* 0 */__le16 i_advise; +/* 32-byte reduced form of an ondisk inode */ +struct erofs_inode_compact { + __le16 i_format; /* inode format hints */ /* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ -/* 2 */__le16 i_xattr_icount; -/* 4 */__le16 i_mode; -/* 6 */__le16 i_nlink; -/* 8 */__le32 i_size; -/* 12 */__le32 i_reserved; -/* 16 */union { + __le16 i_xattr_icount; + __le16 i_mode; + __le16 i_nlink; + __le32 i_size; + __le32 i_reserved; + union { /* file total compressed blocks for data mapping 1 */ __le32 compressed_blocks; __le32 raw_blkaddr; /* for device files, used to indicate old/new device # */ __le32 rdev; - } i_u __packed; -/* 20 */__le32 i_ino; /* only used for 32-bit stat compatibility */ -/* 24 */__le16 i_uid; -/* 26 */__le16 i_gid; -/* 28 */__le32 i_checksum; -} __packed; + } i_u; + __le32 i_ino; /* only used for 32-bit stat compatibility */ + __le16 i_uid; + __le16 i_gid; + __le32 i_reserved2; +}; /* 32 bytes on-disk inode */ -#define EROFS_INODE_LAYOUT_V1 0 +#define EROFS_INODE_LAYOUT_COMPACT 0 /* 64 bytes on-disk inode */ -#define EROFS_INODE_LAYOUT_V2 1 +#define EROFS_INODE_LAYOUT_EXTENDED 1 -struct erofs_inode_v2 { - __le16 i_advise; +/* 64-byte complete form of an ondisk inode */ +struct erofs_inode_extended { + __le16 i_format; /* inode format hints */ - /* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ +/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ __le16 i_xattr_icount; __le16 i_mode; - __le16 i_reserved; /* 8 bytes */ - __le64 i_size; /* 16 bytes */ + __le16 i_reserved; + __le64 i_size; union { /* file total compressed blocks for data mapping 1 */ __le32 compressed_blocks; @@ -117,19 +123,18 @@ struct erofs_inode_v2 { /* for device files, used to indicate old/new device # */ __le32 rdev; - } i_u __packed; + } i_u; /* only used for 32-bit stat compatibility */ - __le32 i_ino; /* 24 bytes */ + __le32 i_ino; __le32 i_uid; __le32 i_gid; - __le64 i_ctime; /* 32 bytes */ + __le64 i_ctime; __le32 i_ctime_nsec; __le32 i_nlink; - __u8 i_reserved2[12]; - __le32 i_checksum; /* 64 bytes */ -} __packed; + __u8 i_reserved2[16]; +}; #define EROFS_MAX_SHARED_XATTRS (128) /* h_shared_count between 129 ... 255 are special # */ @@ -147,11 +152,11 @@ struct erofs_inode_v2 { * for read-only fs, no need to introduce h_refcount */ struct erofs_xattr_ibody_header { - __le32 h_checksum; + __le32 h_reserved; __u8 h_shared_count; - __u8 h_reserved[7]; + __u8 h_reserved2[7]; __le32 h_shared_xattrs[0]; /* shared xattr id array */ -} __packed; +}; /* Name indexes */ #define EROFS_XATTR_INDEX_USER 1 @@ -168,27 +173,61 @@ struct erofs_xattr_entry { __le16 e_value_size; /* size of attribute value */ /* followed by e_name and e_value */ char e_name[0]; /* attribute name */ -} __packed; +}; -#define ondisk_xattr_ibody_size(count) ({\ - u32 __count = le16_to_cpu(count); \ - ((__count) == 0) ? 0 : \ - sizeof(struct erofs_xattr_ibody_header) + \ - sizeof(__u32) * ((__count) - 1); }) +static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount) +{ + if (!i_xattr_icount) + return 0; + + return sizeof(struct erofs_xattr_ibody_header) + + sizeof(__u32) * (le16_to_cpu(i_xattr_icount) - 1); +} #define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry)) -#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \ - sizeof(struct erofs_xattr_entry) + \ - (entry)->e_name_len + le16_to_cpu((entry)->e_value_size)) - -/* have to be aligned with 8 bytes on disk */ -struct erofs_extent_header { - __le32 eh_checksum; - __le32 eh_reserved[3]; -} __packed; + +static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e) +{ + return EROFS_XATTR_ALIGN(sizeof(struct erofs_xattr_entry) + + e->e_name_len + le16_to_cpu(e->e_value_size)); +} + +/* available compression algorithm types (for h_algorithmtype) */ +enum { + Z_EROFS_COMPRESSION_LZ4 = 0, + Z_EROFS_COMPRESSION_MAX +}; /* - * Z_EROFS Variable-sized Logical Extent cluster type: + * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) + * e.g. for 4k logical cluster size, 4B if compacted 2B is off; + * (4B) + 2B + (4B) if compacted 2B is on. + */ +#define Z_EROFS_ADVISE_COMPACTED_2B_BIT 0 + +#define Z_EROFS_ADVISE_COMPACTED_2B (1 << Z_EROFS_ADVISE_COMPACTED_2B_BIT) + +struct z_erofs_map_header { + __le32 h_reserved1; + __le16 h_advise; + /* + * bit 0-3 : algorithm type of head 1 (logical cluster type 01); + * bit 4-7 : algorithm type of head 2 (logical cluster type 11). + */ + __u8 h_algorithmtype; + /* + * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; + * bit 3-4 : (physical - logical) cluster bits of head 1: + * For example, if logical clustersize = 4096, 1 for 8192. + * bit 5-7 : (physical - logical) cluster bits of head 2. + */ + __u8 h_clusterbits; +}; + +#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8 + +/* + * Fixed-sized output compression ondisk Logical Extent cluster type: * 0 - literal (uncompressed) cluster * 1 - compressed cluster (for the head logical cluster) * 2 - compressed cluster (for the other logical clusters) @@ -212,6 +251,14 @@ struct erofs_extent_header { * di_u.delta[1] = distance to its corresponding tail cluster * (di_advise could be 0, 1 or 2) */ +enum { + Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0, + Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1, + Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2, + Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3, + Z_EROFS_VLE_CLUSTER_TYPE_MAX +}; + #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 @@ -230,18 +277,19 @@ struct z_erofs_vle_decompressed_index { * [1] - pointing to the tail cluster */ __le16 delta[2]; - } di_u __packed; /* 8 bytes */ -} __packed; + } di_u; +}; -#define Z_EROFS_VLE_EXTENT_ALIGN(size) round_up(size, \ - sizeof(struct z_erofs_vle_decompressed_index)) +#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \ + (round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \ + sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING) /* dirent sorts in alphabet order, thus we can do binary search */ struct erofs_dirent { - __le64 nid; /* 0, node number */ - __le16 nameoff; /* 8, start offset of file name */ - __u8 file_type; /* 10, file type */ - __u8 reserved; /* 11, reserved */ + __le64 nid; /* node number */ + __le16 nameoff; /* start offset of file name */ + __u8 file_type; /* file type */ + __u8 reserved; /* reserved */ } __packed; /* file types used in inode_info->flags */ @@ -263,14 +311,16 @@ enum { static inline void erofs_check_ondisk_layout_definitions(void) { BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); - BUILD_BUG_ON(sizeof(struct erofs_inode_v1) != 32); - BUILD_BUG_ON(sizeof(struct erofs_inode_v2) != 64); + BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); + BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12); BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4); - BUILD_BUG_ON(sizeof(struct erofs_extent_header) != 16); + BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8); BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8); BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12); + + BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < + Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); } #endif - diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c index 02398c7eb4a4..5f9c6fb79ac0 100644 --- a/drivers/staging/erofs/inode.c +++ b/drivers/staging/erofs/inode.c @@ -1,14 +1,7 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only /* - * linux/drivers/staging/erofs/inode.c - * * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. + * https://www.huawei.com/ */ #include "xattr.h" @@ -19,56 +12,59 @@ * the inode payload page if it's an extended inode) in order to fill * inline data if possible. */ -static struct page *read_inode(struct inode *inode, unsigned int *ofs) +static struct page *erofs_read_inode(struct inode *inode, + unsigned int *ofs) { struct super_block *sb = inode->i_sb; struct erofs_sb_info *sbi = EROFS_SB(sb); - struct erofs_vnode *vi = EROFS_V(inode); + struct erofs_inode *vi = EROFS_I(inode); const erofs_off_t inode_loc = iloc(sbi, vi->nid); - erofs_blk_t blkaddr; + + erofs_blk_t blkaddr, nblks = 0; struct page *page; - struct erofs_inode_v1 *v1; - struct erofs_inode_v2 *v2, *copied = NULL; + struct erofs_inode_compact *dic; + struct erofs_inode_extended *die, *copied = NULL; unsigned int ifmt; int err; blkaddr = erofs_blknr(inode_loc); *ofs = erofs_blkoff(inode_loc); - debugln("%s, reading inode nid %llu at %u of blkaddr %u", - __func__, vi->nid, *ofs, blkaddr); + erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u", + __func__, vi->nid, *ofs, blkaddr); - page = erofs_get_meta_page(sb, blkaddr, false); + page = erofs_get_meta_page(sb, blkaddr); if (IS_ERR(page)) { - errln("failed to get inode (nid: %llu) page, err %ld", - vi->nid, PTR_ERR(page)); + erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", + vi->nid, PTR_ERR(page)); return page; } - v1 = page_address(page) + *ofs; - ifmt = le16_to_cpu(v1->i_advise); + dic = page_address(page) + *ofs; + ifmt = le16_to_cpu(dic->i_format); if (ifmt & ~EROFS_I_ALL) { - errln("unsupported i_format %u of nid %llu", ifmt, vi->nid); + erofs_err(inode->i_sb, "unsupported i_format %u of nid %llu", + ifmt, vi->nid); err = -EOPNOTSUPP; goto err_out; } - vi->data_mapping_mode = __inode_data_mapping(ifmt); - if (unlikely(vi->data_mapping_mode >= EROFS_INODE_LAYOUT_MAX)) { - errln("unknown data mapping mode %u of nid %llu", - vi->data_mapping_mode, vi->nid); + vi->datalayout = erofs_inode_datalayout(ifmt); + if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) { + erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu", + vi->datalayout, vi->nid); err = -EOPNOTSUPP; goto err_out; } - switch (__inode_version(ifmt)) { - case EROFS_INODE_LAYOUT_V2: - vi->inode_isize = sizeof(struct erofs_inode_v2); + switch (erofs_inode_version(ifmt)) { + case EROFS_INODE_LAYOUT_EXTENDED: + vi->inode_isize = sizeof(struct erofs_inode_extended); /* check if the inode acrosses page boundary */ if (*ofs + vi->inode_isize <= PAGE_SIZE) { *ofs += vi->inode_isize; - v2 = (struct erofs_inode_v2 *)v1; + die = (struct erofs_inode_extended *)dic; } else { const unsigned int gotten = PAGE_SIZE - *ofs; @@ -77,78 +73,98 @@ static struct page *read_inode(struct inode *inode, unsigned int *ofs) err = -ENOMEM; goto err_out; } - memcpy(copied, v1, gotten); + memcpy(copied, dic, gotten); unlock_page(page); put_page(page); - page = erofs_get_meta_page(sb, blkaddr + 1, false); + page = erofs_get_meta_page(sb, blkaddr + 1); if (IS_ERR(page)) { - errln("failed to get inode payload page (nid: %llu), err %ld", - vi->nid, PTR_ERR(page)); + erofs_err(sb, "failed to get inode payload page (nid: %llu), err %ld", + vi->nid, PTR_ERR(page)); kfree(copied); return page; } *ofs = vi->inode_isize - gotten; memcpy((u8 *)copied + gotten, page_address(page), *ofs); - v2 = copied; + die = copied; } - vi->xattr_isize = ondisk_xattr_ibody_size(v2->i_xattr_icount); - - inode->i_mode = le16_to_cpu(v2->i_mode); - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) { - vi->raw_blkaddr = le32_to_cpu(v2->i_u.raw_blkaddr); - } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); + + inode->i_mode = le16_to_cpu(die->i_mode); + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + vi->raw_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr); + break; + case S_IFCHR: + case S_IFBLK: inode->i_rdev = - new_decode_dev(le32_to_cpu(v2->i_u.rdev)); - } else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { + new_decode_dev(le32_to_cpu(die->i_u.rdev)); + break; + case S_IFIFO: + case S_IFSOCK: inode->i_rdev = 0; - } else { + break; + default: goto bogusimode; } - - i_uid_write(inode, le32_to_cpu(v2->i_uid)); - i_gid_write(inode, le32_to_cpu(v2->i_gid)); - set_nlink(inode, le32_to_cpu(v2->i_nlink)); + i_uid_write(inode, le32_to_cpu(die->i_uid)); + i_gid_write(inode, le32_to_cpu(die->i_gid)); + set_nlink(inode, le32_to_cpu(die->i_nlink)); /* extended inode has its own timestamp */ - inode->i_ctime.tv_sec = le64_to_cpu(v2->i_ctime); - inode->i_ctime.tv_nsec = le32_to_cpu(v2->i_ctime_nsec); + inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime); + inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec); + + inode->i_size = le64_to_cpu(die->i_size); + + /* total blocks for compressed files */ + if (erofs_inode_is_data_compressed(vi->datalayout)) + nblks = le32_to_cpu(die->i_u.compressed_blocks); - inode->i_size = le64_to_cpu(v2->i_size); kfree(copied); break; - case EROFS_INODE_LAYOUT_V1: - vi->inode_isize = sizeof(struct erofs_inode_v1); + case EROFS_INODE_LAYOUT_COMPACT: + vi->inode_isize = sizeof(struct erofs_inode_compact); *ofs += vi->inode_isize; - vi->xattr_isize = ondisk_xattr_ibody_size(v1->i_xattr_icount); - - inode->i_mode = le16_to_cpu(v1->i_mode); - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) { - vi->raw_blkaddr = le32_to_cpu(v1->i_u.raw_blkaddr); - } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount); + + inode->i_mode = le16_to_cpu(dic->i_mode); + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + vi->raw_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr); + break; + case S_IFCHR: + case S_IFBLK: inode->i_rdev = - new_decode_dev(le32_to_cpu(v1->i_u.rdev)); - } else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { + new_decode_dev(le32_to_cpu(dic->i_u.rdev)); + break; + case S_IFIFO: + case S_IFSOCK: inode->i_rdev = 0; - } else { + break; + default: goto bogusimode; } - - i_uid_write(inode, le16_to_cpu(v1->i_uid)); - i_gid_write(inode, le16_to_cpu(v1->i_gid)); - set_nlink(inode, le16_to_cpu(v1->i_nlink)); + i_uid_write(inode, le16_to_cpu(dic->i_uid)); + i_gid_write(inode, le16_to_cpu(dic->i_gid)); + set_nlink(inode, le16_to_cpu(dic->i_nlink)); /* use build time for compact inodes */ inode->i_ctime.tv_sec = sbi->build_time; inode->i_ctime.tv_nsec = sbi->build_time_nsec; - inode->i_size = le32_to_cpu(v1->i_size); + inode->i_size = le32_to_cpu(dic->i_size); + if (erofs_inode_is_data_compressed(vi->datalayout)) + nblks = le32_to_cpu(dic->i_u.compressed_blocks); break; default: - errln("unsupported on-disk inode version %u of nid %llu", - __inode_version(ifmt), vi->nid); + erofs_err(inode->i_sb, + "unsupported on-disk inode version %u of nid %llu", + erofs_inode_version(ifmt), vi->nid); err = -EOPNOTSUPP; goto err_out; } @@ -158,12 +174,17 @@ static struct page *read_inode(struct inode *inode, unsigned int *ofs) inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec; inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec; - /* measure inode.i_blocks as the generic filesystem */ - inode->i_blocks = ((inode->i_size - 1) >> 9) + 1; + if (!nblks) + /* measure inode.i_blocks as generic filesystems */ + inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9; + else + inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK; return page; + bogusimode: - errln("bogus i_mode (%o) @ nid %llu", inode->i_mode, vi->nid); - err = -EIO; + erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu", + inode->i_mode, vi->nid); + err = -EFSCORRUPTED; err_out: DBG_BUGON(1); kfree(copied); @@ -172,56 +193,45 @@ err_out: return ERR_PTR(err); } -/* - * try_lock can be required since locking order is: - * file data(fs_inode) - * meta(bd_inode) - * but the majority of the callers is "iget", - * in that case we are pretty sure no deadlock since - * no data operations exist. However I tend to - * try_lock since it takes no much overhead and - * will success immediately. - */ -static int fill_inline_data(struct inode *inode, void *data, unsigned m_pofs) +static int erofs_fill_symlink(struct inode *inode, void *data, + unsigned int m_pofs) { - struct erofs_vnode *vi = EROFS_V(inode); - struct erofs_sb_info *sbi = EROFS_I_SB(inode); - int mode = vi->data_mapping_mode; - - DBG_BUGON(mode >= EROFS_INODE_LAYOUT_MAX); + struct erofs_inode *vi = EROFS_I(inode); + char *lnk; - /* should be inode inline C */ - if (mode != EROFS_INODE_LAYOUT_INLINE) + /* if it cannot be handled with fast symlink scheme */ + if (vi->datalayout != EROFS_INODE_FLAT_INLINE || + inode->i_size >= PAGE_SIZE) { + inode->i_op = &erofs_symlink_iops; return 0; + } - /* fast symlink (following ext4) */ - if (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) { - char *lnk = erofs_kmalloc(sbi, inode->i_size + 1, GFP_KERNEL); - - if (unlikely(lnk == NULL)) - return -ENOMEM; - - m_pofs += vi->xattr_isize; - - /* inline symlink data shouldn't across page boundary as well */ - if (unlikely(m_pofs + inode->i_size > PAGE_SIZE)) { - DBG_BUGON(1); - kfree(lnk); - return -EIO; - } + lnk = kmalloc(inode->i_size + 1, GFP_KERNEL); + if (!lnk) + return -ENOMEM; + + m_pofs += vi->xattr_isize; + /* inline symlink data shouldn't cross page boundary as well */ + if (m_pofs + inode->i_size > PAGE_SIZE) { + kfree(lnk); + erofs_err(inode->i_sb, + "inline data cross block boundary @ nid %llu", + vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } - /* get in-page inline data */ - memcpy(lnk, data + m_pofs, inode->i_size); - lnk[inode->i_size] = '\0'; + memcpy(lnk, data + m_pofs, inode->i_size); + lnk[inode->i_size] = '\0'; - inode->i_link = lnk; - set_inode_fast_symlink(inode); - } - return -EAGAIN; + inode->i_link = lnk; + inode->i_op = &erofs_fast_symlink_iops; + return 0; } -static int fill_inode(struct inode *inode, int isdir) +static int erofs_fill_inode(struct inode *inode, int isdir) { + struct erofs_inode *vi = EROFS_I(inode); struct page *page; unsigned int ofs; int err = 0; @@ -229,59 +239,43 @@ static int fill_inode(struct inode *inode, int isdir) trace_erofs_fill_inode(inode, isdir); /* read inode base data from disk */ - page = read_inode(inode, &ofs); - if (IS_ERR(page)) { + page = erofs_read_inode(inode, &ofs); + if (IS_ERR(page)) return PTR_ERR(page); - } else { - /* setup the new inode */ - if (S_ISREG(inode->i_mode)) { -#ifdef CONFIG_EROFS_FS_XATTR - inode->i_op = &erofs_generic_xattr_iops; -#endif - inode->i_fop = &generic_ro_fops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = -#ifdef CONFIG_EROFS_FS_XATTR - &erofs_dir_xattr_iops; -#else - &erofs_dir_iops; -#endif - inode->i_fop = &erofs_dir_fops; - } else if (S_ISLNK(inode->i_mode)) { - /* by default, page_get_link is used for symlink */ - inode->i_op = -#ifdef CONFIG_EROFS_FS_XATTR - &erofs_symlink_xattr_iops, -#else - &page_symlink_inode_operations; -#endif - inode_nohighmem(inode); - } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || - S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { -#ifdef CONFIG_EROFS_FS_XATTR - inode->i_op = &erofs_special_inode_operations; -#endif - init_special_inode(inode, inode->i_mode, inode->i_rdev); - } else { - err = -EIO; - goto out_unlock; - } - if (is_inode_layout_compression(inode)) { -#ifdef CONFIG_EROFS_FS_ZIP - inode->i_mapping->a_ops = - &z_erofs_vle_normalaccess_aops; -#else - err = -ENOTSUPP; -#endif + /* setup the new inode */ + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_op = &erofs_generic_iops; + inode->i_fop = &generic_ro_fops; + break; + case S_IFDIR: + inode->i_op = &erofs_dir_iops; + inode->i_fop = &erofs_dir_fops; + break; + case S_IFLNK: + err = erofs_fill_symlink(inode, page_address(page), ofs); + if (err) goto out_unlock; - } - - inode->i_mapping->a_ops = &erofs_raw_access_aops; + inode_nohighmem(inode); + break; + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + inode->i_op = &erofs_generic_iops; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + goto out_unlock; + default: + err = -EFSCORRUPTED; + goto out_unlock; + } - /* fill last page if inline data is available */ - fill_inline_data(inode, page_address(page), ofs); + if (erofs_inode_is_data_compressed(vi->datalayout)) { + err = z_erofs_fill_inode(inode); + goto out_unlock; } + inode->i_mapping->a_ops = &erofs_raw_access_aops; out_unlock: unlock_page(page); @@ -289,21 +283,51 @@ out_unlock: return err; } +/* + * erofs nid is 64bits, but i_ino is 'unsigned long', therefore + * we should do more for 32-bit platform to find the right inode. + */ +static int erofs_ilookup_test_actor(struct inode *inode, void *opaque) +{ + const erofs_nid_t nid = *(erofs_nid_t *)opaque; + + return EROFS_I(inode)->nid == nid; +} + +static int erofs_iget_set_actor(struct inode *inode, void *opaque) +{ + const erofs_nid_t nid = *(erofs_nid_t *)opaque; + + inode->i_ino = erofs_inode_hash(nid); + return 0; +} + +static inline struct inode *erofs_iget_locked(struct super_block *sb, + erofs_nid_t nid) +{ + const unsigned long hashval = erofs_inode_hash(nid); + + return iget5_locked(sb, hashval, erofs_ilookup_test_actor, + erofs_iget_set_actor, &nid); +} + struct inode *erofs_iget(struct super_block *sb, - erofs_nid_t nid, bool isdir) + erofs_nid_t nid, + bool isdir) { - struct inode *inode = iget_locked(sb, nid); + struct inode *inode = erofs_iget_locked(sb, nid); - if (unlikely(inode == NULL)) + if (!inode) return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { int err; - struct erofs_vnode *vi = EROFS_V(inode); + struct erofs_inode *vi = EROFS_I(inode); + vi->nid = nid; - err = fill_inode(inode, isdir); - if (likely(!err)) + err = erofs_fill_inode(inode, isdir); + if (!err) unlock_new_inode(inode); else { iget_failed(inode); @@ -313,29 +337,38 @@ struct inode *erofs_iget(struct super_block *sb, return inode; } -#ifdef CONFIG_EROFS_FS_XATTR -const struct inode_operations erofs_generic_xattr_iops = { - .listxattr = erofs_listxattr, -}; -#endif +int erofs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *const inode = d_inode(path->dentry); -#ifdef CONFIG_EROFS_FS_XATTR -const struct inode_operations erofs_symlink_xattr_iops = { - .get_link = page_get_link, + if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) + stat->attributes |= STATX_ATTR_COMPRESSED; + + stat->attributes |= STATX_ATTR_IMMUTABLE; + stat->attributes_mask |= (STATX_ATTR_COMPRESSED | + STATX_ATTR_IMMUTABLE); + + generic_fillattr(inode, stat); + return 0; +} + +const struct inode_operations erofs_generic_iops = { + .getattr = erofs_getattr, .listxattr = erofs_listxattr, + .get_acl = erofs_get_acl, }; -#endif -const struct inode_operations erofs_special_inode_operations = { -#ifdef CONFIG_EROFS_FS_XATTR +const struct inode_operations erofs_symlink_iops = { + .get_link = page_get_link, + .getattr = erofs_getattr, .listxattr = erofs_listxattr, -#endif + .get_acl = erofs_get_acl, }; -#ifdef CONFIG_EROFS_FS_XATTR -const struct inode_operations erofs_fast_symlink_xattr_iops = { +const struct inode_operations erofs_fast_symlink_iops = { .get_link = simple_get_link, + .getattr = erofs_getattr, .listxattr = erofs_listxattr, + .get_acl = erofs_get_acl, }; -#endif - diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h index 8ce37091db20..5ee2368a18d5 100644 --- a/drivers/staging/erofs/internal.h +++ b/drivers/staging/erofs/internal.h @@ -1,17 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * linux/drivers/staging/erofs/internal.h - * +/* SPDX-License-Identifier: GPL-2.0-only */ +/* * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. + * https://www.huawei.com/ */ -#ifndef __INTERNAL_H -#define __INTERNAL_H +#ifndef __EROFS_INTERNAL_H +#define __EROFS_INTERNAL_H #include #include @@ -19,7 +12,7 @@ #include #include #include -#include +#include #include #include #include "erofs_fs.h" @@ -28,58 +21,50 @@ #undef pr_fmt #define pr_fmt(fmt) "erofs: " fmt -#define errln(x, ...) pr_err(x "\n", ##__VA_ARGS__) -#define infoln(x, ...) pr_info(x "\n", ##__VA_ARGS__) +__printf(3, 4) void _erofs_err(struct super_block *sb, + const char *function, const char *fmt, ...); +#define erofs_err(sb, fmt, ...) \ + _erofs_err(sb, __func__, fmt "\n", ##__VA_ARGS__) +__printf(3, 4) void _erofs_info(struct super_block *sb, + const char *function, const char *fmt, ...); +#define erofs_info(sb, fmt, ...) \ + _erofs_info(sb, __func__, fmt "\n", ##__VA_ARGS__) #ifdef CONFIG_EROFS_FS_DEBUG -#define debugln(x, ...) pr_debug(x "\n", ##__VA_ARGS__) - -#define dbg_might_sleep might_sleep +#define erofs_dbg(x, ...) pr_debug(x "\n", ##__VA_ARGS__) #define DBG_BUGON BUG_ON #else -#define debugln(x, ...) ((void)0) - -#define dbg_might_sleep() ((void)0) +#define erofs_dbg(x, ...) ((void)0) #define DBG_BUGON(x) ((void)(x)) -#endif - -#ifdef CONFIG_EROFS_FAULT_INJECTION -enum { - FAULT_KMALLOC, - FAULT_MAX, -}; - -extern char *erofs_fault_name[FAULT_MAX]; -#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) - -struct erofs_fault_info { - atomic_t inject_ops; - unsigned int inject_rate; - unsigned int inject_type; -}; -#endif - -#ifdef CONFIG_EROFS_FS_ZIP_CACHE_BIPOLAR -#define EROFS_FS_ZIP_CACHE_LVL (2) -#elif defined(EROFS_FS_ZIP_CACHE_UNIPOLAR) -#define EROFS_FS_ZIP_CACHE_LVL (1) -#else -#define EROFS_FS_ZIP_CACHE_LVL (0) -#endif - -#if (!defined(EROFS_FS_HAS_MANAGED_CACHE) && (EROFS_FS_ZIP_CACHE_LVL > 0)) -#define EROFS_FS_HAS_MANAGED_CACHE -#endif +#endif /* !CONFIG_EROFS_FS_DEBUG */ /* EROFS_SUPER_MAGIC_V1 to represent the whole file system */ #define EROFS_SUPER_MAGIC EROFS_SUPER_MAGIC_V1 typedef u64 erofs_nid_t; +typedef u64 erofs_off_t; +/* data type for filesystem-wide blocks number */ +typedef u32 erofs_blk_t; struct erofs_sb_info { +#ifdef CONFIG_EROFS_FS_ZIP /* list for all registered superblocks, mainly for shrinker */ struct list_head list; struct mutex umount_mutex; + /* the dedicated workstation for compression */ + struct radix_tree_root workstn_tree; + + /* threshold for decompression synchronously */ + unsigned int max_sync_decompress_pages; + + unsigned int shrinker_run_no; + + /* current strategy of how to use managed cache */ + unsigned char cache_strategy; + + /* pseudo inode to manage cached pages */ + struct inode *managed_cache; +#endif /* CONFIG_EROFS_FS_ZIP */ u32 blocks; u32 meta_blkaddr; #ifdef CONFIG_EROFS_FS_XATTR @@ -88,18 +73,6 @@ struct erofs_sb_info { /* inode slot unit size in bit shift */ unsigned char islotbits; -#ifdef CONFIG_EROFS_FS_ZIP - /* cluster size in bit shift */ - unsigned char clusterbits; - - /* the dedicated workstation for compression */ - struct radix_tree_root workstn_tree; - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - struct inode *managed_cache; -#endif - -#endif u32 build_time_nsec; u64 build_time; @@ -111,69 +84,31 @@ struct erofs_sb_info { u8 uuid[16]; /* 128-bit uuid for volume */ u8 volume_name[16]; /* volume name */ - u32 requirements; - - char *dev_name; + u32 feature_compat; + u32 feature_incompat; unsigned int mount_opt; - unsigned int shrinker_run_no; - -#ifdef CONFIG_EROFS_FAULT_INJECTION - struct erofs_fault_info fault_info; /* For fault injection */ -#endif }; -#ifdef CONFIG_EROFS_FAULT_INJECTION -#define erofs_show_injection_info(type) \ - infoln("inject %s in %s of %pS", erofs_fault_name[type], \ - __func__, __builtin_return_address(0)) - -static inline bool time_to_inject(struct erofs_sb_info *sbi, int type) -{ - struct erofs_fault_info *ffi = &sbi->fault_info; - - if (!ffi->inject_rate) - return false; - - if (!IS_FAULT_SET(ffi, type)) - return false; - - atomic_inc(&ffi->inject_ops); - if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { - atomic_set(&ffi->inject_ops, 0); - return true; - } - return false; -} -#endif - -static inline void *erofs_kmalloc(struct erofs_sb_info *sbi, - size_t size, gfp_t flags) -{ -#ifdef CONFIG_EROFS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_KMALLOC)) { - erofs_show_injection_info(FAULT_KMALLOC); - return NULL; - } -#endif - return kmalloc(size, flags); -} - #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) #define EROFS_I_SB(inode) ((struct erofs_sb_info *)(inode)->i_sb->s_fs_info) /* Mount flags set via mount options or defaults */ #define EROFS_MOUNT_XATTR_USER 0x00000010 #define EROFS_MOUNT_POSIX_ACL 0x00000020 -#define EROFS_MOUNT_FAULT_INJECTION 0x00000040 #define clear_opt(sbi, option) ((sbi)->mount_opt &= ~EROFS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt |= EROFS_MOUNT_##option) #define test_opt(sbi, option) ((sbi)->mount_opt & EROFS_MOUNT_##option) #ifdef CONFIG_EROFS_FS_ZIP -#define erofs_workstn_lock(sbi) xa_lock(&(sbi)->workstn_tree) -#define erofs_workstn_unlock(sbi) xa_unlock(&(sbi)->workstn_tree) +enum { + EROFS_ZIP_CACHE_DISABLED, + EROFS_ZIP_CACHE_READAHEAD, + EROFS_ZIP_CACHE_READAROUND +}; + +#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL) /* basic unit of the workstation of a super_block */ struct erofs_workgroup { @@ -184,8 +119,6 @@ struct erofs_workgroup { atomic_t refcount; }; -#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL) - #if defined(CONFIG_SMP) static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp, int val) @@ -242,54 +175,14 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) DBG_BUGON(v == EROFS_LOCKED_MAGIC); return v; } -#endif - -static inline bool erofs_workgroup_get(struct erofs_workgroup *grp, int *ocnt) -{ - int o; - -repeat: - o = erofs_wait_on_workgroup_freezed(grp); - - if (unlikely(o <= 0)) - return -1; - - if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o)) - goto repeat; - - *ocnt = o; - return 0; -} - -#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) -#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) +#endif /* !CONFIG_SMP */ -extern int erofs_workgroup_put(struct erofs_workgroup *grp); - -extern struct erofs_workgroup *erofs_find_workgroup( - struct super_block *sb, pgoff_t index, bool *tag); - -extern int erofs_register_workgroup(struct super_block *sb, - struct erofs_workgroup *grp, bool tag); - -extern unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, - unsigned long nr_shrink, bool cleanup); - -static inline void erofs_workstation_cleanup_all(struct super_block *sb) -{ - erofs_shrink_workstation(EROFS_SB(sb), ~0UL, true); -} - -#ifdef EROFS_FS_HAS_MANAGED_CACHE -#define EROFS_UNALLOCATED_CACHED_PAGE ((void *)0x5F0EF00D) - -extern int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, - struct erofs_workgroup *egrp); -extern int erofs_try_to_free_cached_page(struct address_space *mapping, - struct page *page); -#endif - -#endif +/* hard limit of pages per compressed cluster */ +#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT) +#define EROFS_PCPUBUF_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES +#else +#define EROFS_PCPUBUF_NR_PAGES 0 +#endif /* !CONFIG_EROFS_FS_ZIP */ /* we strictly follow PAGE_SIZE and no buffer head yet */ #define LOG_BLOCK_SIZE PAGE_SHIFT @@ -308,19 +201,6 @@ extern int erofs_try_to_free_cached_page(struct address_space *mapping, #define ROOT_NID(sb) ((sb)->root_nid) -#ifdef CONFIG_EROFS_FS_ZIP -/* hard limit of pages per compressed cluster */ -#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT) - -/* page count of a compressed cluster */ -#define erofs_clusterpages(sbi) ((1 << (sbi)->clusterbits) / PAGE_SIZE) -#endif - -typedef u64 erofs_off_t; - -/* data type for filesystem-wide blocks number */ -typedef u32 erofs_blk_t; - #define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ) #define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ) #define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ) @@ -331,75 +211,74 @@ static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid) } /* atomic flag definitions */ -#define EROFS_V_EA_INITED_BIT 0 +#define EROFS_I_EA_INITED_BIT 0 +#define EROFS_I_Z_INITED_BIT 1 /* bitlock definitions (arranged in reverse order) */ -#define EROFS_V_BL_XATTR_BIT (BITS_PER_LONG - 1) +#define EROFS_I_BL_XATTR_BIT (BITS_PER_LONG - 1) +#define EROFS_I_BL_Z_BIT (BITS_PER_LONG - 2) -struct erofs_vnode { +struct erofs_inode { erofs_nid_t nid; /* atomic flags (including bitlocks) */ unsigned long flags; - unsigned char data_mapping_mode; - /* inline size in bytes */ + unsigned char datalayout; unsigned char inode_isize; unsigned short xattr_isize; - unsigned xattr_shared_count; - unsigned *xattr_shared_xattrs; - - erofs_blk_t raw_blkaddr; + unsigned int xattr_shared_count; + unsigned int *xattr_shared_xattrs; + union { + erofs_blk_t raw_blkaddr; +#ifdef CONFIG_EROFS_FS_ZIP + struct { + unsigned short z_advise; + unsigned char z_algorithmtype[2]; + unsigned char z_logical_clusterbits; + unsigned char z_physical_clusterbits[2]; + }; +#endif /* CONFIG_EROFS_FS_ZIP */ + }; /* the corresponding vfs inode */ struct inode vfs_inode; }; -#define EROFS_V(ptr) \ - container_of(ptr, struct erofs_vnode, vfs_inode) - -#define __inode_advise(x, bit, bits) \ - (((x) >> (bit)) & ((1 << (bits)) - 1)) - -#define __inode_version(advise) \ - __inode_advise(advise, EROFS_I_VERSION_BIT, \ - EROFS_I_VERSION_BITS) - -#define __inode_data_mapping(advise) \ - __inode_advise(advise, EROFS_I_DATA_MAPPING_BIT,\ - EROFS_I_DATA_MAPPING_BITS) +#define EROFS_I(ptr) \ + container_of(ptr, struct erofs_inode, vfs_inode) -static inline unsigned long inode_datablocks(struct inode *inode) +static inline unsigned long erofs_inode_datablocks(struct inode *inode) { /* since i_size cannot be changed */ return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); } -static inline bool is_inode_layout_plain(struct inode *inode) +static inline unsigned int erofs_bitrange(unsigned int value, unsigned int bit, + unsigned int bits) { - return EROFS_V(inode)->data_mapping_mode == EROFS_INODE_LAYOUT_PLAIN; + + return (value >> bit) & ((1 << bits) - 1); } -static inline bool is_inode_layout_compression(struct inode *inode) + +static inline unsigned int erofs_inode_version(unsigned int value) { - return EROFS_V(inode)->data_mapping_mode == - EROFS_INODE_LAYOUT_COMPRESSION; + return erofs_bitrange(value, EROFS_I_VERSION_BIT, + EROFS_I_VERSION_BITS); } -static inline bool is_inode_layout_inline(struct inode *inode) +static inline unsigned int erofs_inode_datalayout(unsigned int value) { - return EROFS_V(inode)->data_mapping_mode == EROFS_INODE_LAYOUT_INLINE; + return erofs_bitrange(value, EROFS_I_DATALAYOUT_BIT, + EROFS_I_DATALAYOUT_BITS); } extern const struct super_operations erofs_sops; -extern const struct inode_operations erofs_dir_iops; -extern const struct file_operations erofs_dir_fops; extern const struct address_space_operations erofs_raw_access_aops; -#ifdef CONFIG_EROFS_FS_ZIP -extern const struct address_space_operations z_erofs_vle_normalaccess_aops; -#endif +extern const struct address_space_operations z_erofs_aops; /* * Logical to physical block mapping, used by erofs_map_blocks() @@ -428,6 +307,7 @@ extern const struct address_space_operations z_erofs_vle_normalaccess_aops; */ enum { BH_Zipped = BH_PrivateStart, + BH_FullMapped, }; /* Has a disk mapping */ @@ -436,150 +316,117 @@ enum { #define EROFS_MAP_META (1 << BH_Meta) /* The extent has been compressed */ #define EROFS_MAP_ZIPPED (1 << BH_Zipped) +/* The length of extent is full */ +#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) struct erofs_map_blocks { erofs_off_t m_pa, m_la; u64 m_plen, m_llen; unsigned int m_flags; + + struct page *mpage; }; /* Flags used by erofs_map_blocks() */ #define EROFS_GET_BLOCKS_RAW 0x0001 -/* data.c */ -static inline struct bio *prepare_bio( - struct super_block *sb, - erofs_blk_t blkaddr, unsigned nr_pages, - bio_end_io_t endio) -{ - gfp_t gfp = GFP_NOIO; - struct bio *bio = bio_alloc(gfp, nr_pages); - - if (unlikely(bio == NULL) && - (current->flags & PF_MEMALLOC)) { - do { - nr_pages /= 2; - if (unlikely(!nr_pages)) { - bio = bio_alloc(gfp | __GFP_NOFAIL, 1); - BUG_ON(bio == NULL); - break; - } - bio = bio_alloc(gfp, nr_pages); - } while (bio == NULL); - } - - bio->bi_end_io = endio; - bio_set_dev(bio, sb->s_bdev); - bio->bi_iter.bi_sector = blkaddr << LOG_SECTORS_PER_BLOCK; - return bio; -} - -static inline void __submit_bio(struct bio *bio, unsigned op, unsigned op_flags) +/* zmap.c */ +#ifdef CONFIG_EROFS_FS_ZIP +int z_erofs_fill_inode(struct inode *inode); +int z_erofs_map_blocks_iter(struct inode *inode, + struct erofs_map_blocks *map, + int flags); +#else +static inline int z_erofs_fill_inode(struct inode *inode) { return -EOPNOTSUPP; } +static inline int z_erofs_map_blocks_iter(struct inode *inode, + struct erofs_map_blocks *map, + int flags) { - bio_set_op_attrs(bio, op, op_flags); - submit_bio(bio); + return -EOPNOTSUPP; } +#endif /* !CONFIG_EROFS_FS_ZIP */ -extern struct page *erofs_get_meta_page(struct super_block *sb, - erofs_blk_t blkaddr, bool prio); -extern int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int); -extern int erofs_map_blocks_iter(struct inode *, struct erofs_map_blocks *, - struct page **, int); - -struct erofs_map_blocks_iter { - struct erofs_map_blocks map; - struct page *mpage; -}; +/* data.c */ +struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr); +int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int); -static inline struct page * -erofs_get_inline_page(struct inode *inode, - erofs_blk_t blkaddr) +/* inode.c */ +static inline unsigned long erofs_inode_hash(erofs_nid_t nid) { - return erofs_get_meta_page(inode->i_sb, - blkaddr, S_ISDIR(inode->i_mode)); +#if BITS_PER_LONG == 32 + return (nid >> 32) ^ (nid & 0xffffffff); +#else + return nid; +#endif } -/* inode.c */ -extern struct inode *erofs_iget(struct super_block *sb, - erofs_nid_t nid, bool dir); +extern const struct inode_operations erofs_generic_iops; +extern const struct inode_operations erofs_symlink_iops; +extern const struct inode_operations erofs_fast_symlink_iops; + +struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir); +int erofs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags); + +/* namei.c */ +extern const struct inode_operations erofs_dir_iops; -/* dir.c */ int erofs_namei(struct inode *dir, struct qstr *name, - erofs_nid_t *nid, unsigned *d_type); + erofs_nid_t *nid, unsigned int *d_type); -/* xattr.c */ -#ifdef CONFIG_EROFS_FS_XATTR -extern const struct xattr_handler *erofs_xattr_handlers[]; -#endif +/* dir.c */ +extern const struct file_operations erofs_dir_fops; -/* symlink */ -#ifdef CONFIG_EROFS_FS_XATTR -extern const struct inode_operations erofs_symlink_xattr_iops; -extern const struct inode_operations erofs_fast_symlink_xattr_iops; -extern const struct inode_operations erofs_special_inode_operations; -#endif +/* utils.c / zdata.c */ +struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp); -static inline void set_inode_fast_symlink(struct inode *inode) -{ -#ifdef CONFIG_EROFS_FS_XATTR - inode->i_op = &erofs_fast_symlink_xattr_iops; +#if (EROFS_PCPUBUF_NR_PAGES > 0) +void *erofs_get_pcpubuf(unsigned int pagenr); +#define erofs_put_pcpubuf(buf) do { \ + (void)&(buf); \ + preempt_enable(); \ +} while (0) #else - inode->i_op = &simple_symlink_inode_operations; -#endif -} - -static inline bool is_inode_fast_symlink(struct inode *inode) +static inline void *erofs_get_pcpubuf(unsigned int pagenr) { -#ifdef CONFIG_EROFS_FS_XATTR - return inode->i_op == &erofs_fast_symlink_xattr_iops; -#else - return inode->i_op == &simple_symlink_inode_operations; -#endif + return ERR_PTR(-EOPNOTSUPP); } -static inline void *erofs_vmap(struct page **pages, unsigned int count) -{ -#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM - int i = 0; - - while (1) { - void *addr = vm_map_ram(pages, count, -1, PAGE_KERNEL); - /* retry two more times (totally 3 times) */ - if (addr != NULL || ++i >= 3) - return addr; - vm_unmap_aliases(); - } - return NULL; -#else - return vmap(pages, count, VM_MAP, PAGE_KERNEL); +#define erofs_put_pcpubuf(buf) do {} while (0) #endif -} -static inline void erofs_vunmap(const void *mem, unsigned int count) -{ -#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM - vm_unmap_ram(mem, count); +#ifdef CONFIG_EROFS_FS_ZIP +int erofs_workgroup_put(struct erofs_workgroup *grp); +struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, + pgoff_t index); +int erofs_register_workgroup(struct super_block *sb, + struct erofs_workgroup *grp); +void erofs_workgroup_free_rcu(struct erofs_workgroup *grp); +void erofs_shrinker_register(struct super_block *sb); +void erofs_shrinker_unregister(struct super_block *sb); +int __init erofs_init_shrinker(void); +void erofs_exit_shrinker(void); +int __init z_erofs_init_zip_subsystem(void); +void z_erofs_exit_zip_subsystem(void); +int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, + struct erofs_workgroup *egrp); +int erofs_try_to_free_cached_page(struct address_space *mapping, + struct page *page); #else - vunmap(mem); -#endif -} +static inline void erofs_shrinker_register(struct super_block *sb) {} +static inline void erofs_shrinker_unregister(struct super_block *sb) {} +static inline int erofs_init_shrinker(void) { return 0; } +static inline void erofs_exit_shrinker(void) {} +static inline int z_erofs_init_zip_subsystem(void) { return 0; } +static inline void z_erofs_exit_zip_subsystem(void) {} +#endif /* !CONFIG_EROFS_FS_ZIP */ -/* utils.c */ -extern struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp); - -extern void erofs_register_super(struct super_block *sb); -extern void erofs_unregister_super(struct super_block *sb); - -extern unsigned long erofs_shrink_count(struct shrinker *shrink, - struct shrink_control *sc); -extern unsigned long erofs_shrink_scan(struct shrinker *shrink, - struct shrink_control *sc); +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #ifndef lru_to_page #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) #endif -#endif - +#endif /* __EROFS_INTERNAL_H */ diff --git a/drivers/staging/erofs/lz4defs.h b/drivers/staging/erofs/lz4defs.h deleted file mode 100644 index 00a0b58a0871..000000000000 --- a/drivers/staging/erofs/lz4defs.h +++ /dev/null @@ -1,227 +0,0 @@ -#ifndef __LZ4DEFS_H__ -#define __LZ4DEFS_H__ - -/* - * lz4defs.h -- common and architecture specific defines for the kernel usage - - * LZ4 - Fast LZ compression algorithm - * Copyright (C) 2011-2016, Yann Collet. - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * You can contact the author at : - * - LZ4 homepage : http://www.lz4.org - * - LZ4 source repository : https://github.com/lz4/lz4 - * - * Changed for kernel usage by: - * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> - */ - -#include -#include /* memset, memcpy */ - -#define FORCE_INLINE __always_inline - -/*-************************************ - * Basic Types - **************************************/ -#include - -typedef uint8_t BYTE; -typedef uint16_t U16; -typedef uint32_t U32; -typedef int32_t S32; -typedef uint64_t U64; -typedef uintptr_t uptrval; - -/*-************************************ - * Architecture specifics - **************************************/ -#if defined(CONFIG_64BIT) -#define LZ4_ARCH64 1 -#else -#define LZ4_ARCH64 0 -#endif - -#if defined(__LITTLE_ENDIAN) -#define LZ4_LITTLE_ENDIAN 1 -#else -#define LZ4_LITTLE_ENDIAN 0 -#endif - -/*-************************************ - * Constants - **************************************/ -#define MINMATCH 4 - -#define WILDCOPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (WILDCOPYLENGTH + MINMATCH) - -/* Increase this value ==> compression run slower on incompressible data */ -#define LZ4_SKIPTRIGGER 6 - -#define HASH_UNIT sizeof(size_t) - -#define KB (1 << 10) -#define MB (1 << 20) -#define GB (1U << 30) - -#define MAXD_LOG 16 -#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) -#define STEPSIZE sizeof(size_t) - -#define ML_BITS 4 -#define ML_MASK ((1U << ML_BITS) - 1) -#define RUN_BITS (8 - ML_BITS) -#define RUN_MASK ((1U << RUN_BITS) - 1) - -/*-************************************ - * Reading and writing into memory - **************************************/ -static FORCE_INLINE U16 LZ4_read16(const void *ptr) -{ - return get_unaligned((const U16 *)ptr); -} - -static FORCE_INLINE U32 LZ4_read32(const void *ptr) -{ - return get_unaligned((const U32 *)ptr); -} - -static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr) -{ - return get_unaligned((const size_t *)ptr); -} - -static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value) -{ - put_unaligned(value, (U16 *)memPtr); -} - -static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value) -{ - put_unaligned(value, (U32 *)memPtr); -} - -static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr) -{ - return get_unaligned_le16(memPtr); -} - -static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value) -{ - return put_unaligned_le16(value, memPtr); -} - -static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) -{ -#if LZ4_ARCH64 - U64 a = get_unaligned((const U64 *)src); - - put_unaligned(a, (U64 *)dst); -#else - U32 a = get_unaligned((const U32 *)src); - U32 b = get_unaligned((const U32 *)src + 1); - - put_unaligned(a, (U32 *)dst); - put_unaligned(b, (U32 *)dst + 1); -#endif -} - -/* - * customized variant of memcpy, - * which can overwrite up to 7 bytes beyond dstEnd - */ -static FORCE_INLINE void LZ4_wildCopy(void *dstPtr, - const void *srcPtr, void *dstEnd) -{ - BYTE *d = (BYTE *)dstPtr; - const BYTE *s = (const BYTE *)srcPtr; - BYTE *const e = (BYTE *)dstEnd; - - do { - LZ4_copy8(d, s); - d += 8; - s += 8; - } while (d < e); -} - -static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val) -{ -#if LZ4_LITTLE_ENDIAN - return __ffs(val) >> 3; -#else - return (BITS_PER_LONG - 1 - __fls(val)) >> 3; -#endif -} - -static FORCE_INLINE unsigned int LZ4_count( - const BYTE *pIn, - const BYTE *pMatch, - const BYTE *pInLimit) -{ - const BYTE *const pStart = pIn; - - while (likely(pIn < pInLimit - (STEPSIZE - 1))) { - size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); - - if (!diff) { - pIn += STEPSIZE; - pMatch += STEPSIZE; - continue; - } - - pIn += LZ4_NbCommonBytes(diff); - - return (unsigned int)(pIn - pStart); - } - -#if LZ4_ARCH64 - if ((pIn < (pInLimit - 3)) - && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { - pIn += 4; - pMatch += 4; - } -#endif - - if ((pIn < (pInLimit - 1)) - && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { - pIn += 2; - pMatch += 2; - } - - if ((pIn < pInLimit) && (*pMatch == *pIn)) - pIn++; - - return (unsigned int)(pIn - pStart); -} - -typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; -typedef enum { byPtr, byU32, byU16 } tableType_t; - -typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; -typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; - -typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; -typedef enum { full = 0, partial = 1 } earlyEnd_directive; - -#endif diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c index 023f64fa2c87..c5c0dd9d9033 100644 --- a/drivers/staging/erofs/namei.c +++ b/drivers/staging/erofs/namei.c @@ -1,16 +1,8 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only /* - * linux/drivers/staging/erofs/namei.c - * * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. + * https://www.huawei.com/ */ -#include "internal.h" #include "xattr.h" #include @@ -21,9 +13,9 @@ struct erofs_qstr { }; /* based on the end of qn is accurate and it must have the trailing '\0' */ -static inline int dirnamecmp(const struct erofs_qstr *qn, - const struct erofs_qstr *qd, - unsigned int *matched) +static inline int erofs_dirnamecmp(const struct erofs_qstr *qn, + const struct erofs_qstr *qd, + unsigned int *matched) { unsigned int i = *matched; @@ -71,16 +63,16 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, unsigned int matched = min(startprfx, endprfx); struct erofs_qstr dname = { .name = data + nameoff, - .end = unlikely(mid >= ndirents - 1) ? + .end = mid >= ndirents - 1 ? data + dirblksize : data + nameoff_from_disk(de[mid + 1].nameoff, dirblksize) }; /* string comparison without already matched prefix */ - int ret = dirnamecmp(name, &dname, &matched); + int ret = erofs_dirnamecmp(name, &dname, &matched); - if (unlikely(!ret)) { + if (!ret) { return de + mid; } else if (ret > 0) { head = mid + 1; @@ -105,7 +97,7 @@ static struct page *find_target_block_classic(struct inode *dir, startprfx = endprfx = 0; head = 0; - back = inode_datablocks(dir) - 1; + back = erofs_inode_datablocks(dir) - 1; while (head <= back) { const int mid = head + (back - head) / 2; @@ -120,11 +112,14 @@ static struct page *find_target_block_classic(struct inode *dir, unsigned int matched; struct erofs_qstr dname; - if (unlikely(!ndirents)) { - DBG_BUGON(1); + if (!ndirents) { kunmap_atomic(de); put_page(page); - page = ERR_PTR(-EIO); + erofs_err(dir->i_sb, + "corrupted dir block %d @ nid %llu", + mid, EROFS_I(dir)->nid); + DBG_BUGON(1); + page = ERR_PTR(-EFSCORRUPTED); goto out; } @@ -139,17 +134,17 @@ static struct page *find_target_block_classic(struct inode *dir, EROFS_BLKSIZ); /* string comparison without already matched prefix */ - diff = dirnamecmp(name, &dname, &matched); + diff = erofs_dirnamecmp(name, &dname, &matched); kunmap_atomic(de); - if (unlikely(!diff)) { + if (!diff) { *_ndirents = 0; goto out; } else if (diff > 0) { head = mid + 1; startprfx = matched; - if (likely(!IS_ERR(candidate))) + if (!IS_ERR(candidate)) put_page(candidate); candidate = page; *_ndirents = ndirents; @@ -179,7 +174,7 @@ int erofs_namei(struct inode *dir, struct erofs_dirent *de; struct erofs_qstr qn; - if (unlikely(!dir->i_size)) + if (!dir->i_size) return -ENOENT; qn.name = name->name; @@ -188,7 +183,7 @@ int erofs_namei(struct inode *dir, ndirents = 0; page = find_target_block_classic(dir, &qn, &ndirents); - if (unlikely(IS_ERR(page))) + if (IS_ERR(page)) return PTR_ERR(page); data = kmap_atomic(page); @@ -198,7 +193,7 @@ int erofs_namei(struct inode *dir, else de = (struct erofs_dirent *)data; - if (likely(!IS_ERR(de))) { + if (!IS_ERR(de)) { *nid = le64_to_cpu(de->nid); *d_type = de->file_type; } @@ -211,11 +206,12 @@ int erofs_namei(struct inode *dir, /* NOTE: i_mutex is already held by vfs */ static struct dentry *erofs_lookup(struct inode *dir, - struct dentry *dentry, unsigned int flags) + struct dentry *dentry, + unsigned int flags) { int err; erofs_nid_t nid; - unsigned d_type; + unsigned int d_type; struct inode *inode; DBG_BUGON(!d_really_is_negative(dentry)); @@ -225,7 +221,7 @@ static struct dentry *erofs_lookup(struct inode *dir, trace_erofs_lookup(dir, dentry, flags); /* file name exceeds fs limit */ - if (unlikely(dentry->d_name.len > EROFS_NAME_LEN)) + if (dentry->d_name.len > EROFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); /* false uninitialized warnings on gcc 4.8.x */ @@ -234,29 +230,19 @@ static struct dentry *erofs_lookup(struct inode *dir, if (err == -ENOENT) { /* negative dentry */ inode = NULL; - goto negative_out; - } else if (unlikely(err)) - return ERR_PTR(err); - - debugln("%s, %s (nid %llu) found, d_type %u", __func__, - dentry->d_name.name, nid, d_type); - - inode = erofs_iget(dir->i_sb, nid, d_type == EROFS_FT_DIR); - if (IS_ERR(inode)) - return ERR_CAST(inode); - -negative_out: + } else if (err) { + inode = ERR_PTR(err); + } else { + erofs_dbg("%s, %s (nid %llu) found, d_type %u", __func__, + dentry->d_name.name, nid, d_type); + inode = erofs_iget(dir->i_sb, nid, d_type == EROFS_FT_DIR); + } return d_splice_alias(inode, dentry); } const struct inode_operations erofs_dir_iops = { .lookup = erofs_lookup, -}; - -const struct inode_operations erofs_dir_xattr_iops = { - .lookup = erofs_lookup, -#ifdef CONFIG_EROFS_FS_XATTR + .getattr = erofs_getattr, .listxattr = erofs_listxattr, -#endif + .get_acl = erofs_get_acl, }; - diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c index b49ebdf6ebda..13977976c719 100644 --- a/drivers/staging/erofs/super.c +++ b/drivers/staging/erofs/super.c @@ -1,188 +1,248 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only /* - * linux/drivers/staging/erofs/super.c - * * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. + * https://www.huawei.com/ */ #include #include #include #include #include -#include "internal.h" +#include +#include "xattr.h" #define CREATE_TRACE_POINTS #include static struct kmem_cache *erofs_inode_cachep __read_mostly; -static void init_once(void *ptr) +void _erofs_err(struct super_block *sb, const char *function, + const char *fmt, ...) { - struct erofs_vnode *vi = ptr; + struct va_format vaf; + va_list args; - inode_init_once(&vi->vfs_inode); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf); + va_end(args); } -static int erofs_init_inode_cache(void) +void _erofs_info(struct super_block *sb, const char *function, + const char *fmt, ...) { - erofs_inode_cachep = kmem_cache_create("erofs_inode", - sizeof(struct erofs_vnode), 0, - SLAB_RECLAIM_ACCOUNT, init_once); + struct va_format vaf; + va_list args; + + va_start(args, fmt); - return erofs_inode_cachep != NULL ? 0 : -ENOMEM; + vaf.fmt = fmt; + vaf.va = &args; + + pr_info("(device %s): %pV", sb->s_id, &vaf); + va_end(args); } -static void erofs_exit_inode_cache(void) +static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) { - kmem_cache_destroy(erofs_inode_cachep); + struct erofs_super_block *dsb; + u32 expected_crc, crc; + + dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, + EROFS_BLKSIZ - EROFS_SUPER_OFFSET, GFP_KERNEL); + if (!dsb) + return -ENOMEM; + + expected_crc = le32_to_cpu(dsb->checksum); + dsb->checksum = 0; + /* to allow for x86 boot sectors and other oddities. */ + crc = crc32c(~0, dsb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET); + kfree(dsb); + + if (crc != expected_crc) { + erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", + crc, expected_crc); + return -EBADMSG; + } + return 0; } -static struct inode *alloc_inode(struct super_block *sb) +static void erofs_inode_init_once(void *ptr) { - struct erofs_vnode *vi = + struct erofs_inode *vi = ptr; + + inode_init_once(&vi->vfs_inode); +} + +static struct inode *erofs_alloc_inode(struct super_block *sb) +{ + struct erofs_inode *vi = kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL); - if (vi == NULL) + if (!vi) return NULL; /* zero out everything except vfs_inode */ - memset(vi, 0, offsetof(struct erofs_vnode, vfs_inode)); + memset(vi, 0, offsetof(struct erofs_inode, vfs_inode)); return &vi->vfs_inode; } static void i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - struct erofs_vnode *vi = EROFS_V(inode); + struct erofs_inode *vi = EROFS_I(inode); - /* be careful RCU symlink path (see ext4_inode_info->i_data)! */ - if (is_inode_fast_symlink(inode)) + /* be careful of RCU symlink path */ + if (inode->i_op == &erofs_fast_symlink_iops) kfree(inode->i_link); - kfree(vi->xattr_shared_xattrs); kmem_cache_free(erofs_inode_cachep, vi); } -static void destroy_inode(struct inode *inode) +static void erofs_destroy_inode(struct inode *inode) { call_rcu(&inode->i_rcu, i_callback); } static bool check_layout_compatibility(struct super_block *sb, - struct erofs_super_block *layout) + struct erofs_super_block *dsb) { - const unsigned int requirements = le32_to_cpu(layout->requirements); + const unsigned int feature = le32_to_cpu(dsb->feature_incompat); - EROFS_SB(sb)->requirements = requirements; + EROFS_SB(sb)->feature_incompat = feature; /* check if current kernel meets all mandatory requirements */ - if (requirements & (~EROFS_ALL_REQUIREMENTS)) { - errln("unidentified requirements %x, please upgrade kernel version", - requirements & ~EROFS_ALL_REQUIREMENTS); + if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { + erofs_err(sb, + "unidentified incompatible feature %x, please upgrade kernel version", + feature & ~EROFS_ALL_FEATURE_INCOMPAT); return false; } return true; } -static int superblock_read(struct super_block *sb) +static int erofs_read_superblock(struct super_block *sb) { struct erofs_sb_info *sbi; - struct buffer_head *bh; - struct erofs_super_block *layout; - unsigned blkszbits; + struct page *page; + struct erofs_super_block *dsb; + unsigned int blkszbits; + void *data; int ret; - bh = sb_bread(sb, 0); - - if (bh == NULL) { - errln("cannot read erofs superblock"); - return -EIO; + page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL); + if (IS_ERR(page)) { + erofs_err(sb, "cannot read erofs superblock"); + return PTR_ERR(page); } sbi = EROFS_SB(sb); - layout = (struct erofs_super_block *)((u8 *)bh->b_data - + EROFS_SUPER_OFFSET); + + data = kmap(page); + dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); ret = -EINVAL; - if (le32_to_cpu(layout->magic) != EROFS_SUPER_MAGIC_V1) { - errln("cannot find valid erofs superblock"); + if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { + erofs_err(sb, "cannot find valid erofs superblock"); goto out; } - blkszbits = layout->blkszbits; + sbi->feature_compat = le32_to_cpu(dsb->feature_compat); + if (sbi->feature_compat & EROFS_FEATURE_COMPAT_SB_CHKSUM) { + ret = erofs_superblock_csum_verify(sb, data); + if (ret) + goto out; + } + + ret = -EINVAL; + blkszbits = dsb->blkszbits; /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ - if (unlikely(blkszbits != LOG_BLOCK_SIZE)) { - errln("blksize %u isn't supported on this platform", - 1 << blkszbits); + if (blkszbits != LOG_BLOCK_SIZE) { + erofs_err(sb, "blkszbits %u isn't supported on this platform", + blkszbits); goto out; } - if (!check_layout_compatibility(sb, layout)) + if (!check_layout_compatibility(sb, dsb)) goto out; - sbi->blocks = le32_to_cpu(layout->blocks); - sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr); + sbi->blocks = le32_to_cpu(dsb->blocks); + sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); #ifdef CONFIG_EROFS_FS_XATTR - sbi->xattr_blkaddr = le32_to_cpu(layout->xattr_blkaddr); + sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); #endif - sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1; -#ifdef CONFIG_EROFS_FS_ZIP - sbi->clusterbits = 12; - - if (1 << (sbi->clusterbits - 12) > Z_EROFS_CLUSTER_MAX_PAGES) - errln("clusterbits %u is not supported on this kernel", - sbi->clusterbits); -#endif - - sbi->root_nid = le16_to_cpu(layout->root_nid); - sbi->inos = le64_to_cpu(layout->inos); + sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); + sbi->root_nid = le16_to_cpu(dsb->root_nid); + sbi->inos = le64_to_cpu(dsb->inos); - sbi->build_time = le64_to_cpu(layout->build_time); - sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec); + sbi->build_time = le64_to_cpu(dsb->build_time); + sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); - memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid)); - memcpy(sbi->volume_name, layout->volume_name, - sizeof(layout->volume_name)); + memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid)); + ret = strscpy(sbi->volume_name, dsb->volume_name, + sizeof(dsb->volume_name)); + if (ret < 0) { /* -E2BIG */ + erofs_err(sb, "bad volume name without NIL terminator"); + ret = -EFSCORRUPTED; + goto out; + } ret = 0; out: - brelse(bh); + kunmap(page); + put_page(page); return ret; } -#ifdef CONFIG_EROFS_FAULT_INJECTION -char *erofs_fault_name[FAULT_MAX] = { - [FAULT_KMALLOC] = "kmalloc", -}; - -static void erofs_build_fault_attr(struct erofs_sb_info *sbi, - unsigned int rate) +#ifdef CONFIG_EROFS_FS_ZIP +static int erofs_build_cache_strategy(struct super_block *sb, + substring_t *args) { - struct erofs_fault_info *ffi = &sbi->fault_info; + struct erofs_sb_info *sbi = EROFS_SB(sb); + const char *cs = match_strdup(args); + int err = 0; - if (rate) { - atomic_set(&ffi->inject_ops, 0); - ffi->inject_rate = rate; - ffi->inject_type = (1 << FAULT_MAX) - 1; + if (!cs) { + erofs_err(sb, "Not enough memory to store cache strategy"); + return -ENOMEM; + } + + if (!strcmp(cs, "disabled")) { + sbi->cache_strategy = EROFS_ZIP_CACHE_DISABLED; + } else if (!strcmp(cs, "readahead")) { + sbi->cache_strategy = EROFS_ZIP_CACHE_READAHEAD; + } else if (!strcmp(cs, "readaround")) { + sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND; } else { - memset(ffi, 0, sizeof(struct erofs_fault_info)); + erofs_err(sb, "Unrecognized cache strategy \"%s\"", cs); + err = -EINVAL; } + kfree(cs); + return err; +} +#else +static int erofs_build_cache_strategy(struct super_block *sb, + substring_t *args) +{ + erofs_info(sb, "EROFS compression is disabled, so cache strategy is ignored"); + return 0; } #endif -static void default_options(struct erofs_sb_info *sbi) +/* set up default EROFS parameters */ +static void erofs_default_options(struct erofs_sb_info *sbi) { +#ifdef CONFIG_EROFS_FS_ZIP + sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND; + sbi->max_sync_decompress_pages = 3; +#endif #ifdef CONFIG_EROFS_FS_XATTR set_opt(sbi, XATTR_USER); #endif - #ifdef CONFIG_EROFS_FS_POSIX_ACL set_opt(sbi, POSIX_ACL); #endif @@ -193,7 +253,7 @@ enum { Opt_nouser_xattr, Opt_acl, Opt_noacl, - Opt_fault_injection, + Opt_cache_strategy, Opt_err }; @@ -202,20 +262,20 @@ static match_table_t erofs_tokens = { {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, - {Opt_fault_injection, "fault_injection=%u"}, + {Opt_cache_strategy, "cache_strategy=%s"}, {Opt_err, NULL} }; -static int parse_options(struct super_block *sb, char *options) +static int erofs_parse_options(struct super_block *sb, char *options) { substring_t args[MAX_OPT_ARGS]; char *p; - int arg = 0; + int err; if (!options) return 0; - while ((p = strsep(&options, ",")) != NULL) { + while ((p = strsep(&options, ","))) { int token; if (!*p) @@ -234,10 +294,10 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_user_xattr: - infoln("user_xattr options not supported"); + erofs_info(sb, "user_xattr options not supported"); break; case Opt_nouser_xattr: - infoln("nouser_xattr options not supported"); + erofs_info(sb, "nouser_xattr options not supported"); break; #endif #ifdef CONFIG_EROFS_FS_POSIX_ACL @@ -249,36 +309,29 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_acl: - infoln("acl options not supported"); + erofs_info(sb, "acl options not supported"); break; case Opt_noacl: - infoln("noacl options not supported"); + erofs_info(sb, "noacl options not supported"); break; #endif - case Opt_fault_injection: - if (args->from && match_int(args, &arg)) - return -EINVAL; -#ifdef CONFIG_EROFS_FAULT_INJECTION - erofs_build_fault_attr(EROFS_SB(sb), arg); - set_opt(EROFS_SB(sb), FAULT_INJECTION); -#else - infoln("FAULT_INJECTION was not selected"); -#endif + case Opt_cache_strategy: + err = erofs_build_cache_strategy(sb, args); + if (err) + return err; break; default: - errln("Unrecognized mount option \"%s\" " - "or missing value", p); + erofs_err(sb, "Unrecognized mount option \"%s\" or missing value", p); return -EINVAL; } } return 0; } -#ifdef EROFS_FS_HAS_MANAGED_CACHE - +#ifdef CONFIG_EROFS_FS_ZIP static const struct address_space_operations managed_cache_aops; -static int managed_cache_releasepage(struct page *page, gfp_t gfp_mask) +static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask) { int ret = 1; /* 0 - busy */ struct address_space *const mapping = page->mapping; @@ -292,8 +345,9 @@ static int managed_cache_releasepage(struct page *page, gfp_t gfp_mask) return ret; } -static void managed_cache_invalidatepage(struct page *page, - unsigned int offset, unsigned int length) +static void erofs_managed_cache_invalidatepage(struct page *page, + unsigned int offset, + unsigned int length) { const unsigned int stop = length + offset; @@ -303,227 +357,147 @@ static void managed_cache_invalidatepage(struct page *page, DBG_BUGON(stop > PAGE_SIZE || stop < length); if (offset == 0 && stop == PAGE_SIZE) - while (!managed_cache_releasepage(page, GFP_NOFS)) + while (!erofs_managed_cache_releasepage(page, GFP_NOFS)) cond_resched(); } static const struct address_space_operations managed_cache_aops = { - .releasepage = managed_cache_releasepage, - .invalidatepage = managed_cache_invalidatepage, + .releasepage = erofs_managed_cache_releasepage, + .invalidatepage = erofs_managed_cache_invalidatepage, }; -static struct inode *erofs_init_managed_cache(struct super_block *sb) +static int erofs_init_managed_cache(struct super_block *sb) { - struct inode *inode = new_inode(sb); + struct erofs_sb_info *const sbi = EROFS_SB(sb); + struct inode *const inode = new_inode(sb); - if (unlikely(inode == NULL)) - return ERR_PTR(-ENOMEM); + if (!inode) + return -ENOMEM; set_nlink(inode, 1); inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &managed_cache_aops; mapping_set_gfp_mask(inode->i_mapping, - GFP_NOFS | __GFP_HIGHMEM | - __GFP_MOVABLE | __GFP_NOFAIL); - return inode; + GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE); + sbi->managed_cache = inode; + return 0; } - +#else +static int erofs_init_managed_cache(struct super_block *sb) { return 0; } #endif -static int erofs_read_super(struct super_block *sb, - const char *dev_name, void *data, int silent) +static int erofs_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; struct erofs_sb_info *sbi; - int err = -EINVAL; + int err; - infoln("read_super, device -> %s", dev_name); - infoln("options -> %s", (char *)data); + sb->s_magic = EROFS_SUPER_MAGIC; - if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) { - errln("failed to set erofs blksize"); - goto err; + if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { + erofs_err(sb, "failed to set erofs blksize"); + return -EINVAL; } - sbi = kzalloc(sizeof(struct erofs_sb_info), GFP_KERNEL); - if (unlikely(sbi == NULL)) { - err = -ENOMEM; - goto err; - } - sb->s_fs_info = sbi; + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; - err = superblock_read(sb); + sb->s_fs_info = sbi; + err = erofs_read_superblock(sb); if (err) - goto err_sbread; + return err; - sb->s_magic = EROFS_SUPER_MAGIC; sb->s_flags |= SB_RDONLY | SB_NOATIME; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_time_gran = 1; sb->s_op = &erofs_sops; - -#ifdef CONFIG_EROFS_FS_XATTR sb->s_xattr = erofs_xattr_handlers; -#endif /* set erofs default mount options */ - default_options(sbi); + erofs_default_options(sbi); - err = parse_options(sb, data); + err = erofs_parse_options(sb, data); if (err) - goto err_parseopt; + return err; - if (!silent) - infoln("root inode @ nid %llu", ROOT_NID(sbi)); + if (test_opt(sbi, POSIX_ACL)) + sb->s_flags |= SB_POSIXACL; + else + sb->s_flags &= ~SB_POSIXACL; #ifdef CONFIG_EROFS_FS_ZIP INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC); #endif -#ifdef EROFS_FS_HAS_MANAGED_CACHE - sbi->managed_cache = erofs_init_managed_cache(sb); - if (IS_ERR(sbi->managed_cache)) { - err = PTR_ERR(sbi->managed_cache); - goto err_init_managed_cache; - } -#endif - /* get the root inode */ inode = erofs_iget(sb, ROOT_NID(sbi), true); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto err_iget; - } + if (IS_ERR(inode)) + return PTR_ERR(inode); if (!S_ISDIR(inode->i_mode)) { - errln("rootino(nid %llu) is not a directory(i_mode %o)", - ROOT_NID(sbi), inode->i_mode); - err = -EINVAL; - goto err_isdir; + erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)", + ROOT_NID(sbi), inode->i_mode); + iput(inode); + return -EINVAL; } sb->s_root = d_make_root(inode); - if (sb->s_root == NULL) { - err = -ENOMEM; - goto err_makeroot; - } - - /* save the device name to sbi */ - sbi->dev_name = __getname(); - if (sbi->dev_name == NULL) { - err = -ENOMEM; - goto err_devname; - } - - snprintf(sbi->dev_name, PATH_MAX, "%s", dev_name); - sbi->dev_name[PATH_MAX - 1] = '\0'; + if (!sb->s_root) + return -ENOMEM; - erofs_register_super(sb); + erofs_shrinker_register(sb); + /* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */ + err = erofs_init_managed_cache(sb); + if (err) + return err; - if (!silent) - infoln("mounted on %s with opts: %s.", dev_name, - (char *)data); + erofs_info(sb, "mounted with opts: %s, root inode @ nid %llu.", + (char *)data, ROOT_NID(sbi)); return 0; - /* - * please add a label for each exit point and use - * the following name convention, thus new features - * can be integrated easily without renaming labels. - */ -err_devname: - dput(sb->s_root); -err_makeroot: -err_isdir: - if (sb->s_root == NULL) - iput(inode); -err_iget: -#ifdef EROFS_FS_HAS_MANAGED_CACHE - iput(sbi->managed_cache); -err_init_managed_cache: -#endif -err_parseopt: -err_sbread: - sb->s_fs_info = NULL; - kfree(sbi); -err: - return err; +} + +static struct dentry *erofs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, erofs_fill_super); } /* * could be triggered after deactivate_locked_super() * is called, thus including umount and failed to initialize. */ -static void erofs_put_super(struct super_block *sb) +static void erofs_kill_sb(struct super_block *sb) { - struct erofs_sb_info *sbi = EROFS_SB(sb); - - /* for cases which are failed in "read_super" */ - if (sbi == NULL) - return; + struct erofs_sb_info *sbi; WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); - infoln("unmounted for %s", sbi->dev_name); - __putname(sbi->dev_name); - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - iput(sbi->managed_cache); -#endif - - mutex_lock(&sbi->umount_mutex); - -#ifdef CONFIG_EROFS_FS_ZIP - erofs_workstation_cleanup_all(sb); -#endif - - erofs_unregister_super(sb); - mutex_unlock(&sbi->umount_mutex); + kill_block_super(sb); + sbi = EROFS_SB(sb); + if (!sbi) + return; kfree(sbi); sb->s_fs_info = NULL; } - -struct erofs_mount_private { - const char *dev_name; - char *options; -}; - -/* support mount_bdev() with options */ -static int erofs_fill_super(struct super_block *sb, - void *_priv, int silent) -{ - struct erofs_mount_private *priv = _priv; - - return erofs_read_super(sb, priv->dev_name, - priv->options, silent); -} - -static struct dentry *erofs_mount( - struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +/* called when ->s_root is non-NULL */ +static void erofs_put_super(struct super_block *sb) { - struct erofs_mount_private priv = { - .dev_name = dev_name, - .options = data - }; + struct erofs_sb_info *const sbi = EROFS_SB(sb); - return mount_bdev(fs_type, flags, dev_name, - &priv, erofs_fill_super); -} + DBG_BUGON(!sbi); -static void erofs_kill_sb(struct super_block *sb) -{ - kill_block_super(sb); + erofs_shrinker_unregister(sb); +#ifdef CONFIG_EROFS_FS_ZIP + iput(sbi->managed_cache); + sbi->managed_cache = NULL; +#endif } -static struct shrinker erofs_shrinker_info = { - .scan_objects = erofs_shrink_scan, - .count_objects = erofs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; - static struct file_system_type erofs_fs_type = { .owner = THIS_MODULE, .name = "erofs", @@ -533,47 +507,41 @@ static struct file_system_type erofs_fs_type = { }; MODULE_ALIAS_FS("erofs"); -#ifdef CONFIG_EROFS_FS_ZIP -extern int z_erofs_init_zip_subsystem(void); -extern void z_erofs_exit_zip_subsystem(void); -#endif - static int __init erofs_module_init(void) { int err; erofs_check_ondisk_layout_definitions(); - infoln("initializing erofs " EROFS_VERSION); - err = erofs_init_inode_cache(); - if (err) + erofs_inode_cachep = kmem_cache_create("erofs_inode", + sizeof(struct erofs_inode), 0, + SLAB_RECLAIM_ACCOUNT, + erofs_inode_init_once); + if (!erofs_inode_cachep) { + err = -ENOMEM; goto icache_err; + } - err = register_shrinker(&erofs_shrinker_info); + err = erofs_init_shrinker(); if (err) goto shrinker_err; -#ifdef CONFIG_EROFS_FS_ZIP err = z_erofs_init_zip_subsystem(); if (err) goto zip_err; -#endif err = register_filesystem(&erofs_fs_type); if (err) goto fs_err; - infoln("successfully to initialize erofs"); return 0; fs_err: -#ifdef CONFIG_EROFS_FS_ZIP z_erofs_exit_zip_subsystem(); zip_err: -#endif - unregister_shrinker(&erofs_shrinker_info); + erofs_exit_shrinker(); shrinker_err: - erofs_exit_inode_cache(); + kmem_cache_destroy(erofs_inode_cachep); icache_err: return err; } @@ -581,12 +549,12 @@ icache_err: static void __exit erofs_module_exit(void) { unregister_filesystem(&erofs_fs_type); -#ifdef CONFIG_EROFS_FS_ZIP z_erofs_exit_zip_subsystem(); -#endif - unregister_shrinker(&erofs_shrinker_info); - erofs_exit_inode_cache(); - infoln("successfully finalize erofs"); + erofs_exit_shrinker(); + + /* Ensure all RCU free inodes are safe before cache is destroyed. */ + rcu_barrier(); + kmem_cache_destroy(erofs_inode_cachep); } /* get filesystem statistics */ @@ -627,26 +595,45 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) else seq_puts(seq, ",noacl"); #endif -#ifdef CONFIG_EROFS_FAULT_INJECTION - if (test_opt(sbi, FAULT_INJECTION)) - seq_printf(seq, ",fault_injection=%u", - sbi->fault_info.inject_rate); +#ifdef CONFIG_EROFS_FS_ZIP + if (sbi->cache_strategy == EROFS_ZIP_CACHE_DISABLED) { + seq_puts(seq, ",cache_strategy=disabled"); + } else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) { + seq_puts(seq, ",cache_strategy=readahead"); + } else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAROUND) { + seq_puts(seq, ",cache_strategy=readaround"); + } #endif return 0; } static int erofs_remount(struct super_block *sb, int *flags, char *data) { + struct erofs_sb_info *sbi = EROFS_SB(sb); + unsigned int org_mnt_opt = sbi->mount_opt; + int err; + DBG_BUGON(!sb_rdonly(sb)); + err = erofs_parse_options(sb, data); + if (err) + goto out; + + if (test_opt(sbi, POSIX_ACL)) + sb->s_flags |= SB_POSIXACL; + else + sb->s_flags &= ~SB_POSIXACL; *flags |= SB_RDONLY; return 0; +out: + sbi->mount_opt = org_mnt_opt; + return err; } const struct super_operations erofs_sops = { .put_super = erofs_put_super, - .alloc_inode = alloc_inode, - .destroy_inode = destroy_inode, + .alloc_inode = erofs_alloc_inode, + .destroy_inode = erofs_destroy_inode, .statfs = erofs_statfs, .show_options = erofs_show_options, .remount_fs = erofs_remount, @@ -656,6 +643,5 @@ module_init(erofs_module_init); module_exit(erofs_module_exit); MODULE_DESCRIPTION("Enhanced ROM File System"); -MODULE_AUTHOR("Gao Xiang, Yu Chao, Miao Xie, CONSUMER BG, HUAWEI Inc."); +MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc."); MODULE_LICENSE("GPL"); - diff --git a/drivers/staging/erofs/include/linux/tagptr.h b/drivers/staging/erofs/tagptr.h similarity index 93% rename from drivers/staging/erofs/include/linux/tagptr.h rename to drivers/staging/erofs/tagptr.h index ccd106dbd48e..64ceb7270b5c 100644 --- a/drivers/staging/erofs/include/linux/tagptr.h +++ b/drivers/staging/erofs/tagptr.h @@ -1,11 +1,9 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Tagged pointer implementation - * - * Copyright (C) 2018 Gao Xiang +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * A tagged pointer implementation */ -#ifndef _LINUX_TAGPTR_H -#define _LINUX_TAGPTR_H +#ifndef __EROFS_FS_TAGPTR_H +#define __EROFS_FS_TAGPTR_H #include #include @@ -106,5 +104,4 @@ tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); }) ptptr->v &= ~tags; \ *ptptr; }) -#endif - +#endif /* __EROFS_FS_TAGPTR_H */ diff --git a/drivers/staging/erofs/unzip_lz4.c b/drivers/staging/erofs/unzip_lz4.c deleted file mode 100644 index b1ea23f66c4e..000000000000 --- a/drivers/staging/erofs/unzip_lz4.c +++ /dev/null @@ -1,251 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause -/* - * linux/drivers/staging/erofs/unzip_lz4.c - * - * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * Original code taken from 'linux/lib/lz4/lz4_decompress.c' - */ - -/* - * LZ4 - Fast LZ compression algorithm - * Copyright (C) 2011 - 2016, Yann Collet. - * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php) - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * You can contact the author at : - * - LZ4 homepage : http://www.lz4.org - * - LZ4 source repository : https://github.com/lz4/lz4 - * - * Changed for kernel usage by: - * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> - */ -#include "internal.h" -#include -#include "lz4defs.h" - -/* - * no public solution to solve our requirement yet. - * see: - * https://groups.google.com/forum/#!topic/lz4c/_3kkz5N6n00 - */ -static FORCE_INLINE int customized_lz4_decompress_safe_partial( - const void * const source, - void * const dest, - int inputSize, - int outputSize) -{ - /* Local Variables */ - const BYTE *ip = (const BYTE *) source; - const BYTE * const iend = ip + inputSize; - - BYTE *op = (BYTE *) dest; - BYTE * const oend = op + outputSize; - BYTE *cpy; - - static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; - static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; - - /* Empty output buffer */ - if (unlikely(outputSize == 0)) - return ((inputSize == 1) && (*ip == 0)) ? 0 : -1; - - /* Main Loop : decode sequences */ - while (1) { - size_t length; - const BYTE *match; - size_t offset; - - /* get literal length */ - unsigned int const token = *ip++; - - length = token>>ML_BITS; - - if (length == RUN_MASK) { - unsigned int s; - - do { - s = *ip++; - length += s; - } while ((ip < iend - RUN_MASK) & (s == 255)); - - if (unlikely((size_t)(op + length) < (size_t)(op))) { - /* overflow detection */ - goto _output_error; - } - if (unlikely((size_t)(ip + length) < (size_t)(ip))) { - /* overflow detection */ - goto _output_error; - } - } - - /* copy literals */ - cpy = op + length; - if ((cpy > oend - WILDCOPYLENGTH) || - (ip + length > iend - (2 + 1 + LASTLITERALS))) { - if (cpy > oend) { - memcpy(op, ip, length = oend - op); - op += length; - break; - } - - if (unlikely(ip + length > iend)) { - /* - * Error : - * read attempt beyond - * end of input buffer - */ - goto _output_error; - } - - memcpy(op, ip, length); - ip += length; - op += length; - - if (ip > iend - 2) - break; - /* Necessarily EOF, due to parsing restrictions */ - /* break; */ - } else { - LZ4_wildCopy(op, ip, cpy); - ip += length; - op = cpy; - } - - /* get offset */ - offset = LZ4_readLE16(ip); - ip += 2; - match = op - offset; - - if (unlikely(match < (const BYTE *)dest)) { - /* Error : offset outside buffers */ - goto _output_error; - } - - /* get matchlength */ - length = token & ML_MASK; - if (length == ML_MASK) { - unsigned int s; - - do { - s = *ip++; - - if (ip > iend - LASTLITERALS) - goto _output_error; - - length += s; - } while (s == 255); - - if (unlikely((size_t)(op + length) < (size_t)op)) { - /* overflow detection */ - goto _output_error; - } - } - - length += MINMATCH; - - /* copy match within block */ - cpy = op + length; - - if (unlikely(cpy >= oend - WILDCOPYLENGTH)) { - if (cpy >= oend) { - while (op < oend) - *op++ = *match++; - break; - } - goto __match; - } - - /* costs ~1%; silence an msan warning when offset == 0 */ - LZ4_write32(op, (U32)offset); - - if (unlikely(offset < 8)) { - const int dec64 = dec64table[offset]; - - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[offset]; - memcpy(op + 4, match, 4); - match -= dec64; - } else { - LZ4_copy8(op, match); - match += 8; - } - - op += 8; - - if (unlikely(cpy > oend - 12)) { - BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1); - - if (op < oCopyLimit) { - LZ4_wildCopy(op, match, oCopyLimit); - match += oCopyLimit - op; - op = oCopyLimit; - } -__match: - while (op < cpy) - *op++ = *match++; - } else { - LZ4_copy8(op, match); - - if (length > 16) - LZ4_wildCopy(op + 8, match + 8, cpy); - } - - op = cpy; /* correction */ - } - DBG_BUGON((void *)ip - source > inputSize); - DBG_BUGON((void *)op - dest > outputSize); - - /* Nb of output bytes decoded */ - return (int) ((void *)op - dest); - - /* Overflow error detected */ -_output_error: - return -ERANGE; -} - -int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen) -{ - int ret = customized_lz4_decompress_safe_partial(in, - out, inlen, outlen); - - if (ret >= 0) - return ret; - - /* - * LZ4_decompress_safe will return an error code - * (< 0) if decompression failed - */ - errln("%s, failed to decompress, in[%p, %zu] outlen[%p, %zu]", - __func__, in, inlen, out, outlen); - WARN_ON(1); - print_hex_dump(KERN_DEBUG, "raw data [in]: ", DUMP_PREFIX_OFFSET, - 16, 1, in, inlen, true); - print_hex_dump(KERN_DEBUG, "raw data [out]: ", DUMP_PREFIX_OFFSET, - 16, 1, out, outlen, true); - return -EIO; -} - diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c deleted file mode 100644 index 0f1558c6747e..000000000000 --- a/drivers/staging/erofs/unzip_vle.c +++ /dev/null @@ -1,1695 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/unzip_vle.c - * - * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#include "unzip_vle.h" -#include - -static struct workqueue_struct *z_erofs_workqueue __read_mostly; -static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly; - -void z_erofs_exit_zip_subsystem(void) -{ - destroy_workqueue(z_erofs_workqueue); - kmem_cache_destroy(z_erofs_workgroup_cachep); -} - -static inline int init_unzip_workqueue(void) -{ - const unsigned onlinecpus = num_possible_cpus(); - - /* - * we don't need too many threads, limiting threads - * could improve scheduling performance. - */ - z_erofs_workqueue = alloc_workqueue("erofs_unzipd", - WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE, - onlinecpus + onlinecpus / 4); - - return z_erofs_workqueue != NULL ? 0 : -ENOMEM; -} - -int z_erofs_init_zip_subsystem(void) -{ - z_erofs_workgroup_cachep = - kmem_cache_create("erofs_compress", - Z_EROFS_WORKGROUP_SIZE, 0, - SLAB_RECLAIM_ACCOUNT, NULL); - - if (z_erofs_workgroup_cachep != NULL) { - if (!init_unzip_workqueue()) - return 0; - - kmem_cache_destroy(z_erofs_workgroup_cachep); - } - return -ENOMEM; -} - -enum z_erofs_vle_work_role { - Z_EROFS_VLE_WORK_SECONDARY, - Z_EROFS_VLE_WORK_PRIMARY, - /* - * The current work was the tail of an exist chain, and the previous - * processed chained works are all decided to be hooked up to it. - * A new chain should be created for the remaining unprocessed works, - * therefore different from Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED, - * the next work cannot reuse the whole page in the following scenario: - * ________________________________________________________________ - * | tail (partial) page | head (partial) page | - * | (belongs to the next work) | (belongs to the current work) | - * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________| - */ - Z_EROFS_VLE_WORK_PRIMARY_HOOKED, - /* - * The current work has been linked with the processed chained works, - * and could be also linked with the potential remaining works, which - * means if the processing page is the tail partial page of the work, - * the current work can safely use the whole page (since the next work - * is under control) for in-place decompression, as illustrated below: - * ________________________________________________________________ - * | tail (partial) page | head (partial) page | - * | (of the current work) | (of the previous work) | - * | PRIMARY_FOLLOWED or | | - * |_____PRIMARY_HOOKED____|____________PRIMARY_FOLLOWED____________| - * - * [ (*) the above page can be used for the current work itself. ] - */ - Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED, - Z_EROFS_VLE_WORK_MAX -}; - -struct z_erofs_vle_work_builder { - enum z_erofs_vle_work_role role; - /* - * 'hosted = false' means that the current workgroup doesn't belong to - * the owned chained workgroups. In the other words, it is none of our - * business to submit this workgroup. - */ - bool hosted; - - struct z_erofs_vle_workgroup *grp; - struct z_erofs_vle_work *work; - struct z_erofs_pagevec_ctor vector; - - /* pages used for reading the compressed data */ - struct page **compressed_pages; - unsigned compressed_deficit; -}; - -#define VLE_WORK_BUILDER_INIT() \ - { .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED } - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - -static bool grab_managed_cache_pages(struct address_space *mapping, - erofs_blk_t start, - struct page **compressed_pages, - int clusterblks, - bool reserve_allocation) -{ - bool noio = true; - unsigned int i; - - /* TODO: optimize by introducing find_get_pages_range */ - for (i = 0; i < clusterblks; ++i) { - struct page *page, *found; - - if (READ_ONCE(compressed_pages[i]) != NULL) - continue; - - page = found = find_get_page(mapping, start + i); - if (found == NULL) { - noio = false; - if (!reserve_allocation) - continue; - page = EROFS_UNALLOCATED_CACHED_PAGE; - } - - if (NULL == cmpxchg(compressed_pages + i, NULL, page)) - continue; - - if (found != NULL) - put_page(found); - } - return noio; -} - -/* called by erofs_shrinker to get rid of all compressed_pages */ -int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, - struct erofs_workgroup *egrp) -{ - struct z_erofs_vle_workgroup *const grp = - container_of(egrp, struct z_erofs_vle_workgroup, obj); - struct address_space *const mapping = sbi->managed_cache->i_mapping; - const int clusterpages = erofs_clusterpages(sbi); - int i; - - /* - * refcount of workgroup is now freezed as 1, - * therefore no need to worry about available decompression users. - */ - for (i = 0; i < clusterpages; ++i) { - struct page *page = grp->compressed_pages[i]; - - if (page == NULL || page->mapping != mapping) - continue; - - /* block other users from reclaiming or migrating the page */ - if (!trylock_page(page)) - return -EBUSY; - - /* barrier is implied in the following 'unlock_page' */ - WRITE_ONCE(grp->compressed_pages[i], NULL); - - set_page_private(page, 0); - ClearPagePrivate(page); - - unlock_page(page); - put_page(page); - } - return 0; -} - -int erofs_try_to_free_cached_page(struct address_space *mapping, - struct page *page) -{ - struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb); - const unsigned int clusterpages = erofs_clusterpages(sbi); - - struct z_erofs_vle_workgroup *grp; - int ret = 0; /* 0 - busy */ - - /* prevent the workgroup from being freed */ - rcu_read_lock(); - grp = (void *)page_private(page); - - if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) { - unsigned int i; - - for (i = 0; i < clusterpages; ++i) { - if (grp->compressed_pages[i] == page) { - WRITE_ONCE(grp->compressed_pages[i], NULL); - ret = 1; - break; - } - } - erofs_workgroup_unfreeze(&grp->obj, 1); - } - rcu_read_unlock(); - - if (ret) { - ClearPagePrivate(page); - put_page(page); - } - return ret; -} -#endif - -/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */ -static inline bool try_to_reuse_as_compressed_page( - struct z_erofs_vle_work_builder *b, - struct page *page) -{ - while (b->compressed_deficit) { - --b->compressed_deficit; - if (NULL == cmpxchg(b->compressed_pages++, NULL, page)) - return true; - } - - return false; -} - -/* callers must be with work->lock held */ -static int z_erofs_vle_work_add_page( - struct z_erofs_vle_work_builder *builder, - struct page *page, - enum z_erofs_page_type type) -{ - int ret; - bool occupied; - - /* give priority for the compressed data storage */ - if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY && - type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && - try_to_reuse_as_compressed_page(builder, page)) - return 0; - - ret = z_erofs_pagevec_ctor_enqueue(&builder->vector, - page, type, &occupied); - builder->work->vcnt += (unsigned)ret; - - return ret ? 0 : -EAGAIN; -} - -static enum z_erofs_vle_work_role -try_to_claim_workgroup(struct z_erofs_vle_workgroup *grp, - z_erofs_vle_owned_workgrp_t *owned_head, - bool *hosted) -{ - DBG_BUGON(*hosted == true); - - /* let's claim these following types of workgroup */ -retry: - if (grp->next == Z_EROFS_VLE_WORKGRP_NIL) { - /* type 1, nil workgroup */ - if (Z_EROFS_VLE_WORKGRP_NIL != cmpxchg(&grp->next, - Z_EROFS_VLE_WORKGRP_NIL, *owned_head)) - goto retry; - - *owned_head = grp; - *hosted = true; - /* lucky, I am the followee :) */ - return Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED; - - } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) { - /* - * type 2, link to the end of a existing open chain, - * be careful that its submission itself is governed - * by the original owned chain. - */ - if (Z_EROFS_VLE_WORKGRP_TAIL != cmpxchg(&grp->next, - Z_EROFS_VLE_WORKGRP_TAIL, *owned_head)) - goto retry; - *owned_head = Z_EROFS_VLE_WORKGRP_TAIL; - return Z_EROFS_VLE_WORK_PRIMARY_HOOKED; - } - - return Z_EROFS_VLE_WORK_PRIMARY; /* :( better luck next time */ -} - -static struct z_erofs_vle_work * -z_erofs_vle_work_lookup(struct super_block *sb, - pgoff_t idx, unsigned pageofs, - struct z_erofs_vle_workgroup **grp_ret, - enum z_erofs_vle_work_role *role, - z_erofs_vle_owned_workgrp_t *owned_head, - bool *hosted) -{ - bool tag, primary; - struct erofs_workgroup *egrp; - struct z_erofs_vle_workgroup *grp; - struct z_erofs_vle_work *work; - - egrp = erofs_find_workgroup(sb, idx, &tag); - if (egrp == NULL) { - *grp_ret = NULL; - return NULL; - } - - *grp_ret = grp = container_of(egrp, - struct z_erofs_vle_workgroup, obj); - - work = z_erofs_vle_grab_work(grp, pageofs); - /* if multiref is disabled, `primary' is always true */ - primary = true; - - if (work->pageofs != pageofs) { - DBG_BUGON(1); - erofs_workgroup_put(egrp); - return ERR_PTR(-EIO); - } - - /* - * lock must be taken first to avoid grp->next == NIL between - * claiming workgroup and adding pages: - * grp->next != NIL - * grp->next = NIL - * mutex_unlock_all - * mutex_lock(&work->lock) - * add all pages to pagevec - * - * [correct locking case 1]: - * mutex_lock(grp->work[a]) - * ... - * mutex_lock(grp->work[b]) mutex_lock(grp->work[c]) - * ... *role = SECONDARY - * add all pages to pagevec - * ... - * mutex_unlock(grp->work[c]) - * mutex_lock(grp->work[c]) - * ... - * grp->next = NIL - * mutex_unlock_all - * - * [correct locking case 2]: - * mutex_lock(grp->work[b]) - * ... - * mutex_lock(grp->work[a]) - * ... - * mutex_lock(grp->work[c]) - * ... - * grp->next = NIL - * mutex_unlock_all - * mutex_lock(grp->work[a]) - * *role = PRIMARY_OWNER - * add all pages to pagevec - * ... - */ - mutex_lock(&work->lock); - - *hosted = false; - if (!primary) - *role = Z_EROFS_VLE_WORK_SECONDARY; - else /* claim the workgroup if possible */ - *role = try_to_claim_workgroup(grp, owned_head, hosted); - return work; -} - -static struct z_erofs_vle_work * -z_erofs_vle_work_register(struct super_block *sb, - struct z_erofs_vle_workgroup **grp_ret, - struct erofs_map_blocks *map, - pgoff_t index, unsigned pageofs, - enum z_erofs_vle_work_role *role, - z_erofs_vle_owned_workgrp_t *owned_head, - bool *hosted) -{ - bool newgrp = false; - struct z_erofs_vle_workgroup *grp = *grp_ret; - struct z_erofs_vle_work *work; - - /* if multiref is disabled, grp should never be nullptr */ - if (unlikely(grp)) { - DBG_BUGON(1); - return ERR_PTR(-EINVAL); - } - - /* no available workgroup, let's allocate one */ - grp = kmem_cache_zalloc(z_erofs_workgroup_cachep, GFP_NOFS); - if (unlikely(grp == NULL)) - return ERR_PTR(-ENOMEM); - - grp->obj.index = index; - grp->llen = map->m_llen; - - z_erofs_vle_set_workgrp_fmt(grp, - (map->m_flags & EROFS_MAP_ZIPPED) ? - Z_EROFS_VLE_WORKGRP_FMT_LZ4 : - Z_EROFS_VLE_WORKGRP_FMT_PLAIN); - atomic_set(&grp->obj.refcount, 1); - - /* new workgrps have been claimed as type 1 */ - WRITE_ONCE(grp->next, *owned_head); - /* primary and followed work for all new workgrps */ - *role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED; - /* it should be submitted by ourselves */ - *hosted = true; - - newgrp = true; - work = z_erofs_vle_grab_primary_work(grp); - work->pageofs = pageofs; - - mutex_init(&work->lock); - - if (newgrp) { - int err = erofs_register_workgroup(sb, &grp->obj, 0); - - if (err) { - kmem_cache_free(z_erofs_workgroup_cachep, grp); - return ERR_PTR(-EAGAIN); - } - } - - *owned_head = *grp_ret = grp; - - mutex_lock(&work->lock); - return work; -} - -static inline void __update_workgrp_llen(struct z_erofs_vle_workgroup *grp, - unsigned int llen) -{ - while (1) { - unsigned int orig_llen = grp->llen; - - if (orig_llen >= llen || orig_llen == - cmpxchg(&grp->llen, orig_llen, llen)) - break; - } -} - -#define builder_is_hooked(builder) \ - ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_HOOKED) - -#define builder_is_followed(builder) \ - ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED) - -static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder, - struct super_block *sb, - struct erofs_map_blocks *map, - z_erofs_vle_owned_workgrp_t *owned_head) -{ - const unsigned clusterpages = erofs_clusterpages(EROFS_SB(sb)); - const erofs_blk_t index = erofs_blknr(map->m_pa); - const unsigned pageofs = map->m_la & ~PAGE_MASK; - struct z_erofs_vle_workgroup *grp; - struct z_erofs_vle_work *work; - - DBG_BUGON(builder->work != NULL); - - /* must be Z_EROFS_WORK_TAIL or the next chained work */ - DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_NIL); - DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - - DBG_BUGON(erofs_blkoff(map->m_pa)); - -repeat: - work = z_erofs_vle_work_lookup(sb, index, - pageofs, &grp, &builder->role, owned_head, &builder->hosted); - if (work != NULL) { - __update_workgrp_llen(grp, map->m_llen); - goto got_it; - } - - work = z_erofs_vle_work_register(sb, &grp, map, index, pageofs, - &builder->role, owned_head, &builder->hosted); - - if (unlikely(work == ERR_PTR(-EAGAIN))) - goto repeat; - - if (unlikely(IS_ERR(work))) - return PTR_ERR(work); -got_it: - z_erofs_pagevec_ctor_init(&builder->vector, - Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, work->vcnt); - - if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) { - /* enable possibly in-place decompression */ - builder->compressed_pages = grp->compressed_pages; - builder->compressed_deficit = clusterpages; - } else { - builder->compressed_pages = NULL; - builder->compressed_deficit = 0; - } - - builder->grp = grp; - builder->work = work; - return 0; -} - -/* - * keep in mind that no referenced workgroups will be freed - * only after a RCU grace period, so rcu_read_lock() could - * prevent a workgroup from being freed. - */ -static void z_erofs_rcu_callback(struct rcu_head *head) -{ - struct z_erofs_vle_work *work = container_of(head, - struct z_erofs_vle_work, rcu); - struct z_erofs_vle_workgroup *grp = - z_erofs_vle_work_workgroup(work, true); - - kmem_cache_free(z_erofs_workgroup_cachep, grp); -} - -void erofs_workgroup_free_rcu(struct erofs_workgroup *grp) -{ - struct z_erofs_vle_workgroup *const vgrp = container_of(grp, - struct z_erofs_vle_workgroup, obj); - struct z_erofs_vle_work *const work = &vgrp->work; - - call_rcu(&work->rcu, z_erofs_rcu_callback); -} - -static void __z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp, - struct z_erofs_vle_work *work __maybe_unused) -{ - erofs_workgroup_put(&grp->obj); -} - -void z_erofs_vle_work_release(struct z_erofs_vle_work *work) -{ - struct z_erofs_vle_workgroup *grp = - z_erofs_vle_work_workgroup(work, true); - - __z_erofs_vle_work_release(grp, work); -} - -static inline bool -z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder *builder) -{ - struct z_erofs_vle_work *work = builder->work; - - if (work == NULL) - return false; - - z_erofs_pagevec_ctor_exit(&builder->vector, false); - mutex_unlock(&work->lock); - - /* - * if all pending pages are added, don't hold work reference - * any longer if the current work isn't hosted by ourselves. - */ - if (!builder->hosted) - __z_erofs_vle_work_release(builder->grp, work); - - builder->work = NULL; - builder->grp = NULL; - return true; -} - -static inline struct page *__stagingpage_alloc(struct list_head *pagepool, - gfp_t gfp) -{ - struct page *page = erofs_allocpage(pagepool, gfp); - - if (unlikely(page == NULL)) - return NULL; - - page->mapping = Z_EROFS_MAPPING_STAGING; - return page; -} - -struct z_erofs_vle_frontend { - struct inode *const inode; - - struct z_erofs_vle_work_builder builder; - struct erofs_map_blocks_iter m_iter; - - z_erofs_vle_owned_workgrp_t owned_head; - - bool initial; -#if (EROFS_FS_ZIP_CACHE_LVL >= 2) - erofs_off_t cachedzone_la; -#endif -}; - -#define VLE_FRONTEND_INIT(__i) { \ - .inode = __i, \ - .m_iter = { \ - { .m_llen = 0, .m_plen = 0 }, \ - .mpage = NULL \ - }, \ - .builder = VLE_WORK_BUILDER_INIT(), \ - .owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \ - .initial = true, } - -static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe, - struct page *page, - struct list_head *page_pool) -{ - struct super_block *const sb = fe->inode->i_sb; - struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb); - struct erofs_map_blocks_iter *const m = &fe->m_iter; - struct erofs_map_blocks *const map = &m->map; - struct z_erofs_vle_work_builder *const builder = &fe->builder; - const loff_t offset = page_offset(page); - - bool tight = builder_is_hooked(builder); - struct z_erofs_vle_work *work = builder->work; - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - struct address_space *const mngda = sbi->managed_cache->i_mapping; - struct z_erofs_vle_workgroup *grp; - bool noio_outoforder; -#endif - - enum z_erofs_page_type page_type; - unsigned cur, end, spiltted, index; - int err = 0; - - /* register locked file pages as online pages in pack */ - z_erofs_onlinepage_init(page); - - spiltted = 0; - end = PAGE_SIZE; -repeat: - cur = end - 1; - - /* lucky, within the range of the current map_blocks */ - if (offset + cur >= map->m_la && - offset + cur < map->m_la + map->m_llen) { - /* didn't get a valid unzip work previously (very rare) */ - if (!builder->work) - goto restart_now; - goto hitted; - } - - /* go ahead the next map_blocks */ - debugln("%s: [out-of-range] pos %llu", __func__, offset + cur); - - if (z_erofs_vle_work_iter_end(builder)) - fe->initial = false; - - map->m_la = offset + cur; - map->m_llen = 0; - err = erofs_map_blocks_iter(fe->inode, map, &m->mpage, 0); - if (unlikely(err)) - goto err_out; - -restart_now: - if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) - goto hitted; - - DBG_BUGON(map->m_plen != 1 << sbi->clusterbits); - DBG_BUGON(erofs_blkoff(map->m_pa)); - - err = z_erofs_vle_work_iter_begin(builder, sb, map, &fe->owned_head); - if (unlikely(err)) - goto err_out; - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - grp = fe->builder.grp; - - /* let's do out-of-order decompression for noio */ - noio_outoforder = grab_managed_cache_pages(mngda, - erofs_blknr(map->m_pa), - grp->compressed_pages, erofs_blknr(map->m_plen), - /* compressed page caching selection strategy */ - fe->initial | (EROFS_FS_ZIP_CACHE_LVL >= 2 ? - map->m_la < fe->cachedzone_la : 0)); - - if (noio_outoforder && builder_is_followed(builder)) - builder->role = Z_EROFS_VLE_WORK_PRIMARY; -#endif - - tight &= builder_is_hooked(builder); - work = builder->work; -hitted: - cur = end - min_t(unsigned, offset + end - map->m_la, end); - if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) { - zero_user_segment(page, cur, end); - goto next_part; - } - - /* let's derive page type */ - page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD : - (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : - (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : - Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); - - if (cur) - tight &= builder_is_followed(builder); - -retry: - err = z_erofs_vle_work_add_page(builder, page, page_type); - /* should allocate an additional staging page for pagevec */ - if (err == -EAGAIN) { - struct page *const newpage = - __stagingpage_alloc(page_pool, GFP_NOFS); - - err = z_erofs_vle_work_add_page(builder, - newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE); - if (likely(!err)) - goto retry; - } - - if (unlikely(err)) - goto err_out; - - index = page->index - map->m_la / PAGE_SIZE; - - /* FIXME! avoid the last relundant fixup & endio */ - z_erofs_onlinepage_fixup(page, index, true); - - /* bump up the number of spiltted parts of a page */ - ++spiltted; - /* also update nr_pages */ - work->nr_pages = max_t(pgoff_t, work->nr_pages, index + 1); -next_part: - /* can be used for verification */ - map->m_llen = offset + cur - map->m_la; - - end = cur; - if (end > 0) - goto repeat; - -out: - /* FIXME! avoid the last relundant fixup & endio */ - z_erofs_onlinepage_endio(page); - - debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu", - __func__, page, spiltted, map->m_llen); - return err; - - /* if some error occurred while processing this page */ -err_out: - SetPageError(page); - goto out; -} - -static void z_erofs_vle_unzip_kickoff(void *ptr, int bios) -{ - tagptr1_t t = tagptr_init(tagptr1_t, ptr); - struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t); - bool background = tagptr_unfold_tags(t); - - if (!background) { - unsigned long flags; - - spin_lock_irqsave(&io->u.wait.lock, flags); - if (!atomic_add_return(bios, &io->pending_bios)) - wake_up_locked(&io->u.wait); - spin_unlock_irqrestore(&io->u.wait.lock, flags); - return; - } - - if (!atomic_add_return(bios, &io->pending_bios)) - queue_work(z_erofs_workqueue, &io->u.work); -} - -static inline void z_erofs_vle_read_endio(struct bio *bio) -{ - const blk_status_t err = bio->bi_status; - unsigned i; - struct bio_vec *bvec; -#ifdef EROFS_FS_HAS_MANAGED_CACHE - struct address_space *mngda = NULL; -#endif - - bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - bool cachemngd = false; - - DBG_BUGON(PageUptodate(page)); - DBG_BUGON(!page->mapping); - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - if (unlikely(mngda == NULL && !z_erofs_is_stagingpage(page))) { - struct inode *const inode = page->mapping->host; - struct super_block *const sb = inode->i_sb; - - mngda = EROFS_SB(sb)->managed_cache->i_mapping; - } - - /* - * If mngda has not gotten, it equals NULL, - * however, page->mapping never be NULL if working properly. - */ - cachemngd = (page->mapping == mngda); -#endif - - if (unlikely(err)) - SetPageError(page); - else if (cachemngd) - SetPageUptodate(page); - - if (cachemngd) - unlock_page(page); - } - - z_erofs_vle_unzip_kickoff(bio->bi_private, -1); - bio_put(bio); -} - -static struct page *z_pagemap_global[Z_EROFS_VLE_VMAP_GLOBAL_PAGES]; -static DEFINE_MUTEX(z_pagemap_global_lock); - -static int z_erofs_vle_unzip(struct super_block *sb, - struct z_erofs_vle_workgroup *grp, - struct list_head *page_pool) -{ - struct erofs_sb_info *const sbi = EROFS_SB(sb); -#ifdef EROFS_FS_HAS_MANAGED_CACHE - struct address_space *const mngda = sbi->managed_cache->i_mapping; -#endif - const unsigned clusterpages = erofs_clusterpages(sbi); - - struct z_erofs_pagevec_ctor ctor; - unsigned int nr_pages; - unsigned int sparsemem_pages = 0; - struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES]; - struct page **pages, **compressed_pages, *page; - unsigned i, llen; - - enum z_erofs_page_type page_type; - bool overlapped; - struct z_erofs_vle_work *work; - void *vout; - int err; - - might_sleep(); - work = z_erofs_vle_grab_primary_work(grp); - DBG_BUGON(!READ_ONCE(work->nr_pages)); - - mutex_lock(&work->lock); - nr_pages = work->nr_pages; - - if (likely(nr_pages <= Z_EROFS_VLE_VMAP_ONSTACK_PAGES)) - pages = pages_onstack; - else if (nr_pages <= Z_EROFS_VLE_VMAP_GLOBAL_PAGES && - mutex_trylock(&z_pagemap_global_lock)) - pages = z_pagemap_global; - else { -repeat: - pages = kvmalloc_array(nr_pages, - sizeof(struct page *), GFP_KERNEL); - - /* fallback to global pagemap for the lowmem scenario */ - if (unlikely(pages == NULL)) { - if (nr_pages > Z_EROFS_VLE_VMAP_GLOBAL_PAGES) - goto repeat; - else { - mutex_lock(&z_pagemap_global_lock); - pages = z_pagemap_global; - } - } - } - - for (i = 0; i < nr_pages; ++i) - pages[i] = NULL; - - err = 0; - z_erofs_pagevec_ctor_init(&ctor, - Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, 0); - - for (i = 0; i < work->vcnt; ++i) { - unsigned pagenr; - - page = z_erofs_pagevec_ctor_dequeue(&ctor, &page_type); - - /* all pages in pagevec ought to be valid */ - DBG_BUGON(page == NULL); - DBG_BUGON(page->mapping == NULL); - - if (z_erofs_gather_if_stagingpage(page_pool, page)) - continue; - - if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD) - pagenr = 0; - else - pagenr = z_erofs_onlinepage_index(page); - - DBG_BUGON(pagenr >= nr_pages); - - /* - * currently EROFS doesn't support multiref(dedup), - * so here erroring out one multiref page. - */ - if (pages[pagenr]) { - DBG_BUGON(1); - SetPageError(pages[pagenr]); - z_erofs_onlinepage_endio(pages[pagenr]); - err = -EIO; - } - pages[pagenr] = page; - } - sparsemem_pages = i; - - z_erofs_pagevec_ctor_exit(&ctor, true); - - overlapped = false; - compressed_pages = grp->compressed_pages; - - for (i = 0; i < clusterpages; ++i) { - unsigned pagenr; - - page = compressed_pages[i]; - - /* all compressed pages ought to be valid */ - DBG_BUGON(page == NULL); - DBG_BUGON(page->mapping == NULL); - - if (!z_erofs_is_stagingpage(page)) { -#ifdef EROFS_FS_HAS_MANAGED_CACHE - if (page->mapping == mngda) { - if (unlikely(!PageUptodate(page))) - err = -EIO; - continue; - } -#endif - - /* - * only if non-head page can be selected - * for inplace decompression - */ - pagenr = z_erofs_onlinepage_index(page); - - DBG_BUGON(pagenr >= nr_pages); - if (pages[pagenr]) { - DBG_BUGON(1); - SetPageError(pages[pagenr]); - z_erofs_onlinepage_endio(pages[pagenr]); - err = -EIO; - } - ++sparsemem_pages; - pages[pagenr] = page; - - overlapped = true; - } - - /* PG_error needs checking for inplaced and staging pages */ - if (unlikely(PageError(page))) { - DBG_BUGON(PageUptodate(page)); - err = -EIO; - } - } - - if (unlikely(err)) - goto out; - - llen = (nr_pages << PAGE_SHIFT) - work->pageofs; - - if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN) { - err = z_erofs_vle_plain_copy(compressed_pages, clusterpages, - pages, nr_pages, work->pageofs); - goto out; - } - - if (llen > grp->llen) - llen = grp->llen; - - err = z_erofs_vle_unzip_fast_percpu(compressed_pages, clusterpages, - pages, llen, work->pageofs); - if (err != -ENOTSUPP) - goto out; - - if (sparsemem_pages >= nr_pages) - goto skip_allocpage; - - for (i = 0; i < nr_pages; ++i) { - if (pages[i] != NULL) - continue; - - pages[i] = __stagingpage_alloc(page_pool, GFP_NOFS); - } - -skip_allocpage: - vout = erofs_vmap(pages, nr_pages); - if (!vout) { - err = -ENOMEM; - goto out; - } - - err = z_erofs_vle_unzip_vmap(compressed_pages, - clusterpages, vout, llen, work->pageofs, overlapped); - - erofs_vunmap(vout, nr_pages); - -out: - /* must handle all compressed pages before endding pages */ - for (i = 0; i < clusterpages; ++i) { - page = compressed_pages[i]; - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - if (page->mapping == mngda) - continue; -#endif - /* recycle all individual staging pages */ - (void)z_erofs_gather_if_stagingpage(page_pool, page); - - WRITE_ONCE(compressed_pages[i], NULL); - } - - for (i = 0; i < nr_pages; ++i) { - page = pages[i]; - if (!page) - continue; - - DBG_BUGON(page->mapping == NULL); - - /* recycle all individual staging pages */ - if (z_erofs_gather_if_stagingpage(page_pool, page)) - continue; - - if (unlikely(err < 0)) - SetPageError(page); - - z_erofs_onlinepage_endio(page); - } - - if (pages == z_pagemap_global) - mutex_unlock(&z_pagemap_global_lock); - else if (unlikely(pages != pages_onstack)) - kvfree(pages); - - work->nr_pages = 0; - work->vcnt = 0; - - /* all work locks MUST be taken before the following line */ - - WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_NIL); - - /* all work locks SHOULD be released right now */ - mutex_unlock(&work->lock); - - z_erofs_vle_work_release(work); - return err; -} - -static void z_erofs_vle_unzip_all(struct super_block *sb, - struct z_erofs_vle_unzip_io *io, - struct list_head *page_pool) -{ - z_erofs_vle_owned_workgrp_t owned = io->head; - - while (owned != Z_EROFS_VLE_WORKGRP_TAIL_CLOSED) { - struct z_erofs_vle_workgroup *grp; - - /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */ - DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_TAIL); - - /* no possible that 'owned' equals NULL */ - DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_NIL); - - grp = owned; - owned = READ_ONCE(grp->next); - - z_erofs_vle_unzip(sb, grp, page_pool); - } -} - -static void z_erofs_vle_unzip_wq(struct work_struct *work) -{ - struct z_erofs_vle_unzip_io_sb *iosb = container_of(work, - struct z_erofs_vle_unzip_io_sb, io.u.work); - LIST_HEAD(page_pool); - - DBG_BUGON(iosb->io.head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &page_pool); - - put_pages_list(&page_pool); - kvfree(iosb); -} - -static inline struct z_erofs_vle_unzip_io * -prepare_io_handler(struct super_block *sb, - struct z_erofs_vle_unzip_io *io, - bool background) -{ - struct z_erofs_vle_unzip_io_sb *iosb; - - if (!background) { - /* waitqueue available for foreground io */ - BUG_ON(io == NULL); - - init_waitqueue_head(&io->u.wait); - atomic_set(&io->pending_bios, 0); - goto out; - } - - if (io != NULL) - BUG(); - else { - /* allocate extra io descriptor for background io */ - iosb = kvzalloc(sizeof(struct z_erofs_vle_unzip_io_sb), - GFP_KERNEL | __GFP_NOFAIL); - BUG_ON(iosb == NULL); - - io = &iosb->io; - } - - iosb->sb = sb; - INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq); -out: - io->head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED; - return io; -} - -#ifdef EROFS_FS_HAS_MANAGED_CACHE -/* true - unlocked (noio), false - locked (need submit io) */ -static inline bool recover_managed_page(struct z_erofs_vle_workgroup *grp, - struct page *page) -{ - wait_on_page_locked(page); - if (PagePrivate(page) && PageUptodate(page)) - return true; - - lock_page(page); - ClearPageError(page); - - if (unlikely(!PagePrivate(page))) { - set_page_private(page, (unsigned long)grp); - SetPagePrivate(page); - } - if (unlikely(PageUptodate(page))) { - unlock_page(page); - return true; - } - return false; -} - -#define __FSIO_1 1 -#else -#define __FSIO_1 0 -#endif - -static bool z_erofs_vle_submit_all(struct super_block *sb, - z_erofs_vle_owned_workgrp_t owned_head, - struct list_head *pagepool, - struct z_erofs_vle_unzip_io *fg_io, - bool force_fg) -{ - struct erofs_sb_info *const sbi = EROFS_SB(sb); - const unsigned clusterpages = erofs_clusterpages(sbi); - const gfp_t gfp = GFP_NOFS; -#ifdef EROFS_FS_HAS_MANAGED_CACHE - struct address_space *const mngda = sbi->managed_cache->i_mapping; - struct z_erofs_vle_workgroup *lstgrp_noio = NULL, *lstgrp_io = NULL; -#endif - struct z_erofs_vle_unzip_io *ios[1 + __FSIO_1]; - struct bio *bio; - tagptr1_t bi_private; - /* since bio will be NULL, no need to initialize last_index */ - pgoff_t uninitialized_var(last_index); - bool force_submit = false; - unsigned nr_bios; - - if (unlikely(owned_head == Z_EROFS_VLE_WORKGRP_TAIL)) - return false; - - /* - * force_fg == 1, (io, fg_io[0]) no io, (io, fg_io[1]) need submit io - * force_fg == 0, (io, fg_io[0]) no io; (io[1], bg_io) need submit io - */ -#ifdef EROFS_FS_HAS_MANAGED_CACHE - ios[0] = prepare_io_handler(sb, fg_io + 0, false); -#endif - - if (force_fg) { - ios[__FSIO_1] = prepare_io_handler(sb, fg_io + __FSIO_1, false); - bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 0); - } else { - ios[__FSIO_1] = prepare_io_handler(sb, NULL, true); - bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 1); - } - - nr_bios = 0; - force_submit = false; - bio = NULL; - - /* by default, all need io submission */ - ios[__FSIO_1]->head = owned_head; - - do { - struct z_erofs_vle_workgroup *grp; - struct page **compressed_pages, *oldpage, *page; - pgoff_t first_index; - unsigned i = 0; -#ifdef EROFS_FS_HAS_MANAGED_CACHE - unsigned int noio = 0; - bool cachemngd; -#endif - int err; - - /* no possible 'owned_head' equals the following */ - DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_NIL); - - grp = owned_head; - - /* close the main owned chain at first */ - owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL, - Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - - first_index = grp->obj.index; - compressed_pages = grp->compressed_pages; - - force_submit |= (first_index != last_index + 1); -repeat: - /* fulfill all compressed pages */ - oldpage = page = READ_ONCE(compressed_pages[i]); - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - cachemngd = false; - - if (page == EROFS_UNALLOCATED_CACHED_PAGE) { - cachemngd = true; - goto do_allocpage; - } else if (page != NULL) { - if (page->mapping != mngda) - BUG_ON(PageUptodate(page)); - else if (recover_managed_page(grp, page)) { - /* page is uptodate, skip io submission */ - force_submit = true; - ++noio; - goto skippage; - } - } else { -do_allocpage: -#else - if (page != NULL) - BUG_ON(PageUptodate(page)); - else { -#endif - page = __stagingpage_alloc(pagepool, gfp); - - if (oldpage != cmpxchg(compressed_pages + i, - oldpage, page)) { - list_add(&page->lru, pagepool); - goto repeat; -#ifdef EROFS_FS_HAS_MANAGED_CACHE - } else if (cachemngd && !add_to_page_cache_lru(page, - mngda, first_index + i, gfp)) { - set_page_private(page, (unsigned long)grp); - SetPagePrivate(page); -#endif - } - } - - if (bio != NULL && force_submit) { -submit_bio_retry: - __submit_bio(bio, REQ_OP_READ, 0); - bio = NULL; - } - - if (bio == NULL) { - bio = prepare_bio(sb, first_index + i, - BIO_MAX_PAGES, z_erofs_vle_read_endio); - bio->bi_private = tagptr_cast_ptr(bi_private); - - ++nr_bios; - } - - err = bio_add_page(bio, page, PAGE_SIZE, 0); - if (err < PAGE_SIZE) - goto submit_bio_retry; - - force_submit = false; - last_index = first_index + i; -#ifdef EROFS_FS_HAS_MANAGED_CACHE -skippage: -#endif - if (++i < clusterpages) - goto repeat; - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - if (noio < clusterpages) { - lstgrp_io = grp; - } else { - z_erofs_vle_owned_workgrp_t iogrp_next = - owned_head == Z_EROFS_VLE_WORKGRP_TAIL ? - Z_EROFS_VLE_WORKGRP_TAIL_CLOSED : - owned_head; - - if (lstgrp_io == NULL) - ios[1]->head = iogrp_next; - else - WRITE_ONCE(lstgrp_io->next, iogrp_next); - - if (lstgrp_noio == NULL) - ios[0]->head = grp; - else - WRITE_ONCE(lstgrp_noio->next, grp); - - lstgrp_noio = grp; - } -#endif - } while (owned_head != Z_EROFS_VLE_WORKGRP_TAIL); - - if (bio != NULL) - __submit_bio(bio, REQ_OP_READ, 0); - -#ifndef EROFS_FS_HAS_MANAGED_CACHE - BUG_ON(!nr_bios); -#else - if (lstgrp_noio != NULL) - WRITE_ONCE(lstgrp_noio->next, Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - - if (!force_fg && !nr_bios) { - kvfree(container_of(ios[1], - struct z_erofs_vle_unzip_io_sb, io)); - return true; - } -#endif - - z_erofs_vle_unzip_kickoff(tagptr_cast_ptr(bi_private), nr_bios); - return true; -} - -static void z_erofs_submit_and_unzip(struct z_erofs_vle_frontend *f, - struct list_head *pagepool, - bool force_fg) -{ - struct super_block *sb = f->inode->i_sb; - struct z_erofs_vle_unzip_io io[1 + __FSIO_1]; - - if (!z_erofs_vle_submit_all(sb, f->owned_head, pagepool, io, force_fg)) - return; - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - z_erofs_vle_unzip_all(sb, &io[0], pagepool); -#endif - if (!force_fg) - return; - - /* wait until all bios are completed */ - wait_event(io[__FSIO_1].u.wait, - !atomic_read(&io[__FSIO_1].pending_bios)); - - /* let's synchronous decompression */ - z_erofs_vle_unzip_all(sb, &io[__FSIO_1], pagepool); -} - -static int z_erofs_vle_normalaccess_readpage(struct file *file, - struct page *page) -{ - struct inode *const inode = page->mapping->host; - struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode); - int err; - LIST_HEAD(pagepool); - -#if (EROFS_FS_ZIP_CACHE_LVL >= 2) - f.cachedzone_la = page->index << PAGE_SHIFT; -#endif - err = z_erofs_do_read_page(&f, page, &pagepool); - (void)z_erofs_vle_work_iter_end(&f.builder); - - /* if some compressed cluster ready, need submit them anyway */ - z_erofs_submit_and_unzip(&f, &pagepool, true); - - if (err) - errln("%s, failed to read, err [%d]", __func__, err); - - if (f.m_iter.mpage != NULL) - put_page(f.m_iter.mpage); - - /* clean up the remaining free pages */ - put_pages_list(&pagepool); - return err; -} - -static inline int __z_erofs_vle_normalaccess_readpages( - struct file *filp, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, bool sync) -{ - struct inode *const inode = mapping->host; - - struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode); - gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); - struct page *head = NULL; - LIST_HEAD(pagepool); - -#if (EROFS_FS_ZIP_CACHE_LVL >= 2) - f.cachedzone_la = lru_to_page(pages)->index << PAGE_SHIFT; -#endif - for (; nr_pages; --nr_pages) { - struct page *page = lru_to_page(pages); - - prefetchw(&page->flags); - list_del(&page->lru); - - if (add_to_page_cache_lru(page, mapping, page->index, gfp)) { - list_add(&page->lru, &pagepool); - continue; - } - - set_page_private(page, (unsigned long)head); - head = page; - } - - while (head != NULL) { - struct page *page = head; - int err; - - /* traversal in reverse order */ - head = (void *)page_private(page); - - err = z_erofs_do_read_page(&f, page, &pagepool); - if (err) { - struct erofs_vnode *vi = EROFS_V(inode); - - errln("%s, readahead error at page %lu of nid %llu", - __func__, page->index, vi->nid); - } - - put_page(page); - } - - (void)z_erofs_vle_work_iter_end(&f.builder); - - z_erofs_submit_and_unzip(&f, &pagepool, sync); - - if (f.m_iter.mpage != NULL) - put_page(f.m_iter.mpage); - - /* clean up the remaining free pages */ - put_pages_list(&pagepool); - return 0; -} - -static int z_erofs_vle_normalaccess_readpages( - struct file *filp, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - return __z_erofs_vle_normalaccess_readpages(filp, - mapping, pages, nr_pages, - nr_pages < 4 /* sync */); -} - -const struct address_space_operations z_erofs_vle_normalaccess_aops = { - .readpage = z_erofs_vle_normalaccess_readpage, - .readpages = z_erofs_vle_normalaccess_readpages, -}; - -#define __vle_cluster_advise(x, bit, bits) \ - ((le16_to_cpu(x) >> (bit)) & ((1 << (bits)) - 1)) - -#define __vle_cluster_type(advise) __vle_cluster_advise(advise, \ - Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT, Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - -enum { - Z_EROFS_VLE_CLUSTER_TYPE_PLAIN, - Z_EROFS_VLE_CLUSTER_TYPE_HEAD, - Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD, - Z_EROFS_VLE_CLUSTER_TYPE_RESERVED, - Z_EROFS_VLE_CLUSTER_TYPE_MAX -}; - -#define vle_cluster_type(di) \ - __vle_cluster_type((di)->di_advise) - -static inline unsigned -vle_compressed_index_clusterofs(unsigned clustersize, - struct z_erofs_vle_decompressed_index *di) -{ - debugln("%s, vle=%pK, advise=%x (type %u), clusterofs=%x blkaddr=%x", - __func__, di, di->di_advise, vle_cluster_type(di), - di->di_clusterofs, di->di_u.blkaddr); - - switch (vle_cluster_type(di)) { - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: - break; - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: - return di->di_clusterofs; - default: - BUG_ON(1); - } - return clustersize; -} - -static inline erofs_blk_t -vle_extent_blkaddr(struct inode *inode, pgoff_t index) -{ - struct erofs_sb_info *sbi = EROFS_I_SB(inode); - struct erofs_vnode *vi = EROFS_V(inode); - - unsigned ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize + - vi->xattr_isize) + sizeof(struct erofs_extent_header) + - index * sizeof(struct z_erofs_vle_decompressed_index); - - return erofs_blknr(iloc(sbi, vi->nid) + ofs); -} - -static inline unsigned int -vle_extent_blkoff(struct inode *inode, pgoff_t index) -{ - struct erofs_sb_info *sbi = EROFS_I_SB(inode); - struct erofs_vnode *vi = EROFS_V(inode); - - unsigned ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize + - vi->xattr_isize) + sizeof(struct erofs_extent_header) + - index * sizeof(struct z_erofs_vle_decompressed_index); - - return erofs_blkoff(iloc(sbi, vi->nid) + ofs); -} - -/* - * Variable-sized Logical Extent (Fixed Physical Cluster) Compression Mode - * --- - * VLE compression mode attempts to compress a number of logical data into - * a physical cluster with a fixed size. - * VLE compression mode uses "struct z_erofs_vle_decompressed_index". - */ -static erofs_off_t vle_get_logical_extent_head( - struct inode *inode, - struct page **page_iter, - void **kaddr_iter, - unsigned lcn, /* logical cluster number */ - erofs_blk_t *pcn, - unsigned *flags) -{ - /* for extent meta */ - struct page *page = *page_iter; - erofs_blk_t blkaddr = vle_extent_blkaddr(inode, lcn); - struct z_erofs_vle_decompressed_index *di; - unsigned long long ofs; - const unsigned int clusterbits = EROFS_SB(inode->i_sb)->clusterbits; - const unsigned int clustersize = 1 << clusterbits; - unsigned int delta0; - - if (page->index != blkaddr) { - kunmap_atomic(*kaddr_iter); - unlock_page(page); - put_page(page); - - *page_iter = page = erofs_get_meta_page(inode->i_sb, - blkaddr, false); - *kaddr_iter = kmap_atomic(page); - } - - di = *kaddr_iter + vle_extent_blkoff(inode, lcn); - switch (vle_cluster_type(di)) { - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: - delta0 = le16_to_cpu(di->di_u.delta[0]); - DBG_BUGON(!delta0); - DBG_BUGON(lcn < delta0); - - ofs = vle_get_logical_extent_head(inode, - page_iter, kaddr_iter, - lcn - delta0, pcn, flags); - break; - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - *flags ^= EROFS_MAP_ZIPPED; - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: - /* clustersize should be a power of two */ - ofs = ((unsigned long long)lcn << clusterbits) + - (le16_to_cpu(di->di_clusterofs) & (clustersize - 1)); - *pcn = le32_to_cpu(di->di_u.blkaddr); - break; - default: - BUG_ON(1); - } - return ofs; -} - -int z_erofs_map_blocks_iter(struct inode *inode, - struct erofs_map_blocks *map, - struct page **mpage_ret, int flags) -{ - /* logicial extent (start, end) offset */ - unsigned long long ofs, end; - struct z_erofs_vle_decompressed_index *di; - erofs_blk_t e_blkaddr, pcn; - unsigned lcn, logical_cluster_ofs, cluster_type; - u32 ofs_rem; - struct page *mpage = *mpage_ret; - void *kaddr; - bool initial; - const unsigned int clusterbits = EROFS_SB(inode->i_sb)->clusterbits; - const unsigned int clustersize = 1 << clusterbits; - int err = 0; - - /* if both m_(l,p)len are 0, regularize l_lblk, l_lofs, etc... */ - initial = !map->m_llen; - - /* when trying to read beyond EOF, leave it unmapped */ - if (unlikely(map->m_la >= inode->i_size)) { - BUG_ON(!initial); - map->m_llen = map->m_la + 1 - inode->i_size; - map->m_la = inode->i_size - 1; - map->m_flags = 0; - goto out; - } - - debugln("%s, m_la %llu m_llen %llu --- start", __func__, - map->m_la, map->m_llen); - - ofs = map->m_la + map->m_llen; - - /* clustersize should be power of two */ - lcn = ofs >> clusterbits; - ofs_rem = ofs & (clustersize - 1); - - e_blkaddr = vle_extent_blkaddr(inode, lcn); - - if (mpage == NULL || mpage->index != e_blkaddr) { - if (mpage != NULL) - put_page(mpage); - - mpage = erofs_get_meta_page(inode->i_sb, e_blkaddr, false); - *mpage_ret = mpage; - } else { - lock_page(mpage); - DBG_BUGON(!PageUptodate(mpage)); - } - - kaddr = kmap_atomic(mpage); - di = kaddr + vle_extent_blkoff(inode, lcn); - - debugln("%s, lcn %u e_blkaddr %u e_blkoff %u", __func__, lcn, - e_blkaddr, vle_extent_blkoff(inode, lcn)); - - logical_cluster_ofs = vle_compressed_index_clusterofs(clustersize, di); - if (!initial) { - /* [walking mode] 'map' has been already initialized */ - map->m_llen += logical_cluster_ofs; - goto unmap_out; - } - - /* by default, compressed */ - map->m_flags |= EROFS_MAP_ZIPPED; - - end = (u64)(lcn + 1) * clustersize; - - cluster_type = vle_cluster_type(di); - - switch (cluster_type) { - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - if (ofs_rem >= logical_cluster_ofs) - map->m_flags ^= EROFS_MAP_ZIPPED; - /* fallthrough */ - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: - if (ofs_rem == logical_cluster_ofs) { - pcn = le32_to_cpu(di->di_u.blkaddr); - goto exact_hitted; - } - - if (ofs_rem > logical_cluster_ofs) { - ofs = lcn * clustersize | logical_cluster_ofs; - pcn = le32_to_cpu(di->di_u.blkaddr); - break; - } - - /* logical cluster number should be >= 1 */ - if (unlikely(!lcn)) { - errln("invalid logical cluster 0 at nid %llu", - EROFS_V(inode)->nid); - err = -EIO; - goto unmap_out; - } - end = (lcn-- * clustersize) | logical_cluster_ofs; - /* fallthrough */ - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: - /* get the correspoinding first chunk */ - ofs = vle_get_logical_extent_head(inode, mpage_ret, - &kaddr, lcn, &pcn, &map->m_flags); - mpage = *mpage_ret; - break; - default: - errln("unknown cluster type %u at offset %llu of nid %llu", - cluster_type, ofs, EROFS_V(inode)->nid); - err = -EIO; - goto unmap_out; - } - - map->m_la = ofs; -exact_hitted: - map->m_llen = end - ofs; - map->m_plen = clustersize; - map->m_pa = blknr_to_addr(pcn); - map->m_flags |= EROFS_MAP_MAPPED; -unmap_out: - kunmap_atomic(kaddr); - unlock_page(mpage); -out: - debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o", - __func__, map->m_la, map->m_pa, - map->m_llen, map->m_plen, map->m_flags); - - /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */ - DBG_BUGON(err < 0); - return err; -} - diff --git a/drivers/staging/erofs/unzip_vle_lz4.c b/drivers/staging/erofs/unzip_vle_lz4.c deleted file mode 100644 index 3a7428317f0a..000000000000 --- a/drivers/staging/erofs/unzip_vle_lz4.c +++ /dev/null @@ -1,210 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/unzip_vle_lz4.c - * - * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#include "unzip_vle.h" - -#if Z_EROFS_CLUSTER_MAX_PAGES > Z_EROFS_VLE_INLINE_PAGEVECS -#define EROFS_PERCPU_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES -#else -#define EROFS_PERCPU_NR_PAGES Z_EROFS_VLE_INLINE_PAGEVECS -#endif - -static struct { - char data[PAGE_SIZE * EROFS_PERCPU_NR_PAGES]; -} erofs_pcpubuf[NR_CPUS]; - -int z_erofs_vle_plain_copy(struct page **compressed_pages, - unsigned clusterpages, - struct page **pages, - unsigned nr_pages, - unsigned short pageofs) -{ - unsigned i, j; - void *src = NULL; - const unsigned righthalf = PAGE_SIZE - pageofs; - char *percpu_data; - bool mirrored[Z_EROFS_CLUSTER_MAX_PAGES] = { 0 }; - - preempt_disable(); - percpu_data = erofs_pcpubuf[smp_processor_id()].data; - - j = 0; - for (i = 0; i < nr_pages; j = i++) { - struct page *page = pages[i]; - void *dst; - - if (page == NULL) { - if (src != NULL) { - if (!mirrored[j]) - kunmap_atomic(src); - src = NULL; - } - continue; - } - - dst = kmap_atomic(page); - - for (; j < clusterpages; ++j) { - if (compressed_pages[j] != page) - continue; - - DBG_BUGON(mirrored[j]); - memcpy(percpu_data + j * PAGE_SIZE, dst, PAGE_SIZE); - mirrored[j] = true; - break; - } - - if (i) { - if (src == NULL) - src = mirrored[i-1] ? - percpu_data + (i-1) * PAGE_SIZE : - kmap_atomic(compressed_pages[i-1]); - - memcpy(dst, src + righthalf, pageofs); - - if (!mirrored[i-1]) - kunmap_atomic(src); - - if (unlikely(i >= clusterpages)) { - kunmap_atomic(dst); - break; - } - } - - if (!righthalf) - src = NULL; - else { - src = mirrored[i] ? percpu_data + i * PAGE_SIZE : - kmap_atomic(compressed_pages[i]); - - memcpy(dst + pageofs, src, righthalf); - } - - kunmap_atomic(dst); - } - - if (src != NULL && !mirrored[j]) - kunmap_atomic(src); - - preempt_enable(); - return 0; -} - -extern int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen); - -int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages, - unsigned clusterpages, - struct page **pages, - unsigned outlen, - unsigned short pageofs) -{ - void *vin, *vout; - unsigned nr_pages, i, j; - int ret; - - if (outlen + pageofs > EROFS_PERCPU_NR_PAGES * PAGE_SIZE) - return -ENOTSUPP; - - nr_pages = DIV_ROUND_UP(outlen + pageofs, PAGE_SIZE); - - if (clusterpages == 1) { - vin = kmap_atomic(compressed_pages[0]); - } else { - vin = erofs_vmap(compressed_pages, clusterpages); - if (!vin) - return -ENOMEM; - } - - preempt_disable(); - vout = erofs_pcpubuf[smp_processor_id()].data; - - ret = z_erofs_unzip_lz4(vin, vout + pageofs, - clusterpages * PAGE_SIZE, outlen); - - if (ret < 0) - goto out; - ret = 0; - - for (i = 0; i < nr_pages; ++i) { - j = min((unsigned)PAGE_SIZE - pageofs, outlen); - - if (pages[i] != NULL) { - if (clusterpages == 1 && - pages[i] == compressed_pages[0]) { - memcpy(vin + pageofs, vout + pageofs, j); - } else { - void *dst = kmap_atomic(pages[i]); - - memcpy(dst + pageofs, vout + pageofs, j); - kunmap_atomic(dst); - } - } - vout += PAGE_SIZE; - outlen -= j; - pageofs = 0; - } - -out: - preempt_enable(); - - if (clusterpages == 1) - kunmap_atomic(vin); - else - erofs_vunmap(vin, clusterpages); - - return ret; -} - -int z_erofs_vle_unzip_vmap(struct page **compressed_pages, - unsigned clusterpages, - void *vout, - unsigned llen, - unsigned short pageofs, - bool overlapped) -{ - void *vin; - unsigned i; - int ret; - - if (overlapped) { - preempt_disable(); - vin = erofs_pcpubuf[smp_processor_id()].data; - - for (i = 0; i < clusterpages; ++i) { - void *t = kmap_atomic(compressed_pages[i]); - - memcpy(vin + PAGE_SIZE *i, t, PAGE_SIZE); - kunmap_atomic(t); - } - } else if (clusterpages == 1) - vin = kmap_atomic(compressed_pages[0]); - else { - vin = erofs_vmap(compressed_pages, clusterpages); - } - - ret = z_erofs_unzip_lz4(vin, vout + pageofs, - clusterpages * PAGE_SIZE, llen); - if (ret > 0) - ret = 0; - - if (!overlapped) { - if (clusterpages == 1) - kunmap_atomic(vin); - else { - erofs_vunmap(vin, clusterpages); - } - } else - preempt_enable(); - - return ret; -} - diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c index 4de9c39535eb..aab4a607f028 100644 --- a/drivers/staging/erofs/utils.c +++ b/drivers/staging/erofs/utils.c @@ -1,16 +1,8 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only /* - * linux/drivers/staging/erofs/utils.c - * * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. + * https://www.huawei.com/ */ - #include "internal.h" #include @@ -20,43 +12,67 @@ struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) if (!list_empty(pool)) { page = lru_to_page(pool); + DBG_BUGON(page_ref_count(page) != 1); list_del(&page->lru); } else { - page = alloc_pages(gfp | __GFP_NOFAIL, 0); + page = alloc_page(gfp); } return page; } +#if (EROFS_PCPUBUF_NR_PAGES > 0) +static struct { + u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; +} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; + +void *erofs_get_pcpubuf(unsigned int pagenr) +{ + preempt_disable(); + return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; +} +#endif + +#ifdef CONFIG_EROFS_FS_ZIP /* global shrink count (for all mounted EROFS instances) */ static atomic_long_t erofs_global_shrink_cnt; -#ifdef CONFIG_EROFS_FS_ZIP +#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) +#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) -/* radix_tree and the future XArray both don't use tagptr_t yet */ -struct erofs_workgroup *erofs_find_workgroup( - struct super_block *sb, pgoff_t index, bool *tag) +static int erofs_workgroup_get(struct erofs_workgroup *grp) +{ + int o; + +repeat: + o = erofs_wait_on_workgroup_freezed(grp); + if (o <= 0) + return -1; + + if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) + goto repeat; + + /* decrease refcount paired by erofs_workgroup_put */ + if (o == 1) + atomic_long_dec(&erofs_global_shrink_cnt); + return 0; +} + +struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, + pgoff_t index) { struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_workgroup *grp; - int oldcount; repeat: rcu_read_lock(); grp = radix_tree_lookup(&sbi->workstn_tree, index); - if (grp != NULL) { - *tag = radix_tree_exceptional_entry(grp); - grp = (void *)((unsigned long)grp & - ~RADIX_TREE_EXCEPTIONAL_ENTRY); - - if (erofs_workgroup_get(grp, &oldcount)) { + if (grp) { + if (erofs_workgroup_get(grp)) { /* prefer to relax rcu read side */ rcu_read_unlock(); goto repeat; } - /* decrease refcount added by erofs_workgroup_put */ - if (unlikely(oldcount == 1)) - atomic_long_dec(&erofs_global_shrink_cnt); DBG_BUGON(index != grp->index); } rcu_read_unlock(); @@ -64,14 +80,13 @@ repeat: } int erofs_register_workgroup(struct super_block *sb, - struct erofs_workgroup *grp, - bool tag) + struct erofs_workgroup *grp) { struct erofs_sb_info *sbi; int err; /* grp shouldn't be broken or used before */ - if (unlikely(atomic_read(&grp->refcount) != 1)) { + if (atomic_read(&grp->refcount) != 1) { DBG_BUGON(1); return -EINVAL; } @@ -81,35 +96,28 @@ int erofs_register_workgroup(struct super_block *sb, return err; sbi = EROFS_SB(sb); - erofs_workstn_lock(sbi); - - if (tag) - grp = (void *)((unsigned long)grp | - 1UL << RADIX_TREE_EXCEPTIONAL_SHIFT); + xa_lock(&sbi->workstn_tree); /* * Bump up reference count before making this workgroup * visible to other users in order to avoid potential UAF - * without serialized by erofs_workstn_lock. + * without serialized by workstn_lock. */ __erofs_workgroup_get(grp); - err = radix_tree_insert(&sbi->workstn_tree, - grp->index, grp); - if (unlikely(err)) + err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp); + if (err) /* * it's safe to decrease since the workgroup isn't visible * and refcount >= 2 (cannot be freezed). */ __erofs_workgroup_put(grp); - erofs_workstn_unlock(sbi); + xa_unlock(&sbi->workstn_tree); radix_tree_preload_end(); return err; } -extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp); - static void __erofs_workgroup_free(struct erofs_workgroup *grp) { atomic_long_dec(&erofs_global_shrink_cnt); @@ -127,33 +135,22 @@ int erofs_workgroup_put(struct erofs_workgroup *grp) return count; } -#ifdef EROFS_FS_HAS_MANAGED_CACHE -/* for cache-managed case, customized reclaim paths exist */ -static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) +static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, + struct erofs_workgroup *grp) { - erofs_workgroup_unfreeze(grp, 0); - __erofs_workgroup_free(grp); -} - -bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, - struct erofs_workgroup *grp, - bool cleanup) -{ - void *entry; - /* - * for managed cache enabled, the refcount of workgroups - * themselves could be < 0 (freezed). So there is no guarantee - * that all refcount > 0 if managed cache is enabled. + * If managed cache is on, refcount of workgroups + * themselves could be < 0 (freezed). In other words, + * there is no guarantee that all refcounts > 0. */ if (!erofs_workgroup_try_to_freeze(grp, 1)) return false; /* - * note that all cached pages should be unlinked - * before delete it from the radix tree. - * Otherwise some cached pages of an orphan old workgroup - * could be still linked after the new one is available. + * Note that all cached pages should be unattached + * before deleted from the radix tree. Otherwise some + * cached pages could be still attached to the orphan + * old workgroup when the new one is available in the tree. */ if (erofs_try_to_free_all_cached_pages(sbi, grp)) { erofs_workgroup_unfreeze(grp, 1); @@ -161,87 +158,52 @@ bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, } /* - * it is impossible to fail after the workgroup is freezed, + * It's impossible to fail after the workgroup is freezed, * however in order to avoid some race conditions, add a * DBG_BUGON to observe this in advance. */ - entry = radix_tree_delete(&sbi->workstn_tree, grp->index); - DBG_BUGON((void *)((unsigned long)entry & - ~RADIX_TREE_EXCEPTIONAL_ENTRY) != grp); + DBG_BUGON(radix_tree_delete(&sbi->workstn_tree, grp->index) != grp); - /* - * if managed cache is enable, the last refcount - * should indicate the related workstation. - */ - erofs_workgroup_unfreeze_final(grp); - return true; -} - -#else -/* for nocache case, no customized reclaim path at all */ -bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, - struct erofs_workgroup *grp, - bool cleanup) -{ - int cnt = atomic_read(&grp->refcount); - void *entry; - - DBG_BUGON(cnt <= 0); - DBG_BUGON(cleanup && cnt != 1); - - if (cnt > 1) - return false; - - entry = radix_tree_delete(&sbi->workstn_tree, grp->index); - DBG_BUGON((void *)((unsigned long)entry & - ~RADIX_TREE_EXCEPTIONAL_ENTRY) != grp); - - /* (rarely) could be grabbed again when freeing */ - erofs_workgroup_put(grp); + /* last refcount should be connected with its managed pslot. */ + erofs_workgroup_unfreeze(grp, 0); + __erofs_workgroup_free(grp); return true; } -#endif - -unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, - unsigned long nr_shrink, - bool cleanup) +static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, + unsigned long nr_shrink) { pgoff_t first_index = 0; void *batch[PAGEVEC_SIZE]; - unsigned freed = 0; + unsigned int freed = 0; int i, found; repeat: - erofs_workstn_lock(sbi); + xa_lock(&sbi->workstn_tree); found = radix_tree_gang_lookup(&sbi->workstn_tree, - batch, first_index, PAGEVEC_SIZE); + batch, first_index, PAGEVEC_SIZE); for (i = 0; i < found; ++i) { - struct erofs_workgroup *grp = (void *) - ((unsigned long)batch[i] & - ~RADIX_TREE_EXCEPTIONAL_ENTRY); + struct erofs_workgroup *grp = batch[i]; first_index = grp->index + 1; /* try to shrink each valid workgroup */ - if (!erofs_try_to_release_workgroup(sbi, grp, cleanup)) + if (!erofs_try_to_release_workgroup(sbi, grp)) continue; ++freed; - if (unlikely(!--nr_shrink)) + if (!--nr_shrink) break; } - erofs_workstn_unlock(sbi); + xa_unlock(&sbi->workstn_tree); if (i && nr_shrink) goto repeat; return freed; } -#endif - /* protected by 'erofs_sb_list_lock' */ static unsigned int shrinker_run_no; @@ -249,7 +211,7 @@ static unsigned int shrinker_run_no; static DEFINE_SPINLOCK(erofs_sb_list_lock); static LIST_HEAD(erofs_sb_list); -void erofs_register_super(struct super_block *sb) +void erofs_shrinker_register(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); @@ -260,21 +222,28 @@ void erofs_register_super(struct super_block *sb) spin_unlock(&erofs_sb_list_lock); } -void erofs_unregister_super(struct super_block *sb) +void erofs_shrinker_unregister(struct super_block *sb) { + struct erofs_sb_info *const sbi = EROFS_SB(sb); + + mutex_lock(&sbi->umount_mutex); + /* clean up all remaining workgroups in memory */ + erofs_shrink_workstation(sbi, ~0UL); + spin_lock(&erofs_sb_list_lock); - list_del(&EROFS_SB(sb)->list); + list_del(&sbi->list); spin_unlock(&erofs_sb_list_lock); + mutex_unlock(&sbi->umount_mutex); } -unsigned long erofs_shrink_count(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long erofs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) { return atomic_long_read(&erofs_global_shrink_cnt); } -unsigned long erofs_shrink_scan(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long erofs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct erofs_sb_info *sbi; struct list_head *p; @@ -284,9 +253,9 @@ unsigned long erofs_shrink_scan(struct shrinker *shrink, unsigned long freed = 0; spin_lock(&erofs_sb_list_lock); - do + do { run_no = ++shrinker_run_no; - while (run_no == 0); + } while (run_no == 0); /* Iterate over all mounted superblocks and try to shrink them */ p = erofs_sb_list.next; @@ -308,9 +277,7 @@ unsigned long erofs_shrink_scan(struct shrinker *shrink, spin_unlock(&erofs_sb_list_lock); sbi->shrinker_run_no = run_no; -#ifdef CONFIG_EROFS_FS_ZIP - freed += erofs_shrink_workstation(sbi, nr - freed, false); -#endif + freed += erofs_shrink_workstation(sbi, nr - freed); spin_lock(&erofs_sb_list_lock); /* Get the next list element before we move this one */ @@ -330,3 +297,19 @@ unsigned long erofs_shrink_scan(struct shrinker *shrink, return freed; } +static struct shrinker erofs_shrinker_info = { + .scan_objects = erofs_shrink_scan, + .count_objects = erofs_shrink_count, + .seeks = DEFAULT_SEEKS, +}; + +int __init erofs_init_shrinker(void) +{ + return register_shrinker(&erofs_shrinker_info); +} + +void erofs_exit_shrinker(void) +{ + unregister_shrinker(&erofs_shrinker_info); +} +#endif /* !CONFIG_EROFS_FS_ZIP */ diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c index d48687ca2199..2b9bb3a7a5f2 100644 --- a/drivers/staging/erofs/xattr.c +++ b/drivers/staging/erofs/xattr.c @@ -1,14 +1,7 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only /* - * linux/drivers/staging/erofs/xattr.c - * * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. + * https://www.huawei.com/ */ #include #include "xattr.h" @@ -19,13 +12,13 @@ struct xattr_iter { void *kaddr; erofs_blk_t blkaddr; - unsigned ofs; + unsigned int ofs; }; static inline void xattr_iter_end(struct xattr_iter *it, bool atomic) { /* the only user of kunmap() is 'init_inode_xattrs' */ - if (unlikely(!atomic)) + if (!atomic) kunmap(it->page); else kunmap_atomic(it->kaddr); @@ -44,23 +37,30 @@ static inline void xattr_iter_end_final(struct xattr_iter *it) static int init_inode_xattrs(struct inode *inode) { - struct erofs_vnode *const vi = EROFS_V(inode); + struct erofs_inode *const vi = EROFS_I(inode); struct xattr_iter it; - unsigned i; + unsigned int i; struct erofs_xattr_ibody_header *ih; + struct super_block *sb; struct erofs_sb_info *sbi; bool atomic_map; int ret = 0; /* the most case is that xattrs of this inode are initialized. */ - if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags)) + if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags)) { + /* + * paired with smp_mb() at the end of the function to ensure + * fields will only be observed after the bit is set. + */ + smp_mb(); return 0; + } - if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_XATTR_BIT, TASK_KILLABLE)) + if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_XATTR_BIT, TASK_KILLABLE)) return -ERESTARTSYS; /* someone has initialized xattrs for us? */ - if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags)) + if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags)) goto out_unlock; /* @@ -72,25 +72,29 @@ static int init_inode_xattrs(struct inode *inode) * undefined right now (maybe use later with some new sb feature). */ if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) { - errln("xattr_isize %d of nid %llu is not supported yet", - vi->xattr_isize, vi->nid); - ret = -ENOTSUPP; + erofs_err(inode->i_sb, + "xattr_isize %d of nid %llu is not supported yet", + vi->xattr_isize, vi->nid); + ret = -EOPNOTSUPP; goto out_unlock; } else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) { - if (unlikely(vi->xattr_isize)) { + if (vi->xattr_isize) { + erofs_err(inode->i_sb, + "bogus xattr ibody @ nid %llu", vi->nid); DBG_BUGON(1); - ret = -EIO; + ret = -EFSCORRUPTED; goto out_unlock; /* xattr ondisk layout error */ } ret = -ENOATTR; goto out_unlock; } - sbi = EROFS_I_SB(inode); + sb = inode->i_sb; + sbi = EROFS_SB(sb); it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize); it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize); - it.page = erofs_get_inline_page(inode, it.blkaddr); + it.page = erofs_get_meta_page(sb, it.blkaddr); if (IS_ERR(it.page)) { ret = PTR_ERR(it.page); goto out_unlock; @@ -115,13 +119,12 @@ static int init_inode_xattrs(struct inode *inode) it.ofs += sizeof(struct erofs_xattr_ibody_header); for (i = 0; i < vi->xattr_shared_count; ++i) { - if (unlikely(it.ofs >= EROFS_BLKSIZ)) { + if (it.ofs >= EROFS_BLKSIZ) { /* cannot be unaligned */ - BUG_ON(it.ofs != EROFS_BLKSIZ); + DBG_BUGON(it.ofs != EROFS_BLKSIZ); xattr_iter_end(&it, atomic_map); - it.page = erofs_get_meta_page(inode->i_sb, - ++it.blkaddr, S_ISDIR(inode->i_mode)); + it.page = erofs_get_meta_page(sb, ++it.blkaddr); if (IS_ERR(it.page)) { kfree(vi->xattr_shared_xattrs); vi->xattr_shared_xattrs = NULL; @@ -139,18 +142,29 @@ static int init_inode_xattrs(struct inode *inode) } xattr_iter_end(&it, atomic_map); - set_bit(EROFS_V_EA_INITED_BIT, &vi->flags); + /* paired with smp_mb() at the beginning of the function. */ + smp_mb(); + set_bit(EROFS_I_EA_INITED_BIT, &vi->flags); out_unlock: - clear_and_wake_up_bit(EROFS_V_BL_XATTR_BIT, &vi->flags); + clear_and_wake_up_bit(EROFS_I_BL_XATTR_BIT, &vi->flags); return ret; } +/* + * the general idea for these return values is + * if 0 is returned, go on processing the current xattr; + * 1 (> 0) is returned, skip this round to process the next xattr; + * -err (< 0) is returned, an error (maybe ENOXATTR) occurred + * and need to be handled + */ struct xattr_iter_handlers { - int (*entry)(struct xattr_iter *, struct erofs_xattr_entry *); - int (*name)(struct xattr_iter *, unsigned, char *, unsigned); - int (*alloc_buffer)(struct xattr_iter *, unsigned); - void (*value)(struct xattr_iter *, unsigned, char *, unsigned); + int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry); + int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf, + unsigned int len); + int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz); + void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf, + unsigned int len); }; static inline int xattr_iter_fixup(struct xattr_iter *it) @@ -161,7 +175,8 @@ static inline int xattr_iter_fixup(struct xattr_iter *it) xattr_iter_end(it, true); it->blkaddr += erofs_blknr(it->ofs); - it->page = erofs_get_meta_page(it->sb, it->blkaddr, false); + + it->page = erofs_get_meta_page(it->sb, it->blkaddr); if (IS_ERR(it->page)) { int err = PTR_ERR(it->page); @@ -175,15 +190,15 @@ static inline int xattr_iter_fixup(struct xattr_iter *it) } static int inline_xattr_iter_begin(struct xattr_iter *it, - struct inode *inode) + struct inode *inode) { - struct erofs_vnode *const vi = EROFS_V(inode); + struct erofs_inode *const vi = EROFS_I(inode); struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb); - unsigned xattr_header_sz, inline_xattr_ofs; + unsigned int xattr_header_sz, inline_xattr_ofs; xattr_header_sz = inlinexattr_header_size(inode); - if (unlikely(xattr_header_sz >= vi->xattr_isize)) { - BUG_ON(xattr_header_sz > vi->xattr_isize); + if (xattr_header_sz >= vi->xattr_isize) { + DBG_BUGON(xattr_header_sz > vi->xattr_isize); return -ENOATTR; } @@ -192,7 +207,7 @@ static int inline_xattr_iter_begin(struct xattr_iter *it, it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs); it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs); - it->page = erofs_get_inline_page(inode, it->blkaddr); + it->page = erofs_get_meta_page(inode->i_sb, it->blkaddr); if (IS_ERR(it->page)) return PTR_ERR(it->page); @@ -200,11 +215,16 @@ static int inline_xattr_iter_begin(struct xattr_iter *it, return vi->xattr_isize - xattr_header_sz; } +/* + * Regardless of success or failure, `xattr_foreach' will end up with + * `ofs' pointing to the next xattr item rather than an arbitrary position. + */ static int xattr_foreach(struct xattr_iter *it, - const struct xattr_iter_handlers *op, unsigned int *tlimit) + const struct xattr_iter_handlers *op, + unsigned int *tlimit) { struct erofs_xattr_entry entry; - unsigned value_sz, processed, slice; + unsigned int value_sz, processed, slice; int err; /* 0. fixup blkaddr, ofs, ipage */ @@ -218,10 +238,14 @@ static int xattr_foreach(struct xattr_iter *it, * therefore entry should be in the page */ entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs); - if (tlimit != NULL) { - unsigned entry_sz = EROFS_XATTR_ENTRY_SIZE(&entry); + if (tlimit) { + unsigned int entry_sz = erofs_xattr_entry_size(&entry); - BUG_ON(*tlimit < entry_sz); + /* xattr on-disk corruption: xattr entry beyond xattr_isize */ + if (*tlimit < entry_sz) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } *tlimit -= entry_sz; } @@ -240,7 +264,7 @@ static int xattr_foreach(struct xattr_iter *it, while (processed < entry.e_name_len) { if (it->ofs >= EROFS_BLKSIZ) { - BUG_ON(it->ofs > EROFS_BLKSIZ); + DBG_BUGON(it->ofs > EROFS_BLKSIZ); err = xattr_iter_fixup(it); if (err) @@ -248,8 +272,8 @@ static int xattr_foreach(struct xattr_iter *it, it->ofs = 0; } - slice = min_t(unsigned, PAGE_SIZE - it->ofs, - entry.e_name_len - processed); + slice = min_t(unsigned int, PAGE_SIZE - it->ofs, + entry.e_name_len - processed); /* handle name */ err = op->name(it, processed, it->kaddr + it->ofs, slice); @@ -265,7 +289,7 @@ static int xattr_foreach(struct xattr_iter *it, /* 3. handle xattr value */ processed = 0; - if (op->alloc_buffer != NULL) { + if (op->alloc_buffer) { err = op->alloc_buffer(it, value_sz); if (err) { it->ofs += value_sz; @@ -275,7 +299,7 @@ static int xattr_foreach(struct xattr_iter *it, while (processed < value_sz) { if (it->ofs >= EROFS_BLKSIZ) { - BUG_ON(it->ofs > EROFS_BLKSIZ); + DBG_BUGON(it->ofs > EROFS_BLKSIZ); err = xattr_iter_fixup(it); if (err) @@ -283,17 +307,17 @@ static int xattr_foreach(struct xattr_iter *it, it->ofs = 0; } - slice = min_t(unsigned, PAGE_SIZE - it->ofs, - value_sz - processed); + slice = min_t(unsigned int, PAGE_SIZE - it->ofs, + value_sz - processed); op->value(it, processed, it->kaddr + it->ofs, slice); it->ofs += slice; processed += slice; } out: - /* we assume that ofs is aligned with 4 bytes */ + /* xattrs should be 4-byte aligned (on-disk constraint) */ it->ofs = EROFS_XATTR_ALIGN(it->ofs); - return err; + return err < 0 ? err : 0; } struct getxattr_iter { @@ -305,7 +329,7 @@ struct getxattr_iter { }; static int xattr_entrymatch(struct xattr_iter *_it, - struct erofs_xattr_entry *entry) + struct erofs_xattr_entry *entry) { struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); @@ -314,7 +338,7 @@ static int xattr_entrymatch(struct xattr_iter *_it, } static int xattr_namematch(struct xattr_iter *_it, - unsigned processed, char *buf, unsigned len) + unsigned int processed, char *buf, unsigned int len) { struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); @@ -322,17 +346,18 @@ static int xattr_namematch(struct xattr_iter *_it, } static int xattr_checkbuffer(struct xattr_iter *_it, - unsigned value_sz) + unsigned int value_sz) { struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); int err = it->buffer_size < value_sz ? -ERANGE : 0; it->buffer_size = value_sz; - return it->buffer == NULL ? 1 : err; + return !it->buffer ? 1 : err; } static void xattr_copyvalue(struct xattr_iter *_it, - unsigned processed, char *buf, unsigned len) + unsigned int processed, + char *buf, unsigned int len) { struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); @@ -349,7 +374,7 @@ static const struct xattr_iter_handlers find_xattr_handlers = { static int inline_getxattr(struct inode *inode, struct getxattr_iter *it) { int ret; - unsigned remaining; + unsigned int remaining; ret = inline_xattr_iter_begin(&it->it, inode); if (ret < 0) @@ -358,22 +383,20 @@ static int inline_getxattr(struct inode *inode, struct getxattr_iter *it) remaining = ret; while (remaining) { ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining); - if (ret >= 0) - break; - - if (ret != -ENOATTR) /* -ENOMEM, -EIO, etc. */ + if (ret != -ENOATTR) break; } xattr_iter_end_final(&it->it); - return ret < 0 ? ret : it->buffer_size; + return ret ? ret : it->buffer_size; } static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) { - struct erofs_vnode *const vi = EROFS_V(inode); - struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb); - unsigned i; + struct erofs_inode *const vi = EROFS_I(inode); + struct super_block *const sb = inode->i_sb; + struct erofs_sb_info *const sbi = EROFS_SB(sb); + unsigned int i; int ret = -ENOATTR; for (i = 0; i < vi->xattr_shared_count; ++i) { @@ -386,8 +409,7 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) if (i) xattr_iter_end(&it->it, true); - it->it.page = erofs_get_meta_page(inode->i_sb, - blkaddr, false); + it->it.page = erofs_get_meta_page(sb, blkaddr); if (IS_ERR(it->it.page)) return PTR_ERR(it->it.page); @@ -396,16 +418,13 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) } ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL); - if (ret >= 0) - break; - - if (ret != -ENOATTR) /* -ENOMEM, -EIO, etc. */ + if (ret != -ENOATTR) break; } if (vi->xattr_shared_count) xattr_iter_end_final(&it->it); - return ret < 0 ? ret : it->buffer_size; + return ret ? ret : it->buffer_size; } static bool erofs_xattr_user_list(struct dentry *dentry) @@ -419,13 +438,13 @@ static bool erofs_xattr_trusted_list(struct dentry *dentry) } int erofs_getxattr(struct inode *inode, int index, - const char *name, - void *buffer, size_t buffer_size) + const char *name, + void *buffer, size_t buffer_size) { int ret; struct getxattr_iter it; - if (unlikely(name == NULL)) + if (!name) return -EINVAL; ret = init_inode_xattrs(inode); @@ -450,8 +469,8 @@ int erofs_getxattr(struct inode *inode, int index, } static int erofs_xattr_generic_get(const struct xattr_handler *handler, - struct dentry *unused, struct inode *inode, - const char *name, void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { struct erofs_sb_info *const sbi = EROFS_I_SB(inode); @@ -461,8 +480,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler, return -EOPNOTSUPP; break; case EROFS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; break; case EROFS_XATTR_INDEX_SECURITY: break; @@ -517,24 +534,23 @@ struct listxattr_iter { }; static int xattr_entrylist(struct xattr_iter *_it, - struct erofs_xattr_entry *entry) + struct erofs_xattr_entry *entry) { struct listxattr_iter *it = container_of(_it, struct listxattr_iter, it); - unsigned prefix_len; + unsigned int prefix_len; const char *prefix; const struct xattr_handler *h = erofs_xattr_handler(entry->e_name_index); - if (h == NULL || (h->list != NULL && !h->list(it->dentry))) + if (!h || (h->list && !h->list(it->dentry))) return 1; - /* Note that at least one of 'prefix' and 'name' should be non-NULL */ - prefix = h->prefix != NULL ? h->prefix : h->name; + prefix = xattr_prefix(h); prefix_len = strlen(prefix); - if (it->buffer == NULL) { + if (!it->buffer) { it->buffer_ofs += prefix_len + entry->e_name_len + 1; return 1; } @@ -549,7 +565,7 @@ static int xattr_entrylist(struct xattr_iter *_it, } static int xattr_namelist(struct xattr_iter *_it, - unsigned processed, char *buf, unsigned len) + unsigned int processed, char *buf, unsigned int len) { struct listxattr_iter *it = container_of(_it, struct listxattr_iter, it); @@ -560,7 +576,7 @@ static int xattr_namelist(struct xattr_iter *_it, } static int xattr_skipvalue(struct xattr_iter *_it, - unsigned value_sz) + unsigned int value_sz) { struct listxattr_iter *it = container_of(_it, struct listxattr_iter, it); @@ -579,7 +595,7 @@ static const struct xattr_iter_handlers list_xattr_handlers = { static int inline_listxattr(struct listxattr_iter *it) { int ret; - unsigned remaining; + unsigned int remaining; ret = inline_xattr_iter_begin(&it->it, d_inode(it->dentry)); if (ret < 0) @@ -588,19 +604,20 @@ static int inline_listxattr(struct listxattr_iter *it) remaining = ret; while (remaining) { ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining); - if (ret < 0) + if (ret) break; } xattr_iter_end_final(&it->it); - return ret < 0 ? ret : it->buffer_ofs; + return ret ? ret : it->buffer_ofs; } static int shared_listxattr(struct listxattr_iter *it) { struct inode *const inode = d_inode(it->dentry); - struct erofs_vnode *const vi = EROFS_V(inode); - struct erofs_sb_info *const sbi = EROFS_I_SB(inode); - unsigned i; + struct erofs_inode *const vi = EROFS_I(inode); + struct super_block *const sb = inode->i_sb; + struct erofs_sb_info *const sbi = EROFS_SB(sb); + unsigned int i; int ret = 0; for (i = 0; i < vi->xattr_shared_count; ++i) { @@ -612,8 +629,7 @@ static int shared_listxattr(struct listxattr_iter *it) if (i) xattr_iter_end(&it->it, true); - it->it.page = erofs_get_meta_page(inode->i_sb, - blkaddr, false); + it->it.page = erofs_get_meta_page(sb, blkaddr); if (IS_ERR(it->it.page)) return PTR_ERR(it->it.page); @@ -622,17 +638,17 @@ static int shared_listxattr(struct listxattr_iter *it) } ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL); - if (ret < 0) + if (ret) break; } if (vi->xattr_shared_count) xattr_iter_end_final(&it->it); - return ret < 0 ? ret : it->buffer_ofs; + return ret ? ret : it->buffer_ofs; } ssize_t erofs_listxattr(struct dentry *dentry, - char *buffer, size_t buffer_size) + char *buffer, size_t buffer_size) { int ret; struct listxattr_iter it; @@ -656,3 +672,39 @@ ssize_t erofs_listxattr(struct dentry *dentry, return shared_listxattr(&it); } +#ifdef CONFIG_EROFS_FS_POSIX_ACL +struct posix_acl *erofs_get_acl(struct inode *inode, int type) +{ + struct posix_acl *acl; + int prefix, rc; + char *value = NULL; + + switch (type) { + case ACL_TYPE_ACCESS: + prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + prefix = EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + default: + return ERR_PTR(-EINVAL); + } + + rc = erofs_getxattr(inode, prefix, "", NULL, 0); + if (rc > 0) { + value = kmalloc(rc, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + rc = erofs_getxattr(inode, prefix, "", value, rc); + } + + if (rc == -ENOATTR) + acl = NULL; + else if (rc < 0) + acl = ERR_PTR(rc); + else + acl = posix_acl_from_xattr(&init_user_ns, value, rc); + kfree(value); + return acl; +} +#endif diff --git a/drivers/staging/erofs/xattr.h b/drivers/staging/erofs/xattr.h index 0c7379282fc5..366dcb400525 100644 --- a/drivers/staging/erofs/xattr.h +++ b/drivers/staging/erofs/xattr.h @@ -1,14 +1,7 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * linux/drivers/staging/erofs/xattr.h - * +/* SPDX-License-Identifier: GPL-2.0-only */ +/* * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. + * https://www.huawei.com/ */ #ifndef __EROFS_XATTR_H #define __EROFS_XATTR_H @@ -20,14 +13,14 @@ /* Attribute not found */ #define ENOATTR ENODATA -static inline unsigned inlinexattr_header_size(struct inode *inode) +static inline unsigned int inlinexattr_header_size(struct inode *inode) { - return sizeof(struct erofs_xattr_ibody_header) - + sizeof(u32) * EROFS_V(inode)->xattr_shared_count; + return sizeof(struct erofs_xattr_ibody_header) + + sizeof(u32) * EROFS_I(inode)->xattr_shared_count; } -static inline erofs_blk_t -xattrblock_addr(struct erofs_sb_info *sbi, unsigned xattr_id) +static inline erofs_blk_t xattrblock_addr(struct erofs_sb_info *sbi, + unsigned int xattr_id) { #ifdef CONFIG_EROFS_FS_XATTR return sbi->xattr_blkaddr + @@ -37,56 +30,59 @@ xattrblock_addr(struct erofs_sb_info *sbi, unsigned xattr_id) #endif } -static inline unsigned -xattrblock_offset(struct erofs_sb_info *sbi, unsigned xattr_id) +static inline unsigned int xattrblock_offset(struct erofs_sb_info *sbi, + unsigned int xattr_id) { return (xattr_id * sizeof(__u32)) % EROFS_BLKSIZ; } +#ifdef CONFIG_EROFS_FS_XATTR extern const struct xattr_handler erofs_xattr_user_handler; extern const struct xattr_handler erofs_xattr_trusted_handler; #ifdef CONFIG_EROFS_FS_SECURITY extern const struct xattr_handler erofs_xattr_security_handler; #endif -static inline const struct xattr_handler *erofs_xattr_handler(unsigned index) +static inline const struct xattr_handler *erofs_xattr_handler(unsigned int idx) { -static const struct xattr_handler *xattr_handler_map[] = { - [EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler, + static const struct xattr_handler *xattr_handler_map[] = { + [EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler, #ifdef CONFIG_EROFS_FS_POSIX_ACL - [EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler, - [EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = - &posix_acl_default_xattr_handler, + [EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = + &posix_acl_access_xattr_handler, + [EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = + &posix_acl_default_xattr_handler, #endif - [EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler, + [EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler, #ifdef CONFIG_EROFS_FS_SECURITY - [EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler, + [EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler, #endif -}; - return index && index < ARRAY_SIZE(xattr_handler_map) ? - xattr_handler_map[index] : NULL; -} + }; -#ifdef CONFIG_EROFS_FS_XATTR + return idx && idx < ARRAY_SIZE(xattr_handler_map) ? + xattr_handler_map[idx] : NULL; +} -extern const struct inode_operations erofs_generic_xattr_iops; -extern const struct inode_operations erofs_dir_xattr_iops; +extern const struct xattr_handler *erofs_xattr_handlers[]; int erofs_getxattr(struct inode *, int, const char *, void *, size_t); ssize_t erofs_listxattr(struct dentry *, char *, size_t); #else -static int __maybe_unused erofs_getxattr(struct inode *inode, int index, - const char *name, - void *buffer, size_t buffer_size) +static inline int erofs_getxattr(struct inode *inode, int index, + const char *name, void *buffer, + size_t buffer_size) { - return -ENOTSUPP; + return -EOPNOTSUPP; } -static ssize_t __maybe_unused erofs_listxattr(struct dentry *dentry, - char *buffer, size_t buffer_size) -{ - return -ENOTSUPP; -} +#define erofs_listxattr (NULL) +#define erofs_xattr_handlers (NULL) +#endif /* !CONFIG_EROFS_FS_XATTR */ + +#ifdef CONFIG_EROFS_FS_POSIX_ACL +struct posix_acl *erofs_get_acl(struct inode *inode, int type); +#else +#define erofs_get_acl (NULL) #endif #endif diff --git a/drivers/staging/erofs/zdata.c b/drivers/staging/erofs/zdata.c new file mode 100644 index 000000000000..37c3e28b1e91 --- /dev/null +++ b/drivers/staging/erofs/zdata.c @@ -0,0 +1,1370 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "zdata.h" +#include "compress.h" +#include + +#include + +/* + * a compressed_pages[] placeholder in order to avoid + * being filled with file pages for in-place decompression. + */ +#define PAGE_UNALLOCATED ((void *)0x5F0E4B1D) + +/* how to allocate cached pages for a pcluster */ +enum z_erofs_cache_alloctype { + DONTALLOC, /* don't allocate any cached pages */ + DELAYEDALLOC, /* delayed allocation (at the time of submitting io) */ +}; + +/* + * tagged pointer with 1-bit tag for all compressed pages + * tag 0 - the page is just found with an extra page reference + */ +typedef tagptr1_t compressed_page_t; + +#define tag_compressed_page_justfound(page) \ + tagptr_fold(compressed_page_t, page, 1) + +static struct workqueue_struct *z_erofs_workqueue __read_mostly; +static struct kmem_cache *pcluster_cachep __read_mostly; + +void z_erofs_exit_zip_subsystem(void) +{ + destroy_workqueue(z_erofs_workqueue); + kmem_cache_destroy(pcluster_cachep); +} + +static inline int z_erofs_init_workqueue(void) +{ + const unsigned int onlinecpus = num_possible_cpus(); + + /* + * no need to spawn too many threads, limiting threads could minimum + * scheduling overhead, perhaps per-CPU threads should be better? + */ + z_erofs_workqueue = alloc_workqueue("erofs_unzipd", + WQ_UNBOUND | WQ_HIGHPRI, + onlinecpus + onlinecpus / 4); + return z_erofs_workqueue ? 0 : -ENOMEM; +} + +static void z_erofs_pcluster_init_once(void *ptr) +{ + struct z_erofs_pcluster *pcl = ptr; + struct z_erofs_collection *cl = z_erofs_primarycollection(pcl); + unsigned int i; + + mutex_init(&cl->lock); + cl->nr_pages = 0; + cl->vcnt = 0; + for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i) + pcl->compressed_pages[i] = NULL; +} + +static void z_erofs_pcluster_init_always(struct z_erofs_pcluster *pcl) +{ + struct z_erofs_collection *cl = z_erofs_primarycollection(pcl); + + atomic_set(&pcl->obj.refcount, 1); + + DBG_BUGON(cl->nr_pages); + DBG_BUGON(cl->vcnt); +} + +int __init z_erofs_init_zip_subsystem(void) +{ + pcluster_cachep = kmem_cache_create("erofs_compress", + Z_EROFS_WORKGROUP_SIZE, 0, + SLAB_RECLAIM_ACCOUNT, + z_erofs_pcluster_init_once); + if (pcluster_cachep) { + if (!z_erofs_init_workqueue()) + return 0; + + kmem_cache_destroy(pcluster_cachep); + } + return -ENOMEM; +} + +enum z_erofs_collectmode { + COLLECT_SECONDARY, + COLLECT_PRIMARY, + /* + * The current collection was the tail of an exist chain, in addition + * that the previous processed chained collections are all decided to + * be hooked up to it. + * A new chain will be created for the remaining collections which are + * not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED, + * the next collection cannot reuse the whole page safely in + * the following scenario: + * ________________________________________________________________ + * | tail (partial) page | head (partial) page | + * | (belongs to the next cl) | (belongs to the current cl) | + * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________| + */ + COLLECT_PRIMARY_HOOKED, + COLLECT_PRIMARY_FOLLOWED_NOINPLACE, + /* + * The current collection has been linked with the owned chain, and + * could also be linked with the remaining collections, which means + * if the processing page is the tail page of the collection, thus + * the current collection can safely use the whole page (since + * the previous collection is under control) for in-place I/O, as + * illustrated below: + * ________________________________________________________________ + * | tail (partial) page | head (partial) page | + * | (of the current cl) | (of the previous collection) | + * | PRIMARY_FOLLOWED or | | + * |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________| + * + * [ (*) the above page can be used as inplace I/O. ] + */ + COLLECT_PRIMARY_FOLLOWED, +}; + +struct z_erofs_collector { + struct z_erofs_pagevec_ctor vector; + + struct z_erofs_pcluster *pcl, *tailpcl; + struct z_erofs_collection *cl; + struct page **compressedpages; + z_erofs_next_pcluster_t owned_head; + + enum z_erofs_collectmode mode; +}; + +struct z_erofs_decompress_frontend { + struct inode *const inode; + + struct z_erofs_collector clt; + struct erofs_map_blocks map; + + bool readahead; + /* used for applying cache strategy on the fly */ + bool backmost; + erofs_off_t headoffset; +}; + +#define COLLECTOR_INIT() { \ + .owned_head = Z_EROFS_PCLUSTER_TAIL, \ + .mode = COLLECT_PRIMARY_FOLLOWED } + +#define DECOMPRESS_FRONTEND_INIT(__i) { \ + .inode = __i, .clt = COLLECTOR_INIT(), \ + .backmost = true, } + +static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES]; +static DEFINE_MUTEX(z_pagemap_global_lock); + +static void preload_compressed_pages(struct z_erofs_collector *clt, + struct address_space *mc, + enum z_erofs_cache_alloctype type) +{ + const struct z_erofs_pcluster *pcl = clt->pcl; + const unsigned int clusterpages = BIT(pcl->clusterbits); + struct page **pages = clt->compressedpages; + pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages); + bool standalone = true; + + if (clt->mode < COLLECT_PRIMARY_FOLLOWED) + return; + + for (; pages < pcl->compressed_pages + clusterpages; ++pages) { + struct page *page; + compressed_page_t t; + + /* the compressed page was loaded before */ + if (READ_ONCE(*pages)) + continue; + + page = find_get_page(mc, index); + + if (page) { + t = tag_compressed_page_justfound(page); + } else if (type == DELAYEDALLOC) { + t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED); + } else { /* DONTALLOC */ + if (standalone) + clt->compressedpages = pages; + standalone = false; + continue; + } + + if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t))) + continue; + + if (page) + put_page(page); + } + + if (standalone) /* downgrade to PRIMARY_FOLLOWED_NOINPLACE */ + clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; +} + +/* called by erofs_shrinker to get rid of all compressed_pages */ +int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, + struct erofs_workgroup *grp) +{ + struct z_erofs_pcluster *const pcl = + container_of(grp, struct z_erofs_pcluster, obj); + struct address_space *const mapping = MNGD_MAPPING(sbi); + const unsigned int clusterpages = BIT(pcl->clusterbits); + int i; + + /* + * refcount of workgroup is now freezed as 1, + * therefore no need to worry about available decompression users. + */ + for (i = 0; i < clusterpages; ++i) { + struct page *page = pcl->compressed_pages[i]; + + if (!page) + continue; + + /* block other users from reclaiming or migrating the page */ + if (!trylock_page(page)) + return -EBUSY; + + if (page->mapping != mapping) + continue; + + /* barrier is implied in the following 'unlock_page' */ + WRITE_ONCE(pcl->compressed_pages[i], NULL); + set_page_private(page, 0); + ClearPagePrivate(page); + + unlock_page(page); + put_page(page); + } + return 0; +} + +int erofs_try_to_free_cached_page(struct address_space *mapping, + struct page *page) +{ + struct z_erofs_pcluster *const pcl = (void *)page_private(page); + const unsigned int clusterpages = BIT(pcl->clusterbits); + int ret = 0; /* 0 - busy */ + + if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) { + unsigned int i; + + for (i = 0; i < clusterpages; ++i) { + if (pcl->compressed_pages[i] == page) { + WRITE_ONCE(pcl->compressed_pages[i], NULL); + ret = 1; + break; + } + } + erofs_workgroup_unfreeze(&pcl->obj, 1); + + if (ret) { + ClearPagePrivate(page); + put_page(page); + } + } + return ret; +} + +/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */ +static inline bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, + struct page *page) +{ + struct z_erofs_pcluster *const pcl = clt->pcl; + const unsigned int clusterpages = BIT(pcl->clusterbits); + + while (clt->compressedpages < pcl->compressed_pages + clusterpages) { + if (!cmpxchg(clt->compressedpages++, NULL, page)) + return true; + } + return false; +} + +/* callers must be with collection lock held */ +static int z_erofs_attach_page(struct z_erofs_collector *clt, + struct page *page, + enum z_erofs_page_type type) +{ + int ret; + bool occupied; + + /* give priority for inplaceio */ + if (clt->mode >= COLLECT_PRIMARY && + type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && + z_erofs_try_inplace_io(clt, page)) + return 0; + + ret = z_erofs_pagevec_enqueue(&clt->vector, + page, type, &occupied); + clt->cl->vcnt += (unsigned int)ret; + + return ret ? 0 : -EAGAIN; +} + +static enum z_erofs_collectmode +try_to_claim_pcluster(struct z_erofs_pcluster *pcl, + z_erofs_next_pcluster_t *owned_head) +{ + /* let's claim these following types of pclusters */ +retry: + if (pcl->next == Z_EROFS_PCLUSTER_NIL) { + /* type 1, nil pcluster */ + if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, + *owned_head) != Z_EROFS_PCLUSTER_NIL) + goto retry; + + *owned_head = &pcl->next; + /* lucky, I am the followee :) */ + return COLLECT_PRIMARY_FOLLOWED; + } else if (pcl->next == Z_EROFS_PCLUSTER_TAIL) { + /* + * type 2, link to the end of a existing open chain, + * be careful that its submission itself is governed + * by the original owned chain. + */ + if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, + *owned_head) != Z_EROFS_PCLUSTER_TAIL) + goto retry; + *owned_head = Z_EROFS_PCLUSTER_TAIL; + return COLLECT_PRIMARY_HOOKED; + } + return COLLECT_PRIMARY; /* :( better luck next time */ +} + +static int z_erofs_lookup_collection(struct z_erofs_collector *clt, + struct inode *inode, + struct erofs_map_blocks *map) +{ + struct erofs_workgroup *grp; + struct z_erofs_pcluster *pcl; + struct z_erofs_collection *cl; + unsigned int length; + + grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT); + if (!grp) + return -ENOENT; + + pcl = container_of(grp, struct z_erofs_pcluster, obj); + if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) { + DBG_BUGON(1); + erofs_workgroup_put(grp); + return -EFSCORRUPTED; + } + + cl = z_erofs_primarycollection(pcl); + if (cl->pageofs != (map->m_la & ~PAGE_MASK)) { + DBG_BUGON(1); + erofs_workgroup_put(grp); + return -EFSCORRUPTED; + } + + length = READ_ONCE(pcl->length); + if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) { + if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) { + DBG_BUGON(1); + erofs_workgroup_put(grp); + return -EFSCORRUPTED; + } + } else { + unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT; + + if (map->m_flags & EROFS_MAP_FULL_MAPPED) + llen |= Z_EROFS_PCLUSTER_FULL_LENGTH; + + while (llen > length && + length != cmpxchg_relaxed(&pcl->length, length, llen)) { + cpu_relax(); + length = READ_ONCE(pcl->length); + } + } + mutex_lock(&cl->lock); + /* used to check tail merging loop due to corrupted images */ + if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) + clt->tailpcl = pcl; + clt->mode = try_to_claim_pcluster(pcl, &clt->owned_head); + /* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */ + if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) + clt->tailpcl = NULL; + clt->pcl = pcl; + clt->cl = cl; + return 0; +} + +static int z_erofs_register_collection(struct z_erofs_collector *clt, + struct inode *inode, + struct erofs_map_blocks *map) +{ + struct z_erofs_pcluster *pcl; + struct z_erofs_collection *cl; + int err; + + /* no available workgroup, let's allocate one */ + pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS); + if (!pcl) + return -ENOMEM; + + z_erofs_pcluster_init_always(pcl); + pcl->obj.index = map->m_pa >> PAGE_SHIFT; + + pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) | + (map->m_flags & EROFS_MAP_FULL_MAPPED ? + Z_EROFS_PCLUSTER_FULL_LENGTH : 0); + + if (map->m_flags & EROFS_MAP_ZIPPED) + pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4; + else + pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; + + pcl->clusterbits = EROFS_I(inode)->z_physical_clusterbits[0]; + pcl->clusterbits -= PAGE_SHIFT; + + /* new pclusters should be claimed as type 1, primary and followed */ + pcl->next = clt->owned_head; + clt->mode = COLLECT_PRIMARY_FOLLOWED; + + cl = z_erofs_primarycollection(pcl); + cl->pageofs = map->m_la & ~PAGE_MASK; + + /* + * lock all primary followed works before visible to others + * and mutex_trylock *never* fails for a new pcluster. + */ + mutex_trylock(&cl->lock); + + err = erofs_register_workgroup(inode->i_sb, &pcl->obj); + if (err) { + mutex_unlock(&cl->lock); + kmem_cache_free(pcluster_cachep, pcl); + return -EAGAIN; + } + /* used to check tail merging loop due to corrupted images */ + if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) + clt->tailpcl = pcl; + clt->owned_head = &pcl->next; + clt->pcl = pcl; + clt->cl = cl; + return 0; +} + +static int z_erofs_collector_begin(struct z_erofs_collector *clt, + struct inode *inode, + struct erofs_map_blocks *map) +{ + int ret; + + DBG_BUGON(clt->cl); + + /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */ + DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL); + DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + + if (!PAGE_ALIGNED(map->m_pa)) { + DBG_BUGON(1); + return -EINVAL; + } + +repeat: + ret = z_erofs_lookup_collection(clt, inode, map); + if (ret == -ENOENT) { + ret = z_erofs_register_collection(clt, inode, map); + + /* someone registered at the same time, give another try */ + if (ret == -EAGAIN) { + cond_resched(); + goto repeat; + } + } + + if (ret) + return ret; + + z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS, + clt->cl->pagevec, clt->cl->vcnt); + + clt->compressedpages = clt->pcl->compressed_pages; + if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */ + clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES; + return 0; +} + +/* + * keep in mind that no referenced pclusters will be freed + * only after a RCU grace period. + */ +static void z_erofs_rcu_callback(struct rcu_head *head) +{ + struct z_erofs_collection *const cl = + container_of(head, struct z_erofs_collection, rcu); + + kmem_cache_free(pcluster_cachep, + container_of(cl, struct z_erofs_pcluster, + primary_collection)); +} + +void erofs_workgroup_free_rcu(struct erofs_workgroup *grp) +{ + struct z_erofs_pcluster *const pcl = + container_of(grp, struct z_erofs_pcluster, obj); + struct z_erofs_collection *const cl = z_erofs_primarycollection(pcl); + + call_rcu(&cl->rcu, z_erofs_rcu_callback); +} + +static void z_erofs_collection_put(struct z_erofs_collection *cl) +{ + struct z_erofs_pcluster *const pcl = + container_of(cl, struct z_erofs_pcluster, primary_collection); + + erofs_workgroup_put(&pcl->obj); +} + +static bool z_erofs_collector_end(struct z_erofs_collector *clt) +{ + struct z_erofs_collection *cl = clt->cl; + + if (!cl) + return false; + + z_erofs_pagevec_ctor_exit(&clt->vector, false); + mutex_unlock(&cl->lock); + + /* + * if all pending pages are added, don't hold its reference + * any longer if the pcluster isn't hosted by ourselves. + */ + if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) + z_erofs_collection_put(cl); + + clt->cl = NULL; + return true; +} + +static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe, + unsigned int cachestrategy, + erofs_off_t la) +{ + if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED) + return false; + + if (fe->backmost) + return true; + + return cachestrategy >= EROFS_ZIP_CACHE_READAROUND && + la < fe->headoffset; +} + +static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, + struct page *page) +{ + struct inode *const inode = fe->inode; + struct erofs_sb_info *const sbi = EROFS_I_SB(inode); + struct erofs_map_blocks *const map = &fe->map; + struct z_erofs_collector *const clt = &fe->clt; + const loff_t offset = page_offset(page); + bool tight = true; + + enum z_erofs_cache_alloctype cache_strategy; + enum z_erofs_page_type page_type; + unsigned int cur, end, spiltted, index; + int err = 0; + + /* register locked file pages as online pages in pack */ + z_erofs_onlinepage_init(page); + + spiltted = 0; + end = PAGE_SIZE; +repeat: + cur = end - 1; + + /* lucky, within the range of the current map_blocks */ + if (offset + cur >= map->m_la && + offset + cur < map->m_la + map->m_llen) { + /* didn't get a valid collection previously (very rare) */ + if (!clt->cl) + goto restart_now; + goto hitted; + } + + /* go ahead the next map_blocks */ + erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur); + + if (z_erofs_collector_end(clt)) + fe->backmost = false; + + map->m_la = offset + cur; + map->m_llen = 0; + err = z_erofs_map_blocks_iter(inode, map, 0); + if (err) + goto err_out; + +restart_now: + if (!(map->m_flags & EROFS_MAP_MAPPED)) + goto hitted; + + err = z_erofs_collector_begin(clt, inode, map); + if (err) + goto err_out; + + /* preload all compressed pages (maybe downgrade role if necessary) */ + if (should_alloc_managed_pages(fe, sbi->cache_strategy, map->m_la)) + cache_strategy = DELAYEDALLOC; + else + cache_strategy = DONTALLOC; + + preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy); + +hitted: + /* + * Ensure the current partial page belongs to this submit chain rather + * than other concurrent submit chains or the noio(bypass) chain since + * those chains are handled asynchronously thus the page cannot be used + * for inplace I/O or pagevec (should be processed in strict order.) + */ + tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED && + clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); + + cur = end - min_t(unsigned int, offset + end - map->m_la, end); + if (!(map->m_flags & EROFS_MAP_MAPPED)) { + zero_user_segment(page, cur, end); + goto next_part; + } + + /* let's derive page type */ + page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD : + (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : + (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : + Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); + + if (cur) + tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED); + +retry: + err = z_erofs_attach_page(clt, page, page_type); + /* should allocate an additional staging page for pagevec */ + if (err == -EAGAIN) { + struct page *const newpage = + alloc_page(GFP_NOFS | __GFP_NOFAIL); + + newpage->mapping = Z_EROFS_MAPPING_STAGING; + err = z_erofs_attach_page(clt, newpage, + Z_EROFS_PAGE_TYPE_EXCLUSIVE); + if (!err) + goto retry; + } + + if (err) + goto err_out; + + index = page->index - (map->m_la >> PAGE_SHIFT); + + z_erofs_onlinepage_fixup(page, index, true); + + /* bump up the number of spiltted parts of a page */ + ++spiltted; + /* also update nr_pages */ + clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1); +next_part: + /* can be used for verification */ + map->m_llen = offset + cur - map->m_la; + + end = cur; + if (end > 0) + goto repeat; + +out: + z_erofs_onlinepage_endio(page); + + erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu", + __func__, page, spiltted, map->m_llen); + return err; + + /* if some error occurred while processing this page */ +err_out: + SetPageError(page); + goto out; +} + +static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, + bool sync, int bios) +{ + /* wake up the caller thread for sync decompression */ + if (sync) { + unsigned long flags; + + spin_lock_irqsave(&io->u.wait.lock, flags); + if (!atomic_add_return(bios, &io->pending_bios)) + wake_up_locked(&io->u.wait); + spin_unlock_irqrestore(&io->u.wait.lock, flags); + return; + } + + if (!atomic_add_return(bios, &io->pending_bios)) + queue_work(z_erofs_workqueue, &io->u.work); +} + +static void z_erofs_decompressqueue_endio(struct bio *bio) +{ + tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private); + struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t); + blk_status_t err = bio->bi_status; + struct bio_vec *bvec; + unsigned int i; + + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; + + DBG_BUGON(PageUptodate(page)); + DBG_BUGON(!page->mapping); + + if (err) + SetPageError(page); + + if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { + if (!err) + SetPageUptodate(page); + unlock_page(page); + } + } + z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1); + bio_put(bio); +} + +static int z_erofs_decompress_pcluster(struct super_block *sb, + struct z_erofs_pcluster *pcl, + struct list_head *pagepool) +{ + struct erofs_sb_info *const sbi = EROFS_SB(sb); + const unsigned int clusterpages = BIT(pcl->clusterbits); + struct z_erofs_pagevec_ctor ctor; + unsigned int i, outputsize, llen, nr_pages; + struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES]; + struct page **pages, **compressed_pages, *page; + + enum z_erofs_page_type page_type; + bool overlapped, partial; + struct z_erofs_collection *cl; + int err; + + might_sleep(); + cl = z_erofs_primarycollection(pcl); + DBG_BUGON(!READ_ONCE(cl->nr_pages)); + + mutex_lock(&cl->lock); + nr_pages = cl->nr_pages; + + if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) { + pages = pages_onstack; + } else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES && + mutex_trylock(&z_pagemap_global_lock)) { + pages = z_pagemap_global; + } else { + gfp_t gfp_flags = GFP_KERNEL; + + if (nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES) + gfp_flags |= __GFP_NOFAIL; + + pages = kvmalloc_array(nr_pages, sizeof(struct page *), + gfp_flags); + + /* fallback to global pagemap for the lowmem scenario */ + if (!pages) { + mutex_lock(&z_pagemap_global_lock); + pages = z_pagemap_global; + } + } + + for (i = 0; i < nr_pages; ++i) + pages[i] = NULL; + + err = 0; + z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS, + cl->pagevec, 0); + + for (i = 0; i < cl->vcnt; ++i) { + unsigned int pagenr; + + page = z_erofs_pagevec_dequeue(&ctor, &page_type); + + /* all pages in pagevec ought to be valid */ + DBG_BUGON(!page); + DBG_BUGON(!page->mapping); + + if (z_erofs_put_stagingpage(pagepool, page)) + continue; + + if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD) + pagenr = 0; + else + pagenr = z_erofs_onlinepage_index(page); + + DBG_BUGON(pagenr >= nr_pages); + + /* + * currently EROFS doesn't support multiref(dedup), + * so here erroring out one multiref page. + */ + if (pages[pagenr]) { + DBG_BUGON(1); + SetPageError(pages[pagenr]); + z_erofs_onlinepage_endio(pages[pagenr]); + err = -EFSCORRUPTED; + } + pages[pagenr] = page; + } + z_erofs_pagevec_ctor_exit(&ctor, true); + + overlapped = false; + compressed_pages = pcl->compressed_pages; + + for (i = 0; i < clusterpages; ++i) { + unsigned int pagenr; + + page = compressed_pages[i]; + + /* all compressed pages ought to be valid */ + DBG_BUGON(!page); + DBG_BUGON(!page->mapping); + + if (!z_erofs_page_is_staging(page)) { + if (erofs_page_is_managed(sbi, page)) { + if (!PageUptodate(page)) + err = -EIO; + continue; + } + + /* + * only if non-head page can be selected + * for inplace decompression + */ + pagenr = z_erofs_onlinepage_index(page); + + DBG_BUGON(pagenr >= nr_pages); + if (pages[pagenr]) { + DBG_BUGON(1); + SetPageError(pages[pagenr]); + z_erofs_onlinepage_endio(pages[pagenr]); + err = -EFSCORRUPTED; + } + pages[pagenr] = page; + + overlapped = true; + } + + /* PG_error needs checking for inplaced and staging pages */ + if (PageError(page)) { + DBG_BUGON(PageUptodate(page)); + err = -EIO; + } + } + + if (err) + goto out; + + llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT; + if (nr_pages << PAGE_SHIFT >= cl->pageofs + llen) { + outputsize = llen; + partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH); + } else { + outputsize = (nr_pages << PAGE_SHIFT) - cl->pageofs; + partial = true; + } + + err = z_erofs_decompress(&(struct z_erofs_decompress_req) { + .sb = sb, + .in = compressed_pages, + .out = pages, + .pageofs_out = cl->pageofs, + .inputsize = PAGE_SIZE, + .outputsize = outputsize, + .alg = pcl->algorithmformat, + .inplace_io = overlapped, + .partial_decoding = partial + }, pagepool); + +out: + /* must handle all compressed pages before endding pages */ + for (i = 0; i < clusterpages; ++i) { + page = compressed_pages[i]; + + if (erofs_page_is_managed(sbi, page)) + continue; + + /* recycle all individual staging pages */ + (void)z_erofs_put_stagingpage(pagepool, page); + + WRITE_ONCE(compressed_pages[i], NULL); + } + + for (i = 0; i < nr_pages; ++i) { + page = pages[i]; + if (!page) + continue; + + DBG_BUGON(!page->mapping); + + /* recycle all individual staging pages */ + if (z_erofs_put_stagingpage(pagepool, page)) + continue; + + if (err < 0) + SetPageError(page); + + z_erofs_onlinepage_endio(page); + } + + if (pages == z_pagemap_global) + mutex_unlock(&z_pagemap_global_lock); + else if (pages != pages_onstack) + kvfree(pages); + + cl->nr_pages = 0; + cl->vcnt = 0; + + /* all cl locks MUST be taken before the following line */ + WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL); + + /* all cl locks SHOULD be released right now */ + mutex_unlock(&cl->lock); + + z_erofs_collection_put(cl); + return err; +} + +static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, + struct list_head *pagepool) +{ + z_erofs_next_pcluster_t owned = io->head; + + while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) { + struct z_erofs_pcluster *pcl; + + /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */ + DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL); + + /* no possible that 'owned' equals NULL */ + DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL); + + pcl = container_of(owned, struct z_erofs_pcluster, next); + owned = READ_ONCE(pcl->next); + + z_erofs_decompress_pcluster(io->sb, pcl, pagepool); + } +} + +static void z_erofs_decompressqueue_work(struct work_struct *work) +{ + struct z_erofs_decompressqueue *bgq = + container_of(work, struct z_erofs_decompressqueue, u.work); + LIST_HEAD(pagepool); + + DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + z_erofs_decompress_queue(bgq, &pagepool); + + put_pages_list(&pagepool); + kvfree(bgq); +} + +static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, + unsigned int nr, + struct list_head *pagepool, + struct address_space *mc, + gfp_t gfp) +{ + const pgoff_t index = pcl->obj.index; + bool tocache = false; + + struct address_space *mapping; + struct page *oldpage, *page; + + compressed_page_t t; + int justfound; + +repeat: + page = READ_ONCE(pcl->compressed_pages[nr]); + oldpage = page; + + if (!page) + goto out_allocpage; + + /* + * the cached page has not been allocated and + * an placeholder is out there, prepare it now. + */ + if (page == PAGE_UNALLOCATED) { + tocache = true; + goto out_allocpage; + } + + /* process the target tagged pointer */ + t = tagptr_init(compressed_page_t, page); + justfound = tagptr_unfold_tags(t); + page = tagptr_unfold_ptr(t); + + mapping = READ_ONCE(page->mapping); + + /* + * unmanaged (file) pages are all locked solidly, + * therefore it is impossible for `mapping' to be NULL. + */ + if (mapping && mapping != mc) + /* ought to be unmanaged pages */ + goto out; + + lock_page(page); + + /* only true if page reclaim goes wrong, should never happen */ + DBG_BUGON(justfound && PagePrivate(page)); + + /* the page is still in manage cache */ + if (page->mapping == mc) { + WRITE_ONCE(pcl->compressed_pages[nr], page); + + ClearPageError(page); + if (!PagePrivate(page)) { + /* + * impossible to be !PagePrivate(page) for + * the current restriction as well if + * the page is already in compressed_pages[]. + */ + DBG_BUGON(!justfound); + + justfound = 0; + set_page_private(page, (unsigned long)pcl); + SetPagePrivate(page); + } + + /* no need to submit io if it is already up-to-date */ + if (PageUptodate(page)) { + unlock_page(page); + page = NULL; + } + goto out; + } + + /* + * the managed page has been truncated, it's unsafe to + * reuse this one, let's allocate a new cache-managed page. + */ + DBG_BUGON(page->mapping); + DBG_BUGON(!justfound); + + tocache = true; + unlock_page(page); + put_page(page); +out_allocpage: + page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL); + if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) { + /* non-LRU / non-movable temporary page is needed */ + page->mapping = Z_EROFS_MAPPING_STAGING; + tocache = false; + } + + if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) { + if (tocache) { + /* since it added to managed cache successfully */ + unlock_page(page); + put_page(page); + } else { + list_add(&page->lru, pagepool); + } + cond_resched(); + goto repeat; + } + + if (tocache) { + set_page_private(page, (unsigned long)pcl); + SetPagePrivate(page); + } +out: /* the only exit (for tracing and debugging) */ + return page; +} + +static struct z_erofs_decompressqueue * +jobqueue_init(struct super_block *sb, + struct z_erofs_decompressqueue *fgq, bool *fg) +{ + struct z_erofs_decompressqueue *q; + + if (fg && !*fg) { + q = kvzalloc(sizeof(*q), GFP_KERNEL | __GFP_NOWARN); + if (!q) { + *fg = true; + goto fg_out; + } + INIT_WORK(&q->u.work, z_erofs_decompressqueue_work); + } else { +fg_out: + q = fgq; + init_waitqueue_head(&fgq->u.wait); + atomic_set(&fgq->pending_bios, 0); + } + q->sb = sb; + q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED; + return q; +} + +/* define decompression jobqueue types */ +enum { + JQ_BYPASS, + JQ_SUBMIT, + NR_JOBQUEUES, +}; + +static void *jobqueueset_init(struct super_block *sb, + struct z_erofs_decompressqueue *q[], + struct z_erofs_decompressqueue *fgq, bool *fg) +{ + /* + * if managed cache is enabled, bypass jobqueue is needed, + * no need to read from device for all pclusters in this queue. + */ + q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL); + q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, fg); + + return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], *fg)); +} + +static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, + z_erofs_next_pcluster_t qtail[], + z_erofs_next_pcluster_t owned_head) +{ + z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT]; + z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS]; + + DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + if (owned_head == Z_EROFS_PCLUSTER_TAIL) + owned_head = Z_EROFS_PCLUSTER_TAIL_CLOSED; + + WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL_CLOSED); + + WRITE_ONCE(*submit_qtail, owned_head); + WRITE_ONCE(*bypass_qtail, &pcl->next); + + qtail[JQ_BYPASS] = &pcl->next; +} + +static void z_erofs_submit_queue(struct super_block *sb, + struct z_erofs_decompress_frontend *f, + struct list_head *pagepool, + struct z_erofs_decompressqueue *fgq, + bool *force_fg) +{ + struct erofs_sb_info *const sbi = EROFS_SB(sb); + z_erofs_next_pcluster_t qtail[NR_JOBQUEUES]; + struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; + void *bi_private; + z_erofs_next_pcluster_t owned_head = f->clt.owned_head; + /* since bio will be NULL, no need to initialize last_index */ + pgoff_t last_index; + unsigned int nr_bios = 0; + struct bio *bio = NULL; + + bi_private = jobqueueset_init(sb, q, fgq, force_fg); + qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head; + qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head; + + /* by default, all need io submission */ + q[JQ_SUBMIT]->head = owned_head; + + do { + struct z_erofs_pcluster *pcl; + pgoff_t cur, end; + unsigned int i = 0; + bool bypass = true; + + /* no possible 'owned_head' equals the following */ + DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL); + + pcl = container_of(owned_head, struct z_erofs_pcluster, next); + + cur = pcl->obj.index; + end = cur + BIT(pcl->clusterbits); + + /* close the main owned chain at first */ + owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, + Z_EROFS_PCLUSTER_TAIL_CLOSED); + + do { + struct page *page; + + page = pickup_page_for_submission(pcl, i++, pagepool, + MNGD_MAPPING(sbi), + GFP_NOFS); + if (!page) + continue; + + if (bio && cur != last_index + 1) { +submit_bio_retry: + submit_bio(bio); + bio = NULL; + } + + if (!bio) { + bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); + + bio->bi_end_io = z_erofs_decompressqueue_endio; + bio_set_dev(bio, sb->s_bdev); + bio->bi_iter.bi_sector = (sector_t)cur << + LOG_SECTORS_PER_BLOCK; + bio->bi_private = bi_private; + bio->bi_opf = REQ_OP_READ; + if (f->readahead) + bio->bi_opf |= REQ_RAHEAD; + ++nr_bios; + } + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) + goto submit_bio_retry; + + last_index = cur; + bypass = false; + } while (++cur < end); + + if (!bypass) + qtail[JQ_SUBMIT] = &pcl->next; + else + move_to_bypass_jobqueue(pcl, qtail, owned_head); + } while (owned_head != Z_EROFS_PCLUSTER_TAIL); + + if (bio) + submit_bio(bio); + + /* + * although background is preferred, no one is pending for submission. + * don't issue workqueue for decompression but drop it directly instead. + */ + if (!*force_fg && !nr_bios) { + kvfree(q[JQ_SUBMIT]); + return; + } + z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios); +} + +static void z_erofs_runqueue(struct super_block *sb, + struct z_erofs_decompress_frontend *f, + struct list_head *pagepool, bool force_fg) +{ + struct z_erofs_decompressqueue io[NR_JOBQUEUES]; + + if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL) + return; + z_erofs_submit_queue(sb, f, pagepool, io, &force_fg); + + /* handle bypass queue (no i/o pclusters) immediately */ + z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool); + + if (!force_fg) + return; + + /* wait until all bios are completed */ + io_wait_event(io[JQ_SUBMIT].u.wait, + !atomic_read(&io[JQ_SUBMIT].pending_bios)); + + /* handle synchronous decompress queue in the caller context */ + z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool); +} + +static int z_erofs_readpage(struct file *file, struct page *page) +{ + struct inode *const inode = page->mapping->host; + struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); + int err; + LIST_HEAD(pagepool); + + trace_erofs_readpage(page, false); + + f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT; + + err = z_erofs_do_read_page(&f, page); + (void)z_erofs_collector_end(&f.clt); + + /* if some compressed cluster ready, need submit them anyway */ + z_erofs_runqueue(inode->i_sb, &f, &pagepool, true); + + if (err) + erofs_err(inode->i_sb, "failed to read, err [%d]", err); + + if (f.map.mpage) + put_page(f.map.mpage); + + /* clean up the remaining free pages */ + put_pages_list(&pagepool); + return err; +} + +static int z_erofs_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + struct inode *const inode = mapping->host; + struct erofs_sb_info *const sbi = EROFS_I_SB(inode); + + bool sync = (nr_pages <= sbi->max_sync_decompress_pages); + struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); + gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); + struct page *head = NULL; + LIST_HEAD(pagepool); + + trace_erofs_readpages(mapping->host, lru_to_page(pages), + nr_pages, false); + + f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT; + + for (; nr_pages; --nr_pages) { + struct page *page = lru_to_page(pages); + + prefetchw(&page->flags); + list_del(&page->lru); + + /* + * A pure asynchronous readahead is indicated if + * a PG_readahead marked page is hitted at first. + * Let's also do asynchronous decompression for this case. + */ + sync &= !(PageReadahead(page) && !head); + + if (add_to_page_cache_lru(page, mapping, page->index, gfp)) { + list_add(&page->lru, &pagepool); + continue; + } + + set_page_private(page, (unsigned long)head); + head = page; + } + + while (head) { + struct page *page = head; + int err; + + /* traversal in reverse order */ + head = (void *)page_private(page); + + err = z_erofs_do_read_page(&f, page); + if (err) + erofs_err(inode->i_sb, + "readahead error at page %lu @ nid %llu", + page->index, EROFS_I(inode)->nid); + put_page(page); + } + + (void)z_erofs_collector_end(&f.clt); + + z_erofs_runqueue(inode->i_sb, &f, &pagepool, sync); + + if (f.map.mpage) + put_page(f.map.mpage); + + /* clean up the remaining free pages */ + put_pages_list(&pagepool); + return 0; +} + +const struct address_space_operations z_erofs_aops = { + .readpage = z_erofs_readpage, + .readpages = z_erofs_readpages, +}; diff --git a/drivers/staging/erofs/zdata.h b/drivers/staging/erofs/zdata.h new file mode 100644 index 000000000000..68da309d5ad7 --- /dev/null +++ b/drivers/staging/erofs/zdata.h @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#ifndef __EROFS_FS_ZDATA_H +#define __EROFS_FS_ZDATA_H + +#include "internal.h" +#include "zpvec.h" + +#define Z_EROFS_NR_INLINE_PAGEVECS 3 + +/* + * Structure fields follow one of the following exclusion rules. + * + * I: Modifiable by initialization/destruction paths and read-only + * for everyone else; + * + * L: Field should be protected by pageset lock; + * + * A: Field should be accessed / updated in atomic for parallelized code. + */ +struct z_erofs_collection { + struct mutex lock; + + /* I: page offset of start position of decompression */ + unsigned short pageofs; + + /* L: maximum relative page index in pagevec[] */ + unsigned short nr_pages; + + /* L: total number of pages in pagevec[] */ + unsigned int vcnt; + + union { + /* L: inline a certain number of pagevecs for bootstrap */ + erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS]; + + /* I: can be used to free the pcluster by RCU. */ + struct rcu_head rcu; + }; +}; + +#define Z_EROFS_PCLUSTER_FULL_LENGTH 0x00000001 +#define Z_EROFS_PCLUSTER_LENGTH_BIT 1 + +/* + * let's leave a type here in case of introducing + * another tagged pointer later. + */ +typedef void *z_erofs_next_pcluster_t; + +struct z_erofs_pcluster { + struct erofs_workgroup obj; + struct z_erofs_collection primary_collection; + + /* A: point to next chained pcluster or TAILs */ + z_erofs_next_pcluster_t next; + + /* A: compressed pages (including multi-usage pages) */ + struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES]; + + /* A: lower limit of decompressed length and if full length or not */ + unsigned int length; + + /* I: compression algorithm format */ + unsigned char algorithmformat; + /* I: bit shift of physical cluster size */ + unsigned char clusterbits; +}; + +#define z_erofs_primarycollection(pcluster) (&(pcluster)->primary_collection) + +/* let's avoid the valid 32-bit kernel addresses */ + +/* the chained workgroup has't submitted io (still open) */ +#define Z_EROFS_PCLUSTER_TAIL ((void *)0x5F0ECAFE) +/* the chained workgroup has already submitted io */ +#define Z_EROFS_PCLUSTER_TAIL_CLOSED ((void *)0x5F0EDEAD) + +#define Z_EROFS_PCLUSTER_NIL (NULL) + +#define Z_EROFS_WORKGROUP_SIZE sizeof(struct z_erofs_pcluster) + +struct z_erofs_decompressqueue { + struct super_block *sb; + atomic_t pending_bios; + z_erofs_next_pcluster_t head; + + union { + wait_queue_head_t wait; + struct work_struct work; + } u; +}; + +#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) +static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, + struct page *page) +{ + return page->mapping == MNGD_MAPPING(sbi); +} + +#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2 +#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1) +#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS) + +/* + * waiters (aka. ongoing_packs): # to unlock the page + * sub-index: 0 - for partial page, >= 1 full page sub-index + */ +typedef atomic_t z_erofs_onlinepage_t; + +/* type punning */ +union z_erofs_onlinepage_converter { + z_erofs_onlinepage_t *o; + unsigned long *v; +}; + +static inline unsigned int z_erofs_onlinepage_index(struct page *page) +{ + union z_erofs_onlinepage_converter u; + + DBG_BUGON(!PagePrivate(page)); + u.v = &page_private(page); + + return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; +} + +static inline void z_erofs_onlinepage_init(struct page *page) +{ + union { + z_erofs_onlinepage_t o; + unsigned long v; + /* keep from being unlocked in advance */ + } u = { .o = ATOMIC_INIT(1) }; + + set_page_private(page, u.v); + smp_wmb(); + SetPagePrivate(page); +} + +static inline void z_erofs_onlinepage_fixup(struct page *page, + uintptr_t index, bool down) +{ + union z_erofs_onlinepage_converter u = { .v = &page_private(page) }; + int orig, orig_index, val; + +repeat: + orig = atomic_read(u.o); + orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; + if (orig_index) { + if (!index) + return; + + DBG_BUGON(orig_index != index); + } + + val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | + ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down); + if (atomic_cmpxchg(u.o, orig, val) != orig) + goto repeat; +} + +static inline void z_erofs_onlinepage_endio(struct page *page) +{ + union z_erofs_onlinepage_converter u; + unsigned int v; + + DBG_BUGON(!PagePrivate(page)); + u.v = &page_private(page); + + v = atomic_dec_return(u.o); + if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) { + ClearPagePrivate(page); + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + } + erofs_dbg("%s, page %p value %x", __func__, page, atomic_read(u.o)); +} + +#define Z_EROFS_VMAP_ONSTACK_PAGES \ + min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U) +#define Z_EROFS_VMAP_GLOBAL_PAGES 2048 + +#endif + diff --git a/drivers/staging/erofs/zmap.c b/drivers/staging/erofs/zmap.c new file mode 100644 index 000000000000..a13f69cf4026 --- /dev/null +++ b/drivers/staging/erofs/zmap.c @@ -0,0 +1,477 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2018-2019 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "internal.h" +#include +#include + +int z_erofs_fill_inode(struct inode *inode) +{ + struct erofs_inode *const vi = EROFS_I(inode); + + if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { + vi->z_advise = 0; + vi->z_algorithmtype[0] = 0; + vi->z_algorithmtype[1] = 0; + vi->z_logical_clusterbits = LOG_BLOCK_SIZE; + vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits; + vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits; + set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); + } + + inode->i_mapping->a_ops = &z_erofs_aops; + return 0; +} + +static int z_erofs_fill_inode_lazy(struct inode *inode) +{ + struct erofs_inode *const vi = EROFS_I(inode); + struct super_block *const sb = inode->i_sb; + int err; + erofs_off_t pos; + struct page *page; + void *kaddr; + struct z_erofs_map_header *h; + + if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) { + /* + * paired with smp_mb() at the end of the function to ensure + * fields will only be observed after the bit is set. + */ + smp_mb(); + return 0; + } + + if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE)) + return -ERESTARTSYS; + + err = 0; + if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) + goto out_unlock; + + DBG_BUGON(vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); + + pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + + vi->xattr_isize, 8); + page = erofs_get_meta_page(sb, erofs_blknr(pos)); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out_unlock; + } + + kaddr = kmap_atomic(page); + + h = kaddr + erofs_blkoff(pos); + vi->z_advise = le16_to_cpu(h->h_advise); + vi->z_algorithmtype[0] = h->h_algorithmtype & 15; + vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; + + if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) { + erofs_err(sb, "unknown compression format %u for nid %llu, please upgrade kernel", + vi->z_algorithmtype[0], vi->nid); + err = -EOPNOTSUPP; + goto unmap_done; + } + + vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); + vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits + + ((h->h_clusterbits >> 3) & 3); + + if (vi->z_physical_clusterbits[0] != LOG_BLOCK_SIZE) { + erofs_err(sb, "unsupported physical clusterbits %u for nid %llu, please upgrade kernel", + vi->z_physical_clusterbits[0], vi->nid); + err = -EOPNOTSUPP; + goto unmap_done; + } + + vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits + + ((h->h_clusterbits >> 5) & 7); + /* paired with smp_mb() at the beginning of the function */ + smp_mb(); + set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); +unmap_done: + kunmap_atomic(kaddr); + unlock_page(page); + put_page(page); +out_unlock: + clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); + return err; +} + +struct z_erofs_maprecorder { + struct inode *inode; + struct erofs_map_blocks *map; + void *kaddr; + + unsigned long lcn; + /* compression extent information gathered */ + u8 type; + u16 clusterofs; + u16 delta[2]; + erofs_blk_t pblk; +}; + +static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, + erofs_blk_t eblk) +{ + struct super_block *const sb = m->inode->i_sb; + struct erofs_map_blocks *const map = m->map; + struct page *mpage = map->mpage; + + if (mpage) { + if (mpage->index == eblk) { + if (!m->kaddr) + m->kaddr = kmap_atomic(mpage); + return 0; + } + + if (m->kaddr) { + kunmap_atomic(m->kaddr); + m->kaddr = NULL; + } + put_page(mpage); + } + + mpage = erofs_get_meta_page(sb, eblk); + if (IS_ERR(mpage)) { + map->mpage = NULL; + return PTR_ERR(mpage); + } + m->kaddr = kmap_atomic(mpage); + unlock_page(mpage); + map->mpage = mpage; + return 0; +} + +static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned long lcn) +{ + struct inode *const inode = m->inode; + struct erofs_inode *const vi = EROFS_I(inode); + const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid); + const erofs_off_t pos = + Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize + + vi->xattr_isize) + + lcn * sizeof(struct z_erofs_vle_decompressed_index); + struct z_erofs_vle_decompressed_index *di; + unsigned int advise, type; + int err; + + err = z_erofs_reload_indexes(m, erofs_blknr(pos)); + if (err) + return err; + + m->lcn = lcn; + di = m->kaddr + erofs_blkoff(pos); + + advise = le16_to_cpu(di->di_advise); + type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) & + ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1); + switch (type) { + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + m->clusterofs = 1 << vi->z_logical_clusterbits; + m->delta[0] = le16_to_cpu(di->di_u.delta[0]); + m->delta[1] = le16_to_cpu(di->di_u.delta[1]); + break; + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + m->clusterofs = le16_to_cpu(di->di_clusterofs); + m->pblk = le32_to_cpu(di->di_u.blkaddr); + break; + default: + DBG_BUGON(1); + return -EOPNOTSUPP; + } + m->type = type; + return 0; +} + +static unsigned int decode_compactedbits(unsigned int lobits, + unsigned int lomask, + u8 *in, unsigned int pos, u8 *type) +{ + const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7); + const unsigned int lo = v & lomask; + + *type = (v >> lobits) & 3; + return lo; +} + +static int unpack_compacted_index(struct z_erofs_maprecorder *m, + unsigned int amortizedshift, + unsigned int eofs) +{ + struct erofs_inode *const vi = EROFS_I(m->inode); + const unsigned int lclusterbits = vi->z_logical_clusterbits; + const unsigned int lomask = (1 << lclusterbits) - 1; + unsigned int vcnt, base, lo, encodebits, nblk; + int i; + u8 *in, type; + + if (1 << amortizedshift == 4) + vcnt = 2; + else if (1 << amortizedshift == 2 && lclusterbits == 12) + vcnt = 16; + else + return -EOPNOTSUPP; + + encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; + base = round_down(eofs, vcnt << amortizedshift); + in = m->kaddr + base; + + i = (eofs - base) >> amortizedshift; + + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + m->type = type; + if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + m->clusterofs = 1 << lclusterbits; + if (i + 1 != vcnt) { + m->delta[0] = lo; + return 0; + } + /* + * since the last lcluster in the pack is special, + * of which lo saves delta[1] rather than delta[0]. + * Hence, get delta[0] by the previous lcluster indirectly. + */ + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * (i - 1), &type); + if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + lo = 0; + m->delta[0] = lo + 1; + return 0; + } + m->clusterofs = lo; + m->delta[0] = 0; + /* figout out blkaddr (pblk) for HEAD lclusters */ + nblk = 1; + while (i > 0) { + --i; + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + i -= lo; + + if (i >= 0) + ++nblk; + } + in += (vcnt << amortizedshift) - sizeof(__le32); + m->pblk = le32_to_cpu(*(__le32 *)in) + nblk; + return 0; +} + +static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned long lcn) +{ + struct inode *const inode = m->inode; + struct erofs_inode *const vi = EROFS_I(inode); + const unsigned int lclusterbits = vi->z_logical_clusterbits; + const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) + + vi->inode_isize + vi->xattr_isize, 8) + + sizeof(struct z_erofs_map_header); + const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); + unsigned int compacted_4b_initial, compacted_2b; + unsigned int amortizedshift; + erofs_off_t pos; + int err; + + if (lclusterbits != 12) + return -EOPNOTSUPP; + + if (lcn >= totalidx) + return -EINVAL; + + m->lcn = lcn; + /* used to align to 32-byte (compacted_2b) alignment */ + compacted_4b_initial = (32 - ebase % 32) / 4; + if (compacted_4b_initial == 32 / 4) + compacted_4b_initial = 0; + + if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) + compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); + else + compacted_2b = 0; + + pos = ebase; + if (lcn < compacted_4b_initial) { + amortizedshift = 2; + goto out; + } + pos += compacted_4b_initial * 4; + lcn -= compacted_4b_initial; + + if (lcn < compacted_2b) { + amortizedshift = 1; + goto out; + } + pos += compacted_2b * 2; + lcn -= compacted_2b; + amortizedshift = 2; +out: + pos += lcn * (1 << amortizedshift); + err = z_erofs_reload_indexes(m, erofs_blknr(pos)); + if (err) + return err; + return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos)); +} + +static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned int lcn) +{ + const unsigned int datamode = EROFS_I(m->inode)->datalayout; + + if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) + return legacy_load_cluster_from_disk(m, lcn); + + if (datamode == EROFS_INODE_FLAT_COMPRESSION) + return compacted_load_cluster_from_disk(m, lcn); + + return -EINVAL; +} + +static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, + unsigned int lookback_distance) +{ + struct erofs_inode *const vi = EROFS_I(m->inode); + struct erofs_map_blocks *const map = m->map; + const unsigned int lclusterbits = vi->z_logical_clusterbits; + unsigned long lcn = m->lcn; + int err; + + if (lcn < lookback_distance) { + erofs_err(m->inode->i_sb, + "bogus lookback distance @ nid %llu", vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + + /* load extent head logical cluster if needed */ + lcn -= lookback_distance; + err = z_erofs_load_cluster_from_disk(m, lcn); + if (err) + return err; + + switch (m->type) { + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + if (!m->delta[0]) { + erofs_err(m->inode->i_sb, + "invalid lookback distance 0 @ nid %llu", + vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + return z_erofs_extent_lookback(m, m->delta[0]); + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + map->m_flags &= ~EROFS_MAP_ZIPPED; + /* fallthrough */ + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + map->m_la = (lcn << lclusterbits) | m->clusterofs; + break; + default: + erofs_err(m->inode->i_sb, + "unknown type %u @ lcn %lu of nid %llu", + m->type, lcn, vi->nid); + DBG_BUGON(1); + return -EOPNOTSUPP; + } + return 0; +} + +int z_erofs_map_blocks_iter(struct inode *inode, + struct erofs_map_blocks *map, + int flags) +{ + struct erofs_inode *const vi = EROFS_I(inode); + struct z_erofs_maprecorder m = { + .inode = inode, + .map = map, + }; + int err = 0; + unsigned int lclusterbits, endoff; + unsigned long long ofs, end; + + trace_z_erofs_map_blocks_iter_enter(inode, map, flags); + + /* when trying to read beyond EOF, leave it unmapped */ + if (map->m_la >= inode->i_size) { + map->m_llen = map->m_la + 1 - inode->i_size; + map->m_la = inode->i_size; + map->m_flags = 0; + goto out; + } + + err = z_erofs_fill_inode_lazy(inode); + if (err) + goto out; + + lclusterbits = vi->z_logical_clusterbits; + ofs = map->m_la; + m.lcn = ofs >> lclusterbits; + endoff = ofs & ((1 << lclusterbits) - 1); + + err = z_erofs_load_cluster_from_disk(&m, m.lcn); + if (err) + goto unmap_out; + + map->m_flags = EROFS_MAP_ZIPPED; /* by default, compressed */ + end = (m.lcn + 1ULL) << lclusterbits; + + switch (m.type) { + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + if (endoff >= m.clusterofs) + map->m_flags &= ~EROFS_MAP_ZIPPED; + /* fallthrough */ + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + if (endoff >= m.clusterofs) { + map->m_la = (m.lcn << lclusterbits) | m.clusterofs; + break; + } + /* m.lcn should be >= 1 if endoff < m.clusterofs */ + if (!m.lcn) { + erofs_err(inode->i_sb, + "invalid logical cluster 0 at nid %llu", + vi->nid); + err = -EFSCORRUPTED; + goto unmap_out; + } + end = (m.lcn << lclusterbits) | m.clusterofs; + map->m_flags |= EROFS_MAP_FULL_MAPPED; + m.delta[0] = 1; + /* fallthrough */ + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + /* get the correspoinding first chunk */ + err = z_erofs_extent_lookback(&m, m.delta[0]); + if (err) + goto unmap_out; + break; + default: + erofs_err(inode->i_sb, + "unknown type %u @ offset %llu of nid %llu", + m.type, ofs, vi->nid); + err = -EOPNOTSUPP; + goto unmap_out; + } + + map->m_llen = end - map->m_la; + map->m_plen = 1 << lclusterbits; + map->m_pa = blknr_to_addr(m.pblk); + map->m_flags |= EROFS_MAP_MAPPED; + +unmap_out: + if (m.kaddr) + kunmap_atomic(m.kaddr); + +out: + erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o", + __func__, map->m_la, map->m_pa, + map->m_llen, map->m_plen, map->m_flags); + + trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); + + /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */ + DBG_BUGON(err < 0 && err != -ENOMEM); + return err; +} diff --git a/drivers/staging/erofs/unzip_pagevec.h b/drivers/staging/erofs/zpvec.h similarity index 67% rename from drivers/staging/erofs/unzip_pagevec.h rename to drivers/staging/erofs/zpvec.h index 23856ba2742d..bfc12ecbbe83 100644 --- a/drivers/staging/erofs/unzip_pagevec.h +++ b/drivers/staging/erofs/zpvec.h @@ -1,21 +1,14 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * linux/drivers/staging/erofs/unzip_pagevec.h - * +/* SPDX-License-Identifier: GPL-2.0-only */ +/* * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. + * https://www.huawei.com/ */ -#ifndef __EROFS_UNZIP_PAGEVEC_H -#define __EROFS_UNZIP_PAGEVEC_H +#ifndef __EROFS_FS_ZPVEC_H +#define __EROFS_FS_ZPVEC_H -#include +#include "tagptr.h" -/* page type in pagevec for unzip subsystem */ +/* page type in pagevec for decompress subsystem */ enum z_erofs_page_type { /* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */ Z_EROFS_PAGE_TYPE_EXCLUSIVE, @@ -43,7 +36,7 @@ struct z_erofs_pagevec_ctor { static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor, bool atomic) { - if (ctor->curr == NULL) + if (!ctor->curr) return; if (atomic) @@ -54,25 +47,22 @@ static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor, static inline struct page * z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor, - unsigned nr) + unsigned int nr) { - unsigned index; + unsigned int index; /* keep away from occupied pages */ - if (ctor->next != NULL) + if (ctor->next) return ctor->next; for (index = 0; index < nr; ++index) { const erofs_vtptr_t t = ctor->pages[index]; - const unsigned tags = tagptr_unfold_tags(t); + const unsigned int tags = tagptr_unfold_tags(t); if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE) return tagptr_unfold_ptr(t); } - - if (unlikely(nr >= ctor->nr)) - BUG(); - + DBG_BUGON(nr >= ctor->nr); return NULL; } @@ -94,8 +84,9 @@ z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor, } static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor, - unsigned nr, - erofs_vtptr_t *pages, unsigned i) + unsigned int nr, + erofs_vtptr_t *pages, + unsigned int i) { ctor->nr = nr; ctor->curr = ctor->next = NULL; @@ -109,23 +100,21 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor, z_erofs_pagevec_ctor_pagedown(ctor, false); } } - ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i); ctor->index = i; } -static inline bool -z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor, - struct page *page, - enum z_erofs_page_type type, - bool *occupied) +static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor, + struct page *page, + enum z_erofs_page_type type, + bool *occupied) { *occupied = false; - if (unlikely(ctor->next == NULL && type)) + if (!ctor->next && type) if (ctor->index + 1 == ctor->nr) return false; - if (unlikely(ctor->index >= ctor->nr)) + if (ctor->index >= ctor->nr) z_erofs_pagevec_ctor_pagedown(ctor, false); /* exclusive page type must be 0 */ @@ -137,19 +126,17 @@ z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor, ctor->next = page; *occupied = true; } - - ctor->pages[ctor->index++] = - tagptr_fold(erofs_vtptr_t, page, type); + ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type); return true; } static inline struct page * -z_erofs_pagevec_ctor_dequeue(struct z_erofs_pagevec_ctor *ctor, - enum z_erofs_page_type *type) +z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor, + enum z_erofs_page_type *type) { erofs_vtptr_t t; - if (unlikely(ctor->index >= ctor->nr)) { + if (ctor->index >= ctor->nr) { DBG_BUGON(!ctor->next); z_erofs_pagevec_ctor_pagedown(ctor, true); } @@ -162,11 +149,7 @@ z_erofs_pagevec_ctor_dequeue(struct z_erofs_pagevec_ctor *ctor, if (*type == (uintptr_t)ctor->next) ctor->next = tagptr_unfold_ptr(t); - ctor->pages[ctor->index++] = - tagptr_fold(erofs_vtptr_t, NULL, 0); - + ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, NULL, 0); return tagptr_unfold_ptr(t); } - #endif - diff --git a/drivers/staging/erofs/include/trace/events/erofs.h b/include/trace/events/erofs.h similarity index 85% rename from drivers/staging/erofs/include/trace/events/erofs.h rename to include/trace/events/erofs.h index 5aead93a762f..27f5caa6299a 100644 --- a/drivers/staging/erofs/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #undef TRACE_SYSTEM #define TRACE_SYSTEM erofs @@ -6,6 +6,9 @@ #define _TRACE_EROFS_H #include +#include + +struct erofs_map_blocks; #define show_dev(dev) MAJOR(dev), MINOR(dev) #define show_dev_nid(entry) show_dev(entry->dev), entry->nid @@ -38,7 +41,7 @@ TRACE_EVENT(erofs_lookup, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; - __entry->nid = EROFS_V(dir)->nid; + __entry->nid = EROFS_I(dir)->nid; __entry->name = dentry->d_name.name; __entry->flags = flags; ), @@ -63,7 +66,7 @@ TRACE_EVENT(erofs_fill_inode, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_V(inode)->nid; + __entry->nid = EROFS_I(inode)->nid; __entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid)); __entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid)); __entry->isdir = isdir; @@ -92,7 +95,7 @@ TRACE_EVENT(erofs_readpage, TP_fast_assign( __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->nid = EROFS_V(page->mapping->host)->nid; + __entry->nid = EROFS_I(page->mapping->host)->nid; __entry->dir = S_ISDIR(page->mapping->host->i_mode); __entry->index = page->index; __entry->uptodate = PageUptodate(page); @@ -125,7 +128,7 @@ TRACE_EVENT(erofs_readpages, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_V(inode)->nid; + __entry->nid = EROFS_I(inode)->nid; __entry->start = page->index; __entry->nrpage = nrpage; __entry->raw = raw; @@ -154,7 +157,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_V(inode)->nid; + __entry->nid = EROFS_I(inode)->nid; __entry->la = map->m_la; __entry->llen = map->m_llen; __entry->flags = flags; @@ -162,7 +165,8 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter, TP_printk("dev = (%d,%d), nid = %llu, la %llu llen %llu flags %s", show_dev_nid(__entry), - __entry->la, __entry->llen, show_map_flags(__entry->flags)) + __entry->la, __entry->llen, + __entry->flags ? show_map_flags(__entry->flags) : "NULL") ); DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter, @@ -172,6 +176,13 @@ DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter, TP_ARGS(inode, map, flags) ); +DEFINE_EVENT(erofs__map_blocks_enter, z_erofs_map_blocks_iter_enter, + TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, + unsigned int flags), + + TP_ARGS(inode, map, flags) +); + DECLARE_EVENT_CLASS(erofs__map_blocks_exit, TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, unsigned int flags, int ret), @@ -192,7 +203,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_V(inode)->nid; + __entry->nid = EROFS_I(inode)->nid; __entry->flags = flags; __entry->la = map->m_la; __entry->pa = map->m_pa; @@ -204,7 +215,8 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit, TP_printk("dev = (%d,%d), nid = %llu, flags %s " "la %llu pa %llu llen %llu plen %llu mflags %s ret %d", - show_dev_nid(__entry), show_map_flags(__entry->flags), + show_dev_nid(__entry), + __entry->flags ? show_map_flags(__entry->flags) : "NULL", __entry->la, __entry->pa, __entry->llen, __entry->plen, show_mflags(__entry->mflags), __entry->ret) ); @@ -216,6 +228,13 @@ DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit, TP_ARGS(inode, map, flags, ret) ); +DEFINE_EVENT(erofs__map_blocks_exit, z_erofs_map_blocks_iter_exit, + TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, + unsigned int flags, int ret), + + TP_ARGS(inode, map, flags, ret) +); + TRACE_EVENT(erofs_destroy_inode, TP_PROTO(struct inode *inode), @@ -228,7 +247,7 @@ TRACE_EVENT(erofs_destroy_inode, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_V(inode)->nid; + __entry->nid = EROFS_I(inode)->nid; ), TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry))