From f99b25897a86fcfff9140396a97261ae65fed872 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 28 Oct 2008 13:21:44 -0400 Subject: ext4: Add support for non-native signed/unsigned htree hash algorithms The original ext3 hash algorithms assumed that variables of type char were signed, as God and K&R intended. Unfortunately, this assumption is not true on some architectures. Userspace support for marking filesystems with non-native signed/unsigned chars was added two years ago, but the kernel-side support was never added (until now). Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/ext4/namei.c') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 9fd2a5e1be4..315858db807 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -372,6 +372,8 @@ dx_probe(const struct qstr *d_name, struct inode *dir, goto fail; } hinfo->hash_version = root->info.hash_version; + if (hinfo->hash_version <= DX_HASH_TEA) + hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; if (d_name) ext4fs_dirhash(d_name->name, d_name->len, hinfo); @@ -641,6 +643,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, dir = dir_file->f_path.dentry->d_inode; if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; + if (hinfo.hash_version <= DX_HASH_TEA) + hinfo.hash_version += + EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, start_hash, start_minor_hash); @@ -1408,6 +1413,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, /* Initialize as for dx_probe */ hinfo.hash_version = root->info.hash_version; + if (hinfo.hash_version <= DX_HASH_TEA) + hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; ext4fs_dirhash(name, namelen, &hinfo); frame = frames; -- cgit v1.2.3 From 59e315b4c410b00a9acd0f24a00dbadbe81ce692 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 6 Dec 2008 16:58:39 -0500 Subject: ext3/4: Fix loop index in do_split() so it is signed This fixes a gcc warning but it doesn't appear able to result in a failure, since the primary way the loop is exited is the first conditional in the for loop, and at least for a consistent filesystem, the signed/unsigned should in practice never be exposed. Signed-off-by: Roel Kluin Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/namei.c') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 315858db807..84a68ae623c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1171,9 +1171,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, u32 hash2; struct dx_map_entry *map; char *data1 = (*bh)->b_data, *data2; - unsigned split, move, size, i; + unsigned split, move, size; struct ext4_dir_entry_2 *de = NULL, *de2; - int err = 0; + int err = 0, i; bh2 = ext4_append (handle, dir, &newblock, &err); if (!(bh2)) { -- cgit v1.2.3 From 0390131ba84fd3f726f9e24fc4553828125700bb Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Wed, 7 Jan 2009 00:06:22 -0500 Subject: ext4: Allow ext4 to run without a journal A few weeks ago I posted a patch for discussion that allowed ext4 to run without a journal. Since that time I've integrated the excellent comments from Andreas and fixed several serious bugs. We're currently running with this patch and generating some performance numbers against both ext2 (with backported reservations code) and ext4 with and without a journal. It just so happens that running without a journal is slightly faster for most everything. We did iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2 which creates 4 threads, each of which create and do reads and writes on a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens to bypass the page cache. Results: ext2 ext4, default ext4, no journal initial writes 13.0 MB/s 15.4 MB/s 15.7 MB/s rewrites 13.1 MB/s 15.6 MB/s 15.9 MB/s reads 15.2 MB/s 16.9 MB/s 17.2 MB/s re-reads 15.3 MB/s 16.9 MB/s 17.2 MB/s random readers 5.6 MB/s 5.6 MB/s 5.7 MB/s random writers 5.1 MB/s 5.3 MB/s 5.4 MB/s So it seems that, so far, this was a useful exercise. Signed-off-by: Frank Mayhar Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 56 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 25 deletions(-) (limited to 'fs/ext4/namei.c') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 84a68ae623c..08873e938ab 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1233,10 +1233,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, de = de2; } dx_insert_block(frame, hash2 + continued, newblock); - err = ext4_journal_dirty_metadata(handle, bh2); + err = ext4_handle_dirty_metadata(handle, dir, bh2); if (err) goto journal_error; - err = ext4_journal_dirty_metadata(handle, frame->bh); + err = ext4_handle_dirty_metadata(handle, dir, frame->bh); if (err) goto journal_error; brelse(bh2); @@ -1340,8 +1340,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, ext4_update_dx_flag(dir); dir->i_version++; ext4_mark_inode_dirty(handle, dir); - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); brelse(bh); @@ -1581,7 +1581,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, dxtrace(dx_show_index("node", frames[1].entries)); dxtrace(dx_show_index("node", ((struct dx_node *) bh2->b_data)->entries)); - err = ext4_journal_dirty_metadata(handle, bh2); + err = ext4_handle_dirty_metadata(handle, inode, bh2); if (err) goto journal_error; brelse (bh2); @@ -1607,7 +1607,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (err) goto journal_error; } - ext4_journal_dirty_metadata(handle, frames[0].bh); + ext4_handle_dirty_metadata(handle, inode, frames[0].bh); } de = do_split(handle, dir, &bh, frame, &hinfo, &err); if (!de) @@ -1653,8 +1653,8 @@ static int ext4_delete_entry(handle_t *handle, else de->inode = 0; dir->i_version++; - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, bh); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, dir, bh); return 0; } i += ext4_rec_len_from_disk(de->rec_len); @@ -1732,7 +1732,7 @@ retry: return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode = ext4_new_inode (handle, dir, mode); err = PTR_ERR(inode); @@ -1766,7 +1766,7 @@ retry: return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode = ext4_new_inode(handle, dir, mode); err = PTR_ERR(inode); @@ -1802,7 +1802,7 @@ retry: return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode = ext4_new_inode(handle, dir, S_IFDIR | mode); err = PTR_ERR(inode); @@ -1831,8 +1831,8 @@ retry: strcpy(de->name, ".."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); inode->i_nlink = 2; - BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, dir_block); + BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, dir, dir_block); brelse(dir_block); ext4_mark_inode_dirty(handle, inode); err = ext4_add_entry(handle, dentry, inode); @@ -1944,6 +1944,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0, rc; + if (!ext4_handle_valid(handle)) + return 0; + lock_super(sb); if (!list_empty(&EXT4_I(inode)->i_orphan)) goto out_unlock; @@ -1972,7 +1975,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* Insert this inode at the head of the on-disk orphan list... */ NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; @@ -2010,6 +2013,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0; + if (!ext4_handle_valid(handle)) + return 0; + lock_super(inode->i_sb); if (list_empty(&ei->i_orphan)) { unlock_super(inode->i_sb); @@ -2028,7 +2034,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) * transaction handle with which to update the orphan list on * disk, but we still need to remove the inode from the linked * list in memory. */ - if (!handle) + if (sbi->s_journal && !handle) goto out; err = ext4_reserve_inode_write(handle, inode, &iloc); @@ -2042,7 +2048,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) if (err) goto out_brelse; sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); - err = ext4_journal_dirty_metadata(handle, sbi->s_sbh); + err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh); } else { struct ext4_iloc iloc2; struct inode *i_prev = @@ -2093,7 +2099,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) goto end_rmdir; if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode = dentry->d_inode; @@ -2147,7 +2153,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); retval = -ENOENT; bh = ext4_find_entry(dir, &dentry->d_name, &de); @@ -2204,7 +2210,7 @@ retry: return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); err = PTR_ERR(inode); @@ -2267,7 +2273,7 @@ retry: return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode->i_ctime = ext4_current_time(inode); ext4_inc_count(handle, inode); @@ -2316,7 +2322,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, return PTR_ERR(handle); if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); /* @@ -2370,8 +2376,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, new_dir->i_ctime = new_dir->i_mtime = ext4_current_time(new_dir); ext4_mark_inode_dirty(handle, new_dir); - BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, new_bh); + BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, new_dir, new_bh); brelse(new_bh); new_bh = NULL; } @@ -2421,8 +2427,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, BUFFER_TRACE(dir_bh, "get_write_access"); ext4_journal_get_write_access(handle, dir_bh); PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); - BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, dir_bh); + BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, old_dir, dir_bh); ext4_dec_count(handle, old_dir); if (new_inode) { /* checked empty_dir above, can't have another parent, -- cgit v1.2.3 From 498e5f24158da7bf8fa48074a70e370e22844492 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Nov 2008 00:14:04 -0500 Subject: ext4: Change unsigned long to unsigned int Convert the unsigned longs that are most responsible for bloating the stack usage on 64-bit systems. Nearly all places in the ext3/4 code which uses "unsigned long" is probably a bug, since on 32-bit systems a ulong a 32-bits, which means we are wasting stack space on 64-bit systems. Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'fs/ext4/namei.c') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 08873e938ab..183a09a8b14 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -811,7 +811,7 @@ static inline int ext4_match (int len, const char * const name, static inline int search_dirblock(struct buffer_head *bh, struct inode *dir, const struct qstr *d_name, - unsigned long offset, + unsigned int offset, struct ext4_dir_entry_2 ** res_dir) { struct ext4_dir_entry_2 * de; @@ -1048,11 +1048,11 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru bh = ext4_find_entry(dir, &dentry->d_name, &de); inode = NULL; if (bh) { - unsigned long ino = le32_to_cpu(de->inode); + __u32 ino = le32_to_cpu(de->inode); brelse(bh); if (!ext4_valid_inum(dir->i_sb, ino)) { ext4_error(dir->i_sb, "ext4_lookup", - "bad inode number: %lu", ino); + "bad inode number: %u", ino); return ERR_PTR(-EIO); } inode = ext4_iget(dir->i_sb, ino); @@ -1065,7 +1065,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru struct dentry *ext4_get_parent(struct dentry *child) { - unsigned long ino; + __u32 ino; struct inode *inode; static const struct qstr dotdot = { .name = "..", @@ -1083,7 +1083,7 @@ struct dentry *ext4_get_parent(struct dentry *child) if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { ext4_error(child->d_inode->i_sb, "ext4_get_parent", - "bad inode number: %lu", ino); + "bad inode number: %u", ino); return ERR_PTR(-EIO); } @@ -1271,7 +1271,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, struct inode *dir = dentry->d_parent->d_inode; const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; - unsigned long offset = 0; + unsigned int offset = 0; unsigned short reclen; int nlen, rlen, err; char *top; @@ -1444,7 +1444,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode) { struct inode *dir = dentry->d_parent->d_inode; - unsigned long offset; struct buffer_head *bh; struct ext4_dir_entry_2 *de; struct super_block *sb; @@ -1466,7 +1465,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, ext4_mark_inode_dirty(handle, dir); } blocks = dir->i_size >> sb->s_blocksize_bits; - for (block = 0, offset = 0; block < blocks; block++) { + for (block = 0; block < blocks; block++) { bh = ext4_bread(handle, dir, block, 0, &retval); if(!bh) return retval; @@ -1861,7 +1860,7 @@ out_stop: */ static int empty_dir(struct inode *inode) { - unsigned long offset; + unsigned int offset; struct buffer_head *bh; struct ext4_dir_entry_2 *de, *de1; struct super_block *sb; @@ -1906,7 +1905,7 @@ static int empty_dir(struct inode *inode) if (err) ext4_error(sb, __func__, "error %d reading directory" - " #%lu offset %lu", + " #%lu offset %u", err, inode->i_ino, offset); offset += sb->s_blocksize; continue; @@ -2009,7 +2008,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) struct list_head *prev; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_sb_info *sbi; - unsigned long ino_next; + __u32 ino_next; struct ext4_iloc iloc; int err = 0; @@ -2042,7 +2041,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) goto out_err; if (prev == &sbi->s_orphan) { - jbd_debug(4, "superblock will point to %lu\n", ino_next); + jbd_debug(4, "superblock will point to %u\n", ino_next); BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) @@ -2054,7 +2053,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) struct inode *i_prev = &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode; - jbd_debug(4, "orphan inode %lu will point to %lu\n", + jbd_debug(4, "orphan inode %lu will point to %u\n", i_prev->i_ino, ino_next); err = ext4_reserve_inode_write(handle, i_prev, &iloc2); if (err) -- cgit v1.2.3