Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
+2.0.23:
+ - Massive internal locking changes to mft record locking. Fixes
+ various race conditions and deadlocks.
+ - Fix ntfs over loopback for compressed files by adding an
+ optimization barrier. (gcc was screwing up otherwise ?)
+ Thanks go to Christoph Hellwig for pointing these two out:
+ - Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs().
+ - Fix ntfs_free() for ia64 and parisc.
2.0.22:
- Small internal cleanups.
2.0.21:
- Find and fix bugs.
- Enable NFS exporting of NTFS.
+2.0.23 - Major bug fixes (races, deadlocks, non-i386 architectures).
+
+ - Massive internal locking changes to mft record locking. Fixes lock
+ recursion and replaces the mrec_lock read/write semaphore with a
+ mutex. Also removes the now superfluous mft_count. This fixes several
+ race conditions and deadlocks, especially in the future write code.
+ - Fix ntfs over loopback for compressed files by adding an
+ optimization barrier. (gcc was screwing up otherwise ?)
+ - Miscellaneous cleanups all over the code and a fix or two in error
+ handling code paths.
+ Thanks go to Christoph Hellwig for pointing out the following two:
+ - Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs().
+ - Fix ntfs_free() for ia64 and parisc by checking for VMALLOC_END, too.
+
2.0.22 - Cleanups, mainly to ntfs_readdir(), and use C99 initializers.
- Change fs/ntfs/dir.c::ntfs_reddir() to only read/write ->f_pos once
ntfs-objs := aops.o attrib.o compress.o debug.o dir.o file.o inode.o mft.o \
mst.o namei.o super.o sysctl.o time.o unistr.o upcase.o
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.0.22\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.0.23\"
ifeq ($(CONFIG_NTFS_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
if (!NInoMstProtected(ni)) {
if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page);
- unlock_page(page);
- return;
} else {
char *addr;
unsigned int i, recs, nr_err;
* for it to be read in before we can do the copy.
*
* Return 0 on success and -errno on error.
+ *
+ * WARNING: Do not make this function static! It is used by mft.c!
*/
int ntfs_readpage(struct file *file, struct page *page)
{
else
base_ni = ni->_INE(base_ntfs_ino);
- /* Map, pin and lock the mft record for reading. */
- mrec = map_mft_record(READ, base_ni);
+ /* Map, pin and lock the mft record. */
+ mrec = map_mft_record(base_ni);
if (unlikely(IS_ERR(mrec))) {
err = PTR_ERR(mrec);
goto err_out;
put_unm_err_out:
put_attr_search_ctx(ctx);
unm_err_out:
- unmap_mft_record(READ, base_ni);
+ unmap_mft_record(base_ni);
err_out:
unlock_page(page);
return err;
else
base_ni = ni->_INE(base_ntfs_ino);
- mrec = map_mft_record(READ, base_ni);
+ mrec = map_mft_record(base_ni);
if (IS_ERR(mrec))
return PTR_ERR(mrec);
ctx = get_attr_search_ctx(base_ni, mrec);
put_attr_search_ctx(ctx);
err_out:
- unmap_mft_record(READ, base_ni);
+ unmap_mft_record(base_ni);
return err;
}
return;
} /* Attribute list. */
if (ctx->ntfs_ino != ctx->base_ntfs_ino)
- unmap_mft_record(READ, ctx->ntfs_ino);
+ unmap_mft_record(ctx->ntfs_ino);
init_attr_search_ctx(ctx, ctx->base_ntfs_ino, ctx->base_mrec);
return;
}
void put_attr_search_ctx(attr_search_context *ctx)
{
if (ctx->base_ntfs_ino && ctx->ntfs_ino != ctx->base_ntfs_ino)
- unmap_mft_record(READ, ctx->ntfs_ino);
+ unmap_mft_record(ctx->ntfs_ino);
kmem_cache_free(ntfs_attr_ctx_cache, ctx);
return;
}
if (buffer_uptodate(tbh))
continue;
wait_on_buffer(tbh);
- if (unlikely(!buffer_uptodate(tbh)))
- goto read_err;
+ /*
+ * We need an optimization barrier here, otherwise we start
+ * hitting the below fixup code when accessing a loopback
+ * mounted ntfs partition. This indicates either there is a
+ * race condition in the loop driver or, more likely, gcc
+ * overoptimises the code without the barrier and it doesn't
+ * do the Right Thing(TM).
+ */
+ barrier();
+ if (unlikely(!buffer_uptodate(tbh))) {
+ ntfs_warning(vol->sb, "Buffer is unlocked but not "
+ "uptodate! Unplugging the disk queue "
+ "and rescheduling.");
+ get_bh(tbh);
+ blk_run_queues();
+ schedule();
+ put_bh(tbh);
+ if (unlikely(!buffer_uptodate(tbh)))
+ goto read_err;
+ ntfs_warning(vol->sb, "Buffer is now uptodate. Good.");
+ }
}
/*
u8 *index_end;
u64 mref;
attr_search_context *ctx;
- int err = 0, rc;
+ int err, rc;
VCN vcn, old_vcn;
struct address_space *ia_mapping;
struct page *page;
ntfs_name *name = NULL;
/* Get hold of the mft record for the directory. */
- m = map_mft_record(READ, dir_ni);
- if (IS_ERR(m))
- goto map_err_out;
-
+ m = map_mft_record(dir_ni);
+ if (unlikely(IS_ERR(m))) {
+ ntfs_error(sb, "map_mft_record() failed with error code %ld.",
+ -PTR_ERR(m));
+ return ERR_MREF(PTR_ERR(m));
+ }
ctx = get_attr_search_ctx(dir_ni, m);
- if (!ctx) {
+ if (unlikely(!ctx)) {
err = -ENOMEM;
- goto unm_err_out;
+ goto err_out;
}
-
/* Find the index root attribute in the mft record. */
if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0,
ctx)) {
ntfs_error(sb, "Index root attribute missing in directory "
"inode 0x%lx.", dir_ni->mft_no);
err = -EIO;
- goto put_unm_err_out;
+ goto err_out;
}
/* Get to the index root value (it's been verified in read_inode). */
ir = (INDEX_ROOT*)((u8*)ctx->attr +
GFP_NOFS);
if (!name) {
err = -ENOMEM;
- goto put_unm_err_out;
+ goto err_out;
}
}
name->mref = le64_to_cpu(
}
mref = le64_to_cpu(ie->_IIF(indexed_file));
put_attr_search_ctx(ctx);
- unmap_mft_record(READ, dir_ni);
+ unmap_mft_record(dir_ni);
return mref;
}
/*
name = kmalloc(name_size, GFP_NOFS);
if (!name) {
err = -ENOMEM;
- goto put_unm_err_out;
+ goto err_out;
}
name->mref = le64_to_cpu(ie->_IIF(indexed_file));
name->type = type;
if (!(ie->_IEH(flags) & INDEX_ENTRY_NODE)) {
if (name) {
put_attr_search_ctx(ctx);
- unmap_mft_record(READ, dir_ni);
+ unmap_mft_record(dir_ni);
return name->mref;
}
ntfs_debug("Entry not found.");
err = -ENOENT;
- goto put_unm_err_out;
+ goto err_out;
} /* Child node present, descend into it. */
/* Consistency check: Verify that an index allocation exists. */
if (!NInoIndexAllocPresent(dir_ni)) {
"requires one. Directory inode 0x%lx is "
"corrupt or driver bug.", dir_ni->mft_no);
err = -EIO;
- goto put_unm_err_out;
+ goto err_out;
}
/* Get the starting vcn of the index_block holding the child node. */
vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->_IEH(length)) - 8);
ia_mapping = VFS_I(dir_ni)->i_mapping;
+ /*
+ * We are done with the index root and the mft record. Release them,
+ * otherwise we deadlock with ntfs_map_page().
+ */
+ put_attr_search_ctx(ctx);
+ unmap_mft_record(dir_ni);
+ m = NULL;
+ ctx = NULL;
descend_into_child_node:
/*
* Convert vcn to index into the index allocation attribute in units
if (IS_ERR(page)) {
ntfs_error(sb, "Failed to map directory index page, error %ld.",
-PTR_ERR(page));
- goto put_unm_err_out;
+ err = PTR_ERR(page);
+ goto err_out;
}
kaddr = (u8*)page_address(page);
fast_descend_into_child_node:
ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
"inode 0x%lx or driver bug.", dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is "
(long long)sle64_to_cpu(ia->index_block_vcn),
(long long)vcn, dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
dir_ni->_IDM(index_block_size)) {
le32_to_cpu(ia->index.allocated_size) + 0x18,
dir_ni->_IDM(index_block_size));
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
index_end = (u8*)ia + dir_ni->_IDM(index_block_size);
if (index_end > kaddr + PAGE_CACHE_SIZE) {
"Cannot access! This is probably a bug in the "
"driver.", (long long)vcn, dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
if (index_end > (u8*)ia + dir_ni->_IDM(index_block_size)) {
"inode 0x%lx exceeds maximum size.",
(long long)vcn, dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
/* The first index entry. */
ie = (INDEX_ENTRY*)((u8*)&ia->index +
"directory inode 0x%lx.",
dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
/*
* The last entry cannot contain a name. It can however contain
GFP_NOFS);
if (!name) {
err = -ENOMEM;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
}
name->mref = le64_to_cpu(
}
mref = le64_to_cpu(ie->_IIF(indexed_file));
ntfs_unmap_page(page);
- put_attr_search_ctx(ctx);
- unmap_mft_record(READ, dir_ni);
return mref;
}
/*
name = kmalloc(name_size, GFP_NOFS);
if (!name) {
err = -ENOMEM;
- goto put_unm_err_out;
+ goto unm_err_out;
}
name->mref = le64_to_cpu(ie->_IIF(indexed_file));
name->type = type;
"a leaf node in directory inode 0x%lx.",
dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
/* Child node present, descend into it. */
old_vcn = vcn;
ntfs_error(sb, "Negative child node vcn in directory inode "
"0x%lx.", dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
/*
* No child node present, return -ENOENT, unless we have got a matching
*/
if (name) {
ntfs_unmap_page(page);
- put_attr_search_ctx(ctx);
- unmap_mft_record(READ, dir_ni);
return name->mref;
}
ntfs_debug("Entry not found.");
err = -ENOENT;
-unm_unm_err_out:
- ntfs_unmap_page(page);
-put_unm_err_out:
- put_attr_search_ctx(ctx);
unm_err_out:
- unmap_mft_record(READ, dir_ni);
+ ntfs_unmap_page(page);
+err_out:
+ if (ctx)
+ put_attr_search_ctx(ctx);
+ if (m)
+ unmap_mft_record(dir_ni);
if (name) {
kfree(name);
*res = NULL;
}
return ERR_MREF(err);
-map_err_out:
- ntfs_error(sb, "map_mft_record(READ) failed with error code %ld.",
- -PTR_ERR(m));
- return ERR_MREF(PTR_ERR(m));
dir_err_out:
ntfs_error(sb, "Corrupt directory. Aborting lookup.");
err = -EIO;
- goto put_unm_err_out;
+ goto err_out;
}
#if 0
u8 *index_end;
u64 mref;
attr_search_context *ctx;
- int err = 0, rc;
+ int err, rc;
IGNORE_CASE_BOOL ic;
VCN vcn, old_vcn;
struct address_space *ia_mapping;
u8 *kaddr;
/* Get hold of the mft record for the directory. */
- m = map_mft_record(READ, dir_ni);
- if (IS_ERR(m))
- goto map_err_out;
-
+ m = map_mft_record(dir_ni);
+ if (IS_ERR(m)) {
+ ntfs_error(sb, "map_mft_record() failed with error code %ld.",
+ -PTR_ERR(m));
+ return ERR_MREF(PTR_ERR(m));
+ }
ctx = get_attr_search_ctx(dir_ni, m);
if (!ctx) {
err = -ENOMEM;
- goto unm_err_out;
+ goto err_out;
}
-
/* Find the index root attribute in the mft record. */
if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0,
ctx)) {
ntfs_error(sb, "Index root attribute missing in directory "
"inode 0x%lx.", dir_ni->mft_no);
err = -EIO;
- goto put_unm_err_out;
+ goto err_out;
}
/* Get to the index root value (it's been verified in read_inode). */
ir = (INDEX_ROOT*)((u8*)ctx->attr +
found_it:
mref = le64_to_cpu(ie->_IIF(indexed_file));
put_attr_search_ctx(ctx);
- unmap_mft_record(READ, dir_ni);
+ unmap_mft_record(dir_ni);
return mref;
}
/*
if (!(ie->_IEH(flags) & INDEX_ENTRY_NODE)) {
/* No child node, return -ENOENT. */
err = -ENOENT;
- goto put_unm_err_out;
+ goto err_out;
} /* Child node present, descend into it. */
/* Consistency check: Verify that an index allocation exists. */
if (!NInoIndexAllocPresent(dir_ni)) {
"requires one. Directory inode 0x%lx is "
"corrupt or driver bug.", dir_ni->mft_no);
err = -EIO;
- goto put_unm_err_out;
+ goto err_out;
}
/* Get the starting vcn of the index_block holding the child node. */
vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->_IEH(length)) - 8);
ia_mapping = VFS_I(dir_ni)->i_mapping;
+ /*
+ * We are done with the index root and the mft record. Release them,
+ * otherwise we deadlock with ntfs_map_page().
+ */
+ put_attr_search_ctx(ctx);
+ unmap_mft_record(dir_ni);
+ m = NULL;
+ ctx = NULL;
descend_into_child_node:
/*
* Convert vcn to index into the index allocation attribute in units
if (IS_ERR(page)) {
ntfs_error(sb, "Failed to map directory index page, error %ld.",
-PTR_ERR(page));
- goto put_unm_err_out;
+ err = PTR_ERR(page);
+ goto err_out;
}
kaddr = (u8*)page_address(page);
fast_descend_into_child_node:
ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
"inode 0x%lx or driver bug.", dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is "
(long long)sle64_to_cpu(ia->index_block_vcn),
(long long)vcn, dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
dir_ni->_IDM(index_block_size)) {
le32_to_cpu(ia->index.allocated_size) + 0x18,
dir_ni->_IDM(index_block_size));
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
index_end = (u8*)ia + dir_ni->_IDM(index_block_size);
if (index_end > kaddr + PAGE_CACHE_SIZE) {
"Cannot access! This is probably a bug in the "
"driver.", (long long)vcn, dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
if (index_end > (u8*)ia + dir_ni->_IDM(index_block_size)) {
"inode 0x%lx exceeds maximum size.",
(long long)vcn, dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
/* The first index entry. */
ie = (INDEX_ENTRY*)((u8*)&ia->index +
"directory inode 0x%lx.",
dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
/*
* The last entry cannot contain a name. It can however contain
found_it2:
mref = le64_to_cpu(ie->_IIF(indexed_file));
ntfs_unmap_page(page);
- put_attr_search_ctx(ctx);
- unmap_mft_record(READ, dir_ni);
return mref;
}
/*
"a leaf node in directory inode 0x%lx.",
dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
/* Child node present, descend into it. */
old_vcn = vcn;
ntfs_error(sb, "Negative child node vcn in directory inode "
"0x%lx.", dir_ni->mft_no);
err = -EIO;
- goto unm_unm_err_out;
+ goto unm_err_out;
}
/* No child node, return -ENOENT. */
ntfs_debug("Entry not found.");
err = -ENOENT;
-unm_unm_err_out:
- ntfs_unmap_page(page);
-put_unm_err_out:
- put_attr_search_ctx(ctx);
unm_err_out:
- unmap_mft_record(READ, dir_ni);
+ ntfs_unmap_page(page);
+err_out:
+ if (ctx)
+ put_attr_search_ctx(ctx);
+ if (m)
+ unmap_mft_record(dir_ni);
return ERR_MREF(err);
-map_err_out:
- ntfs_error(sb, "map_mft_record(READ) failed with error code %ld.",
- -PTR_ERR(m));
- return ERR_MREF(PTR_ERR(m));
dir_err_out:
ntfs_error(sb, "Corrupt directory. Aborting lookup.");
err = -EIO;
- goto put_unm_err_out;
+ goto err_out;
}
#endif
goto done;
fpos++;
}
-
- /* Get hold of the mft record for the directory. */
- m = map_mft_record(READ, ndir);
- if (unlikely(IS_ERR(m))) {
- err = PTR_ERR(m);
- m = NULL;
- ctx = NULL;
- goto err_out;
- }
-
- ctx = get_attr_search_ctx(ndir, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
-
+ m = NULL;
+ ctx = NULL;
/*
* Allocate a buffer to store the current name being processed
* converted to format determined by current NLS.
/* Are we jumping straight into the index allocation attribute? */
if (fpos >= vol->mft_record_size)
goto skip_index_root;
+ /* Get hold of the mft record for the directory. */
+ m = map_mft_record(ndir);
+ if (unlikely(IS_ERR(m))) {
+ err = PTR_ERR(m);
+ m = NULL;
+ goto err_out;
+ }
+ ctx = get_attr_search_ctx(ndir, m);
+ if (unlikely(!ctx)) {
+ err = -ENOMEM;
+ goto err_out;
+ }
/* Get the offset into the index root attribute. */
ir_pos = (s64)fpos;
/* Find the index root attribute in the mft record. */
/* Submit the name to the filldir callback. */
rc = ntfs_filldir(vol, &fpos, ndir, INDEX_TYPE_ROOT, ir, ie,
name, dirent, filldir);
- if (rc)
+ if (rc) {
+ put_attr_search_ctx(ctx);
+ unmap_mft_record(ndir);
goto abort;
+ }
}
+ /*
+ * We are done with the index root and the mft record for that matter.
+ * We need to release it, otherwise we deadlock on ntfs_attr_iget()
+ * and/or ntfs_read_page().
+ */
+ put_attr_search_ctx(ctx);
+ unmap_mft_record(ndir);
+ m = NULL;
+ ctx = NULL;
/* If there is no index allocation attribute we are finished. */
if (!NInoIndexAllocPresent(ndir))
goto EOD;
}
/* Get the starting bit position in the current bitmap page. */
cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1);
- bmp_pos &= ~((PAGE_CACHE_SIZE * 8) - 1);
+ bmp_pos &= ~(u64)((PAGE_CACHE_SIZE * 8) - 1);
get_next_bmp_page:
ntfs_debug("Reading bitmap with page index 0x%Lx, bit ofs 0x%Lx",
(long long)bmp_pos >> (3 + PAGE_CACHE_SHIFT),
/* We are finished, set fpos to EOD. */
fpos = vdir->i_size + vol->mft_record_size;
abort:
- put_attr_search_ctx(ctx);
- unmap_mft_record(READ, ndir);
kfree(name);
done:
#ifdef DEBUG
if (ctx)
put_attr_search_ctx(ctx);
if (m)
- unmap_mft_record(READ, ndir);
+ unmap_mft_record(ndir);
if (!err)
err = -EIO;
ntfs_debug("Failed. Returning error code %i.", -err);
ntfs_inode *ni = NTFS_I(inode);
ntfs_debug("Entering.");
- BUG_ON(atomic_read(&ni->mft_count) || !atomic_dec_and_test(&ni->count));
+ BUG_ON(ni->page || !atomic_dec_and_test(&ni->count));
kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
}
void ntfs_destroy_extent_inode(ntfs_inode *ni)
{
ntfs_debug("Entering.");
- BUG_ON(atomic_read(&ni->mft_count) || !atomic_dec_and_test(&ni->count));
+ BUG_ON(ni->page || !atomic_dec_and_test(&ni->count));
kmem_cache_free(ntfs_inode_cache, ni);
}
atomic_set(&ni->count, 1);
ni->vol = NTFS_SB(sb);
init_run_list(&ni->run_list);
- init_rwsem(&ni->mrec_lock);
- atomic_set(&ni->mft_count, 0);
+ init_MUTEX(&ni->mrec_lock);
ni->page = NULL;
ni->page_ofs = 0;
ni->attr_list_size = 0;
ntfs_init_big_inode(vi);
ni = NTFS_I(vi);
- m = map_mft_record(READ, ni);
+ m = map_mft_record(ni);
if (IS_ERR(m)) {
err = PTR_ERR(m);
goto err_out;
/* No index allocation. */
vi->i_size = ni->initialized_size =
ni->allocated_size = 0;
+ /* We are done with the mft record, so we release it. */
+ put_attr_search_ctx(ctx);
+ unmap_mft_record(ni);
+ m = NULL;
+ ctx = NULL;
goto skip_large_dir_stuff;
} /* LARGE_INDEX: Index allocation present. Setup state. */
NInoSetIndexAllocPresent(ni);
ctx->attr->_ANR(initialized_size));
ni->allocated_size = sle64_to_cpu(
ctx->attr->_ANR(allocated_size));
-
+ /*
+ * We are done with the mft record, so we release it. Otherwise
+ *
+ */
+ put_attr_search_ctx(ctx);
+ unmap_mft_record(ni);
+ m = NULL;
+ ctx = NULL;
/* Get the index bitmap attribute inode. */
bvi = ntfs_attr_iget(vi, AT_BITMAP, I30, 4);
if (unlikely(IS_ERR(bvi))) {
bvi->i_size << 3, vi->i_size);
goto unm_err_out;
}
-
skip_large_dir_stuff:
/* Everyone gets read and scan permissions. */
vi->i_mode |= S_IRUGO | S_IXUGO;
le32_to_cpu(ctx->attr->_ARA(value_length));
}
no_data_attr_special_case:
+ /* We are done with the mft record, so we release it. */
+ put_attr_search_ctx(ctx);
+ unmap_mft_record(ni);
+ m = NULL;
+ ctx = NULL;
/* Everyone gets all permissions. */
vi->i_mode |= S_IRWXUGO;
/* If read-only, noone gets write permissions. */
else
vi->i_blocks = ni->_ICF(compressed_size) >> 9;
- put_attr_search_ctx(ctx);
- unmap_mft_record(READ, ni);
-
ntfs_debug("Done.");
return 0;
err = -EIO;
if (ctx)
put_attr_search_ctx(ctx);
- unmap_mft_record(READ, ni);
+ if (m)
+ unmap_mft_record(ni);
err_out:
ntfs_error(vi->i_sb, "Failed with error code %i. Marking inode 0x%lx "
"as bad.", -err, vi->i_ino);
/* Set inode type to zero but preserve permissions. */
vi->i_mode = base_vi->i_mode & ~S_IFMT;
- m = map_mft_record(READ, base_ni);
+ m = map_mft_record(base_ni);
if (IS_ERR(m)) {
err = PTR_ERR(m);
goto err_out;
ni->nr_extents = -1;
put_attr_search_ctx(ctx);
- unmap_mft_record(READ, base_ni);
+ unmap_mft_record(base_ni);
ntfs_debug("Done.");
return 0;
err = -EIO;
if (ctx)
put_attr_search_ctx(ctx);
- unmap_mft_record(READ, base_ni);
+ unmap_mft_record(base_ni);
err_out:
ntfs_error(vi->i_sb, "Failed with error code %i while reading "
"attribute inode (mft_no 0x%lx, type 0x%x, name_len "
/* Need this to sanity check attribute list references to $MFT. */
ni->seq_no = le16_to_cpu(m->sequence_number);
- /* Provides readpage() and sync_page() for map_mft_record(READ). */
+ /* Provides readpage() and sync_page() for map_mft_record(). */
vi->i_mapping->a_ops = &ntfs_mft_aops;
ctx = get_attr_search_ctx(ni, m);
}
}
/* Synchronize with ntfs_commit_inode(). */
- down_write(&ni->mrec_lock);
- up_write(&ni->mrec_lock);
+ down(&ni->mrec_lock);
+ up(&ni->mrec_lock);
if (NInoDirty(ni)) {
ntfs_error(ni->vol->sb, "Failed to commit dirty inode "
"asynchronously.");
* The following fields are only valid for real inodes and extent
* inodes.
*/
- struct rw_semaphore mrec_lock; /* Lock for serializing access to the
+ struct semaphore mrec_lock; /* Lock for serializing access to the
mft record belonging to this inode. */
- atomic_t mft_count; /* Mapping reference count for book keeping. */
struct page *page; /* The page containing the mft record of the
inode. This should only be touched by the
(un)map_mft_record*() functions. */
#include <linux/vmalloc.h>
#include <linux/slab.h>
-/**
- * vmalloc_nofs - allocate any pages but don't allow calls into fs layer
- * @size: number of bytes to allocate
- *
- * Allocate any pages but don't allow calls into fs layer. Return allocated
- * memory or NULL if insufficient memory.
- */
-static inline void *vmalloc_nofs(unsigned long size)
-{
- if (likely(size >> PAGE_SHIFT < num_physpages))
- return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL);
- return NULL;
-}
-
/**
* ntfs_malloc_nofs - allocate memory in multiples of pages
* @size number of bytes to allocate
static inline void ntfs_free(void *addr)
{
- if (likely((unsigned long)addr < VMALLOC_START)) {
+ if (likely(((unsigned long)addr < VMALLOC_START) ||
+ ((unsigned long)addr >= VMALLOC_END ))) {
return kfree(addr);
/* return free_page((unsigned long)addr); */
}
* mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
*
* Copyright (c) 2001,2002 Anton Altaparmakov.
- * Copyright (C) 2002 Richard Russon.
+ * Copyright (c) 2002 Richard Russon.
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
if (mft_rec)
m = mft_rec;
else {
- m = map_mft_record(WRITE, ni);
+ m = map_mft_record(ni);
if (IS_ERR(m))
return PTR_ERR(m);
}
__format_mft_record(m, ni->vol->mft_record_size, ni->mft_no);
- if (!mft_rec)
- unmap_mft_record(WRITE, ni);
+ if (!mft_rec) {
+ // FIXME: Need to set the mft record dirty!
+ unmap_mft_record(ni);
+ }
return 0;
}
struct page *page;
unsigned long index, ofs, end_index;
- BUG_ON(atomic_read(&ni->mft_count) || ni->page);
+ BUG_ON(ni->page);
/*
* The index into the page cache and the offset within the page cache
* page of the wanted mft record. FIXME: We need to check for
end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT;
/* If the wanted index is out of bounds the mft record doesn't exist. */
- if (index >= end_index) {
+ if (unlikely(index >= end_index)) {
if (index > end_index || (mft_vi->i_size & ~PAGE_CACHE_MASK) <
ofs + vol->mft_record_size) {
page = ERR_PTR(-ENOENT);
- goto up_err_out;
+ goto err_out;
}
}
/* Read, map, and pin the page. */
page = ntfs_map_page(mft_vi->i_mapping, index);
- if (!IS_ERR(page)) {
- /* Pin the mft record mapping in the ntfs_inode. */
- atomic_inc(&ni->mft_count);
-
- /* Setup the references in the ntfs_inode. */
+ if (likely(!IS_ERR(page))) {
ni->page = page;
ni->page_ofs = ofs;
-
return page_address(page) + ofs;
}
-up_err_out:
- /* Just in case... */
+err_out:
ni->page = NULL;
ni->page_ofs = 0;
-
ntfs_error(vol->sb, "Failed with error code %lu.", -PTR_ERR(page));
return (void*)page;
}
-/**
- * unmap_mft_record_page - unmap the page in which a specific mft record resides
- * @ni: ntfs inode whose mft record page to unmap
- *
- * This unmaps the page in which the mft record of the ntfs inode @ni is
- * situated and returns. This is a NOOP if highmem is not configured.
- *
- * The unmap happens via ntfs_unmap_page() which in turn decrements the use
- * count on the page thus releasing it from the pinned state.
- *
- * We do not actually unmap the page from memory of course, as that will be
- * done by the page cache code itself when memory pressure increases or
- * whatever.
- */
-static inline void unmap_mft_record_page(ntfs_inode *ni)
-{
- BUG_ON(atomic_read(&ni->mft_count) || !ni->page);
- // TODO: If dirty, blah...
- ntfs_unmap_page(ni->page);
- ni->page = NULL;
- ni->page_ofs = 0;
- return;
-}
-
/**
* map_mft_record - map, pin and lock an mft record
- * @rw: map for read (rw = READ) or write (rw = WRITE)
* @ni: ntfs inode whose MFT record to map
*
- * First, take the mrec_lock semaphore for reading or writing, depending on
- * the value or @rw. We might now be sleeping, while waiting for the semaphore
- * if it was already locked by someone else.
+ * First, take the mrec_lock semaphore. We might now be sleeping, while waiting
+ * for the semaphore if it was already locked by someone else.
*
- * Then increment the map reference count and return the mft. If this is the
- * first invocation, the page of the record is first mapped using
- * map_mft_record_page().
+ * The page of the record is first mapped using map_mft_record_page() before
+ * being returned to the caller.
*
* This in turn uses ntfs_map_page() to get the page containing the wanted mft
* record (it in turn calls read_cache_page() which reads it in from disk if
* locking problem then is them locking the page while we are accessing it.
*
* So that code will end up having to own the mrec_lock of all mft
- * records/inodes present in the page before I/O can proceed. Grr. In that
- * case we wouldn't need need to bother with PG_locked and PG_uptodate as
- * nobody will be accessing anything without owning the mrec_lock semaphore.
- * But we do need to use them because of the read_cache_page() invokation and
- * the code becomes so much simpler this way that it is well worth it.
+ * records/inodes present in the page before I/O can proceed. In that case we
+ * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be
+ * accessing anything without owning the mrec_lock semaphore. But we do need
+ * to use them because of the read_cache_page() invokation and the code becomes
+ * so much simpler this way that it is well worth it.
*
* The mft record is now ours and we return a pointer to it. You need to check
* the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return
* A: No, the inode ones mean we want to change the mft record, not we want to
* write it out.
*/
-MFT_RECORD *map_mft_record(const int rw, ntfs_inode *ni)
+MFT_RECORD *map_mft_record(ntfs_inode *ni)
{
MFT_RECORD *m;
- ntfs_debug("Entering for mft_no 0x%lx, mapping for %s.", ni->mft_no,
- rw == READ ? "READ" : "WRITE");
+ ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no);
/* Make sure the ntfs inode doesn't go away. */
atomic_inc(&ni->count);
/* Serialize access to this mft record. */
- if (rw == READ)
- down_read(&ni->mrec_lock);
- else
- down_write(&ni->mrec_lock);
-
- /* If already mapped, bump reference count and return the mft record. */
- if (atomic_read(&ni->mft_count)) {
- BUG_ON(!ni->page);
- atomic_inc(&ni->mft_count);
- return page_address(ni->page) + ni->page_ofs;
- }
+ down(&ni->mrec_lock);
- /* Wasn't mapped. Map it now and return it if all was ok. */
m = map_mft_record_page(ni);
- if (!IS_ERR(m))
+ if (likely(!IS_ERR(m)))
return m;
- /* Mapping failed. Release the mft record lock. */
- if (rw == READ)
- up_read(&ni->mrec_lock);
- else
- up_write(&ni->mrec_lock);
-
- ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m));
-
- /* Release the ntfs inode and return the error code. */
+ up(&ni->mrec_lock);
atomic_dec(&ni->count);
+ ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m));
return m;
}
/**
- * unmap_mft_record - release a mapped mft record
- * @rw: unmap from read (@rw = READ) or write (@rw = WRITE)
- * @ni: ntfs inode whose MFT record to unmap
- *
- * First, decrement the mapping count and when it reaches zero unmap the mft
- * record.
+ * unmap_mft_record_page - unmap the page in which a specific mft record resides
+ * @ni: ntfs inode whose mft record page to unmap
*
- * Second, release the mrec_lock semaphore.
+ * This unmaps the page in which the mft record of the ntfs inode @ni is
+ * situated and returns. This is a NOOP if highmem is not configured.
*
- * The mft record is now released for others to get hold of.
+ * The unmap happens via ntfs_unmap_page() which in turn decrements the use
+ * count on the page thus releasing it from the pinned state.
*
- * Finally, release the ntfs inode by decreasing the ntfs inode reference count.
+ * We do not actually unmap the page from memory of course, as that will be
+ * done by the page cache code itself when memory pressure increases or
+ * whatever.
+ */
+static inline void unmap_mft_record_page(ntfs_inode *ni)
+{
+ BUG_ON(!ni->page);
+
+ // TODO: If dirty, blah...
+ ntfs_unmap_page(ni->page);
+ ni->page = NULL;
+ ni->page_ofs = 0;
+ return;
+}
+
+/**
+ * unmap_mft_record - release a mapped mft record
+ * @ni: ntfs inode whose MFT record to unmap
*
- * NOTE: If caller had the mft record mapped for write and has modified it, it
- * is imperative to set the mft record dirty BEFORE calling unmap_mft_record().
+ * We release the page mapping and the mrec_lock mutex which unmaps the mft
+ * record and releases it for others to get hold of. We also release the ntfs
+ * inode by decrementing the ntfs inode reference count.
*
- * NOTE: This has to be done both for 'normal' mft records, and for extent mft
- * records.
+ * NOTE: If caller has modified the mft record, it is imperative to set the mft
+ * record dirty BEFORE calling unmap_mft_record().
*/
-void unmap_mft_record(const int rw, ntfs_inode *ni)
+void unmap_mft_record(ntfs_inode *ni)
{
struct page *page = ni->page;
- BUG_ON(!atomic_read(&ni->mft_count) || !page);
-
- ntfs_debug("Entering for mft_no 0x%lx, unmapping from %s.", ni->mft_no,
- rw == READ ? "READ" : "WRITE");
+ BUG_ON(!page);
- /* Only release the actual page mapping if this is the last one. */
- if (atomic_dec_and_test(&ni->mft_count))
- unmap_mft_record_page(ni);
+ ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no);
- /* Release the semaphore. */
- if (rw == READ)
- up_read(&ni->mrec_lock);
- else
- up_write(&ni->mrec_lock);
-
- /* Release the ntfs inode. */
+ unmap_mft_record_page(ni);
+ up(&ni->mrec_lock);
atomic_dec(&ni->count);
-
/*
* If pure ntfs_inode, i.e. no vfs inode attached, we leave it to
* ntfs_clear_extent_inode() in the extent inode case, and to the
*
* On successful return, @ntfs_ino contains a pointer to the ntfs_inode
* structure of the mapped extent inode.
- *
- * Note, we always map for READ. We consider this lock as irrelevant because
- * the base inode will be write locked in all cases when we want to write to
- * an extent inode which already gurantees that there is no-one else accessing
- * the extent inode.
*/
MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
ntfs_inode **ntfs_ino)
break;
}
}
- if (ni) {
+ if (likely(ni != NULL)) {
up(&base_ni->extent_lock);
atomic_dec(&base_ni->count);
/* We found the record; just have to map and return it. */
- m = map_mft_record(READ, ni);
- /* Map mft record increments this on success. */
+ m = map_mft_record(ni);
+ /* map_mft_record() has incremented this on success. */
atomic_dec(&ni->count);
- if (!IS_ERR(m)) {
+ if (likely(!IS_ERR(m))) {
/* Verify the sequence number. */
- if (le16_to_cpu(m->sequence_number) == seq_no) {
+ if (likely(le16_to_cpu(m->sequence_number) == seq_no)) {
ntfs_debug("Done 1.");
*ntfs_ino = ni;
return m;
}
- unmap_mft_record(READ, ni);
+ unmap_mft_record(ni);
ntfs_error(base_ni->vol->sb, "Found stale extent mft "
"reference! Corrupt file system. "
"Run chkdsk.");
}
/* Record wasn't there. Get a new ntfs inode and initialize it. */
ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no);
- if (!ni) {
+ if (unlikely(!ni)) {
up(&base_ni->extent_lock);
atomic_dec(&base_ni->count);
return ERR_PTR(-ENOMEM);
ni->nr_extents = -1;
ni->_INE(base_ntfs_ino) = base_ni;
/* Now map the record. */
- m = map_mft_record(READ, ni);
- if (IS_ERR(m)) {
+ m = map_mft_record(ni);
+ if (unlikely(IS_ERR(m))) {
up(&base_ni->extent_lock);
atomic_dec(&base_ni->count);
ntfs_clear_extent_inode(ni);
goto map_err_out;
}
/* Verify the sequence number. */
- if (le16_to_cpu(m->sequence_number) != seq_no) {
+ if (unlikely(le16_to_cpu(m->sequence_number) != seq_no)) {
ntfs_error(base_ni->vol->sb, "Found stale extent mft "
"reference! Corrupt file system. Run chkdsk.");
destroy_ni = TRUE;
int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
tmp = (ntfs_inode **)kmalloc(new_size, GFP_NOFS);
- if (!tmp) {
+ if (unlikely(!tmp)) {
ntfs_error(base_ni->vol->sb, "Failed to allocate "
"internal buffer.");
destroy_ni = TRUE;
*ntfs_ino = ni;
return m;
unm_err_out:
- unmap_mft_record(READ, ni);
+ unmap_mft_record(ni);
up(&base_ni->extent_lock);
atomic_dec(&base_ni->count);
/*
//extern int format_mft_record2(struct super_block *vfs_sb,
// const unsigned long inum, MFT_RECORD *m);
-extern MFT_RECORD *map_mft_record(const int rw, ntfs_inode *ni);
-extern void unmap_mft_record(const int rw, ntfs_inode *ni);
+extern MFT_RECORD *map_mft_record(ntfs_inode *ni);
+extern void unmap_mft_record(ntfs_inode *ni);
extern MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
ntfs_inode **ntfs_ino);
static inline void unmap_extent_mft_record(ntfs_inode *ni)
{
- unmap_mft_record(READ, ni);
+ unmap_mft_record(ni);
return;
}
handle_name:
{
struct dentry *real_dent;
+ MFT_RECORD *m;
attr_search_context *ctx;
ntfs_inode *ni = NTFS_I(dent_inode);
int err;
name->len * 3 + 1);
kfree(name);
} else /* if (name->type == FILE_NAME_DOS) */ { /* Case 3. */
- MFT_RECORD *m;
FILE_NAME_ATTR *fn;
kfree(name);
/* Find the WIN32 name corresponding to the matched DOS name. */
ni = NTFS_I(dent_inode);
- m = map_mft_record(READ, ni);
+ m = map_mft_record(ni);
if (IS_ERR(m)) {
err = PTR_ERR(m);
- goto name_err_out;
+ m = NULL;
+ ctx = NULL;
+ goto err_out;
}
ctx = get_attr_search_ctx(ni, m);
if (!ctx) {
err = -ENOMEM;
- goto unm_err_out;
+ goto err_out;
}
do {
ATTR_RECORD *a;
"namespace counterpart to DOS "
"file name. Run chkdsk.");
err = -EIO;
- goto put_unm_err_out;
+ goto err_out;
}
/* Consistency checks. */
a = ctx->attr;
if (a->non_resident || a->flags)
- goto eio_put_unm_err_out;
+ goto eio_err_out;
val_len = le32_to_cpu(a->_ARA(value_length));
if (le16_to_cpu(a->_ARA(value_offset)) + val_len >
le32_to_cpu(a->length))
- goto eio_put_unm_err_out;
+ goto eio_err_out;
fn = (FILE_NAME_ATTR*)((u8*)ctx->attr + le16_to_cpu(
ctx->attr->_ARA(value_offset)));
if ((u32)(fn->file_name_length * sizeof(uchar_t) +
sizeof(FILE_NAME_ATTR)) > val_len)
- goto eio_put_unm_err_out;
+ goto eio_err_out;
} while (fn->file_name_type != FILE_NAME_WIN32);
/* Convert the found WIN32 name to current NLS code page. */
fn->file_name_length * 3 + 1);
put_attr_search_ctx(ctx);
- unmap_mft_record(READ, ni);
+ unmap_mft_record(ni);
}
+ m = NULL;
+ ctx = NULL;
/* Check if a conversion error occured. */
if ((signed)nls_name.len < 0) {
err = (signed)nls_name.len;
- goto name_err_out;
+ goto err_out;
}
nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
kfree(nls_name.name);
if (!real_dent) {
err = -ENOMEM;
- goto name_err_out;
+ goto err_out;
}
d_add(real_dent, dent_inode);
return real_dent;
d_instantiate(real_dent, dent_inode);
return real_dent;
-eio_put_unm_err_out:
+eio_err_out:
ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk.");
err = -EIO;
-put_unm_err_out:
- put_attr_search_ctx(ctx);
-unm_err_out:
- unmap_mft_record(READ, ni);
-name_err_out:
+err_out:
+ if (ctx)
+ put_attr_search_ctx(ctx);
+ if (m)
+ unmap_mft_record(ni);
iput(dent_inode);
return ERR_PTR(err);
}
ntfs_error(sb, "Failed to load $Volume.");
goto iput_lcnbmp_err_out;
}
- m = map_mft_record(READ, NTFS_I(vol->vol_ino));
+ m = map_mft_record(NTFS_I(vol->vol_ino));
if (IS_ERR(m)) {
iput_volume_failed:
iput(vol->vol_ino);
err_put_vol:
put_attr_search_ctx(ctx);
get_ctx_vol_failed:
- unmap_mft_record(READ, NTFS_I(vol->vol_ino));
+ unmap_mft_record(NTFS_I(vol->vol_ino));
goto iput_volume_failed;
}
vi = (VOLUME_INFORMATION*)((char*)ctx->attr +
vol->major_ver = vi->major_ver;
vol->minor_ver = vi->minor_ver;
put_attr_search_ctx(ctx);
- unmap_mft_record(READ, NTFS_I(vol->vol_ino));
+ unmap_mft_record(NTFS_I(vol->vol_ino));
printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver,
vol->minor_ver);
/*