Discussion:
[PATCH 4/6] audit: Embed key into chunk
Jan Kara
2018-06-28 16:40:12 UTC
Permalink
Currently chunk hash key (which is in fact pointer to the inode) is
derived as chunk->mark.conn->obj. It is tricky to make this dereference
reliable for hash table lookups only under RCU as mark can get detached
from the connector and connector gets freed independently of the
running lookup. Thus there is a possible use after free / NULL ptr
dereference issue:

CPU1 CPU2
untag_chunk()
...
audit_tree_lookup()
list_for_each_entry_rcu(p, list, hash) {
list_del_rcu(&chunk->hash);
fsnotify_destroy_mark(entry);
fsnotify_put_mark(entry)
chunk_to_key(p)
if (!chunk->mark.connector)
...
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
inode = fsnotify_detach_connector_from_object(conn);
mark->connector = NULL;
...
frees connector from workqueue
chunk->mark.connector->obj

This race is probably impossible to hit in practice as the race window
on CPU1 is very narrow and CPU2 has a lot of code to execute. Still it's
better to have this fixed. Since the inode the chunk is attached to is
constant during chunk's lifetime it is easy to cache the key in the
chunk itself and thus avoid these issues.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 26 +++++++-------------------
1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 7f9df8c66227..89ad8857a578 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_tree {

struct audit_chunk {
struct list_head hash;
+ unsigned long key;
struct fsnotify_mark mark;
struct list_head trees; /* with root here */
int dead;
@@ -172,21 +173,6 @@ static unsigned long inode_to_key(const struct inode *inode)
return (unsigned long)&inode->i_fsnotify_marks;
}

-/*
- * Function to return search key in our hash from chunk. Key 0 is special and
- * should never be present in the hash.
- */
-static unsigned long chunk_to_key(struct audit_chunk *chunk)
-{
- /*
- * We have a reference to the mark so it should be attached to a
- * connector.
- */
- if (WARN_ON_ONCE(!chunk->mark.connector))
- return 0;
- return (unsigned long)chunk->mark.connector->obj;
-}
-
static inline struct list_head *chunk_hash(unsigned long key)
{
unsigned long n = key / L1_CACHE_BYTES;
@@ -196,12 +182,11 @@ static inline struct list_head *chunk_hash(unsigned long key)
/* hash_lock & entry->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
- unsigned long key = chunk_to_key(chunk);
struct list_head *list;

if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
return;
- list = chunk_hash(key);
+ list = chunk_hash(chunk->key);
list_add_rcu(&chunk->hash, list);
}

@@ -213,7 +198,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
struct audit_chunk *p;

list_for_each_entry_rcu(p, list, hash) {
- if (chunk_to_key(p) == key) {
+ if (p->key == key) {
atomic_long_inc(&p->refs);
return p;
}
@@ -295,6 +280,7 @@ static void untag_chunk(struct node *p)

chunk->dead = 1;
spin_lock(&hash_lock);
+ new->key = chunk->key;
list_replace_init(&chunk->trees, &new->trees);
if (owner->root == chunk) {
list_del_init(&owner->same_root);
@@ -380,6 +366,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
+ chunk->key = inode_to_key(inode);
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
@@ -456,6 +443,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
fsnotify_put_mark(old_entry);
return 0;
}
+ chunk->key = old->key;
list_replace_init(&old->trees, &chunk->trees);
for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
struct audit_tree *s = old->owners[n].owner;
@@ -652,7 +640,7 @@ void audit_trim_trees(void)
/* this could be NULL if the watch is dying else where... */
node->index |= 1U<<31;
if (iterate_mounts(compare_root,
- (void *)chunk_to_key(chunk),
+ (void *)(chunk->key),
root_mnt))
node->index &= ~(1U<<31);
}
--
2.16.4
Jan Kara
2018-06-28 16:40:09 UTC
Permalink
Currently, audit_tree code uses mark->lock to protect against detaching
of mark from an inode. In most places it however also uses
mark->group->mark_mutex (as we need to atomically replace attached
marks) and this provides protection against mark detaching as well. So
just replace protection with mark->lock from audit tree code and replace
it with mark->group->mark_mutex protection in all the places. It
simplifies the code and gets rid of some ugly catches like calling
fsnotify_add_mark_locked() with mark->lock held (which cannot sleep only
because we hold a reference to another mark attached to the same inode).

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 24 ++++--------------------
1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 02feef939560..1c82eb6674c4 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -193,7 +193,7 @@ static inline struct list_head *chunk_hash(unsigned long key)
return chunk_hash_heads + n % HASH_SIZE;
}

-/* hash_lock & entry->lock is held by caller */
+/* hash_lock & entry->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
unsigned long key = chunk_to_key(chunk);
@@ -256,13 +256,11 @@ static void untag_chunk(struct node *p)
new = alloc_chunk(size);

mutex_lock(&entry->group->mark_mutex);
- spin_lock(&entry->lock);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
if (new)
fsnotify_put_mark(&new->mark);
@@ -280,7 +278,6 @@ static void untag_chunk(struct node *p)
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
goto out;
@@ -323,7 +320,6 @@ static void untag_chunk(struct node *p)
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(&new->mark); /* drop initial reference */
@@ -340,7 +336,6 @@ static void untag_chunk(struct node *p)
p->owner = NULL;
put_tree(owner);
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
out:
fsnotify_put_mark(entry);
@@ -360,12 +355,12 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOSPC;
}

- spin_lock(&entry->lock);
+ mutex_lock(&entry->group->mark_mutex);
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- spin_unlock(&entry->lock);
+ mutex_unlock(&entry->group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(entry);
return 0;
@@ -380,7 +375,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
}
insert_hash(chunk);
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
+ mutex_unlock(&entry->group->mark_mutex);
fsnotify_put_mark(entry); /* drop initial reference */
return 0;
}
@@ -421,14 +416,12 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk_entry = &chunk->mark;

mutex_lock(&old_entry->group->mark_mutex);
- spin_lock(&old_entry->lock);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
/* old_entry is being shot, lets just lie */
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);
fsnotify_put_mark(old_entry);
fsnotify_put_mark(&chunk->mark);
@@ -437,23 +430,16 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)

if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return -ENOSPC;
}

- /* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */
- spin_lock(&chunk_entry->lock);
spin_lock(&hash_lock);
-
- /* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- spin_unlock(&chunk_entry->lock);
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);

fsnotify_destroy_mark(chunk_entry, audit_tree_group);
@@ -485,8 +471,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
spin_unlock(&hash_lock);
- spin_unlock(&chunk_entry->lock);
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);
fsnotify_destroy_mark(old_entry, audit_tree_group);
fsnotify_put_mark(chunk_entry); /* drop initial reference */
--
2.16.4
Amir Goldstein
2018-06-29 11:31:57 UTC
Permalink
Post by Jan Kara
Currently, audit_tree code uses mark->lock to protect against detaching
of mark from an inode. In most places it however also uses
mark->group->mark_mutex (as we need to atomically replace attached
marks) and this provides protection against mark detaching as well. So
just replace protection with mark->lock from audit tree code and replace
typo.....^ s/replace/remove

Otherwise looks sane.

Thanks,
Amir.
Jan Kara
2018-07-03 14:07:08 UTC
Permalink
Post by Amir Goldstein
Post by Jan Kara
Currently, audit_tree code uses mark->lock to protect against detaching
of mark from an inode. In most places it however also uses
mark->group->mark_mutex (as we need to atomically replace attached
marks) and this provides protection against mark detaching as well. So
just replace protection with mark->lock from audit tree code and replace
typo.....^ s/replace/remove
Thanks fixed.

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Jan Kara
2018-06-28 16:40:10 UTC
Permalink
When an inode is tagged with a tree, tag_chunk() checks whether there is
audit_tree_group mark attached to the inode and adds one if not. However
nothing protects another tag_chunk() to add the mark between we've
checked and try to add the fsnotify mark thus resulting in an error from
fsnotify_add_mark() and consequently an ENOSPC error from tag_chunk().

Fix the problem by holding mark_mutex over the whole check-insert code
sequence.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 1c82eb6674c4..de8d344d91b1 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -342,25 +342,29 @@ static void untag_chunk(struct node *p)
spin_lock(&hash_lock);
}

+/* Call with group->mark_mutex held, releases it */
static int create_chunk(struct inode *inode, struct audit_tree *tree)
{
struct fsnotify_mark *entry;
struct audit_chunk *chunk = alloc_chunk(1);
- if (!chunk)
+
+ if (!chunk) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
return -ENOMEM;
+ }

entry = &chunk->mark;
- if (fsnotify_add_inode_mark(entry, inode, 0)) {
+ if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry);
return -ENOSPC;
}

- mutex_lock(&entry->group->mark_mutex);
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(entry);
return 0;
@@ -375,7 +379,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
}
insert_hash(chunk);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry); /* drop initial reference */
return 0;
}
@@ -389,6 +393,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
struct node *p;
int n;

+ mutex_lock(&audit_tree_group->mark_mutex);
old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
audit_tree_group);
if (!old_entry)
@@ -401,6 +406,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
return 0;
}
@@ -409,20 +415,20 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)

chunk = alloc_chunk(old->count + 1);
if (!chunk) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
return -ENOMEM;
}

chunk_entry = &chunk->mark;

- mutex_lock(&old_entry->group->mark_mutex);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
/* old_entry is being shot, lets just lie */
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
fsnotify_put_mark(&chunk->mark);
return -ENOENT;
@@ -430,7 +436,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)

if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return -ENOSPC;
@@ -440,7 +446,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);

fsnotify_destroy_mark(chunk_entry, audit_tree_group);

@@ -471,7 +477,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
spin_unlock(&hash_lock);
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_destroy_mark(old_entry, audit_tree_group);
fsnotify_put_mark(chunk_entry); /* drop initial reference */
fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
--
2.16.4
Amir Goldstein
2018-06-29 11:42:57 UTC
Permalink
Post by Jan Kara
When an inode is tagged with a tree, tag_chunk() checks whether there is
audit_tree_group mark attached to the inode and adds one if not. However
nothing protects another tag_chunk() to add the mark between we've
checked and try to add the fsnotify mark thus resulting in an error from
fsnotify_add_mark() and consequently an ENOSPC error from tag_chunk().
Fix the problem by holding mark_mutex over the whole check-insert code
sequence.
---
Never dived into the audit_tree code, but this looks sane.

Thanks,
Amir.
Jan Kara
2018-06-28 16:40:13 UTC
Permalink
Currently, the audit tree code does not make sure that when a chunk in
inserted into the hash table, it is fully initialized. So in theory (in
practice only on DEC Alpha) a user of RCU lookup could see uninitialized
structure in the hash table and crash. Add appropriate barriers between
initialization of the structure and its insertion into hash table.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 89ad8857a578..46cc6d046c75 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -198,7 +198,11 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
struct audit_chunk *p;

list_for_each_entry_rcu(p, list, hash) {
- if (p->key == key) {
+ /*
+ * We use a data dependency barrier in READ_ONCE() to make sure
+ * the chunk we see is fully initialized.
+ */
+ if (READ_ONCE(p->key) == key) {
atomic_long_inc(&p->refs);
return p;
}
@@ -303,9 +307,15 @@ static void untag_chunk(struct node *p)
list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
}

- list_replace_rcu(&chunk->hash, &new->hash);
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&chunk->hash, &new->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -367,6 +377,12 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
chunk->key = inode_to_key(inode);
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
@@ -458,7 +474,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- list_replace_rcu(&old->hash, &chunk->hash);
list_for_each_entry(owner, &chunk->trees, same_root)
owner->root = chunk;
old->dead = 1;
@@ -466,6 +481,13 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&old->hash, &chunk->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
Amir Goldstein
2018-06-29 13:02:10 UTC
Permalink
Post by Jan Kara
Currently, the audit tree code does not make sure that when a chunk in
inserted into the hash table, it is fully initialized. So in theory (in
practice only on DEC Alpha) a user of RCU lookup could see uninitialized
structure in the hash table and crash. Add appropriate barriers between
initialization of the structure and its insertion into hash table.
---
kernel/audit_tree.c | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 89ad8857a578..46cc6d046c75 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -198,7 +198,11 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
struct audit_chunk *p;
list_for_each_entry_rcu(p, list, hash) {
- if (p->key == key) {
+ /*
+ * We use a data dependency barrier in READ_ONCE() to make sure
+ * the chunk we see is fully initialized.
+ */
+ if (READ_ONCE(p->key) == key) {
atomic_long_inc(&p->refs);
return p;
}
@@ -303,9 +307,15 @@ static void untag_chunk(struct node *p)
list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
}
- list_replace_rcu(&chunk->hash, &new->hash);
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&chunk->hash, &new->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -367,6 +377,12 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
chunk->key = inode_to_key(inode);
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
@@ -458,7 +474,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- list_replace_rcu(&old->hash, &chunk->hash);
list_for_each_entry(owner, &chunk->trees, same_root)
owner->root = chunk;
old->dead = 1;
@@ -466,6 +481,13 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&old->hash, &chunk->hash);
IMO, now that list_replace_rcu() is no longer a one liner (including the wmb and
comment above) it would be cleaner to have a helper update_hash(old, chunk)
right next to insert_hash() and for the same reason smp_wmb with the comment
should go into insert_hash() helpler.

Thanks,
Amir.
Jan Kara
2018-07-03 15:31:09 UTC
Permalink
Post by Amir Goldstein
Post by Jan Kara
Currently, the audit tree code does not make sure that when a chunk in
inserted into the hash table, it is fully initialized. So in theory (in
practice only on DEC Alpha) a user of RCU lookup could see uninitialized
structure in the hash table and crash. Add appropriate barriers between
initialization of the structure and its insertion into hash table.
...
Post by Amir Goldstein
Post by Jan Kara
@@ -466,6 +481,13 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&old->hash, &chunk->hash);
IMO, now that list_replace_rcu() is no longer a one liner (including the wmb and
comment above) it would be cleaner to have a helper update_hash(old, chunk)
right next to insert_hash() and for the same reason smp_wmb with the comment
should go into insert_hash() helpler.
I was thinking about this as well when writing the code. What I disliked
about hiding smp_wmb() in some helper function is that after that it's much
less obvious that you should have a good reason to add anything after
smp_wmb() as RCU readers needn't see your write. However with some
commenting, I guess it should be obvious enough. I'll do that as a separate
cleanup patch though.

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Jan Kara
2018-06-28 16:40:11 UTC
Permalink
Audit tree code is replacing marks attached to inodes in non-atomic way.
Thus fsnotify_find_mark() in tag_chunk() may find a mark that belongs to
a chunk that is no longer valid one and will soon be destroyed. Tags
added to such chunk will be simply lost.

Fix the problem by making sure old mark is marked as going away (through
fsnotify_detach_mark()) before dropping mark_mutex and thus in an atomic
way wrt tag_chunk(). Note that this does not fix the problem completely
as if tag_chunk() finds a mark that is going away, it fails with
-ENOENT. But at least the failure is not silent and currently there's no
way to search for another fsnotify mark attached to the inode. We'll fix
this problem in later patch.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index de8d344d91b1..7f9df8c66227 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -278,8 +278,9 @@ static void untag_chunk(struct node *p)
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
+ fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
- fsnotify_destroy_mark(entry, audit_tree_group);
+ fsnotify_free_mark(entry);
goto out;
}

@@ -320,8 +321,9 @@ static void untag_chunk(struct node *p)
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
spin_unlock(&hash_lock);
+ fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
- fsnotify_destroy_mark(entry, audit_tree_group);
+ fsnotify_free_mark(entry);
fsnotify_put_mark(&new->mark); /* drop initial reference */
goto out;

@@ -364,8 +366,9 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
+ fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_destroy_mark(entry, audit_tree_group);
+ fsnotify_free_mark(entry);
fsnotify_put_mark(entry);
return 0;
}
@@ -446,10 +449,9 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
+ fsnotify_detach_mark(chunk_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
-
- fsnotify_destroy_mark(chunk_entry, audit_tree_group);
-
+ fsnotify_free_mark(chunk_entry);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return 0;
@@ -477,8 +479,9 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
spin_unlock(&hash_lock);
+ fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_destroy_mark(old_entry, audit_tree_group);
+ fsnotify_free_mark(old_entry);
fsnotify_put_mark(chunk_entry); /* drop initial reference */
fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
return 0;
--
2.16.4
Amir Goldstein
2018-06-29 12:05:07 UTC
Permalink
Post by Jan Kara
Audit tree code is replacing marks attached to inodes in non-atomic way.
Thus fsnotify_find_mark() in tag_chunk() may find a mark that belongs to
a chunk that is no longer valid one and will soon be destroyed. Tags
added to such chunk will be simply lost.
Fix the problem by making sure old mark is marked as going away (through
fsnotify_detach_mark()) before dropping mark_mutex and thus in an atomic
way wrt tag_chunk(). Note that this does not fix the problem completely
as if tag_chunk() finds a mark that is going away, it fails with
-ENOENT. But at least the failure is not silent and currently there's no
way to search for another fsnotify mark attached to the inode. We'll fix
this problem in later patch.
---
This one too looks sane.
Without knowing anything about audit_watch, there seems to be
an fsnotify_destroy_mark() after unlock of audit_filter_mutex, so it
may require a similar fix.

Thanks,
Amir.
Jan Kara
2018-07-03 14:21:29 UTC
Permalink
Post by Amir Goldstein
Post by Jan Kara
Audit tree code is replacing marks attached to inodes in non-atomic way.
Thus fsnotify_find_mark() in tag_chunk() may find a mark that belongs to
a chunk that is no longer valid one and will soon be destroyed. Tags
added to such chunk will be simply lost.
Fix the problem by making sure old mark is marked as going away (through
fsnotify_detach_mark()) before dropping mark_mutex and thus in an atomic
way wrt tag_chunk(). Note that this does not fix the problem completely
as if tag_chunk() finds a mark that is going away, it fails with
-ENOENT. But at least the failure is not silent and currently there's no
way to search for another fsnotify mark attached to the inode. We'll fix
this problem in later patch.
---
This one too looks sane.
Without knowing anything about audit_watch, there seems to be
an fsnotify_destroy_mark() after unlock of audit_filter_mutex, so it
may require a similar fix.
Where? I don't see any call to fsnotify_destroy_mark() left after this
patch...

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Amir Goldstein
2018-07-03 17:42:26 UTC
Permalink
Post by Jan Kara
Post by Amir Goldstein
Post by Jan Kara
Audit tree code is replacing marks attached to inodes in non-atomic way.
Thus fsnotify_find_mark() in tag_chunk() may find a mark that belongs to
a chunk that is no longer valid one and will soon be destroyed. Tags
added to such chunk will be simply lost.
Fix the problem by making sure old mark is marked as going away (through
fsnotify_detach_mark()) before dropping mark_mutex and thus in an atomic
way wrt tag_chunk(). Note that this does not fix the problem completely
as if tag_chunk() finds a mark that is going away, it fails with
-ENOENT. But at least the failure is not silent and currently there's no
way to search for another fsnotify mark attached to the inode. We'll fix
this problem in later patch.
---
This one too looks sane.
Without knowing anything about audit_watch, there seems to be
an fsnotify_destroy_mark() after unlock of audit_filter_mutex, so it
may require a similar fix.
Where? I don't see any call to fsnotify_destroy_mark() left after this
patch...
Not directly related to this cleanup, but looking in other audit modules,
fsnotify_destroy_mark() in audit_remove_parent_watches() is called
outside audit_filter_mutex and audit_find_parent() in audit_add_watch()
is called inside audit_filter_mutex, so I was wondering if there was a
similar race window in that code. I didn't spend enough time looking
at audit_watch.c to understand what is going on in there.

Thanks,
Amir.
Jan Kara
2018-07-04 08:28:40 UTC
Permalink
Post by Amir Goldstein
Post by Jan Kara
Post by Amir Goldstein
Post by Jan Kara
Audit tree code is replacing marks attached to inodes in non-atomic way.
Thus fsnotify_find_mark() in tag_chunk() may find a mark that belongs to
a chunk that is no longer valid one and will soon be destroyed. Tags
added to such chunk will be simply lost.
Fix the problem by making sure old mark is marked as going away (through
fsnotify_detach_mark()) before dropping mark_mutex and thus in an atomic
way wrt tag_chunk(). Note that this does not fix the problem completely
as if tag_chunk() finds a mark that is going away, it fails with
-ENOENT. But at least the failure is not silent and currently there's no
way to search for another fsnotify mark attached to the inode. We'll fix
this problem in later patch.
---
This one too looks sane.
Without knowing anything about audit_watch, there seems to be
an fsnotify_destroy_mark() after unlock of audit_filter_mutex, so it
may require a similar fix.
Where? I don't see any call to fsnotify_destroy_mark() left after this
patch...
Not directly related to this cleanup, but looking in other audit modules,
fsnotify_destroy_mark() in audit_remove_parent_watches() is called
outside audit_filter_mutex and audit_find_parent() in audit_add_watch()
is called inside audit_filter_mutex, so I was wondering if there was a
similar race window in that code. I didn't spend enough time looking
at audit_watch.c to understand what is going on in there.
Oh, those are completely different fsnotify marks :) They belong to
audit_watch_group (while these to audit_tree_group) and these patches have
no ambition to change anything there.

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Jan Kara
2018-06-28 16:40:14 UTC
Permalink
Audit tree code currently embeds fsnotify mark in audit_chunk. As chunk
attached to an inode is replace when new tag is added / removed, we also
need to remove old fsnotify mark and add a new one on such occasion.
This is cumbersome and makes locking rules somewhat difficult to follow.

Fix these problems by allocating fsnotify mark independently and keeping
it all the time while there is some chunk attached to an inode. Also add
documentation about the locking rules so that things are easier to
follow.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 209 ++++++++++++++++++++++++++++++++--------------------
1 file changed, 129 insertions(+), 80 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 46cc6d046c75..045878d1c060 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -25,9 +25,8 @@ struct audit_tree {
struct audit_chunk {
struct list_head hash;
unsigned long key;
- struct fsnotify_mark mark;
+ struct fsnotify_mark *mark;
struct list_head trees; /* with root here */
- int dead;
int count;
atomic_long_t refs;
struct rcu_head head;
@@ -38,13 +37,25 @@ struct audit_chunk {
} owners[];
};

+struct audit_tree_mark {
+ struct fsnotify_mark fsn_mark;
+ struct audit_chunk *chunk;
+};
+
static LIST_HEAD(tree_list);
static LIST_HEAD(prune_list);
static struct task_struct *prune_thread;

/*
- * One struct chunk is attached to each inode of interest.
- * We replace struct chunk on tagging/untagging.
+ * One struct chunk is attached to each inode of interest through
+ * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
+ * untagging, the mark is stable as long as there is chunk attached. The
+ * association between mark and chunk is protected by hash_lock and
+ * audit_tree_group->mark_mutex. Thus as long as we hold
+ * audit_tree_group->mark_mutex and check that the mark is alive by
+ * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
+ * the current chunk.
+ *
* Rules have pointer to struct audit_tree.
* Rules have struct list_head rlist forming a list of rules over
* the same tree.
@@ -63,8 +74,12 @@ static struct task_struct *prune_thread;
* tree is refcounted; one reference for "some rules on rules_list refer to
* it", one for each chunk with pointer to it.
*
- * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
- * of watch contributes 1 to .refs).
+ * chunk is refcounted by embedded .refs. Mark associated with the chunk holds
+ * one chunk reference. This reference is dropped either when a mark is going
+ * to be freed (corresponding inode goes away) or when chunk attached to the
+ * mark gets replaced. This reference must be dropped using
+ * audit_mark_put_chunk() to make sure the reference is dropped only after RCU
+ * grace period as it protects RCU readers of the hash table.
*
* node.index allows to get from node.list to containing chunk.
* MSB of that sucker is stolen to mark taggings that we might have to
@@ -73,6 +88,7 @@ static struct task_struct *prune_thread;
*/

static struct fsnotify_group *audit_tree_group;
+static struct kmem_cache *audit_tree_mark_cachep __read_mostly;

static struct audit_tree *alloc_tree(const char *s)
{
@@ -126,16 +142,14 @@ void audit_put_chunk(struct audit_chunk *chunk)
free_chunk(chunk);
}

-static void __put_chunk(struct rcu_head *rcu)
+static inline struct audit_tree_mark *AUDIT_M(struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
- audit_put_chunk(chunk);
+ return container_of(entry, struct audit_tree_mark, fsn_mark);
}

static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
- call_rcu(&chunk->head, __put_chunk);
+ kmem_cache_free(audit_tree_mark_cachep, entry);
}

static struct audit_chunk *alloc_chunk(int count)
@@ -157,8 +171,6 @@ static struct audit_chunk *alloc_chunk(int count)
INIT_LIST_HEAD(&chunk->owners[i].list);
chunk->owners[i].index = i;
}
- fsnotify_init_mark(&chunk->mark, audit_tree_group);
- chunk->mark.mask = FS_IN_IGNORED;
return chunk;
}

@@ -184,8 +196,6 @@ static void insert_hash(struct audit_chunk *chunk)
{
struct list_head *list;

- if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
- return;
list = chunk_hash(chunk->key);
list_add_rcu(&chunk->hash, list);
}
@@ -228,15 +238,49 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}

+static void __put_chunk(struct rcu_head *rcu)
+{
+ struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
+ audit_put_chunk(chunk);
+}
+
+/*
+ * Drop reference to the chunk that was held by the mark. This is the reference
+ * that gets dropped after we've removed the chunk from the hash table and we
+ * use it to make sure chunk cannot be freed before RCU grace period expires.
+ */
+static void audit_mark_put_chunk(struct audit_chunk *chunk)
+{
+ call_rcu(&chunk->head, __put_chunk);
+}
+
+static void replace_mark_chunk(struct fsnotify_mark *entry,
+ struct audit_chunk *chunk)
+{
+ struct audit_chunk *old;
+
+ assert_spin_locked(&hash_lock);
+ old = AUDIT_M(entry)->chunk;
+ AUDIT_M(entry)->chunk = chunk;
+ if (chunk)
+ chunk->mark = entry;
+ if (old)
+ old->mark = NULL;
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
- struct fsnotify_mark *entry = &chunk->mark;
+ struct fsnotify_mark *entry = chunk->mark;
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
int i, j;

+ /* Racing with audit_tree_freeing_mark()? */
+ if (!entry)
+ return;
+
fsnotify_get_mark(entry);

spin_unlock(&hash_lock);
@@ -246,29 +290,31 @@ static void untag_chunk(struct node *p)

mutex_lock(&entry->group->mark_mutex);
/*
- * mark_mutex protects mark from getting detached and thus also from
- * mark->connector->obj getting NULL.
+ * mark_mutex protects mark stabilizes chunk attached to the mark so we
+ * can check whether it didn't change while we've dropped hash_lock.
*/
- if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
+ if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+ AUDIT_M(entry)->chunk != chunk) {
mutex_unlock(&entry->group->mark_mutex);
if (new)
- fsnotify_put_mark(&new->mark);
+ kfree(new);
goto out;
}

owner = p->owner;

if (!size) {
- chunk->dead = 1;
spin_lock(&hash_lock);
list_del_init(&chunk->trees);
if (owner->root == chunk)
owner->root = NULL;
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
+ replace_mark_chunk(entry, NULL);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
+ audit_mark_put_chunk(chunk);
fsnotify_free_mark(entry);
goto out;
}
@@ -276,15 +322,9 @@ static void untag_chunk(struct node *p)
if (!new)
goto Fallback;

- if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj,
- FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- fsnotify_put_mark(&new->mark);
- goto Fallback;
- }
-
- chunk->dead = 1;
spin_lock(&hash_lock);
new->key = chunk->key;
+ replace_mark_chunk(entry, new);
list_replace_init(&chunk->trees, &new->trees);
if (owner->root == chunk) {
list_del_init(&owner->same_root);
@@ -317,10 +357,8 @@ static void untag_chunk(struct node *p)
smp_wmb();
list_replace_rcu(&chunk->hash, &new->hash);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
- fsnotify_free_mark(entry);
- fsnotify_put_mark(&new->mark); /* drop initial reference */
+ audit_mark_put_chunk(chunk);
goto out;

Fallback:
@@ -340,6 +378,18 @@ static void untag_chunk(struct node *p)
spin_lock(&hash_lock);
}

+static struct fsnotify_mark *alloc_fsnotify_mark(void)
+{
+ struct audit_tree_mark *mark;
+
+ mark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
+ if (!mark)
+ return NULL;
+ fsnotify_init_mark(&mark->fsn_mark, audit_tree_group);
+ mark->fsn_mark.mask = FS_IN_IGNORED;
+ return &mark->fsn_mark;
+}
+
/* Call with group->mark_mutex held, releases it */
static int create_chunk(struct inode *inode, struct audit_tree *tree)
{
@@ -351,23 +401,31 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}

- entry = &chunk->mark;
+ entry = alloc_fsnotify_mark();
+ if (!entry) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
+ kfree(chunk);
+ return -ENOMEM;
+ }
+
if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry);
+ kfree(chunk);
return -ENOSPC;
}

spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- chunk->dead = 1;
fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_free_mark(entry);
fsnotify_put_mark(entry);
+ kfree(chunk);
return 0;
}
+ replace_mark_chunk(entry, chunk);
chunk->owners[0].index = (1U << 31);
chunk->owners[0].owner = tree;
get_tree(tree);
@@ -386,34 +444,42 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry); /* drop initial reference */
+ /*
+ * Drop our initial reference. When mark we point to is getting freed,
+ * we get notification through ->freeing_mark callback and cleanup
+ * chunk pointing to this mark.
+ */
+ fsnotify_put_mark(entry);
return 0;
}

/* the first tagged inode becomes root of tree */
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *old_entry, *chunk_entry;
+ struct fsnotify_mark *entry;
struct audit_tree *owner;
struct audit_chunk *chunk, *old;
struct node *p;
int n;

mutex_lock(&audit_tree_group->mark_mutex);
- old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
- audit_tree_group);
- if (!old_entry)
+ entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+ if (!entry)
return create_chunk(inode, tree);

- old = container_of(old_entry, struct audit_chunk, mark);
-
+ /*
+ * Found mark is guaranteed to be attached and mark_mutex protects mark
+ * from getting detached and thus it makes sure there is chunk attached
+ * to the mark.
+ */
/* are we already there? */
spin_lock(&hash_lock);
+ old = AUDIT_M(entry)->chunk;
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
return 0;
}
}
@@ -422,44 +488,20 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
return -ENOMEM;
}

- chunk_entry = &chunk->mark;
-
- /*
- * mark_mutex protects mark from getting detached and thus also from
- * mark->connector->obj getting NULL.
- */
- if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- /* old_entry is being shot, lets just lie */
- mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
- fsnotify_put_mark(&chunk->mark);
- return -ENOENT;
- }
-
- if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
- FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(chunk_entry);
- fsnotify_put_mark(old_entry);
- return -ENOSPC;
- }
-
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- chunk->dead = 1;
- fsnotify_detach_mark(chunk_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(chunk_entry);
- fsnotify_put_mark(chunk_entry);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
+ kfree(chunk);
return 0;
}
chunk->key = old->key;
+ replace_mark_chunk(entry, chunk);
list_replace_init(&old->trees, &chunk->trees);
for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
struct audit_tree *s = old->owners[n].owner;
@@ -476,7 +518,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&p->list, &tree->chunks);
list_for_each_entry(owner, &chunk->trees, same_root)
owner->root = chunk;
- old->dead = 1;
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
@@ -489,11 +530,10 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
smp_wmb();
list_replace_rcu(&old->hash, &chunk->hash);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(old_entry);
- fsnotify_put_mark(chunk_entry); /* drop initial reference */
- fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
+ fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+ audit_mark_put_chunk(old);
+
return 0;
}

@@ -960,10 +1000,6 @@ static void evict_chunk(struct audit_chunk *chunk)
int need_prune = 0;
int n;

- if (chunk->dead)
- return;
-
- chunk->dead = 1;
mutex_lock(&audit_filter_mutex);
spin_lock(&hash_lock);
while (!list_empty(&chunk->trees)) {
@@ -1002,9 +1038,20 @@ static int audit_tree_handle_event(struct fsnotify_group *group,

static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
{
- struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+ struct audit_chunk *chunk;

- evict_chunk(chunk);
+ mutex_lock(&entry->group->mark_mutex);
+ spin_lock(&hash_lock);
+ chunk = AUDIT_M(entry)->chunk;
+ replace_mark_chunk(entry, NULL);
+ spin_unlock(&hash_lock);
+ if (chunk) {
+ mutex_unlock(&entry->group->mark_mutex);
+ evict_chunk(chunk);
+ audit_mark_put_chunk(chunk);
+ } else {
+ mutex_unlock(&entry->group->mark_mutex);
+ }

/*
* We are guaranteed to have at least one reference to the mark from
@@ -1023,6 +1070,8 @@ static int __init audit_tree_init(void)
{
int i;

+ audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
+
audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
if (IS_ERR(audit_tree_group))
audit_panic("cannot initialize fsnotify group for rectree watches");
--
2.16.4
Amir Goldstein
2018-06-29 13:20:12 UTC
Permalink
Post by Jan Kara
Audit tree code currently embeds fsnotify mark in audit_chunk. As chunk
attached to an inode is replace when new tag is added / removed, we also
need to remove old fsnotify mark and add a new one on such occasion.
This is cumbersome and makes locking rules somewhat difficult to follow.
Fix these problems by allocating fsnotify mark independently and keeping
it all the time while there is some chunk attached to an inode. Also add
documentation about the locking rules so that things are easier to
follow.
---
Wow! this patch is a lot to take in at once.
I wonder if it would be possible to split it to:
- make mark not embedded and take chunk reference
- replace_mark_chunk() and rid of cumbersome code

Or any other simplification that would help me survive this review.

In the mean while just one nit below...

[...]
Post by Jan Kara
+ mutex_lock(&entry->group->mark_mutex);
+ spin_lock(&hash_lock);
+ chunk = AUDIT_M(entry)->chunk;
+ replace_mark_chunk(entry, NULL);
+ spin_unlock(&hash_lock);
+ if (chunk) {
+ mutex_unlock(&entry->group->mark_mutex);
+ evict_chunk(chunk);
+ audit_mark_put_chunk(chunk);
+ } else {
+ mutex_unlock(&entry->group->mark_mutex);
+ }
else case seems like a leftover or something.

Thanks,
Amir.
Jan Kara
2018-07-04 12:34:08 UTC
Permalink
Post by Amir Goldstein
Post by Jan Kara
Audit tree code currently embeds fsnotify mark in audit_chunk. As chunk
attached to an inode is replace when new tag is added / removed, we also
need to remove old fsnotify mark and add a new one on such occasion.
This is cumbersome and makes locking rules somewhat difficult to follow.
Fix these problems by allocating fsnotify mark independently and keeping
it all the time while there is some chunk attached to an inode. Also add
documentation about the locking rules so that things are easier to
follow.
---
Wow! this patch is a lot to take in at once.
- make mark not embedded and take chunk reference
- replace_mark_chunk() and rid of cumbersome code
OK, I'll try something.
Post by Amir Goldstein
Or any other simplification that would help me survive this review.
In the mean while just one nit below...
[...]
Post by Jan Kara
+ mutex_lock(&entry->group->mark_mutex);
+ spin_lock(&hash_lock);
+ chunk = AUDIT_M(entry)->chunk;
+ replace_mark_chunk(entry, NULL);
+ spin_unlock(&hash_lock);
+ if (chunk) {
+ mutex_unlock(&entry->group->mark_mutex);
+ evict_chunk(chunk);
+ audit_mark_put_chunk(chunk);
+ } else {
+ mutex_unlock(&entry->group->mark_mutex);
+ }
else case seems like a leftover or something.
Fixed. Thanks.

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Amir Goldstein
2018-06-29 11:44:54 UTC
Permalink
Hello,
this series addresses the problems I have identified when trying to understand
how exactly is kernel/audit_tree.c using generic fsnotify framework. I hope
I have understood all the interactions right but careful review is certainly
welcome (CCing Al as he was the one implementing this code originally).
The patches have been tested by a stress test I have written which mounts &
unmounts filesystems in the directory tree while adding and removing audit
rules for this tree in parallel and accessing the tree to generate events.
Still some real-world testing would be welcome.
This sort of stress test sound really useful to fanotify/inotify as well.
Do plan to upstream that stress test?

Thanks!
Amir.
Paul Moore
2018-06-29 18:01:44 UTC
Permalink
Post by Amir Goldstein
Hello,
this series addresses the problems I have identified when trying to understand
how exactly is kernel/audit_tree.c using generic fsnotify framework. I hope
I have understood all the interactions right but careful review is certainly
welcome (CCing Al as he was the one implementing this code originally).
The patches have been tested by a stress test I have written which mounts &
unmounts filesystems in the directory tree while adding and removing audit
rules for this tree in parallel and accessing the tree to generate events.
Still some real-world testing would be welcome.
This sort of stress test sound really useful to fanotify/inotify as well.
Do plan to upstream that stress test?
Agreed.

I would be interested in having something like this in the
audit-testsuite so that we can include it in our regular regression
testing.

* https://github.com/linux-audit/audit-testsuite
--
paul moore
www.paul-moore.com
Jan Kara
2018-07-03 14:14:17 UTC
Permalink
Post by Paul Moore
Post by Amir Goldstein
Hello,
this series addresses the problems I have identified when trying to understand
how exactly is kernel/audit_tree.c using generic fsnotify framework. I hope
I have understood all the interactions right but careful review is certainly
welcome (CCing Al as he was the one implementing this code originally).
The patches have been tested by a stress test I have written which mounts &
unmounts filesystems in the directory tree while adding and removing audit
rules for this tree in parallel and accessing the tree to generate events.
Still some real-world testing would be welcome.
This sort of stress test sound really useful to fanotify/inotify as well.
Do plan to upstream that stress test?
Agreed.
I would be interested in having something like this in the
audit-testsuite so that we can include it in our regular regression
testing.
* https://github.com/linux-audit/audit-testsuite
OK, I'll look into integrating the test script into audit testsuite.

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-07-03 17:03:38 UTC
Permalink
Post by Jan Kara
Post by Paul Moore
Post by Amir Goldstein
Hello,
this series addresses the problems I have identified when trying to understand
how exactly is kernel/audit_tree.c using generic fsnotify framework. I hope
I have understood all the interactions right but careful review is certainly
welcome (CCing Al as he was the one implementing this code originally).
The patches have been tested by a stress test I have written which mounts &
unmounts filesystems in the directory tree while adding and removing audit
rules for this tree in parallel and accessing the tree to generate events.
Still some real-world testing would be welcome.
This sort of stress test sound really useful to fanotify/inotify as well.
Do plan to upstream that stress test?
Agreed.
I would be interested in having something like this in the
audit-testsuite so that we can include it in our regular regression
testing.
* https://github.com/linux-audit/audit-testsuite
OK, I'll look into integrating the test script into audit testsuite.
Great, thank you.

Even if you don't get around to it, posting it somewhere could still
be helpful, e.g. I could use it to hammer on your patches too.
Speaking of which, thank you very much for doing this work; I know how
painful the audit code can be and I suspect this wasn't easy. I see
you've already got some feedback from Amir (thank you Amir!) and I'm
working my way through them too, but some vacation time is going to
make progress a bit slow.
--
paul moore
www.paul-moore.com
Amir Goldstein
2018-06-29 12:53:20 UTC
Permalink
Post by Jan Kara
Currently chunk hash key (which is in fact pointer to the inode) is
derived as chunk->mark.conn->obj. It is tricky to make this dereference
reliable for hash table lookups only under RCU as mark can get detached
from the connector and connector gets freed independently of the
running lookup. Thus there is a possible use after free / NULL ptr
CPU1 CPU2
untag_chunk()
...
audit_tree_lookup()
list_for_each_entry_rcu(p, list, hash) {
list_del_rcu(&chunk->hash);
fsnotify_destroy_mark(entry);
fsnotify_put_mark(entry)
chunk_to_key(p)
if (!chunk->mark.connector)
...
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
inode = fsnotify_detach_connector_from_object(conn);
mark->connector = NULL;
...
frees connector from workqueue
chunk->mark.connector->obj
This race is probably impossible to hit in practice as the race window
on CPU1 is very narrow and CPU2 has a lot of code to execute. Still it's
better to have this fixed. Since the inode the chunk is attached to is
constant during chunk's lifetime it is easy to cache the key in the
chunk itself and thus avoid these issues.
---
kernel/audit_tree.c | 26 +++++++-------------------
1 file changed, 7 insertions(+), 19 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 7f9df8c66227..89ad8857a578 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_tree {
struct audit_chunk {
struct list_head hash;
+ unsigned long key;
struct fsnotify_mark mark;
struct list_head trees; /* with root here */
int dead;
@@ -172,21 +173,6 @@ static unsigned long inode_to_key(const struct inode *inode)
return (unsigned long)&inode->i_fsnotify_marks;
}
-/*
- * Function to return search key in our hash from chunk. Key 0 is special and
- * should never be present in the hash.
- */
-static unsigned long chunk_to_key(struct audit_chunk *chunk)
-{
- /*
- * We have a reference to the mark so it should be attached to a
- * connector.
- */
- if (WARN_ON_ONCE(!chunk->mark.connector))
- return 0;
- return (unsigned long)chunk->mark.connector->obj;
-}
-
static inline struct list_head *chunk_hash(unsigned long key)
{
unsigned long n = key / L1_CACHE_BYTES;
@@ -196,12 +182,11 @@ static inline struct list_head *chunk_hash(unsigned long key)
/* hash_lock & entry->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
- unsigned long key = chunk_to_key(chunk);
struct list_head *list;
Suggest to add check (WARN_ON_ONCE(!chunk->key))
because its quite hard to assert by code review alone that no execution
path was missed where chunk->key should have been set.

Thanks,
Amir.
Jan Kara
2018-07-03 14:25:42 UTC
Permalink
Post by Amir Goldstein
Post by Jan Kara
Currently chunk hash key (which is in fact pointer to the inode) is
derived as chunk->mark.conn->obj. It is tricky to make this dereference
reliable for hash table lookups only under RCU as mark can get detached
from the connector and connector gets freed independently of the
running lookup. Thus there is a possible use after free / NULL ptr
CPU1 CPU2
untag_chunk()
...
audit_tree_lookup()
list_for_each_entry_rcu(p, list, hash) {
list_del_rcu(&chunk->hash);
fsnotify_destroy_mark(entry);
fsnotify_put_mark(entry)
chunk_to_key(p)
if (!chunk->mark.connector)
...
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
inode = fsnotify_detach_connector_from_object(conn);
mark->connector = NULL;
...
frees connector from workqueue
chunk->mark.connector->obj
This race is probably impossible to hit in practice as the race window
on CPU1 is very narrow and CPU2 has a lot of code to execute. Still it's
better to have this fixed. Since the inode the chunk is attached to is
constant during chunk's lifetime it is easy to cache the key in the
chunk itself and thus avoid these issues.
---
kernel/audit_tree.c | 26 +++++++-------------------
1 file changed, 7 insertions(+), 19 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 7f9df8c66227..89ad8857a578 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_tree {
struct audit_chunk {
struct list_head hash;
+ unsigned long key;
struct fsnotify_mark mark;
struct list_head trees; /* with root here */
int dead;
@@ -172,21 +173,6 @@ static unsigned long inode_to_key(const struct inode *inode)
return (unsigned long)&inode->i_fsnotify_marks;
}
-/*
- * Function to return search key in our hash from chunk. Key 0 is special and
- * should never be present in the hash.
- */
-static unsigned long chunk_to_key(struct audit_chunk *chunk)
-{
- /*
- * We have a reference to the mark so it should be attached to a
- * connector.
- */
- if (WARN_ON_ONCE(!chunk->mark.connector))
- return 0;
- return (unsigned long)chunk->mark.connector->obj;
-}
-
static inline struct list_head *chunk_hash(unsigned long key)
{
unsigned long n = key / L1_CACHE_BYTES;
@@ -196,12 +182,11 @@ static inline struct list_head *chunk_hash(unsigned long key)
/* hash_lock & entry->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
- unsigned long key = chunk_to_key(chunk);
struct list_head *list;
Suggest to add check (WARN_ON_ONCE(!chunk->key))
because its quite hard to assert by code review alone that no execution
path was missed where chunk->key should have been set.
OK, added. FWIW, create_chunk() is the only place that uses insert_hash()
and that clearly sets the key. But it doesn't hurt to check here for
future-proofing.

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Dan Carpenter
2018-07-02 06:05:49 UTC
Permalink
Hi Jan,

url: https://github.com/0day-ci/linux/commits/Jan-Kara/audit-Fix-various-races-when-tagging-and-untagging-mounts/20180629-043337

smatch warnings:
kernel/audit_tree.c:484 tag_chunk() warn: inconsistent returns 'mutex:&audit_tree_group->mark_mutex'.
Locked on: line 400
Unlocked on: line 411

# https://github.com/0day-ci/linux/commit/86c9c9a738e409c85891519c17d94043b7f434d5
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 86c9c9a738e409c85891519c17d94043b7f434d5
vim +484 kernel/audit_tree.c

74c3cbe33 Al Viro 2007-07-22 386
74c3cbe33 Al Viro 2007-07-22 387 /* the first tagged inode becomes root of tree */
74c3cbe33 Al Viro 2007-07-22 388 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
74c3cbe33 Al Viro 2007-07-22 389 {
e61ce8673 Eric Paris 2009-12-17 390 struct fsnotify_mark *old_entry, *chunk_entry;
74c3cbe33 Al Viro 2007-07-22 391 struct audit_tree *owner;
74c3cbe33 Al Viro 2007-07-22 392 struct audit_chunk *chunk, *old;
74c3cbe33 Al Viro 2007-07-22 393 struct node *p;
74c3cbe33 Al Viro 2007-07-22 394 int n;
74c3cbe33 Al Viro 2007-07-22 395
86c9c9a73 Jan Kara 2018-06-28 396 mutex_lock(&audit_tree_group->mark_mutex);
b1362edfe Jan Kara 2016-12-21 397 old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
b1362edfe Jan Kara 2016-12-21 398 audit_tree_group);
28a3a7eb3 Eric Paris 2009-12-17 399 if (!old_entry)
74c3cbe33 Al Viro 2007-07-22 400 return create_chunk(inode, tree);
^^^^^^^^^^^^^^^^^^^^^^^^^
Should we drop the lock before this return?

74c3cbe33 Al Viro 2007-07-22 401
28a3a7eb3 Eric Paris 2009-12-17 402 old = container_of(old_entry, struct audit_chunk, mark);
74c3cbe33 Al Viro 2007-07-22 403
74c3cbe33 Al Viro 2007-07-22 404 /* are we already there? */
74c3cbe33 Al Viro 2007-07-22 405 spin_lock(&hash_lock);
74c3cbe33 Al Viro 2007-07-22 406 for (n = 0; n < old->count; n++) {
74c3cbe33 Al Viro 2007-07-22 407 if (old->owners[n].owner == tree) {
74c3cbe33 Al Viro 2007-07-22 408 spin_unlock(&hash_lock);
86c9c9a73 Jan Kara 2018-06-28 409 mutex_unlock(&audit_tree_group->mark_mutex);
28a3a7eb3 Eric Paris 2009-12-17 410 fsnotify_put_mark(old_entry);
74c3cbe33 Al Viro 2007-07-22 411 return 0;
74c3cbe33 Al Viro 2007-07-22 412 }
74c3cbe33 Al Viro 2007-07-22 413 }
74c3cbe33 Al Viro 2007-07-22 414 spin_unlock(&hash_lock);
74c3cbe33 Al Viro 2007-07-22 415
74c3cbe33 Al Viro 2007-07-22 416 chunk = alloc_chunk(old->count + 1);
b4c30aad3 Al Viro 2009-12-19 417 if (!chunk) {
86c9c9a73 Jan Kara 2018-06-28 418 mutex_unlock(&audit_tree_group->mark_mutex);
28a3a7eb3 Eric Paris 2009-12-17 419 fsnotify_put_mark(old_entry);
74c3cbe33 Al Viro 2007-07-22 420 return -ENOMEM;
b4c30aad3 Al Viro 2009-12-19 421 }
74c3cbe33 Al Viro 2007-07-22 422
28a3a7eb3 Eric Paris 2009-12-17 423 chunk_entry = &chunk->mark;
28a3a7eb3 Eric Paris 2009-12-17 424
6b3f05d24 Jan Kara 2016-12-21 425 /*
6b3f05d24 Jan Kara 2016-12-21 426 * mark_mutex protects mark from getting detached and thus also from
36f10f55f Amir Goldstein 2018-06-23 427 * mark->connector->obj getting NULL.
6b3f05d24 Jan Kara 2016-12-21 428 */
43471d15d Jan Kara 2017-04-03 429 if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
28a3a7eb3 Eric Paris 2009-12-17 430 /* old_entry is being shot, lets just lie */
86c9c9a73 Jan Kara 2018-06-28 431 mutex_unlock(&audit_tree_group->mark_mutex);
28a3a7eb3 Eric Paris 2009-12-17 432 fsnotify_put_mark(old_entry);
7b1293234 Jan Kara 2016-12-21 433 fsnotify_put_mark(&chunk->mark);
28a3a7eb3 Eric Paris 2009-12-17 434 return -ENOENT;
28a3a7eb3 Eric Paris 2009-12-17 435 }
28a3a7eb3 Eric Paris 2009-12-17 436
36f10f55f Amir Goldstein 2018-06-23 437 if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
36f10f55f Amir Goldstein 2018-06-23 438 FSNOTIFY_OBJ_TYPE_INODE, 1)) {
86c9c9a73 Jan Kara 2018-06-28 439 mutex_unlock(&audit_tree_group->mark_mutex);
0fe33aae0 Miklos Szeredi 2012-08-15 440 fsnotify_put_mark(chunk_entry);
28a3a7eb3 Eric Paris 2009-12-17 441 fsnotify_put_mark(old_entry);
74c3cbe33 Al Viro 2007-07-22 442 return -ENOSPC;
74c3cbe33 Al Viro 2007-07-22 443 }
28a3a7eb3 Eric Paris 2009-12-17 444
74c3cbe33 Al Viro 2007-07-22 445 spin_lock(&hash_lock);
74c3cbe33 Al Viro 2007-07-22 446 if (tree->goner) {
74c3cbe33 Al Viro 2007-07-22 447 spin_unlock(&hash_lock);
74c3cbe33 Al Viro 2007-07-22 448 chunk->dead = 1;
86c9c9a73 Jan Kara 2018-06-28 449 mutex_unlock(&audit_tree_group->mark_mutex);
28a3a7eb3 Eric Paris 2009-12-17 450
e2a29943e Lino Sanfilippo 2011-06-14 451 fsnotify_destroy_mark(chunk_entry, audit_tree_group);
28a3a7eb3 Eric Paris 2009-12-17 452
28a3a7eb3 Eric Paris 2009-12-17 453 fsnotify_put_mark(chunk_entry);
28a3a7eb3 Eric Paris 2009-12-17 454 fsnotify_put_mark(old_entry);
74c3cbe33 Al Viro 2007-07-22 455 return 0;
74c3cbe33 Al Viro 2007-07-22 456 }
74c3cbe33 Al Viro 2007-07-22 457 list_replace_init(&old->trees, &chunk->trees);
74c3cbe33 Al Viro 2007-07-22 458 for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
74c3cbe33 Al Viro 2007-07-22 459 struct audit_tree *s = old->owners[n].owner;
74c3cbe33 Al Viro 2007-07-22 460 p->owner = s;
74c3cbe33 Al Viro 2007-07-22 461 p->index = old->owners[n].index;
74c3cbe33 Al Viro 2007-07-22 462 if (!s) /* result of fallback in untag */
74c3cbe33 Al Viro 2007-07-22 463 continue;
74c3cbe33 Al Viro 2007-07-22 464 get_tree(s);
74c3cbe33 Al Viro 2007-07-22 465 list_replace_init(&old->owners[n].list, &p->list);
74c3cbe33 Al Viro 2007-07-22 466 }
74c3cbe33 Al Viro 2007-07-22 467 p->index = (chunk->count - 1) | (1U<<31);
74c3cbe33 Al Viro 2007-07-22 468 p->owner = tree;
74c3cbe33 Al Viro 2007-07-22 469 get_tree(tree);
74c3cbe33 Al Viro 2007-07-22 470 list_add(&p->list, &tree->chunks);
74c3cbe33 Al Viro 2007-07-22 471 list_replace_rcu(&old->hash, &chunk->hash);
74c3cbe33 Al Viro 2007-07-22 472 list_for_each_entry(owner, &chunk->trees, same_root)
74c3cbe33 Al Viro 2007-07-22 473 owner->root = chunk;
74c3cbe33 Al Viro 2007-07-22 474 old->dead = 1;
74c3cbe33 Al Viro 2007-07-22 475 if (!tree->root) {
74c3cbe33 Al Viro 2007-07-22 476 tree->root = chunk;
74c3cbe33 Al Viro 2007-07-22 477 list_add(&tree->same_root, &chunk->trees);
74c3cbe33 Al Viro 2007-07-22 478 }
74c3cbe33 Al Viro 2007-07-22 479 spin_unlock(&hash_lock);
86c9c9a73 Jan Kara 2018-06-28 480 mutex_unlock(&audit_tree_group->mark_mutex);
e2a29943e Lino Sanfilippo 2011-06-14 481 fsnotify_destroy_mark(old_entry, audit_tree_group);
b3e8692b4 Miklos Szeredi 2012-08-15 482 fsnotify_put_mark(chunk_entry); /* drop initial reference */
28a3a7eb3 Eric Paris 2009-12-17 483 fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
74c3cbe33 Al Viro 2007-07-22 @484 return 0;
74c3cbe33 Al Viro 2007-07-22 485 }
74c3cbe33 Al Viro 2007-07-22 486

:::::: The code at line 484 was first introduced by commit
:::::: 74c3cbe33bc077ac1159cadfea608b501e100344 [PATCH] audit: watching subtrees

:::::: TO: Al Viro <***@zeniv.linux.org.uk>
:::::: CC: Al Viro <***@zeniv.linux.org.uk>

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Jan Kara
2018-07-03 14:18:49 UTC
Permalink
Hi Dan!
Post by Dan Carpenter
url: https://github.com/0day-ci/linux/commits/Jan-Kara/audit-Fix-various-races-when-tagging-and-untagging-mounts/20180629-043337
kernel/audit_tree.c:484 tag_chunk() warn: inconsistent returns 'mutex:&audit_tree_group->mark_mutex'.
Locked on: line 400
Unlocked on: line 411
# https://github.com/0day-ci/linux/commit/86c9c9a738e409c85891519c17d94043b7f434d5
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 86c9c9a738e409c85891519c17d94043b7f434d5
vim +484 kernel/audit_tree.c
74c3cbe33 Al Viro 2007-07-22 386
74c3cbe33 Al Viro 2007-07-22 387 /* the first tagged inode becomes root of tree */
74c3cbe33 Al Viro 2007-07-22 388 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
74c3cbe33 Al Viro 2007-07-22 389 {
e61ce8673 Eric Paris 2009-12-17 390 struct fsnotify_mark *old_entry, *chunk_entry;
74c3cbe33 Al Viro 2007-07-22 391 struct audit_tree *owner;
74c3cbe33 Al Viro 2007-07-22 392 struct audit_chunk *chunk, *old;
74c3cbe33 Al Viro 2007-07-22 393 struct node *p;
74c3cbe33 Al Viro 2007-07-22 394 int n;
74c3cbe33 Al Viro 2007-07-22 395
86c9c9a73 Jan Kara 2018-06-28 396 mutex_lock(&audit_tree_group->mark_mutex);
b1362edfe Jan Kara 2016-12-21 397 old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
b1362edfe Jan Kara 2016-12-21 398 audit_tree_group);
28a3a7eb3 Eric Paris 2009-12-17 399 if (!old_entry)
74c3cbe33 Al Viro 2007-07-22 400 return create_chunk(inode, tree);
^^^^^^^^^^^^^^^^^^^^^^^^^
Should we drop the lock before this return?
No, because create_chunk() drops &audit_tree_group->mark_mutex in all the
cases. It's a bit ugly to have a function entered with mutex held and
release it but in this case it's somewhat difficult to avoid...

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Loading...