Discussion:
[PATCH 05/14] audit: Make hash table insertion safe against concurrent lookups
Add Reply
Jan Kara
2018-10-17 10:14:56 UTC
Reply
Permalink
Currently, the audit tree code does not make sure that when a chunk is
inserted into the hash table, it is fully initialized. So in theory a
user of RCU lookup could see uninitialized structure in the hash table
and crash. Add appropriate barriers between initialization of the
structure and its insertion into hash table.

Reviewed-by: Richard Guy Briggs <***@redhat.com>
Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 32 +++++++++++++++++++++++++++++---
1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index bac5dd90c629..307749d6773c 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -186,6 +186,12 @@ static void insert_hash(struct audit_chunk *chunk)

if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
return;
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
WARN_ON_ONCE(!chunk->key);
list = chunk_hash(chunk->key);
list_add_rcu(&chunk->hash, list);
@@ -199,7 +205,11 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
struct audit_chunk *p;

list_for_each_entry_rcu(p, list, hash) {
- if (p->key == key) {
+ /*
+ * We use a data dependency barrier in READ_ONCE() to make sure
+ * the chunk we see is fully initialized.
+ */
+ if (READ_ONCE(p->key) == key) {
atomic_long_inc(&p->refs);
return p;
}
@@ -304,9 +314,15 @@ static void untag_chunk(struct node *p)
list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
}

- list_replace_rcu(&chunk->hash, &new->hash);
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&chunk->hash, &new->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -368,6 +384,10 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
chunk->key = inode_to_key(inode);
+ /*
+ * Inserting into the hash table has to go last as once we do that RCU
+ * readers can see the chunk.
+ */
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
@@ -459,7 +479,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- list_replace_rcu(&old->hash, &chunk->hash);
list_for_each_entry(owner, &chunk->trees, same_root)
owner->root = chunk;
old->dead = 1;
@@ -467,6 +486,13 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&old->hash, &chunk->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
Jan Kara
2018-10-17 10:14:58 UTC
Reply
Permalink
The audit_tree_group->mark_mutex is held all the time while we create
the fsnotify mark, add it to the inode, and insert chunk into the hash.
Hence mark cannot get detached during this time and so the check whether
the mark is attached in insert_hash() is pointless.

Reviewed-by: Richard Guy Briggs <***@redhat.com>
Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 2 --
1 file changed, 2 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index d8f6cfa0005b..d150514ff15e 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -184,8 +184,6 @@ static void insert_hash(struct audit_chunk *chunk)
{
struct list_head *list;

- if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
- return;
/*
* Make sure chunk is fully initialized before making it visible in the
* hash. Pairs with a data dependency barrier in READ_ONCE() in
--
2.16.4
Jan Kara
2018-10-17 10:14:59 UTC
Reply
Permalink
Provide a helper function audit_mark_put_chunk() for dropping mark's
reference (which has to happen only after RCU grace period expires).
Currently that happens only from a single place but in later patches we
introduce more callers.

Reviewed-by: Richard Guy Briggs <***@redhat.com>
Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index d150514ff15e..35c031ebcc12 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -132,10 +132,20 @@ static void __put_chunk(struct rcu_head *rcu)
audit_put_chunk(chunk);
}

+/*
+ * Drop reference to the chunk that was held by the mark. This is the reference
+ * that gets dropped after we've removed the chunk from the hash table and we
+ * use it to make sure chunk cannot be freed before RCU grace period expires.
+ */
+static void audit_mark_put_chunk(struct audit_chunk *chunk)
+{
+ call_rcu(&chunk->head, __put_chunk);
+}
+
static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
- call_rcu(&chunk->head, __put_chunk);
+ audit_mark_put_chunk(chunk);
}

static struct audit_chunk *alloc_chunk(int count)
--
2.16.4
Jan Kara
2018-10-17 10:14:53 UTC
Reply
Permalink
When an inode is tagged with a tree, tag_chunk() checks whether there is
audit_tree_group mark attached to the inode and adds one if not. However
nothing protects another tag_chunk() to add the mark between we've
checked and try to add the fsnotify mark thus resulting in an error from
fsnotify_add_mark() and consequently an ENOSPC error from tag_chunk().

Fix the problem by holding mark_mutex over the whole check-insert code
sequence.

Reviewed-by: Richard Guy Briggs <***@redhat.com>
Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 1b55b1026a36..8a74b468b666 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -342,25 +342,29 @@ static void untag_chunk(struct node *p)
spin_lock(&hash_lock);
}

+/* Call with group->mark_mutex held, releases it */
static int create_chunk(struct inode *inode, struct audit_tree *tree)
{
struct fsnotify_mark *entry;
struct audit_chunk *chunk = alloc_chunk(1);
- if (!chunk)
+
+ if (!chunk) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
return -ENOMEM;
+ }

entry = &chunk->mark;
- if (fsnotify_add_inode_mark(entry, inode, 0)) {
+ if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry);
return -ENOSPC;
}

- mutex_lock(&entry->group->mark_mutex);
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(entry);
return 0;
@@ -375,7 +379,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
}
insert_hash(chunk);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry); /* drop initial reference */
return 0;
}
@@ -389,6 +393,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
struct node *p;
int n;

+ mutex_lock(&audit_tree_group->mark_mutex);
old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
audit_tree_group);
if (!old_entry)
@@ -401,6 +406,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
return 0;
}
@@ -409,20 +415,20 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)

chunk = alloc_chunk(old->count + 1);
if (!chunk) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
return -ENOMEM;
}

chunk_entry = &chunk->mark;

- mutex_lock(&old_entry->group->mark_mutex);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
/* old_entry is being shot, lets just lie */
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
fsnotify_put_mark(&chunk->mark);
return -ENOENT;
@@ -430,7 +436,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)

if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return -ENOSPC;
@@ -440,7 +446,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);

fsnotify_destroy_mark(chunk_entry, audit_tree_group);

@@ -471,7 +477,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
spin_unlock(&hash_lock);
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_destroy_mark(old_entry, audit_tree_group);
fsnotify_put_mark(chunk_entry); /* drop initial reference */
fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
--
2.16.4
Jan Kara
2018-10-17 10:14:52 UTC
Reply
Permalink
Currently, audit_tree code uses mark->lock to protect against detaching
of mark from an inode. In most places it however also uses
mark->group->mark_mutex (as we need to atomically replace attached
marks) and this provides protection against mark detaching as well. So
just remove protection with mark->lock from audit tree code and replace
it with mark->group->mark_mutex protection in all the places. It
simplifies the code and gets rid of some ugly catches like calling
fsnotify_add_mark_locked() with mark->lock held (which cannot sleep only
because we hold a reference to another mark attached to the same inode).

Reviewed-by: Richard Guy Briggs <***@redhat.com>
Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 24 ++++--------------------
1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ea43181cde4a..1b55b1026a36 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -193,7 +193,7 @@ static inline struct list_head *chunk_hash(unsigned long key)
return chunk_hash_heads + n % HASH_SIZE;
}

-/* hash_lock & entry->lock is held by caller */
+/* hash_lock & entry->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
unsigned long key = chunk_to_key(chunk);
@@ -256,13 +256,11 @@ static void untag_chunk(struct node *p)
new = alloc_chunk(size);

mutex_lock(&entry->group->mark_mutex);
- spin_lock(&entry->lock);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
if (new)
fsnotify_put_mark(&new->mark);
@@ -280,7 +278,6 @@ static void untag_chunk(struct node *p)
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
goto out;
@@ -323,7 +320,6 @@ static void untag_chunk(struct node *p)
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(&new->mark); /* drop initial reference */
@@ -340,7 +336,6 @@ static void untag_chunk(struct node *p)
p->owner = NULL;
put_tree(owner);
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
out:
fsnotify_put_mark(entry);
@@ -360,12 +355,12 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOSPC;
}

- spin_lock(&entry->lock);
+ mutex_lock(&entry->group->mark_mutex);
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- spin_unlock(&entry->lock);
+ mutex_unlock(&entry->group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(entry);
return 0;
@@ -380,7 +375,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
}
insert_hash(chunk);
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
+ mutex_unlock(&entry->group->mark_mutex);
fsnotify_put_mark(entry); /* drop initial reference */
return 0;
}
@@ -421,14 +416,12 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk_entry = &chunk->mark;

mutex_lock(&old_entry->group->mark_mutex);
- spin_lock(&old_entry->lock);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
/* old_entry is being shot, lets just lie */
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);
fsnotify_put_mark(old_entry);
fsnotify_put_mark(&chunk->mark);
@@ -437,23 +430,16 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)

if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return -ENOSPC;
}

- /* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */
- spin_lock(&chunk_entry->lock);
spin_lock(&hash_lock);
-
- /* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- spin_unlock(&chunk_entry->lock);
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);

fsnotify_destroy_mark(chunk_entry, audit_tree_group);
@@ -485,8 +471,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
spin_unlock(&hash_lock);
- spin_unlock(&chunk_entry->lock);
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);
fsnotify_destroy_mark(old_entry, audit_tree_group);
fsnotify_put_mark(chunk_entry); /* drop initial reference */
--
2.16.4
Jan Kara
2018-10-17 10:14:55 UTC
Reply
Permalink
Currently chunk hash key (which is in fact pointer to the inode) is
derived as chunk->mark.conn->obj. It is tricky to make this dereference
reliable for hash table lookups only under RCU as mark can get detached
from the connector and connector gets freed independently of the
running lookup. Thus there is a possible use after free / NULL ptr
dereference issue:

CPU1 CPU2
untag_chunk()
...
audit_tree_lookup()
list_for_each_entry_rcu(p, list, hash) {
list_del_rcu(&chunk->hash);
fsnotify_destroy_mark(entry);
fsnotify_put_mark(entry)
chunk_to_key(p)
if (!chunk->mark.connector)
...
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
inode = fsnotify_detach_connector_from_object(conn);
mark->connector = NULL;
...
frees connector from workqueue
chunk->mark.connector->obj

This race is probably impossible to hit in practice as the race window
on CPU1 is very narrow and CPU2 has a lot of code to execute. Still it's
better to have this fixed. Since the inode the chunk is attached to is
constant during chunk's lifetime it is easy to cache the key in the
chunk itself and thus avoid these issues.

Reviewed-by: Richard Guy Briggs <***@redhat.com>
Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 27 ++++++++-------------------
1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index c194dbd53753..bac5dd90c629 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_tree {

struct audit_chunk {
struct list_head hash;
+ unsigned long key;
struct fsnotify_mark mark;
struct list_head trees; /* with root here */
int dead;
@@ -172,21 +173,6 @@ static unsigned long inode_to_key(const struct inode *inode)
return (unsigned long)&inode->i_fsnotify_marks;
}

-/*
- * Function to return search key in our hash from chunk. Key 0 is special and
- * should never be present in the hash.
- */
-static unsigned long chunk_to_key(struct audit_chunk *chunk)
-{
- /*
- * We have a reference to the mark so it should be attached to a
- * connector.
- */
- if (WARN_ON_ONCE(!chunk->mark.connector))
- return 0;
- return (unsigned long)chunk->mark.connector->obj;
-}
-
static inline struct list_head *chunk_hash(unsigned long key)
{
unsigned long n = key / L1_CACHE_BYTES;
@@ -196,12 +182,12 @@ static inline struct list_head *chunk_hash(unsigned long key)
/* hash_lock & entry->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
- unsigned long key = chunk_to_key(chunk);
struct list_head *list;

if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
return;
- list = chunk_hash(key);
+ WARN_ON_ONCE(!chunk->key);
+ list = chunk_hash(chunk->key);
list_add_rcu(&chunk->hash, list);
}

@@ -213,7 +199,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
struct audit_chunk *p;

list_for_each_entry_rcu(p, list, hash) {
- if (chunk_to_key(p) == key) {
+ if (p->key == key) {
atomic_long_inc(&p->refs);
return p;
}
@@ -295,6 +281,7 @@ static void untag_chunk(struct node *p)

chunk->dead = 1;
spin_lock(&hash_lock);
+ new->key = chunk->key;
list_replace_init(&chunk->trees, &new->trees);
if (owner->root == chunk) {
list_del_init(&owner->same_root);
@@ -380,6 +367,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
+ chunk->key = inode_to_key(inode);
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
@@ -456,6 +444,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
fsnotify_put_mark(old_entry);
return 0;
}
+ chunk->key = old->key;
list_replace_init(&old->trees, &chunk->trees);
for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
struct audit_tree *s = old->owners[n].owner;
@@ -654,7 +643,7 @@ void audit_trim_trees(void)
/* this could be NULL if the watch is dying else where... */
node->index |= 1U<<31;
if (iterate_mounts(compare_root,
- (void *)chunk_to_key(chunk),
+ (void *)(chunk->key),
root_mnt))
node->index &= ~(1U<<31);
}
--
2.16.4
Jan Kara
2018-10-17 10:15:05 UTC
Reply
Permalink
Variables pointing to fsnotify_mark are sometimes called 'entry' and
sometimes 'mark'. Use 'mark' in all places.

Reviewed-by: Richard Guy Briggs <***@redhat.com>
Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 79 +++++++++++++++++++++++++++--------------------------
1 file changed, 40 insertions(+), 39 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 451d1b744e82..624f22dd4d35 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -158,9 +158,9 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}

-static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *mark)
{
- return container_of(entry, struct audit_tree_mark, mark);
+ return container_of(mark, struct audit_tree_mark, mark);
}

static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
@@ -168,9 +168,9 @@ static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
return audit_mark(mark)->chunk;
}

-static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
+static void audit_tree_destroy_watch(struct fsnotify_mark *mark)
{
- kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
+ kmem_cache_free(audit_tree_mark_cachep, audit_mark(mark));
}

static struct fsnotify_mark *alloc_mark(void)
@@ -224,7 +224,7 @@ static inline struct list_head *chunk_hash(unsigned long key)
return chunk_hash_heads + n % HASH_SIZE;
}

-/* hash_lock & entry->group->mark_mutex is held by caller */
+/* hash_lock & mark->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
struct list_head *list;
@@ -278,16 +278,16 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}

-static void replace_mark_chunk(struct fsnotify_mark *entry,
+static void replace_mark_chunk(struct fsnotify_mark *mark,
struct audit_chunk *chunk)
{
struct audit_chunk *old;

assert_spin_locked(&hash_lock);
- old = mark_chunk(entry);
- audit_mark(entry)->chunk = chunk;
+ old = mark_chunk(mark);
+ audit_mark(mark)->chunk = chunk;
if (chunk)
- chunk->mark = entry;
+ chunk->mark = mark;
if (old)
old->mark = NULL;
}
@@ -348,7 +348,7 @@ static int chunk_count_trees(struct audit_chunk *chunk)
return ret;
}

-static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
+static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark)
{
struct audit_chunk *new;
int size;
@@ -358,8 +358,8 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
* mark_mutex stabilizes chunk attached to the mark so we can check
* whether it didn't change while we've dropped hash_lock.
*/
- if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
- mark_chunk(entry) != chunk) {
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+ mark_chunk(mark) != chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
return;
}
@@ -369,12 +369,12 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
spin_lock(&hash_lock);
list_del_init(&chunk->trees);
list_del_rcu(&chunk->hash);
- replace_mark_chunk(entry, NULL);
+ replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
+ fsnotify_detach_mark(mark);
mutex_unlock(&audit_tree_group->mark_mutex);
audit_mark_put_chunk(chunk);
- fsnotify_free_mark(entry);
+ fsnotify_free_mark(mark);
return;
}

@@ -400,7 +400,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
/* Call with group->mark_mutex held, releases it */
static int create_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *entry;
+ struct fsnotify_mark *mark;
struct audit_chunk *chunk = alloc_chunk(1);

if (!chunk) {
@@ -408,16 +408,16 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}

- entry = alloc_mark();
- if (!entry) {
+ mark = alloc_mark();
+ if (!mark) {
mutex_unlock(&audit_tree_group->mark_mutex);
kfree(chunk);
return -ENOMEM;
}

- if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
+ if (fsnotify_add_inode_mark_locked(mark, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
kfree(chunk);
return -ENOSPC;
}
@@ -425,14 +425,14 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
+ fsnotify_detach_mark(mark);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(entry);
- fsnotify_put_mark(entry);
+ fsnotify_free_mark(mark);
+ fsnotify_put_mark(mark);
kfree(chunk);
return 0;
}
- replace_mark_chunk(entry, chunk);
+ replace_mark_chunk(mark, chunk);
chunk->owners[0].index = (1U << 31);
chunk->owners[0].owner = tree;
get_tree(tree);
@@ -454,21 +454,21 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
* we get notification through ->freeing_mark callback and cleanup
* chunk pointing to this mark.
*/
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return 0;
}

/* the first tagged inode becomes root of tree */
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *entry;
+ struct fsnotify_mark *mark;
struct audit_chunk *chunk, *old;
struct node *p;
int n;

mutex_lock(&audit_tree_group->mark_mutex);
- entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
- if (!entry)
+ mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+ if (!mark)
return create_chunk(inode, tree);

/*
@@ -478,12 +478,12 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
*/
/* are we already there? */
spin_lock(&hash_lock);
- old = mark_chunk(entry);
+ old = mark_chunk(mark);
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return 0;
}
}
@@ -492,7 +492,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return -ENOMEM;
}

@@ -500,7 +500,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
kfree(chunk);
return 0;
}
@@ -520,7 +520,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
replace_chunk(chunk, old);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+ fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */
audit_mark_put_chunk(old);

return 0;
@@ -1046,16 +1046,17 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
return 0;
}

-static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
+static void audit_tree_freeing_mark(struct fsnotify_mark *mark,
+ struct fsnotify_group *group)
{
struct audit_chunk *chunk;

- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&mark->group->mark_mutex);
spin_lock(&hash_lock);
- chunk = mark_chunk(entry);
- replace_mark_chunk(entry, NULL);
+ chunk = mark_chunk(mark);
+ replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
if (chunk) {
evict_chunk(chunk);
audit_mark_put_chunk(chunk);
@@ -1065,7 +1066,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
* We are guaranteed to have at least one reference to the mark from
* either the inode or the caller of fsnotify_destroy_mark().
*/
- BUG_ON(refcount_read(&entry->refcnt) < 1);
+ BUG_ON(refcount_read(&mark->refcnt) < 1);
}

static const struct fsnotify_ops audit_tree_ops = {
--
2.16.4
Jan Kara
2018-10-17 10:15:04 UTC
Reply
Permalink
Audit tree code currently associates new fsnotify mark with each new
chunk. As chunk attached to an inode is replaced when new tag is added /
removed, we also need to remove old fsnotify mark and add a new one on
such occasion. This is cumbersome and makes locking rules somewhat
difficult to follow.

Fix these problems by allocating fsnotify mark independently of chunk
and keeping it all the time while there is some chunk attached to an
inode. Also add documentation about the locking rules so that things are
easier to follow.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 160 +++++++++++++++++++++++++++-------------------------
1 file changed, 83 insertions(+), 77 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 5deb4e1ed648..451d1b744e82 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -27,7 +27,6 @@ struct audit_chunk {
unsigned long key;
struct fsnotify_mark *mark;
struct list_head trees; /* with root here */
- int dead;
int count;
atomic_long_t refs;
struct rcu_head head;
@@ -48,8 +47,15 @@ static LIST_HEAD(prune_list);
static struct task_struct *prune_thread;

/*
- * One struct chunk is attached to each inode of interest.
- * We replace struct chunk on tagging/untagging.
+ * One struct chunk is attached to each inode of interest through
+ * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
+ * untagging, the mark is stable as long as there is chunk attached. The
+ * association between mark and chunk is protected by hash_lock and
+ * audit_tree_group->mark_mutex. Thus as long as we hold
+ * audit_tree_group->mark_mutex and check that the mark is alive by
+ * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
+ * the current chunk.
+ *
* Rules have pointer to struct audit_tree.
* Rules have struct list_head rlist forming a list of rules over
* the same tree.
@@ -68,8 +74,12 @@ static struct task_struct *prune_thread;
* tree is refcounted; one reference for "some rules on rules_list refer to
* it", one for each chunk with pointer to it.
*
- * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
- * of watch contributes 1 to .refs).
+ * chunk is refcounted by embedded .refs. Mark associated with the chunk holds
+ * one chunk reference. This reference is dropped either when a mark is going
+ * to be freed (corresponding inode goes away) or when chunk attached to the
+ * mark gets replaced. This reference must be dropped using
+ * audit_mark_put_chunk() to make sure the reference is dropped only after RCU
+ * grace period as it protects RCU readers of the hash table.
*
* node.index allows to get from node.list to containing chunk.
* MSB of that sucker is stolen to mark taggings that we might have to
@@ -160,8 +170,6 @@ static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)

static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = mark_chunk(entry);
- audit_mark_put_chunk(chunk);
kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
}

@@ -188,13 +196,6 @@ static struct audit_chunk *alloc_chunk(int count)
if (!chunk)
return NULL;

- chunk->mark = alloc_mark();
- if (!chunk->mark) {
- kfree(chunk);
- return NULL;
- }
- audit_mark(chunk->mark)->chunk = chunk;
-
INIT_LIST_HEAD(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
@@ -277,6 +278,20 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}

+static void replace_mark_chunk(struct fsnotify_mark *entry,
+ struct audit_chunk *chunk)
+{
+ struct audit_chunk *old;
+
+ assert_spin_locked(&hash_lock);
+ old = mark_chunk(entry);
+ audit_mark(entry)->chunk = chunk;
+ if (chunk)
+ chunk->mark = entry;
+ if (old)
+ old->mark = NULL;
+}
+
static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
{
struct audit_tree *owner;
@@ -299,6 +314,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
get_tree(owner);
list_replace_init(&old->owners[j].list, &new->owners[i].list);
}
+ replace_mark_chunk(old->mark, new);
/*
* Make sure chunk is fully initialized before making it visible in the
* hash. Pairs with a data dependency barrier in READ_ONCE() in
@@ -339,23 +355,25 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)

mutex_lock(&audit_tree_group->mark_mutex);
/*
- * mark_mutex protects mark from getting detached and thus also from
- * mark->connector->obj getting NULL.
+ * mark_mutex stabilizes chunk attached to the mark so we can check
+ * whether it didn't change while we've dropped hash_lock.
*/
- if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
+ if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+ mark_chunk(entry) != chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
return;
}

size = chunk_count_trees(chunk);
if (!size) {
- chunk->dead = 1;
spin_lock(&hash_lock);
list_del_init(&chunk->trees);
list_del_rcu(&chunk->hash);
+ replace_mark_chunk(entry, NULL);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
+ audit_mark_put_chunk(chunk);
fsnotify_free_mark(entry);
return;
}
@@ -364,13 +382,6 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
if (!new)
goto out_mutex;

- if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
- FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- fsnotify_put_mark(new->mark);
- goto out_mutex;
- }
-
- chunk->dead = 1;
spin_lock(&hash_lock);
/*
* This has to go last when updating chunk as once replace_chunk() is
@@ -378,10 +389,8 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
*/
replace_chunk(new, chunk);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(entry);
- fsnotify_put_mark(new->mark); /* drop initial reference */
+ audit_mark_put_chunk(chunk);
return;

out_mutex:
@@ -399,23 +408,31 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}

- entry = chunk->mark;
+ entry = alloc_mark();
+ if (!entry) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
+ kfree(chunk);
+ return -ENOMEM;
+ }
+
if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry);
+ kfree(chunk);
return -ENOSPC;
}

spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- chunk->dead = 1;
fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_free_mark(entry);
fsnotify_put_mark(entry);
+ kfree(chunk);
return 0;
}
+ replace_mark_chunk(entry, chunk);
chunk->owners[0].index = (1U << 31);
chunk->owners[0].owner = tree;
get_tree(tree);
@@ -432,33 +449,41 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry); /* drop initial reference */
+ /*
+ * Drop our initial reference. When mark we point to is getting freed,
+ * we get notification through ->freeing_mark callback and cleanup
+ * chunk pointing to this mark.
+ */
+ fsnotify_put_mark(entry);
return 0;
}

/* the first tagged inode becomes root of tree */
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *old_entry, *chunk_entry;
+ struct fsnotify_mark *entry;
struct audit_chunk *chunk, *old;
struct node *p;
int n;

mutex_lock(&audit_tree_group->mark_mutex);
- old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
- audit_tree_group);
- if (!old_entry)
+ entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+ if (!entry)
return create_chunk(inode, tree);

- old = mark_chunk(old_entry);
-
+ /*
+ * Found mark is guaranteed to be attached and mark_mutex protects mark
+ * from getting detached and thus it makes sure there is chunk attached
+ * to the mark.
+ */
/* are we already there? */
spin_lock(&hash_lock);
+ old = mark_chunk(entry);
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
return 0;
}
}
@@ -467,41 +492,16 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
return -ENOMEM;
}

- chunk_entry = chunk->mark;
-
- /*
- * mark_mutex protects mark from getting detached and thus also from
- * mark->connector->obj getting NULL.
- */
- if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- /* old_entry is being shot, lets just lie */
- mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
- fsnotify_put_mark(chunk->mark);
- return -ENOENT;
- }
-
- if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
- FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(chunk_entry);
- fsnotify_put_mark(old_entry);
- return -ENOSPC;
- }
-
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- chunk->dead = 1;
- fsnotify_detach_mark(chunk_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(chunk_entry);
- fsnotify_put_mark(chunk_entry);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
+ kfree(chunk);
return 0;
}
p = &chunk->owners[chunk->count - 1];
@@ -509,7 +509,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- old->dead = 1;
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
@@ -520,11 +519,10 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
*/
replace_chunk(chunk, old);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(old_entry);
- fsnotify_put_mark(chunk_entry); /* drop initial reference */
- fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
+ fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+ audit_mark_put_chunk(old);
+
return 0;
}

@@ -587,6 +585,9 @@ static void prune_tree_chunks(struct audit_tree *victim, bool tagged)
chunk = find_chunk(p);
mark = chunk->mark;
remove_chunk_node(chunk, p);
+ /* Racing with audit_tree_freeing_mark()? */
+ if (!mark)
+ continue;
fsnotify_get_mark(mark);
spin_unlock(&hash_lock);

@@ -1009,10 +1010,6 @@ static void evict_chunk(struct audit_chunk *chunk)
int need_prune = 0;
int n;

- if (chunk->dead)
- return;
-
- chunk->dead = 1;
mutex_lock(&audit_filter_mutex);
spin_lock(&hash_lock);
while (!list_empty(&chunk->trees)) {
@@ -1051,9 +1048,18 @@ static int audit_tree_handle_event(struct fsnotify_group *group,

static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
{
- struct audit_chunk *chunk = mark_chunk(entry);
+ struct audit_chunk *chunk;

- evict_chunk(chunk);
+ mutex_lock(&entry->group->mark_mutex);
+ spin_lock(&hash_lock);
+ chunk = mark_chunk(entry);
+ replace_mark_chunk(entry, NULL);
+ spin_unlock(&hash_lock);
+ mutex_unlock(&entry->group->mark_mutex);
+ if (chunk) {
+ evict_chunk(chunk);
+ audit_mark_put_chunk(chunk);
+ }

/*
* We are guaranteed to have at least one reference to the mark from
--
2.16.4
Richard Guy Briggs
2018-10-18 19:39:24 UTC
Reply
Permalink
Post by Jan Kara
Audit tree code currently associates new fsnotify mark with each new
chunk. As chunk attached to an inode is replaced when new tag is added /
removed, we also need to remove old fsnotify mark and add a new one on
such occasion. This is cumbersome and makes locking rules somewhat
difficult to follow.
Fix these problems by allocating fsnotify mark independently of chunk
and keeping it all the time while there is some chunk attached to an
inode. Also add documentation about the locking rules so that things are
easier to follow.
---
kernel/audit_tree.c | 160 +++++++++++++++++++++++++++-------------------------
1 file changed, 83 insertions(+), 77 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 5deb4e1ed648..451d1b744e82 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -27,7 +27,6 @@ struct audit_chunk {
unsigned long key;
struct fsnotify_mark *mark;
struct list_head trees; /* with root here */
- int dead;
int count;
atomic_long_t refs;
struct rcu_head head;
@@ -48,8 +47,15 @@ static LIST_HEAD(prune_list);
static struct task_struct *prune_thread;
/*
- * One struct chunk is attached to each inode of interest.
- * We replace struct chunk on tagging/untagging.
+ * One struct chunk is attached to each inode of interest through
+ * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
+ * untagging, the mark is stable as long as there is chunk attached. The
+ * association between mark and chunk is protected by hash_lock and
+ * audit_tree_group->mark_mutex. Thus as long as we hold
+ * audit_tree_group->mark_mutex and check that the mark is alive by
+ * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
+ * the current chunk.
+ *
* Rules have pointer to struct audit_tree.
* Rules have struct list_head rlist forming a list of rules over
* the same tree.
@@ -68,8 +74,12 @@ static struct task_struct *prune_thread;
* tree is refcounted; one reference for "some rules on rules_list refer to
* it", one for each chunk with pointer to it.
*
- * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
- * of watch contributes 1 to .refs).
+ * chunk is refcounted by embedded .refs. Mark associated with the chunk holds
+ * one chunk reference. This reference is dropped either when a mark is going
+ * to be freed (corresponding inode goes away) or when chunk attached to the
+ * mark gets replaced. This reference must be dropped using
+ * audit_mark_put_chunk() to make sure the reference is dropped only after RCU
+ * grace period as it protects RCU readers of the hash table.
*
* node.index allows to get from node.list to containing chunk.
* MSB of that sucker is stolen to mark taggings that we might have to
@@ -160,8 +170,6 @@ static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = mark_chunk(entry);
- audit_mark_put_chunk(chunk);
kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
}
@@ -188,13 +196,6 @@ static struct audit_chunk *alloc_chunk(int count)
if (!chunk)
return NULL;
- chunk->mark = alloc_mark();
- if (!chunk->mark) {
- kfree(chunk);
- return NULL;
- }
- audit_mark(chunk->mark)->chunk = chunk;
-
INIT_LIST_HEAD(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
@@ -277,6 +278,20 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}
+static void replace_mark_chunk(struct fsnotify_mark *entry,
+ struct audit_chunk *chunk)
+{
+ struct audit_chunk *old;
+
+ assert_spin_locked(&hash_lock);
+ old = mark_chunk(entry);
+ audit_mark(entry)->chunk = chunk;
+ if (chunk)
+ chunk->mark = entry;
+ if (old)
+ old->mark = NULL;
+}
+
static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
{
struct audit_tree *owner;
@@ -299,6 +314,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
get_tree(owner);
list_replace_init(&old->owners[j].list, &new->owners[i].list);
}
+ replace_mark_chunk(old->mark, new);
/*
* Make sure chunk is fully initialized before making it visible in the
* hash. Pairs with a data dependency barrier in READ_ONCE() in
@@ -339,23 +355,25 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
mutex_lock(&audit_tree_group->mark_mutex);
/*
- * mark_mutex protects mark from getting detached and thus also from
- * mark->connector->obj getting NULL.
+ * mark_mutex stabilizes chunk attached to the mark so we can check
+ * whether it didn't change while we've dropped hash_lock.
*/
- if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
+ if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+ mark_chunk(entry) != chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
return;
}
size = chunk_count_trees(chunk);
if (!size) {
- chunk->dead = 1;
spin_lock(&hash_lock);
list_del_init(&chunk->trees);
list_del_rcu(&chunk->hash);
+ replace_mark_chunk(entry, NULL);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
+ audit_mark_put_chunk(chunk);
fsnotify_free_mark(entry);
return;
}
@@ -364,13 +382,6 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
if (!new)
goto out_mutex;
- if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
- FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- fsnotify_put_mark(new->mark);
- goto out_mutex;
- }
-
- chunk->dead = 1;
spin_lock(&hash_lock);
/*
* This has to go last when updating chunk as once replace_chunk() is
@@ -378,10 +389,8 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
*/
replace_chunk(new, chunk);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(entry);
- fsnotify_put_mark(new->mark); /* drop initial reference */
+ audit_mark_put_chunk(chunk);
return;
@@ -399,23 +408,31 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}
- entry = chunk->mark;
+ entry = alloc_mark();
+ if (!entry) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
+ kfree(chunk);
+ return -ENOMEM;
+ }
+
if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry);
+ kfree(chunk);
return -ENOSPC;
}
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- chunk->dead = 1;
fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_free_mark(entry);
fsnotify_put_mark(entry);
+ kfree(chunk);
return 0;
}
+ replace_mark_chunk(entry, chunk);
chunk->owners[0].index = (1U << 31);
chunk->owners[0].owner = tree;
get_tree(tree);
@@ -432,33 +449,41 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry); /* drop initial reference */
+ /*
+ * Drop our initial reference. When mark we point to is getting freed,
+ * we get notification through ->freeing_mark callback and cleanup
+ * chunk pointing to this mark.
+ */
+ fsnotify_put_mark(entry);
return 0;
}
/* the first tagged inode becomes root of tree */
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *old_entry, *chunk_entry;
+ struct fsnotify_mark *entry;
struct audit_chunk *chunk, *old;
struct node *p;
int n;
mutex_lock(&audit_tree_group->mark_mutex);
- old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
- audit_tree_group);
- if (!old_entry)
+ entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+ if (!entry)
return create_chunk(inode, tree);
- old = mark_chunk(old_entry);
-
+ /*
+ * Found mark is guaranteed to be attached and mark_mutex protects mark
+ * from getting detached and thus it makes sure there is chunk attached
+ * to the mark.
+ */
/* are we already there? */
spin_lock(&hash_lock);
+ old = mark_chunk(entry);
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
return 0;
}
}
@@ -467,41 +492,16 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
return -ENOMEM;
}
- chunk_entry = chunk->mark;
-
- /*
- * mark_mutex protects mark from getting detached and thus also from
- * mark->connector->obj getting NULL.
- */
- if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- /* old_entry is being shot, lets just lie */
- mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
- fsnotify_put_mark(chunk->mark);
- return -ENOENT;
- }
-
- if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
- FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(chunk_entry);
- fsnotify_put_mark(old_entry);
- return -ENOSPC;
- }
-
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- chunk->dead = 1;
- fsnotify_detach_mark(chunk_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(chunk_entry);
- fsnotify_put_mark(chunk_entry);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
+ kfree(chunk);
return 0;
}
p = &chunk->owners[chunk->count - 1];
@@ -509,7 +509,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- old->dead = 1;
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
@@ -520,11 +519,10 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
*/
replace_chunk(chunk, old);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(old_entry);
- fsnotify_put_mark(chunk_entry); /* drop initial reference */
- fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
+ fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+ audit_mark_put_chunk(old);
+
return 0;
}
@@ -587,6 +585,9 @@ static void prune_tree_chunks(struct audit_tree *victim, bool tagged)
chunk = find_chunk(p);
mark = chunk->mark;
remove_chunk_node(chunk, p);
+ /* Racing with audit_tree_freeing_mark()? */
+ if (!mark)
+ continue;
fsnotify_get_mark(mark);
spin_unlock(&hash_lock);
@@ -1009,10 +1010,6 @@ static void evict_chunk(struct audit_chunk *chunk)
int need_prune = 0;
int n;
- if (chunk->dead)
- return;
-
- chunk->dead = 1;
mutex_lock(&audit_filter_mutex);
spin_lock(&hash_lock);
while (!list_empty(&chunk->trees)) {
@@ -1051,9 +1048,18 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
{
- struct audit_chunk *chunk = mark_chunk(entry);
+ struct audit_chunk *chunk;
- evict_chunk(chunk);
+ mutex_lock(&entry->group->mark_mutex);
+ spin_lock(&hash_lock);
+ chunk = mark_chunk(entry);
+ replace_mark_chunk(entry, NULL);
+ spin_unlock(&hash_lock);
+ mutex_unlock(&entry->group->mark_mutex);
+ if (chunk) {
+ evict_chunk(chunk);
+ audit_mark_put_chunk(chunk);
+ }
/*
* We are guaranteed to have at least one reference to the mark from
--
2.16.4
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jan Kara
2018-10-17 10:15:01 UTC
Reply
Permalink
When removing chunk from a tree, we do shrink the chunk. This can fail
for various reasons (due to races, ENOMEM, etc.) and in some cases we
just bail from untag_chunk() relying on someone else to cleanup.
Although this currently works, later we will need to add new failure
situation which would break. Also this simplifies the code and will
allow us to make locking around untag_chunk() less awkward.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 42 +++++++++++++++++-------------------------
1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index c98ab2d68a1c..ca2b6baff7aa 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -309,16 +309,28 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
list_replace_rcu(&old->hash, &new->hash);
}

+static void remove_chunk_node(struct audit_chunk *chunk, struct node *p)
+{
+ struct audit_tree *owner = p->owner;
+
+ if (owner->root == chunk) {
+ list_del_init(&owner->same_root);
+ owner->root = NULL;
+ }
+ list_del_init(&p->list);
+ p->owner = NULL;
+ put_tree(owner);
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
struct fsnotify_mark *entry = chunk->mark;
struct audit_chunk *new = NULL;
- struct audit_tree *owner;
int size = chunk->count - 1;

+ remove_chunk_node(chunk, p);
fsnotify_get_mark(entry);
-
spin_unlock(&hash_lock);

if (size)
@@ -336,15 +348,10 @@ static void untag_chunk(struct node *p)
goto out;
}

- owner = p->owner;
-
if (!size) {
chunk->dead = 1;
spin_lock(&hash_lock);
list_del_init(&chunk->trees);
- if (owner->root == chunk)
- owner->root = NULL;
- list_del_init(&p->list);
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
@@ -354,21 +361,16 @@ static void untag_chunk(struct node *p)
}

if (!new)
- goto Fallback;
+ goto out_mutex;

if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
fsnotify_put_mark(new->mark);
- goto Fallback;
+ goto out_mutex;
}

chunk->dead = 1;
spin_lock(&hash_lock);
- if (owner->root == chunk) {
- list_del_init(&owner->same_root);
- owner->root = NULL;
- }
- list_del_init(&p->list);
/*
* This has to go last when updating chunk as once replace_chunk() is
* called, new RCU readers can see the new chunk.
@@ -381,17 +383,7 @@ static void untag_chunk(struct node *p)
fsnotify_put_mark(new->mark); /* drop initial reference */
goto out;

-Fallback:
- // do the best we can
- spin_lock(&hash_lock);
- if (owner->root == chunk) {
- list_del_init(&owner->same_root);
- owner->root = NULL;
- }
- list_del_init(&p->list);
- p->owner = NULL;
- put_tree(owner);
- spin_unlock(&hash_lock);
+out_mutex:
mutex_unlock(&entry->group->mark_mutex);
out:
fsnotify_put_mark(entry);
--
2.16.4
Richard Guy Briggs
2018-10-18 19:29:38 UTC
Reply
Permalink
Post by Jan Kara
When removing chunk from a tree, we do shrink the chunk. This can fail
for various reasons (due to races, ENOMEM, etc.) and in some cases we
just bail from untag_chunk() relying on someone else to cleanup.
Although this currently works, later we will need to add new failure
situation which would break. Also this simplifies the code and will
allow us to make locking around untag_chunk() less awkward.
---
kernel/audit_tree.c | 42 +++++++++++++++++-------------------------
1 file changed, 17 insertions(+), 25 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index c98ab2d68a1c..ca2b6baff7aa 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -309,16 +309,28 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
list_replace_rcu(&old->hash, &new->hash);
}
+static void remove_chunk_node(struct audit_chunk *chunk, struct node *p)
+{
+ struct audit_tree *owner = p->owner;
+
+ if (owner->root == chunk) {
+ list_del_init(&owner->same_root);
+ owner->root = NULL;
+ }
+ list_del_init(&p->list);
+ p->owner = NULL;
+ put_tree(owner);
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
struct fsnotify_mark *entry = chunk->mark;
struct audit_chunk *new = NULL;
- struct audit_tree *owner;
int size = chunk->count - 1;
+ remove_chunk_node(chunk, p);
fsnotify_get_mark(entry);
-
spin_unlock(&hash_lock);
if (size)
@@ -336,15 +348,10 @@ static void untag_chunk(struct node *p)
goto out;
}
- owner = p->owner;
-
if (!size) {
chunk->dead = 1;
spin_lock(&hash_lock);
list_del_init(&chunk->trees);
- if (owner->root == chunk)
- owner->root = NULL;
- list_del_init(&p->list);
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
@@ -354,21 +361,16 @@ static void untag_chunk(struct node *p)
}
if (!new)
- goto Fallback;
+ goto out_mutex;
if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
fsnotify_put_mark(new->mark);
- goto Fallback;
+ goto out_mutex;
}
chunk->dead = 1;
spin_lock(&hash_lock);
- if (owner->root == chunk) {
- list_del_init(&owner->same_root);
- owner->root = NULL;
- }
- list_del_init(&p->list);
/*
* This has to go last when updating chunk as once replace_chunk() is
* called, new RCU readers can see the new chunk.
@@ -381,17 +383,7 @@ static void untag_chunk(struct node *p)
fsnotify_put_mark(new->mark); /* drop initial reference */
goto out;
- // do the best we can
- spin_lock(&hash_lock);
- if (owner->root == chunk) {
- list_del_init(&owner->same_root);
- owner->root = NULL;
- }
- list_del_init(&p->list);
- p->owner = NULL;
- put_tree(owner);
- spin_unlock(&hash_lock);
mutex_unlock(&entry->group->mark_mutex);
fsnotify_put_mark(entry);
--
2.16.4
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jan Kara
2018-10-17 10:15:03 UTC
Reply
Permalink
untag_chunk() has to be called with hash_lock, it drops it and
reacquires it when returning. The unlocking of hash_lock is thus hidden
from the callers of untag_chunk() with is rather error prone. Reorganize
the code so that untag_chunk() is called without hash_lock, only with
mark reference preventing the chunk from going away.

Since this requires some more code in the caller of untag_chunk() to
assure forward progress, factor out loop pruning tree from all chunks
into a common helper function.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 75 ++++++++++++++++++++++++++++-------------------------
1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 145e8c92dd31..5deb4e1ed648 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -332,27 +332,19 @@ static int chunk_count_trees(struct audit_chunk *chunk)
return ret;
}

-static void untag_chunk(struct node *p)
+static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = find_chunk(p);
- struct fsnotify_mark *entry = chunk->mark;
- struct audit_chunk *new = NULL;
+ struct audit_chunk *new;
int size;

- remove_chunk_node(chunk, p);
- fsnotify_get_mark(entry);
- spin_unlock(&hash_lock);
-
- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&audit_tree_group->mark_mutex);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- mutex_unlock(&entry->group->mark_mutex);
- if (new)
- fsnotify_put_mark(new->mark);
- goto out;
+ mutex_unlock(&audit_tree_group->mark_mutex);
+ return;
}

size = chunk_count_trees(chunk);
@@ -363,9 +355,9 @@ static void untag_chunk(struct node *p)
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_free_mark(entry);
- goto out;
+ return;
}

new = alloc_chunk(size);
@@ -387,16 +379,13 @@ static void untag_chunk(struct node *p)
replace_chunk(new, chunk);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_free_mark(entry);
fsnotify_put_mark(new->mark); /* drop initial reference */
- goto out;
+ return;

out_mutex:
- mutex_unlock(&entry->group->mark_mutex);
-out:
- fsnotify_put_mark(entry);
- spin_lock(&hash_lock);
+ mutex_unlock(&audit_tree_group->mark_mutex);
}

/* Call with group->mark_mutex held, releases it */
@@ -579,22 +568,45 @@ static void kill_rules(struct audit_tree *tree)
}

/*
- * finish killing struct audit_tree
+ * Remove tree from chunks. If 'tagged' is set, remove tree only from tagged
+ * chunks. The function expects tagged chunks are all at the beginning of the
+ * chunks list.
*/
-static void prune_one(struct audit_tree *victim)
+static void prune_tree_chunks(struct audit_tree *victim, bool tagged)
{
spin_lock(&hash_lock);
while (!list_empty(&victim->chunks)) {
struct node *p;
+ struct audit_chunk *chunk;
+ struct fsnotify_mark *mark;
+
+ p = list_first_entry(&victim->chunks, struct node, list);
+ /* have we run out of marked? */
+ if (tagged && !(p->index & (1U<<31)))
+ break;
+ chunk = find_chunk(p);
+ mark = chunk->mark;
+ remove_chunk_node(chunk, p);
+ fsnotify_get_mark(mark);
+ spin_unlock(&hash_lock);

- p = list_entry(victim->chunks.next, struct node, list);
+ untag_chunk(chunk, mark);
+ fsnotify_put_mark(mark);

- untag_chunk(p);
+ spin_lock(&hash_lock);
}
spin_unlock(&hash_lock);
put_tree(victim);
}

+/*
+ * finish killing struct audit_tree
+ */
+static void prune_one(struct audit_tree *victim)
+{
+ prune_tree_chunks(victim, false);
+}
+
/* trim the uncommitted chunks from tree */

static void trim_marked(struct audit_tree *tree)
@@ -614,18 +626,11 @@ static void trim_marked(struct audit_tree *tree)
list_add(p, &tree->chunks);
}
}
+ spin_unlock(&hash_lock);

- while (!list_empty(&tree->chunks)) {
- struct node *node;
-
- node = list_entry(tree->chunks.next, struct node, list);
-
- /* have we run out of marked? */
- if (!(node->index & (1U<<31)))
- break;
+ prune_tree_chunks(tree, true);

- untag_chunk(node);
- }
+ spin_lock(&hash_lock);
if (!tree->root && !tree->goner) {
tree->goner = 1;
spin_unlock(&hash_lock);
--
2.16.4
Richard Guy Briggs
2018-10-18 12:27:40 UTC
Reply
Permalink
Post by Jan Kara
untag_chunk() has to be called with hash_lock, it drops it and
reacquires it when returning. The unlocking of hash_lock is thus hidden
from the callers of untag_chunk() with is rather error prone. Reorganize
the code so that untag_chunk() is called without hash_lock, only with
mark reference preventing the chunk from going away.
Since this requires some more code in the caller of untag_chunk() to
assure forward progress, factor out loop pruning tree from all chunks
into a common helper function.
---
kernel/audit_tree.c | 75 ++++++++++++++++++++++++++++-------------------------
1 file changed, 40 insertions(+), 35 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 145e8c92dd31..5deb4e1ed648 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -332,27 +332,19 @@ static int chunk_count_trees(struct audit_chunk *chunk)
return ret;
}
-static void untag_chunk(struct node *p)
+static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = find_chunk(p);
- struct fsnotify_mark *entry = chunk->mark;
- struct audit_chunk *new = NULL;
+ struct audit_chunk *new;
int size;
- remove_chunk_node(chunk, p);
- fsnotify_get_mark(entry);
- spin_unlock(&hash_lock);
-
- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&audit_tree_group->mark_mutex);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- mutex_unlock(&entry->group->mark_mutex);
- if (new)
- fsnotify_put_mark(new->mark);
- goto out;
+ mutex_unlock(&audit_tree_group->mark_mutex);
+ return;
This isn't an error, but more a question of style. Since the end of the
function has been simplified, this could just be "goto out_mutex", or
remove the one remaining "goto out_mutex" after the next patch and do
the same as here since other exits aren't so clean.
Post by Jan Kara
}
size = chunk_count_trees(chunk);
@@ -363,9 +355,9 @@ static void untag_chunk(struct node *p)
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_free_mark(entry);
- goto out;
+ return;
}
new = alloc_chunk(size);
@@ -387,16 +379,13 @@ static void untag_chunk(struct node *p)
replace_chunk(new, chunk);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_free_mark(entry);
fsnotify_put_mark(new->mark); /* drop initial reference */
- goto out;
+ return;
- mutex_unlock(&entry->group->mark_mutex);
- fsnotify_put_mark(entry);
- spin_lock(&hash_lock);
+ mutex_unlock(&audit_tree_group->mark_mutex);
}
/* Call with group->mark_mutex held, releases it */
@@ -579,22 +568,45 @@ static void kill_rules(struct audit_tree *tree)
}
/*
- * finish killing struct audit_tree
+ * Remove tree from chunks. If 'tagged' is set, remove tree only from tagged
+ * chunks. The function expects tagged chunks are all at the beginning of the
+ * chunks list.
*/
-static void prune_one(struct audit_tree *victim)
+static void prune_tree_chunks(struct audit_tree *victim, bool tagged)
{
spin_lock(&hash_lock);
while (!list_empty(&victim->chunks)) {
struct node *p;
+ struct audit_chunk *chunk;
+ struct fsnotify_mark *mark;
+
+ p = list_first_entry(&victim->chunks, struct node, list);
+ /* have we run out of marked? */
+ if (tagged && !(p->index & (1U<<31)))
+ break;
+ chunk = find_chunk(p);
+ mark = chunk->mark;
+ remove_chunk_node(chunk, p);
+ fsnotify_get_mark(mark);
+ spin_unlock(&hash_lock);
- p = list_entry(victim->chunks.next, struct node, list);
+ untag_chunk(chunk, mark);
+ fsnotify_put_mark(mark);
- untag_chunk(p);
+ spin_lock(&hash_lock);
}
spin_unlock(&hash_lock);
put_tree(victim);
}
+/*
+ * finish killing struct audit_tree
+ */
+static void prune_one(struct audit_tree *victim)
+{
+ prune_tree_chunks(victim, false);
+}
+
/* trim the uncommitted chunks from tree */
static void trim_marked(struct audit_tree *tree)
@@ -614,18 +626,11 @@ static void trim_marked(struct audit_tree *tree)
list_add(p, &tree->chunks);
}
}
+ spin_unlock(&hash_lock);
- while (!list_empty(&tree->chunks)) {
- struct node *node;
-
- node = list_entry(tree->chunks.next, struct node, list);
-
- /* have we run out of marked? */
- if (!(node->index & (1U<<31)))
- break;
+ prune_tree_chunks(tree, true);
- untag_chunk(node);
- }
+ spin_lock(&hash_lock);
if (!tree->root && !tree->goner) {
tree->goner = 1;
spin_unlock(&hash_lock);
--
2.16.4
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Richard Guy Briggs
2018-10-19 11:18:22 UTC
Reply
Permalink
Post by Richard Guy Briggs
Post by Jan Kara
if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- mutex_unlock(&entry->group->mark_mutex);
- if (new)
- fsnotify_put_mark(new->mark);
- goto out;
+ mutex_unlock(&audit_tree_group->mark_mutex);
+ return;
This isn't an error, but more a question of style. Since the end of the
function has been simplified, this could just be "goto out_mutex", or
remove the one remaining "goto out_mutex" after the next patch and do
the same as here since other exits aren't so clean.
Good point, I've switch this to "goto out_mutex". The result is attached.
Can I add your Reviewed-by tag?
Yup.
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jan Kara
2018-10-17 10:14:57 UTC
Reply
Permalink
Chunk replacement code is very similar for the cases where we grow or
shrink chunk. Factor the code out into a common helper function.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 86 +++++++++++++++++++++++++----------------------------
1 file changed, 40 insertions(+), 46 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 307749d6773c..d8f6cfa0005b 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -235,6 +235,38 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}

+static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
+ struct node *skip)
+{
+ struct audit_tree *owner;
+ int i, j;
+
+ new->key = old->key;
+ list_splice_init(&old->trees, &new->trees);
+ list_for_each_entry(owner, &new->trees, same_root)
+ owner->root = new;
+ for (i = j = 0; j < old->count; i++, j++) {
+ if (&old->owners[j] == skip) {
+ i--;
+ continue;
+ }
+ owner = old->owners[j].owner;
+ new->owners[i].owner = owner;
+ new->owners[i].index = old->owners[j].index - j + i;
+ if (!owner) /* result of earlier fallback */
+ continue;
+ get_tree(owner);
+ list_replace_init(&old->owners[j].list, &new->owners[i].list);
+ }
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&old->hash, &new->hash);
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
@@ -242,7 +274,6 @@ static void untag_chunk(struct node *p)
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
- int i, j;

fsnotify_get_mark(entry);

@@ -291,38 +322,16 @@ static void untag_chunk(struct node *p)

chunk->dead = 1;
spin_lock(&hash_lock);
- new->key = chunk->key;
- list_replace_init(&chunk->trees, &new->trees);
if (owner->root == chunk) {
list_del_init(&owner->same_root);
owner->root = NULL;
}
-
- for (i = j = 0; j <= size; i++, j++) {
- struct audit_tree *s;
- if (&chunk->owners[j] == p) {
- list_del_init(&p->list);
- i--;
- continue;
- }
- s = chunk->owners[j].owner;
- new->owners[i].owner = s;
- new->owners[i].index = chunk->owners[j].index - j + i;
- if (!s) /* result of earlier fallback */
- continue;
- get_tree(s);
- list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
- }
-
- list_for_each_entry(owner, &new->trees, same_root)
- owner->root = new;
+ list_del_init(&p->list);
/*
- * Make sure chunk is fully initialized before making it visible in the
- * hash. Pairs with a data dependency barrier in READ_ONCE() in
- * audit_tree_lookup().
+ * This has to go last when updating chunk as once replace_chunk() is
+ * called, new RCU readers can see the new chunk.
*/
- smp_wmb();
- list_replace_rcu(&chunk->hash, &new->hash);
+ replace_chunk(new, chunk, p);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -399,7 +408,6 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
struct fsnotify_mark *old_entry, *chunk_entry;
- struct audit_tree *owner;
struct audit_chunk *chunk, *old;
struct node *p;
int n;
@@ -464,35 +472,21 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
fsnotify_put_mark(old_entry);
return 0;
}
- chunk->key = old->key;
- list_replace_init(&old->trees, &chunk->trees);
- for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
- struct audit_tree *s = old->owners[n].owner;
- p->owner = s;
- p->index = old->owners[n].index;
- if (!s) /* result of fallback in untag */
- continue;
- get_tree(s);
- list_replace_init(&old->owners[n].list, &p->list);
- }
+ p = &chunk->owners[chunk->count - 1];
p->index = (chunk->count - 1) | (1U<<31);
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- list_for_each_entry(owner, &chunk->trees, same_root)
- owner->root = chunk;
old->dead = 1;
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
/*
- * Make sure chunk is fully initialized before making it visible in the
- * hash. Pairs with a data dependency barrier in READ_ONCE() in
- * audit_tree_lookup().
+ * This has to go last when updating chunk as once replace_chunk() is
+ * called, new RCU readers can see the new chunk.
*/
- smp_wmb();
- list_replace_rcu(&old->hash, &chunk->hash);
+ replace_chunk(chunk, old, NULL);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
Richard Guy Briggs
2018-10-18 19:27:01 UTC
Reply
Permalink
Post by Jan Kara
Chunk replacement code is very similar for the cases where we grow or
shrink chunk. Factor the code out into a common helper function.
Noting just the switch from list_replace_init() to list_splice_init().
Post by Jan Kara
---
kernel/audit_tree.c | 86 +++++++++++++++++++++++++----------------------------
1 file changed, 40 insertions(+), 46 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 307749d6773c..d8f6cfa0005b 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -235,6 +235,38 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}
+static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
+ struct node *skip)
+{
+ struct audit_tree *owner;
+ int i, j;
+
+ new->key = old->key;
+ list_splice_init(&old->trees, &new->trees);
+ list_for_each_entry(owner, &new->trees, same_root)
+ owner->root = new;
+ for (i = j = 0; j < old->count; i++, j++) {
+ if (&old->owners[j] == skip) {
+ i--;
+ continue;
+ }
+ owner = old->owners[j].owner;
+ new->owners[i].owner = owner;
+ new->owners[i].index = old->owners[j].index - j + i;
+ if (!owner) /* result of earlier fallback */
+ continue;
+ get_tree(owner);
+ list_replace_init(&old->owners[j].list, &new->owners[i].list);
+ }
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&old->hash, &new->hash);
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
@@ -242,7 +274,6 @@ static void untag_chunk(struct node *p)
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
- int i, j;
fsnotify_get_mark(entry);
@@ -291,38 +322,16 @@ static void untag_chunk(struct node *p)
chunk->dead = 1;
spin_lock(&hash_lock);
- new->key = chunk->key;
- list_replace_init(&chunk->trees, &new->trees);
if (owner->root == chunk) {
list_del_init(&owner->same_root);
owner->root = NULL;
}
-
- for (i = j = 0; j <= size; i++, j++) {
- struct audit_tree *s;
- if (&chunk->owners[j] == p) {
- list_del_init(&p->list);
- i--;
- continue;
- }
- s = chunk->owners[j].owner;
- new->owners[i].owner = s;
- new->owners[i].index = chunk->owners[j].index - j + i;
- if (!s) /* result of earlier fallback */
- continue;
- get_tree(s);
- list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
- }
-
- list_for_each_entry(owner, &new->trees, same_root)
- owner->root = new;
+ list_del_init(&p->list);
/*
- * Make sure chunk is fully initialized before making it visible in the
- * hash. Pairs with a data dependency barrier in READ_ONCE() in
- * audit_tree_lookup().
+ * This has to go last when updating chunk as once replace_chunk() is
+ * called, new RCU readers can see the new chunk.
*/
- smp_wmb();
- list_replace_rcu(&chunk->hash, &new->hash);
+ replace_chunk(new, chunk, p);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -399,7 +408,6 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
struct fsnotify_mark *old_entry, *chunk_entry;
- struct audit_tree *owner;
struct audit_chunk *chunk, *old;
struct node *p;
int n;
@@ -464,35 +472,21 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
fsnotify_put_mark(old_entry);
return 0;
}
- chunk->key = old->key;
- list_replace_init(&old->trees, &chunk->trees);
- for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
- struct audit_tree *s = old->owners[n].owner;
- p->owner = s;
- p->index = old->owners[n].index;
- if (!s) /* result of fallback in untag */
- continue;
- get_tree(s);
- list_replace_init(&old->owners[n].list, &p->list);
- }
+ p = &chunk->owners[chunk->count - 1];
p->index = (chunk->count - 1) | (1U<<31);
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- list_for_each_entry(owner, &chunk->trees, same_root)
- owner->root = chunk;
old->dead = 1;
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
/*
- * Make sure chunk is fully initialized before making it visible in the
- * hash. Pairs with a data dependency barrier in READ_ONCE() in
- * audit_tree_lookup().
+ * This has to go last when updating chunk as once replace_chunk() is
+ * called, new RCU readers can see the new chunk.
*/
- smp_wmb();
- list_replace_rcu(&old->hash, &chunk->hash);
+ replace_chunk(chunk, old, NULL);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-11-06 13:58:36 UTC
Reply
Permalink
Post by Richard Guy Briggs
Post by Jan Kara
Chunk replacement code is very similar for the cases where we grow or
shrink chunk. Factor the code out into a common helper function.
Noting just the switch from list_replace_init() to list_splice_init().
Yeah, I wasn't expecting to see that, maybe it will make sense as I
work through the rest of the patchset.

Jan, can you explain the reason behind the change? I'm a little
nervous that this is simply hiding a problem (e.g. the list_empty()
check in splice).
Post by Richard Guy Briggs
Post by Jan Kara
---
kernel/audit_tree.c | 86 +++++++++++++++++++++++++----------------------------
1 file changed, 40 insertions(+), 46 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 307749d6773c..d8f6cfa0005b 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -235,6 +235,38 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}
+static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
+ struct node *skip)
+{
+ struct audit_tree *owner;
+ int i, j;
+
+ new->key = old->key;
+ list_splice_init(&old->trees, &new->trees);
+ list_for_each_entry(owner, &new->trees, same_root)
+ owner->root = new;
+ for (i = j = 0; j < old->count; i++, j++) {
+ if (&old->owners[j] == skip) {
+ i--;
+ continue;
+ }
+ owner = old->owners[j].owner;
+ new->owners[i].owner = owner;
+ new->owners[i].index = old->owners[j].index - j + i;
+ if (!owner) /* result of earlier fallback */
+ continue;
+ get_tree(owner);
+ list_replace_init(&old->owners[j].list, &new->owners[i].list);
+ }
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&old->hash, &new->hash);
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
@@ -242,7 +274,6 @@ static void untag_chunk(struct node *p)
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
- int i, j;
fsnotify_get_mark(entry);
@@ -291,38 +322,16 @@ static void untag_chunk(struct node *p)
chunk->dead = 1;
spin_lock(&hash_lock);
- new->key = chunk->key;
- list_replace_init(&chunk->trees, &new->trees);
if (owner->root == chunk) {
list_del_init(&owner->same_root);
owner->root = NULL;
}
-
- for (i = j = 0; j <= size; i++, j++) {
- struct audit_tree *s;
- if (&chunk->owners[j] == p) {
- list_del_init(&p->list);
- i--;
- continue;
- }
- s = chunk->owners[j].owner;
- new->owners[i].owner = s;
- new->owners[i].index = chunk->owners[j].index - j + i;
- if (!s) /* result of earlier fallback */
- continue;
- get_tree(s);
- list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
- }
-
- list_for_each_entry(owner, &new->trees, same_root)
- owner->root = new;
+ list_del_init(&p->list);
/*
- * Make sure chunk is fully initialized before making it visible in the
- * hash. Pairs with a data dependency barrier in READ_ONCE() in
- * audit_tree_lookup().
+ * This has to go last when updating chunk as once replace_chunk() is
+ * called, new RCU readers can see the new chunk.
*/
- smp_wmb();
- list_replace_rcu(&chunk->hash, &new->hash);
+ replace_chunk(new, chunk, p);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -399,7 +408,6 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
struct fsnotify_mark *old_entry, *chunk_entry;
- struct audit_tree *owner;
struct audit_chunk *chunk, *old;
struct node *p;
int n;
@@ -464,35 +472,21 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
fsnotify_put_mark(old_entry);
return 0;
}
- chunk->key = old->key;
- list_replace_init(&old->trees, &chunk->trees);
- for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
- struct audit_tree *s = old->owners[n].owner;
- p->owner = s;
- p->index = old->owners[n].index;
- if (!s) /* result of fallback in untag */
- continue;
- get_tree(s);
- list_replace_init(&old->owners[n].list, &p->list);
- }
+ p = &chunk->owners[chunk->count - 1];
p->index = (chunk->count - 1) | (1U<<31);
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- list_for_each_entry(owner, &chunk->trees, same_root)
- owner->root = chunk;
old->dead = 1;
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
/*
- * Make sure chunk is fully initialized before making it visible in the
- * hash. Pairs with a data dependency barrier in READ_ONCE() in
- * audit_tree_lookup().
+ * This has to go last when updating chunk as once replace_chunk() is
+ * called, new RCU readers can see the new chunk.
*/
- smp_wmb();
- list_replace_rcu(&old->hash, &chunk->hash);
+ replace_chunk(chunk, old, NULL);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
- RGB
--
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
--
paul moore
www.paul-moore.com
Jan Kara
2018-11-07 09:55:12 UTC
Reply
Permalink
Post by Paul Moore
Post by Richard Guy Briggs
Post by Jan Kara
Chunk replacement code is very similar for the cases where we grow or
shrink chunk. Factor the code out into a common helper function.
Noting just the switch from list_replace_init() to list_splice_init().
Yeah, I wasn't expecting to see that, maybe it will make sense as I
work through the rest of the patchset.
Jan, can you explain the reason behind the change? I'm a little
nervous that this is simply hiding a problem (e.g. the list_empty()
check in splice).
The reason is very simple: replace_chunk() gets called from tag_chunk()
*after* we have possibly done:

if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}

So new->trees is possibly non-empty and we need to preserve its contents.
That's why we need list_splice() and not plain list_replace().

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-11-09 14:45:16 UTC
Reply
Permalink
Post by Jan Kara
Post by Paul Moore
Post by Richard Guy Briggs
Post by Jan Kara
Chunk replacement code is very similar for the cases where we grow or
shrink chunk. Factor the code out into a common helper function.
Noting just the switch from list_replace_init() to list_splice_init().
Yeah, I wasn't expecting to see that, maybe it will make sense as I
work through the rest of the patchset.
Jan, can you explain the reason behind the change? I'm a little
nervous that this is simply hiding a problem (e.g. the list_empty()
check in splice).
The reason is very simple: replace_chunk() gets called from tag_chunk()
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
So new->trees is possibly non-empty and we need to preserve its contents.
That's why we need list_splice() and not plain list_replace().
After revisiting this a couple of time this week, I found my problem -
I had reversed list_splice_init() in my mind and was worried that if
the old->trees list was not empty we would end up not adding
new->trees. Sorry about the noise.

I've updated the audit/working-fsnotify_fixes with the latest patches,
including the revised 12/14 sent as an attachment, and I'm going to
test it over the weekend. If the kernel is still standing on Monday
morning I'll merge it into audit/next.

Thanks again.
--
paul moore
www.paul-moore.com
Paul Moore
2018-11-12 15:15:42 UTC
Reply
Permalink
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Richard Guy Briggs
Post by Jan Kara
Chunk replacement code is very similar for the cases where we grow or
shrink chunk. Factor the code out into a common helper function.
Noting just the switch from list_replace_init() to list_splice_init().
Yeah, I wasn't expecting to see that, maybe it will make sense as I
work through the rest of the patchset.
Jan, can you explain the reason behind the change? I'm a little
nervous that this is simply hiding a problem (e.g. the list_empty()
check in splice).
The reason is very simple: replace_chunk() gets called from tag_chunk()
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
So new->trees is possibly non-empty and we need to preserve its contents.
That's why we need list_splice() and not plain list_replace().
After revisiting this a couple of time this week, I found my problem -
I had reversed list_splice_init() in my mind and was worried that if
the old->trees list was not empty we would end up not adding
new->trees. Sorry about the noise.
I've updated the audit/working-fsnotify_fixes with the latest patches,
including the revised 12/14 sent as an attachment, and I'm going to
test it over the weekend. If the kernel is still standing on Monday
morning I'll merge it into audit/next.
Thanks again.
My test machine was still standing this morning so I went ahead and
merged all of the patches into audit/next - Thanks Jan.
--
paul moore
www.paul-moore.com
Jan Kara
2018-11-12 15:25:05 UTC
Reply
Permalink
Post by Paul Moore
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Richard Guy Briggs
Post by Jan Kara
Chunk replacement code is very similar for the cases where we grow or
shrink chunk. Factor the code out into a common helper function.
Noting just the switch from list_replace_init() to list_splice_init().
Yeah, I wasn't expecting to see that, maybe it will make sense as I
work through the rest of the patchset.
Jan, can you explain the reason behind the change? I'm a little
nervous that this is simply hiding a problem (e.g. the list_empty()
check in splice).
The reason is very simple: replace_chunk() gets called from tag_chunk()
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
So new->trees is possibly non-empty and we need to preserve its contents.
That's why we need list_splice() and not plain list_replace().
After revisiting this a couple of time this week, I found my problem -
I had reversed list_splice_init() in my mind and was worried that if
the old->trees list was not empty we would end up not adding
new->trees. Sorry about the noise.
I've updated the audit/working-fsnotify_fixes with the latest patches,
including the revised 12/14 sent as an attachment, and I'm going to
test it over the weekend. If the kernel is still standing on Monday
morning I'll merge it into audit/next.
Thanks again.
My test machine was still standing this morning so I went ahead and
merged all of the patches into audit/next - Thanks Jan.
Cool, thanks!

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Jan Kara
2018-10-17 10:14:54 UTC
Reply
Permalink
Audit tree code is replacing marks attached to inodes in non-atomic way.
Thus fsnotify_find_mark() in tag_chunk() may find a mark that belongs to
a chunk that is no longer valid one and will soon be destroyed. Tags
added to such chunk will be simply lost.

Fix the problem by making sure old mark is marked as going away (through
fsnotify_detach_mark()) before dropping mark_mutex and thus in an atomic
way wrt tag_chunk(). Note that this does not fix the problem completely
as if tag_chunk() finds a mark that is going away, it fails with
-ENOENT. But at least the failure is not silent and currently there's no
way to search for another fsnotify mark attached to the inode. We'll fix
this problem in later patch.

Reviewed-by: Richard Guy Briggs <***@redhat.com>
Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 8a74b468b666..c194dbd53753 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -278,8 +278,9 @@ static void untag_chunk(struct node *p)
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
+ fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
- fsnotify_destroy_mark(entry, audit_tree_group);
+ fsnotify_free_mark(entry);
goto out;
}

@@ -320,8 +321,9 @@ static void untag_chunk(struct node *p)
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
spin_unlock(&hash_lock);
+ fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
- fsnotify_destroy_mark(entry, audit_tree_group);
+ fsnotify_free_mark(entry);
fsnotify_put_mark(&new->mark); /* drop initial reference */
goto out;

@@ -364,8 +366,9 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
+ fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_destroy_mark(entry, audit_tree_group);
+ fsnotify_free_mark(entry);
fsnotify_put_mark(entry);
return 0;
}
@@ -446,10 +449,9 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
+ fsnotify_detach_mark(chunk_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
-
- fsnotify_destroy_mark(chunk_entry, audit_tree_group);
-
+ fsnotify_free_mark(chunk_entry);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return 0;
@@ -477,8 +479,9 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
spin_unlock(&hash_lock);
+ fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_destroy_mark(old_entry, audit_tree_group);
+ fsnotify_free_mark(old_entry);
fsnotify_put_mark(chunk_entry); /* drop initial reference */
fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
return 0;
--
2.16.4
Jan Kara
2018-10-17 10:15:00 UTC
Reply
Permalink
Allocate fsnotify mark independently instead of embedding it inside
chunk. This will allow us to just replace chunk attached to mark when
growing / shrinking chunk instead of replacing mark attached to inode
which is a more complex operation.

Reviewed-by: Richard Guy Briggs <***@redhat.com>
Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 64 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 50 insertions(+), 14 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 35c031ebcc12..c98ab2d68a1c 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -25,7 +25,7 @@ struct audit_tree {
struct audit_chunk {
struct list_head hash;
unsigned long key;
- struct fsnotify_mark mark;
+ struct fsnotify_mark *mark;
struct list_head trees; /* with root here */
int dead;
int count;
@@ -38,6 +38,11 @@ struct audit_chunk {
} owners[];
};

+struct audit_tree_mark {
+ struct fsnotify_mark mark;
+ struct audit_chunk *chunk;
+};
+
static LIST_HEAD(tree_list);
static LIST_HEAD(prune_list);
static struct task_struct *prune_thread;
@@ -73,6 +78,7 @@ static struct task_struct *prune_thread;
*/

static struct fsnotify_group *audit_tree_group;
+static struct kmem_cache *audit_tree_mark_cachep __read_mostly;

static struct audit_tree *alloc_tree(const char *s)
{
@@ -142,10 +148,33 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}

+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+{
+ return container_of(entry, struct audit_tree_mark, mark);
+}
+
+static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
+{
+ return audit_mark(mark)->chunk;
+}
+
static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+ struct audit_chunk *chunk = mark_chunk(entry);
audit_mark_put_chunk(chunk);
+ kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
+}
+
+static struct fsnotify_mark *alloc_mark(void)
+{
+ struct audit_tree_mark *amark;
+
+ amark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
+ if (!amark)
+ return NULL;
+ fsnotify_init_mark(&amark->mark, audit_tree_group);
+ amark->mark.mask = FS_IN_IGNORED;
+ return &amark->mark;
}

static struct audit_chunk *alloc_chunk(int count)
@@ -159,6 +188,13 @@ static struct audit_chunk *alloc_chunk(int count)
if (!chunk)
return NULL;

+ chunk->mark = alloc_mark();
+ if (!chunk->mark) {
+ kfree(chunk);
+ return NULL;
+ }
+ audit_mark(chunk->mark)->chunk = chunk;
+
INIT_LIST_HEAD(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
@@ -167,8 +203,6 @@ static struct audit_chunk *alloc_chunk(int count)
INIT_LIST_HEAD(&chunk->owners[i].list);
chunk->owners[i].index = i;
}
- fsnotify_init_mark(&chunk->mark, audit_tree_group);
- chunk->mark.mask = FS_IN_IGNORED;
return chunk;
}

@@ -278,7 +312,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
- struct fsnotify_mark *entry = &chunk->mark;
+ struct fsnotify_mark *entry = chunk->mark;
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
@@ -298,7 +332,7 @@ static void untag_chunk(struct node *p)
if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
mutex_unlock(&entry->group->mark_mutex);
if (new)
- fsnotify_put_mark(&new->mark);
+ fsnotify_put_mark(new->mark);
goto out;
}

@@ -322,9 +356,9 @@ static void untag_chunk(struct node *p)
if (!new)
goto Fallback;

- if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj,
+ if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- fsnotify_put_mark(&new->mark);
+ fsnotify_put_mark(new->mark);
goto Fallback;
}

@@ -344,7 +378,7 @@ static void untag_chunk(struct node *p)
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
fsnotify_free_mark(entry);
- fsnotify_put_mark(&new->mark); /* drop initial reference */
+ fsnotify_put_mark(new->mark); /* drop initial reference */
goto out;

Fallback:
@@ -375,7 +409,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}

- entry = &chunk->mark;
+ entry = chunk->mark;
if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry);
@@ -426,7 +460,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (!old_entry)
return create_chunk(inode, tree);

- old = container_of(old_entry, struct audit_chunk, mark);
+ old = mark_chunk(old_entry);

/* are we already there? */
spin_lock(&hash_lock);
@@ -447,7 +481,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}

- chunk_entry = &chunk->mark;
+ chunk_entry = chunk->mark;

/*
* mark_mutex protects mark from getting detached and thus also from
@@ -457,7 +491,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
/* old_entry is being shot, lets just lie */
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
- fsnotify_put_mark(&chunk->mark);
+ fsnotify_put_mark(chunk->mark);
return -ENOENT;
}

@@ -1011,7 +1045,7 @@ static int audit_tree_handle_event(struct fsnotify_group *group,

static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
{
- struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+ struct audit_chunk *chunk = mark_chunk(entry);

evict_chunk(chunk);

@@ -1032,6 +1066,8 @@ static int __init audit_tree_init(void)
{
int i;

+ audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
+
audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
if (IS_ERR(audit_tree_group))
audit_panic("cannot initialize fsnotify group for rectree watches");
--
2.16.4
Jan Kara
2018-10-17 10:15:02 UTC
Reply
Permalink
When deleting chunk from a tree, drop all unused nodes in a chunk
instead of just the one used by the tree. This gets rid of possibly
lingering unused nodes (created due to fallback path in untag_chunk())
and also removes some special cases and will allow us to simplify
locking in untag_chunk().

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 27 ++++++++++++++++++---------
1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ca2b6baff7aa..145e8c92dd31 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,8 +277,7 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}

-static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
- struct node *skip)
+static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
{
struct audit_tree *owner;
int i, j;
@@ -288,7 +287,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
for (i = j = 0; j < old->count; i++, j++) {
- if (&old->owners[j] == skip) {
+ if (!old->owners[j].owner) {
i--;
continue;
}
@@ -322,20 +321,28 @@ static void remove_chunk_node(struct audit_chunk *chunk, struct node *p)
put_tree(owner);
}

+static int chunk_count_trees(struct audit_chunk *chunk)
+{
+ int i;
+ int ret = 0;
+
+ for (i = 0; i < chunk->count; i++)
+ if (chunk->owners[i].owner)
+ ret++;
+ return ret;
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
struct fsnotify_mark *entry = chunk->mark;
struct audit_chunk *new = NULL;
- int size = chunk->count - 1;
+ int size;

remove_chunk_node(chunk, p);
fsnotify_get_mark(entry);
spin_unlock(&hash_lock);

- if (size)
- new = alloc_chunk(size);
-
mutex_lock(&entry->group->mark_mutex);
/*
* mark_mutex protects mark from getting detached and thus also from
@@ -348,6 +355,7 @@ static void untag_chunk(struct node *p)
goto out;
}

+ size = chunk_count_trees(chunk);
if (!size) {
chunk->dead = 1;
spin_lock(&hash_lock);
@@ -360,6 +368,7 @@ static void untag_chunk(struct node *p)
goto out;
}

+ new = alloc_chunk(size);
if (!new)
goto out_mutex;

@@ -375,7 +384,7 @@ static void untag_chunk(struct node *p)
* This has to go last when updating chunk as once replace_chunk() is
* called, new RCU readers can see the new chunk.
*/
- replace_chunk(new, chunk, p);
+ replace_chunk(new, chunk);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -520,7 +529,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
* This has to go last when updating chunk as once replace_chunk() is
* called, new RCU readers can see the new chunk.
*/
- replace_chunk(chunk, old, NULL);
+ replace_chunk(chunk, old);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
Richard Guy Briggs
2018-10-18 19:32:02 UTC
Reply
Permalink
Post by Jan Kara
When deleting chunk from a tree, drop all unused nodes in a chunk
instead of just the one used by the tree. This gets rid of possibly
lingering unused nodes (created due to fallback path in untag_chunk())
and also removes some special cases and will allow us to simplify
locking in untag_chunk().
---
kernel/audit_tree.c | 27 ++++++++++++++++++---------
1 file changed, 18 insertions(+), 9 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ca2b6baff7aa..145e8c92dd31 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,8 +277,7 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}
-static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
- struct node *skip)
+static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
{
struct audit_tree *owner;
int i, j;
@@ -288,7 +287,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
for (i = j = 0; j < old->count; i++, j++) {
- if (&old->owners[j] == skip) {
+ if (!old->owners[j].owner) {
i--;
continue;
}
@@ -322,20 +321,28 @@ static void remove_chunk_node(struct audit_chunk *chunk, struct node *p)
put_tree(owner);
}
+static int chunk_count_trees(struct audit_chunk *chunk)
+{
+ int i;
+ int ret = 0;
+
+ for (i = 0; i < chunk->count; i++)
+ if (chunk->owners[i].owner)
+ ret++;
+ return ret;
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
struct fsnotify_mark *entry = chunk->mark;
struct audit_chunk *new = NULL;
- int size = chunk->count - 1;
+ int size;
remove_chunk_node(chunk, p);
fsnotify_get_mark(entry);
spin_unlock(&hash_lock);
- if (size)
- new = alloc_chunk(size);
-
mutex_lock(&entry->group->mark_mutex);
/*
* mark_mutex protects mark from getting detached and thus also from
@@ -348,6 +355,7 @@ static void untag_chunk(struct node *p)
goto out;
}
+ size = chunk_count_trees(chunk);
if (!size) {
chunk->dead = 1;
spin_lock(&hash_lock);
@@ -360,6 +368,7 @@ static void untag_chunk(struct node *p)
goto out;
}
+ new = alloc_chunk(size);
if (!new)
goto out_mutex;
@@ -375,7 +384,7 @@ static void untag_chunk(struct node *p)
* This has to go last when updating chunk as once replace_chunk() is
* called, new RCU readers can see the new chunk.
*/
- replace_chunk(new, chunk, p);
+ replace_chunk(new, chunk);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -520,7 +529,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
* This has to go last when updating chunk as once replace_chunk() is
* called, new RCU readers can see the new chunk.
*/
- replace_chunk(chunk, old, NULL);
+ replace_chunk(chunk, old);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-11-06 14:14:55 UTC
Reply
Permalink
Post by Jan Kara
When deleting chunk from a tree, drop all unused nodes in a chunk
instead of just the one used by the tree. This gets rid of possibly
lingering unused nodes (created due to fallback path in untag_chunk())
and also removes some special cases and will allow us to simplify
locking in untag_chunk().
---
kernel/audit_tree.c | 27 ++++++++++++++++++---------
1 file changed, 18 insertions(+), 9 deletions(-)
Hmmm, it looks like this is the patch which makes the list
replace->splice change okay, yes? If so, should this change be
squashed into the replace_chunk() patch?
Post by Jan Kara
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ca2b6baff7aa..145e8c92dd31 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,8 +277,7 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}
-static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
- struct node *skip)
+static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
{
struct audit_tree *owner;
int i, j;
@@ -288,7 +287,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
for (i = j = 0; j < old->count; i++, j++) {
- if (&old->owners[j] == skip) {
+ if (!old->owners[j].owner) {
i--;
continue;
}
@@ -322,20 +321,28 @@ static void remove_chunk_node(struct audit_chunk *chunk, struct node *p)
put_tree(owner);
}
+static int chunk_count_trees(struct audit_chunk *chunk)
+{
+ int i;
+ int ret = 0;
+
+ for (i = 0; i < chunk->count; i++)
+ if (chunk->owners[i].owner)
+ ret++;
+ return ret;
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
struct fsnotify_mark *entry = chunk->mark;
struct audit_chunk *new = NULL;
- int size = chunk->count - 1;
+ int size;
remove_chunk_node(chunk, p);
fsnotify_get_mark(entry);
spin_unlock(&hash_lock);
- if (size)
- new = alloc_chunk(size);
-
mutex_lock(&entry->group->mark_mutex);
/*
* mark_mutex protects mark from getting detached and thus also from
@@ -348,6 +355,7 @@ static void untag_chunk(struct node *p)
goto out;
}
+ size = chunk_count_trees(chunk);
if (!size) {
chunk->dead = 1;
spin_lock(&hash_lock);
@@ -360,6 +368,7 @@ static void untag_chunk(struct node *p)
goto out;
}
+ new = alloc_chunk(size);
if (!new)
goto out_mutex;
@@ -375,7 +384,7 @@ static void untag_chunk(struct node *p)
* This has to go last when updating chunk as once replace_chunk() is
* called, new RCU readers can see the new chunk.
*/
- replace_chunk(new, chunk, p);
+ replace_chunk(new, chunk);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -520,7 +529,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
* This has to go last when updating chunk as once replace_chunk() is
* called, new RCU readers can see the new chunk.
*/
- replace_chunk(chunk, old, NULL);
+ replace_chunk(chunk, old);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
--
paul moore
www.paul-moore.com
Jan Kara
2018-11-07 10:00:57 UTC
Reply
Permalink
Post by Paul Moore
Post by Jan Kara
When deleting chunk from a tree, drop all unused nodes in a chunk
instead of just the one used by the tree. This gets rid of possibly
lingering unused nodes (created due to fallback path in untag_chunk())
and also removes some special cases and will allow us to simplify
locking in untag_chunk().
---
kernel/audit_tree.c | 27 ++++++++++++++++++---------
1 file changed, 18 insertions(+), 9 deletions(-)
Hmmm, it looks like this is the patch which makes the list
replace->splice change okay, yes? If so, should this change be
squashed into the replace_chunk() patch?
No, this change is completely unrelated to that. This is really only about
making untag_chunk() cleanup less dependent on previous context so we can
simplify the code and locking.

Honza
Post by Paul Moore
Post by Jan Kara
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ca2b6baff7aa..145e8c92dd31 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,8 +277,7 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}
-static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
- struct node *skip)
+static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
{
struct audit_tree *owner;
int i, j;
@@ -288,7 +287,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
for (i = j = 0; j < old->count; i++, j++) {
- if (&old->owners[j] == skip) {
+ if (!old->owners[j].owner) {
i--;
continue;
}
@@ -322,20 +321,28 @@ static void remove_chunk_node(struct audit_chunk *chunk, struct node *p)
put_tree(owner);
}
+static int chunk_count_trees(struct audit_chunk *chunk)
+{
+ int i;
+ int ret = 0;
+
+ for (i = 0; i < chunk->count; i++)
+ if (chunk->owners[i].owner)
+ ret++;
+ return ret;
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
struct fsnotify_mark *entry = chunk->mark;
struct audit_chunk *new = NULL;
- int size = chunk->count - 1;
+ int size;
remove_chunk_node(chunk, p);
fsnotify_get_mark(entry);
spin_unlock(&hash_lock);
- if (size)
- new = alloc_chunk(size);
-
mutex_lock(&entry->group->mark_mutex);
/*
* mark_mutex protects mark from getting detached and thus also from
@@ -348,6 +355,7 @@ static void untag_chunk(struct node *p)
goto out;
}
+ size = chunk_count_trees(chunk);
if (!size) {
chunk->dead = 1;
spin_lock(&hash_lock);
@@ -360,6 +368,7 @@ static void untag_chunk(struct node *p)
goto out;
}
+ new = alloc_chunk(size);
if (!new)
goto out_mutex;
@@ -375,7 +384,7 @@ static void untag_chunk(struct node *p)
* This has to go last when updating chunk as once replace_chunk() is
* called, new RCU readers can see the new chunk.
*/
- replace_chunk(new, chunk, p);
+ replace_chunk(new, chunk);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -520,7 +529,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
* This has to go last when updating chunk as once replace_chunk() is
* called, new RCU readers can see the new chunk.
*/
- replace_chunk(chunk, old, NULL);
+ replace_chunk(chunk, old);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
--
paul moore
www.paul-moore.com
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Loading...