Discussion:
[PATCH 09/11] audit: Allocate fsnotify mark independently of chunk
Jan Kara
2018-09-04 16:06:29 UTC
Permalink
Allocate fsnotify mark independently instead of embedding it inside
chunk. This will allow us to just replace chunk attached to mark when
growing / shrinking chunk instead of replacing mark attached to inode
which is a more complex operation.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 64 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 50 insertions(+), 14 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0cd08b3581f1..481fdc190c2f 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -25,7 +25,7 @@ struct audit_tree {
struct audit_chunk {
struct list_head hash;
unsigned long key;
- struct fsnotify_mark mark;
+ struct fsnotify_mark *mark;
struct list_head trees; /* with root here */
int dead;
int count;
@@ -38,6 +38,11 @@ struct audit_chunk {
} owners[];
};

+struct audit_tree_mark {
+ struct fsnotify_mark mark;
+ struct audit_chunk *chunk;
+};
+
static LIST_HEAD(tree_list);
static LIST_HEAD(prune_list);
static struct task_struct *prune_thread;
@@ -73,6 +78,7 @@ static struct task_struct *prune_thread;
*/

static struct fsnotify_group *audit_tree_group;
+static struct kmem_cache *audit_tree_mark_cachep __read_mostly;

static struct audit_tree *alloc_tree(const char *s)
{
@@ -142,10 +148,33 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}

+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+{
+ return container_of(entry, struct audit_tree_mark, mark);
+}
+
+static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
+{
+ return audit_mark(mark)->chunk;
+}
+
static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+ struct audit_chunk *chunk = mark_chunk(entry);
audit_mark_put_chunk(chunk);
+ kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
+}
+
+static struct fsnotify_mark *alloc_mark(void)
+{
+ struct audit_tree_mark *mark;
+
+ mark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
+ if (!mark)
+ return NULL;
+ fsnotify_init_mark(&mark->mark, audit_tree_group);
+ mark->mark.mask = FS_IN_IGNORED;
+ return &mark->mark;
}

static struct audit_chunk *alloc_chunk(int count)
@@ -159,6 +188,13 @@ static struct audit_chunk *alloc_chunk(int count)
if (!chunk)
return NULL;

+ chunk->mark = alloc_mark();
+ if (!chunk->mark) {
+ kfree(chunk);
+ return NULL;
+ }
+ audit_mark(chunk->mark)->chunk = chunk;
+
INIT_LIST_HEAD(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
@@ -167,8 +203,6 @@ static struct audit_chunk *alloc_chunk(int count)
INIT_LIST_HEAD(&chunk->owners[i].list);
chunk->owners[i].index = i;
}
- fsnotify_init_mark(&chunk->mark, audit_tree_group);
- chunk->mark.mask = FS_IN_IGNORED;
return chunk;
}

@@ -278,7 +312,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
- struct fsnotify_mark *entry = &chunk->mark;
+ struct fsnotify_mark *entry = chunk->mark;
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
@@ -298,7 +332,7 @@ static void untag_chunk(struct node *p)
if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
mutex_unlock(&entry->group->mark_mutex);
if (new)
- fsnotify_put_mark(&new->mark);
+ fsnotify_put_mark(new->mark);
goto out;
}

@@ -322,9 +356,9 @@ static void untag_chunk(struct node *p)
if (!new)
goto Fallback;

- if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj,
+ if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- fsnotify_put_mark(&new->mark);
+ fsnotify_put_mark(new->mark);
goto Fallback;
}

@@ -344,7 +378,7 @@ static void untag_chunk(struct node *p)
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
fsnotify_free_mark(entry);
- fsnotify_put_mark(&new->mark); /* drop initial reference */
+ fsnotify_put_mark(new->mark); /* drop initial reference */
goto out;

Fallback:
@@ -375,7 +409,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}

- entry = &chunk->mark;
+ entry = chunk->mark;
if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry);
@@ -426,7 +460,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (!old_entry)
return create_chunk(inode, tree);

- old = container_of(old_entry, struct audit_chunk, mark);
+ old = mark_chunk(old_entry)->chunk;

/* are we already there? */
spin_lock(&hash_lock);
@@ -447,7 +481,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}

- chunk_entry = &chunk->mark;
+ chunk_entry = chunk->mark;

/*
* mark_mutex protects mark from getting detached and thus also from
@@ -457,7 +491,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
/* old_entry is being shot, lets just lie */
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
- fsnotify_put_mark(&chunk->mark);
+ fsnotify_put_mark(chunk->mark);
return -ENOENT;
}

@@ -1011,7 +1045,7 @@ static int audit_tree_handle_event(struct fsnotify_group *group,

static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
{
- struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+ struct audit_chunk *chunk = mark_chunk(entry);

evict_chunk(chunk);

@@ -1032,6 +1066,8 @@ static int __init audit_tree_init(void)
{
int i;

+ audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
+
audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
if (IS_ERR(audit_tree_group))
audit_panic("cannot initialize fsnotify group for rectree watches");
--
2.16.4
Jan Kara
2018-09-04 16:06:24 UTC
Permalink
Currently chunk hash key (which is in fact pointer to the inode) is
derived as chunk->mark.conn->obj. It is tricky to make this dereference
reliable for hash table lookups only under RCU as mark can get detached
from the connector and connector gets freed independently of the
running lookup. Thus there is a possible use after free / NULL ptr
dereference issue:

CPU1 CPU2
untag_chunk()
...
audit_tree_lookup()
list_for_each_entry_rcu(p, list, hash) {
list_del_rcu(&chunk->hash);
fsnotify_destroy_mark(entry);
fsnotify_put_mark(entry)
chunk_to_key(p)
if (!chunk->mark.connector)
...
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
inode = fsnotify_detach_connector_from_object(conn);
mark->connector = NULL;
...
frees connector from workqueue
chunk->mark.connector->obj

This race is probably impossible to hit in practice as the race window
on CPU1 is very narrow and CPU2 has a lot of code to execute. Still it's
better to have this fixed. Since the inode the chunk is attached to is
constant during chunk's lifetime it is easy to cache the key in the
chunk itself and thus avoid these issues.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 27 ++++++++-------------------
1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index c194dbd53753..bac5dd90c629 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_tree {

struct audit_chunk {
struct list_head hash;
+ unsigned long key;
struct fsnotify_mark mark;
struct list_head trees; /* with root here */
int dead;
@@ -172,21 +173,6 @@ static unsigned long inode_to_key(const struct inode *inode)
return (unsigned long)&inode->i_fsnotify_marks;
}

-/*
- * Function to return search key in our hash from chunk. Key 0 is special and
- * should never be present in the hash.
- */
-static unsigned long chunk_to_key(struct audit_chunk *chunk)
-{
- /*
- * We have a reference to the mark so it should be attached to a
- * connector.
- */
- if (WARN_ON_ONCE(!chunk->mark.connector))
- return 0;
- return (unsigned long)chunk->mark.connector->obj;
-}
-
static inline struct list_head *chunk_hash(unsigned long key)
{
unsigned long n = key / L1_CACHE_BYTES;
@@ -196,12 +182,12 @@ static inline struct list_head *chunk_hash(unsigned long key)
/* hash_lock & entry->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
- unsigned long key = chunk_to_key(chunk);
struct list_head *list;

if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
return;
- list = chunk_hash(key);
+ WARN_ON_ONCE(!chunk->key);
+ list = chunk_hash(chunk->key);
list_add_rcu(&chunk->hash, list);
}

@@ -213,7 +199,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
struct audit_chunk *p;

list_for_each_entry_rcu(p, list, hash) {
- if (chunk_to_key(p) == key) {
+ if (p->key == key) {
atomic_long_inc(&p->refs);
return p;
}
@@ -295,6 +281,7 @@ static void untag_chunk(struct node *p)

chunk->dead = 1;
spin_lock(&hash_lock);
+ new->key = chunk->key;
list_replace_init(&chunk->trees, &new->trees);
if (owner->root == chunk) {
list_del_init(&owner->same_root);
@@ -380,6 +367,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
+ chunk->key = inode_to_key(inode);
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
@@ -456,6 +444,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
fsnotify_put_mark(old_entry);
return 0;
}
+ chunk->key = old->key;
list_replace_init(&old->trees, &chunk->trees);
for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
struct audit_tree *s = old->owners[n].owner;
@@ -654,7 +643,7 @@ void audit_trim_trees(void)
/* this could be NULL if the watch is dying else where... */
node->index |= 1U<<31;
if (iterate_mounts(compare_root,
- (void *)chunk_to_key(chunk),
+ (void *)(chunk->key),
root_mnt))
node->index &= ~(1U<<31);
}
--
2.16.4
Richard Guy Briggs
2018-09-13 20:06:40 UTC
Permalink
Post by Jan Kara
Currently chunk hash key (which is in fact pointer to the inode) is
derived as chunk->mark.conn->obj. It is tricky to make this dereference
reliable for hash table lookups only under RCU as mark can get detached
from the connector and connector gets freed independently of the
running lookup. Thus there is a possible use after free / NULL ptr
CPU1 CPU2
untag_chunk()
...
audit_tree_lookup()
list_for_each_entry_rcu(p, list, hash) {
list_del_rcu(&chunk->hash);
fsnotify_destroy_mark(entry);
fsnotify_put_mark(entry)
chunk_to_key(p)
if (!chunk->mark.connector)
...
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
inode = fsnotify_detach_connector_from_object(conn);
mark->connector = NULL;
...
frees connector from workqueue
chunk->mark.connector->obj
This race is probably impossible to hit in practice as the race window
on CPU1 is very narrow and CPU2 has a lot of code to execute. Still it's
better to have this fixed. Since the inode the chunk is attached to is
constant during chunk's lifetime it is easy to cache the key in the
chunk itself and thus avoid these issues.
---
kernel/audit_tree.c | 27 ++++++++-------------------
1 file changed, 8 insertions(+), 19 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index c194dbd53753..bac5dd90c629 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_tree {
struct audit_chunk {
struct list_head hash;
+ unsigned long key;
struct fsnotify_mark mark;
struct list_head trees; /* with root here */
int dead;
@@ -172,21 +173,6 @@ static unsigned long inode_to_key(const struct inode *inode)
return (unsigned long)&inode->i_fsnotify_marks;
}
-/*
- * Function to return search key in our hash from chunk. Key 0 is special and
- * should never be present in the hash.
- */
-static unsigned long chunk_to_key(struct audit_chunk *chunk)
-{
- /*
- * We have a reference to the mark so it should be attached to a
- * connector.
- */
- if (WARN_ON_ONCE(!chunk->mark.connector))
- return 0;
- return (unsigned long)chunk->mark.connector->obj;
-}
-
static inline struct list_head *chunk_hash(unsigned long key)
{
unsigned long n = key / L1_CACHE_BYTES;
@@ -196,12 +182,12 @@ static inline struct list_head *chunk_hash(unsigned long key)
/* hash_lock & entry->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
- unsigned long key = chunk_to_key(chunk);
struct list_head *list;
if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
return;
- list = chunk_hash(key);
+ WARN_ON_ONCE(!chunk->key);
+ list = chunk_hash(chunk->key);
list_add_rcu(&chunk->hash, list);
}
@@ -213,7 +199,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
struct audit_chunk *p;
list_for_each_entry_rcu(p, list, hash) {
- if (chunk_to_key(p) == key) {
+ if (p->key == key) {
atomic_long_inc(&p->refs);
return p;
}
@@ -295,6 +281,7 @@ static void untag_chunk(struct node *p)
chunk->dead = 1;
spin_lock(&hash_lock);
+ new->key = chunk->key;
list_replace_init(&chunk->trees, &new->trees);
if (owner->root == chunk) {
list_del_init(&owner->same_root);
@@ -380,6 +367,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
+ chunk->key = inode_to_key(inode);
Is there a patch missing that somehow converts from chunk_to_key() to
inode_to_key() and from chunk->mark.connector->obj to
chunk->mark.connector->inode that I've missed?

Yes. 36f10f55ff1d <***@gmail.com> 2018-06-23 ("fsnotify: let
connector point to an abstract object"). I was looking at audit/next
rather than v4.19-rc1.
Post by Jan Kara
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
@@ -456,6 +444,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
fsnotify_put_mark(old_entry);
return 0;
}
+ chunk->key = old->key;
list_replace_init(&old->trees, &chunk->trees);
for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
struct audit_tree *s = old->owners[n].owner;
@@ -654,7 +643,7 @@ void audit_trim_trees(void)
/* this could be NULL if the watch is dying else where... */
node->index |= 1U<<31;
if (iterate_mounts(compare_root,
- (void *)chunk_to_key(chunk),
+ (void *)(chunk->key),
root_mnt))
node->index &= ~(1U<<31);
}
--
2.16.4
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jan Kara
2018-09-04 16:06:26 UTC
Permalink
Chunk replacement code is very similar for the cases where we grow or
shrink chunk. Factor the code out into a common helper function.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 86 +++++++++++++++++++++++++----------------------------
1 file changed, 40 insertions(+), 46 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 307749d6773c..6978a92f6fef 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -235,6 +235,38 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}

+static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
+ struct node *skip)
+{
+ struct audit_tree *owner;
+ int i, j;
+
+ new->key = old->key;
+ list_replace_init(&old->trees, &new->trees);
+ list_for_each_entry(owner, &new->trees, same_root)
+ owner->root = new;
+ for (i = j = 0; j < old->count; i++, j++) {
+ if (&old->owners[j] == skip) {
+ i--;
+ continue;
+ }
+ owner = old->owners[j].owner;
+ new->owners[i].owner = owner;
+ new->owners[i].index = old->owners[j].index - j + i;
+ if (!owner) /* result of earlier fallback */
+ continue;
+ get_tree(owner);
+ list_replace_init(&old->owners[j].list, &new->owners[i].list);
+ }
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&old->hash, &new->hash);
+}
+
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
@@ -242,7 +274,6 @@ static void untag_chunk(struct node *p)
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
- int i, j;

fsnotify_get_mark(entry);

@@ -291,38 +322,16 @@ static void untag_chunk(struct node *p)

chunk->dead = 1;
spin_lock(&hash_lock);
- new->key = chunk->key;
- list_replace_init(&chunk->trees, &new->trees);
if (owner->root == chunk) {
list_del_init(&owner->same_root);
owner->root = NULL;
}
-
- for (i = j = 0; j <= size; i++, j++) {
- struct audit_tree *s;
- if (&chunk->owners[j] == p) {
- list_del_init(&p->list);
- i--;
- continue;
- }
- s = chunk->owners[j].owner;
- new->owners[i].owner = s;
- new->owners[i].index = chunk->owners[j].index - j + i;
- if (!s) /* result of earlier fallback */
- continue;
- get_tree(s);
- list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
- }
-
- list_for_each_entry(owner, &new->trees, same_root)
- owner->root = new;
+ list_del_init(&p->list);
/*
- * Make sure chunk is fully initialized before making it visible in the
- * hash. Pairs with a data dependency barrier in READ_ONCE() in
- * audit_tree_lookup().
+ * This has to go last when updating chunk as once replace_chunk() is
+ * called, new RCU readers can see the new chunk.
*/
- smp_wmb();
- list_replace_rcu(&chunk->hash, &new->hash);
+ replace_chunk(new, chunk, p);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -399,7 +408,6 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
struct fsnotify_mark *old_entry, *chunk_entry;
- struct audit_tree *owner;
struct audit_chunk *chunk, *old;
struct node *p;
int n;
@@ -464,35 +472,21 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
fsnotify_put_mark(old_entry);
return 0;
}
- chunk->key = old->key;
- list_replace_init(&old->trees, &chunk->trees);
- for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
- struct audit_tree *s = old->owners[n].owner;
- p->owner = s;
- p->index = old->owners[n].index;
- if (!s) /* result of fallback in untag */
- continue;
- get_tree(s);
- list_replace_init(&old->owners[n].list, &p->list);
- }
+ p = &chunk->owners[chunk->count - 1];
p->index = (chunk->count - 1) | (1U<<31);
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- list_for_each_entry(owner, &chunk->trees, same_root)
- owner->root = chunk;
old->dead = 1;
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
/*
- * Make sure chunk is fully initialized before making it visible in the
- * hash. Pairs with a data dependency barrier in READ_ONCE() in
- * audit_tree_lookup().
+ * This has to go last when updating chunk as once replace_chunk() is
+ * called, new RCU readers can see the new chunk.
*/
- smp_wmb();
- list_replace_rcu(&old->hash, &chunk->hash);
+ replace_chunk(chunk, old, NULL);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
Jan Kara
2018-09-04 16:06:21 UTC
Permalink
Currently, audit_tree code uses mark->lock to protect against detaching
of mark from an inode. In most places it however also uses
mark->group->mark_mutex (as we need to atomically replace attached
marks) and this provides protection against mark detaching as well. So
just remove protection with mark->lock from audit tree code and replace
it with mark->group->mark_mutex protection in all the places. It
simplifies the code and gets rid of some ugly catches like calling
fsnotify_add_mark_locked() with mark->lock held (which cannot sleep only
because we hold a reference to another mark attached to the same inode).

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 24 ++++--------------------
1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ea43181cde4a..1b55b1026a36 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -193,7 +193,7 @@ static inline struct list_head *chunk_hash(unsigned long key)
return chunk_hash_heads + n % HASH_SIZE;
}

-/* hash_lock & entry->lock is held by caller */
+/* hash_lock & entry->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
unsigned long key = chunk_to_key(chunk);
@@ -256,13 +256,11 @@ static void untag_chunk(struct node *p)
new = alloc_chunk(size);

mutex_lock(&entry->group->mark_mutex);
- spin_lock(&entry->lock);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
if (new)
fsnotify_put_mark(&new->mark);
@@ -280,7 +278,6 @@ static void untag_chunk(struct node *p)
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
goto out;
@@ -323,7 +320,6 @@ static void untag_chunk(struct node *p)
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(&new->mark); /* drop initial reference */
@@ -340,7 +336,6 @@ static void untag_chunk(struct node *p)
p->owner = NULL;
put_tree(owner);
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
mutex_unlock(&entry->group->mark_mutex);
out:
fsnotify_put_mark(entry);
@@ -360,12 +355,12 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOSPC;
}

- spin_lock(&entry->lock);
+ mutex_lock(&entry->group->mark_mutex);
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- spin_unlock(&entry->lock);
+ mutex_unlock(&entry->group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(entry);
return 0;
@@ -380,7 +375,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
}
insert_hash(chunk);
spin_unlock(&hash_lock);
- spin_unlock(&entry->lock);
+ mutex_unlock(&entry->group->mark_mutex);
fsnotify_put_mark(entry); /* drop initial reference */
return 0;
}
@@ -421,14 +416,12 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk_entry = &chunk->mark;

mutex_lock(&old_entry->group->mark_mutex);
- spin_lock(&old_entry->lock);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
/* old_entry is being shot, lets just lie */
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);
fsnotify_put_mark(old_entry);
fsnotify_put_mark(&chunk->mark);
@@ -437,23 +430,16 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)

if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return -ENOSPC;
}

- /* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */
- spin_lock(&chunk_entry->lock);
spin_lock(&hash_lock);
-
- /* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- spin_unlock(&chunk_entry->lock);
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);

fsnotify_destroy_mark(chunk_entry, audit_tree_group);
@@ -485,8 +471,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
spin_unlock(&hash_lock);
- spin_unlock(&chunk_entry->lock);
- spin_unlock(&old_entry->lock);
mutex_unlock(&old_entry->group->mark_mutex);
fsnotify_destroy_mark(old_entry, audit_tree_group);
fsnotify_put_mark(chunk_entry); /* drop initial reference */
--
2.16.4
Jan Kara
2018-09-04 16:06:32 UTC
Permalink
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.

Signed-off-by: Jan Kara <***@suse.cz>
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test

diff --git a/tests/stress_tree/Makefile b/tests/stress_tree/Makefile
new file mode 100644
index 000000000000..7ade09aad86f
--- /dev/null
+++ b/tests/stress_tree/Makefile
@@ -0,0 +1,8 @@
+TARGETS=$(patsubst %.c,%,$(wildcard *.c))
+
+LDLIBS += -lpthread
+
+all: $(TARGETS)
+clean:
+ rm -f $(TARGETS)
+
diff --git a/tests/stress_tree/test b/tests/stress_tree/test
new file mode 100755
index 000000000000..6215bec810d1
--- /dev/null
+++ b/tests/stress_tree/test
@@ -0,0 +1,171 @@
+#!/usr/bin/perl
+
+use strict;
+
+use Test;
+BEGIN { plan tests => 1 }
+
+use File::Temp qw/ tempdir tempfile /;
+
+###
+# functions
+
+sub key_gen {
+ my @chars = ( "A" .. "Z", "a" .. "z" );
+ my $key = "testsuite-" . time . "-";
+ $key .= $chars[ rand @chars ] for 1 .. 8;
+ return $key;
+}
+
+# Run stat on random files in subtrees to generate audit events
+sub run_stat {
+ my($dir,$dirs) = @_;
+ my $path;
+
+ while (1) {
+ $path = "$dir/mnt/mnt".int(rand($dirs))."/subdir".int(rand($dirs));
+ stat($path);
+ }
+}
+
+# Generate audit rules for subtrees. Do one rule per subtree. Because watch
+# recursively iterates child mounts and we mount $dir/leaf$i under various
+# subtrees, the inode corresponding to $dir/leaf$i gets tagged by different
+# trees.
+sub run_mark_audit {
+ my($dir,$dirs,$key) = @_;
+
+ while (1) {
+ for (my $i=0; $i < $dirs; $i++) {
+ system("auditctl -w $dir/mnt/mnt$i -p r -k $key");
+ }
+ system("auditctl -D -k $key >& /dev/null");
+ }
+}
+
+sub umount_all {
+ my($dir,$dirs,$ignore_fail) = @_;
+
+ for (my $i=0; $i < $dirs; $i++) {
+ while (system("umount $dir/leaf$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ for (my $j=0; $j < $dirs; $j++) {
+ while (system("umount $dir/mnt/mnt$i/subdir$j >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ while (system("umount $dir/mnt/mnt$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+}
+
+# Mount and unmount filesystems. We pick random leaf mount so that sometimes
+# a leaf mount point root inode will gather more tags from different trees
+# and sometimes we will be quicker in unmounting all instances of leaf and
+# thus excercise inode evistion path
+sub run_mount {
+ my($dir,$dirs) = @_;
+
+ while (1) {
+ # We use tmpfs here and not just bind mounts of some dir so
+ # that the root inode gets evicted once all instances are
+ # unmounted.
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount -t tmpfs none $dir/leaf$i");
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount --bind $dir/dir$i $dir/mnt/mnt$i");
+ for (my $j=0; $j < $dirs; $j++) {
+ my $leaf="$dir/leaf".int(rand($dirs));
+ system("mount --bind $leaf $dir/mnt/mnt$i/subdir$j");
+ }
+ }
+ umount_all($dir, $dirs, 0);
+ }
+}
+
+
+###
+# setup
+
+# reset audit
+system("auditctl -D >& /dev/null");
+
+# create temp directory
+my $dir = tempdir( TEMPLATE => '/tmp/audit-testsuite-XXXX', CLEANUP => 1 );
+
+# create stdout/stderr sinks
+( my $fh_out, my $stdout ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-out-XXXX',
+ UNLINK => 1
+);
+( my $fh_err, my $stderr ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-err-XXXX',
+ UNLINK => 1
+);
+
+###
+# tests
+
+my $dirs = 4;
+
+# setup directory hierarchy
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir $dir."/dir".$i;
+ for (my $j=0; $j < $dirs; $j++) {
+ mkdir $dir."/dir".$i."/subdir".$j;
+ }
+}
+mkdir "$dir/mnt";
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir "$dir/mnt/mnt$i";
+ mkdir "$dir/leaf$i";
+}
+
+my $stat_pid = fork();
+
+if ($stat_pid == 0) {
+ run_stat($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $mount_pid = fork();
+
+if ($mount_pid == 0) {
+ run_mount($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $key = key_gen();
+
+my $audit_pid = fork();
+
+if ($audit_pid == 0) {
+ run_mark_audit($dir, $dirs, $key);
+ # Never reached
+ exit;
+}
+
+# Sleep for a minute to let stress test run...
+sleep(60);
+ok(1);
+
+###
+# cleanup
+
+kill('KILL', $stat_pid, $mount_pid, $audit_pid);
+# Wait for children to terminate
+waitpid($stat_pid, 0);
+waitpid($mount_pid, 0);
+waitpid($audit_pid, 0);
+system("auditctl -D >& /dev/null");
+umount_all($dir, $dirs, 1);
--
2.16.4
Richard Guy Briggs
2018-09-14 18:21:04 UTC
Permalink
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
A couple of minor comments below, but otherwise looks reasonable to me.
Reviewed-by: Richard Guy Briggs <***@redhat.com>

I assume you've tested this with more than $dirs = 4 and sleep(60)?
Post by Jan Kara
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
diff --git a/tests/stress_tree/Makefile b/tests/stress_tree/Makefile
new file mode 100644
index 000000000000..7ade09aad86f
--- /dev/null
+++ b/tests/stress_tree/Makefile
@@ -0,0 +1,8 @@
+TARGETS=$(patsubst %.c,%,$(wildcard *.c))
+
+LDLIBS += -lpthread
+
+all: $(TARGETS)
+ rm -f $(TARGETS)
+
diff --git a/tests/stress_tree/test b/tests/stress_tree/test
new file mode 100755
index 000000000000..6215bec810d1
--- /dev/null
+++ b/tests/stress_tree/test
@@ -0,0 +1,171 @@
+#!/usr/bin/perl
+
+use strict;
+
+use Test;
+BEGIN { plan tests => 1 }
+
+use File::Temp qw/ tempdir tempfile /;
+
+###
+# functions
+
+sub key_gen {
+ my $key = "testsuite-" . time . "-";
+ return $key;
+}
+
+# Run stat on random files in subtrees to generate audit events
+sub run_stat {
+ my $path;
+
+ while (1) {
+ $path = "$dir/mnt/mnt".int(rand($dirs))."/subdir".int(rand($dirs));
+ stat($path);
+ }
+}
+
+# Generate audit rules for subtrees. Do one rule per subtree. Because watch
+# recursively iterates child mounts and we mount $dir/leaf$i under various
+# subtrees, the inode corresponding to $dir/leaf$i gets tagged by different
+# trees.
+sub run_mark_audit {
+
+ while (1) {
+ for (my $i=0; $i < $dirs; $i++) {
+ system("auditctl -w $dir/mnt/mnt$i -p r -k $key");
+ }
+ system("auditctl -D -k $key >& /dev/null");
+ }
+}
+
+sub umount_all {
+
+ for (my $i=0; $i < $dirs; $i++) {
+ while (system("umount $dir/leaf$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
Shouldn't this set of tmpfs be unmounted after the bind mounts that
follow, in reverse order to the way they were mounted?
Post by Jan Kara
+ for (my $i=0; $i < $dirs; $i++) {
+ for (my $j=0; $j < $dirs; $j++) {
+ while (system("umount $dir/mnt/mnt$i/subdir$j >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ while (system("umount $dir/mnt/mnt$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+}
+
+# Mount and unmount filesystems. We pick random leaf mount so that sometimes
+# a leaf mount point root inode will gather more tags from different trees
+# and sometimes we will be quicker in unmounting all instances of leaf and
+# thus excercise inode evistion path
+sub run_mount {
+
+ while (1) {
+ # We use tmpfs here and not just bind mounts of some dir so
+ # that the root inode gets evicted once all instances are
+ # unmounted.
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount -t tmpfs none $dir/leaf$i");
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount --bind $dir/dir$i $dir/mnt/mnt$i");
+ for (my $j=0; $j < $dirs; $j++) {
+ my $leaf="$dir/leaf".int(rand($dirs));
+ system("mount --bind $leaf $dir/mnt/mnt$i/subdir$j");
+ }
+ }
+ umount_all($dir, $dirs, 0);
+ }
+}
+
+
+###
+# setup
+
+# reset audit
+system("auditctl -D >& /dev/null");
+
+# create temp directory
+my $dir = tempdir( TEMPLATE => '/tmp/audit-testsuite-XXXX', CLEANUP => 1 );
+
+# create stdout/stderr sinks
+( my $fh_out, my $stdout ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-out-XXXX',
+ UNLINK => 1
+);
+( my $fh_err, my $stderr ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-err-XXXX',
+ UNLINK => 1
+);
+
+###
+# tests
+
+my $dirs = 4;
+
+# setup directory hierarchy
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir $dir."/dir".$i;
+ for (my $j=0; $j < $dirs; $j++) {
+ mkdir $dir."/dir".$i."/subdir".$j;
+ }
+}
+mkdir "$dir/mnt";
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir "$dir/mnt/mnt$i";
+ mkdir "$dir/leaf$i";
+}
+
+my $stat_pid = fork();
+
+if ($stat_pid == 0) {
+ run_stat($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $mount_pid = fork();
+
+if ($mount_pid == 0) {
+ run_mount($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $key = key_gen();
+
+my $audit_pid = fork();
+
+if ($audit_pid == 0) {
+ run_mark_audit($dir, $dirs, $key);
+ # Never reached
+ exit;
+}
+
+# Sleep for a minute to let stress test run...
+sleep(60);
+ok(1);
+
+###
+# cleanup
+
+kill('KILL', $stat_pid, $mount_pid, $audit_pid);
+# Wait for children to terminate
+waitpid($stat_pid, 0);
+waitpid($mount_pid, 0);
+waitpid($audit_pid, 0);
+system("auditctl -D >& /dev/null");
+umount_all($dir, $dirs, 1);
Should all the subdirectories in the temp directory be deleted, or are
they all cleaned up recursively by the tempdir command?

- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jan Kara
2018-09-17 16:56:04 UTC
Permalink
Post by Richard Guy Briggs
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
A couple of minor comments below, but otherwise looks reasonable to me.
I assume you've tested this with more than $dirs = 4 and sleep(60)?
I've tested it to run for longer time (couple hours) but I don't think I've
tested more directories. I can try that out.
Post by Richard Guy Briggs
Post by Jan Kara
+sub umount_all {
+
+ for (my $i=0; $i < $dirs; $i++) {
+ while (system("umount $dir/leaf$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
Shouldn't this set of tmpfs be unmounted after the bind mounts that
follow, in reverse order to the way they were mounted?
The order does not really matter. Once bind mount is created, it is
independent entity from the original mount (well, except for possible mount
inheritance) so you can unmount them in arbitrary order.
Post by Richard Guy Briggs
Post by Jan Kara
+# create temp directory
+my $dir = tempdir( TEMPLATE => '/tmp/audit-testsuite-XXXX', CLEANUP => 1 );
+
+# create stdout/stderr sinks
+( my $fh_out, my $stdout ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-out-XXXX',
+ UNLINK => 1
+);
+( my $fh_err, my $stderr ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-err-XXXX',
+ UNLINK => 1
+);
+
+###
+# tests
+
+my $dirs = 4;
+
+# setup directory hierarchy
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir $dir."/dir".$i;
+ for (my $j=0; $j < $dirs; $j++) {
+ mkdir $dir."/dir".$i."/subdir".$j;
+ }
+}
+mkdir "$dir/mnt";
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir "$dir/mnt/mnt$i";
+ mkdir "$dir/leaf$i";
+}
+
+my $stat_pid = fork();
+
+if ($stat_pid == 0) {
+ run_stat($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $mount_pid = fork();
+
+if ($mount_pid == 0) {
+ run_mount($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $key = key_gen();
+
+my $audit_pid = fork();
+
+if ($audit_pid == 0) {
+ run_mark_audit($dir, $dirs, $key);
+ # Never reached
+ exit;
+}
+
+# Sleep for a minute to let stress test run...
+sleep(60);
+ok(1);
+
+###
+# cleanup
+
+kill('KILL', $stat_pid, $mount_pid, $audit_pid);
+# Wait for children to terminate
+waitpid($stat_pid, 0);
+waitpid($mount_pid, 0);
+waitpid($audit_pid, 0);
+system("auditctl -D >& /dev/null");
+umount_all($dir, $dirs, 1);
Should all the subdirectories in the temp directory be deleted, or are
they all cleaned up recursively by the tempdir command?
The CLEANUP / UNLINK parameters to tempdir() and tempfile() functions
should make sure everything is removed on exit (including possible
subdirs).

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-10-05 21:06:22 UTC
Permalink
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
No commentary on the test itself, other than perhaps it should live
under test_manual/, but in running the tests in a loop today I am
reliably able to panic my test kernel after ~30m or so.

I'm using the kernel linked below which is Fedora Rawhide +
selinux/next + audit/next + audit/working-fsnotify_fixes; a link to
the patches added to the Rawhide kernel can be found in the list
archive linked below.

* https://groups.google.com/forum/#!topic/kernel-secnext/SFv0d-ij3z8
* https://copr.fedorainfracloud.org/coprs/pcmoore/kernel-secnext/build/805949

The initial panic dump is below:

[ 139.619065] list_del corruption. prev->next should be
ffff985fa98d4100, but was ffff985fae91e370
[ 139.622504] ------------[ cut here ]------------
[ 139.623402] kernel BUG at lib/list_debug.c:53!
[ 139.624294] invalid opcode: 0000 [#1] SMP PTI
[ 139.625439] CPU: 1 PID: 3248 Comm: auditctl Not tainted
4.19.0-0.rc6.git2.1.1.secnext.fc30.x86_64 #1
[ 139.630761] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 139.631812] RIP: 0010:__list_del_entry_valid.cold.1+0x34/0x4c
[ 139.632853] Code: ce 34 ac e8 b8 f7 c0 ff 0f 0b 48 c7 c7 a8 cf 34
ac e8 aa f7 c0 ff 0f 0b 48 89 f2 48 89 fe 48 c7 c7 68 cf 34 ac e8 96
f7 c0 ff <0f> 0b 48 89 fe 48 c7 c7 30 cf 34 ac e8 85 f7 c0 ff 0f 0b 90
90 90
[ 139.636347] RSP: 0018:ffffb4890293fbb8 EFLAGS: 00010246
[ 139.637295] RAX: 0000000000000054 RBX: ffff985fae91e620 RCX: 0000000000000000
[ 139.638573] RDX: 0000000000000000 RSI: ffff985fb77d6be8 RDI: ffff985fb77d6be8
[ 139.639855] RBP: ffff985fa98d40c0 R08: 00046c1ac1fe8d00 R09: 0000000000000000
[ 139.641136] R10: 0000000000000000 R11: 0000000000000000 R12: ffff985fa98d4100
[ 139.642416] R13: 0000000000000000 R14: ffff985faf00fe20 R15: 00000000000003f4
[ 139.643699] FS: 00007f9898252b80(0000) GS:ffff985fb7600000(0000)
knlGS:0000000000000000
[ 139.645199] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 139.646244] CR2: 0000561d333b8218 CR3: 000000023110a000 CR4: 00000000001406e0
[ 139.647533] Call Trace:
[ 139.647997] audit_remove_tree_rule+0xad/0x160
[ 139.648819] audit_del_rule+0x90/0x190
[ 139.649507] audit_rule_change+0x98e/0xbf0
[ 139.650259] audit_receive_msg+0x142/0x1160
[ 139.651030] ? netlink_deliver_tap+0x99/0x410
[ 139.651832] audit_receive+0x54/0xb0
[ 139.652495] netlink_unicast+0x181/0x210
[ 139.653211] netlink_sendmsg+0x218/0x3e0
[ 139.653936] sock_sendmsg+0x36/0x40
[ 139.654583] __sys_sendto+0xf1/0x160
[ 139.655244] ? syscall_trace_enter+0x1d3/0x330
[ 139.656055] ? trace_hardirqs_off_thunk+0x1a/0x1c
[ 139.656912] __x64_sys_sendto+0x24/0x30
[ 139.657615] do_syscall_64+0x60/0x1f0
[ 139.658285] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 139.659192] RIP: 0033:0x7f989835a7fb
[ 139.659847] Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f3
0f 1e fa 48 8d 05 25 6f 0c 00 41 89 ca 8b 00 85 c0 75 14 b8 2c 00 00
00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 41 57 4d 89 c7 41 56
41 89
[ 139.663170] RSP: 002b:00007ffc95ae6b48 EFLAGS: 00000246 ORIG_RAX:
000000000000002c
[ 139.664531] RAX: ffffffffffffffda RBX: 0000000000000460 RCX: 00007f989835a7fb
[ 139.665809] RDX: 0000000000000460 RSI: 00007ffc95ae6b80 RDI: 0000000000000003
[ 139.667087] RBP: 0000000000000003 R08: 00007ffc95ae6b6c R09: 000000000000000c
[ 139.668370] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc95ae6b80
[ 139.669648] R13: 00007ffc95ae6b6c R14: 0000000000000002 R15: 000000000000044f
[ 139.670935] Modules linked in: ib_isert iscsi_target_mod ib_srpt
target_core_mod ib_srp scsi_transport_srp rpcrdma ib_umad rdma_ucm
ib_iser ib_ipoib rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_cm
mlx5_ib ib_uverbs ib_core crct10dif_pclmul crc32_pclmul
ghash_clmulni_intel joydev virtio_balloon i2c_piix4 sunrpc
drm_kms_helper ttm crc32c_intel mlx5_core drm virtio_console mlxfw
serio_raw devlink virtio_blk virtio_net net_failover failover
ata_generic pata_acpi qemu_fw_cfg
[ 139.678676] ---[ end trace be98f2acb1e536e4 ]---
Post by Jan Kara
diff --git a/tests/stress_tree/Makefile b/tests/stress_tree/Makefile
new file mode 100644
index 000000000000..7ade09aad86f
--- /dev/null
+++ b/tests/stress_tree/Makefile
@@ -0,0 +1,8 @@
+TARGETS=$(patsubst %.c,%,$(wildcard *.c))
+
+LDLIBS += -lpthread
+
+all: $(TARGETS)
+ rm -f $(TARGETS)
+
diff --git a/tests/stress_tree/test b/tests/stress_tree/test
new file mode 100755
index 000000000000..6215bec810d1
--- /dev/null
+++ b/tests/stress_tree/test
@@ -0,0 +1,171 @@
+#!/usr/bin/perl
+
+use strict;
+
+use Test;
+BEGIN { plan tests => 1 }
+
+use File::Temp qw/ tempdir tempfile /;
+
+###
+# functions
+
+sub key_gen {
+ my $key = "testsuite-" . time . "-";
+ return $key;
+}
+
+# Run stat on random files in subtrees to generate audit events
+sub run_stat {
+ my $path;
+
+ while (1) {
+ $path = "$dir/mnt/mnt".int(rand($dirs))."/subdir".int(rand($dirs));
+ stat($path);
+ }
+}
+
+# Generate audit rules for subtrees. Do one rule per subtree. Because watch
+# recursively iterates child mounts and we mount $dir/leaf$i under various
+# subtrees, the inode corresponding to $dir/leaf$i gets tagged by different
+# trees.
+sub run_mark_audit {
+
+ while (1) {
+ for (my $i=0; $i < $dirs; $i++) {
+ system("auditctl -w $dir/mnt/mnt$i -p r -k $key");
+ }
+ system("auditctl -D -k $key >& /dev/null");
+ }
+}
+
+sub umount_all {
+
+ for (my $i=0; $i < $dirs; $i++) {
+ while (system("umount $dir/leaf$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ for (my $j=0; $j < $dirs; $j++) {
+ while (system("umount $dir/mnt/mnt$i/subdir$j >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ while (system("umount $dir/mnt/mnt$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+}
+
+# Mount and unmount filesystems. We pick random leaf mount so that sometimes
+# a leaf mount point root inode will gather more tags from different trees
+# and sometimes we will be quicker in unmounting all instances of leaf and
+# thus excercise inode evistion path
+sub run_mount {
+
+ while (1) {
+ # We use tmpfs here and not just bind mounts of some dir so
+ # that the root inode gets evicted once all instances are
+ # unmounted.
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount -t tmpfs none $dir/leaf$i");
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount --bind $dir/dir$i $dir/mnt/mnt$i");
+ for (my $j=0; $j < $dirs; $j++) {
+ my $leaf="$dir/leaf".int(rand($dirs));
+ system("mount --bind $leaf $dir/mnt/mnt$i/subdir$j");
+ }
+ }
+ umount_all($dir, $dirs, 0);
+ }
+}
+
+
+###
+# setup
+
+# reset audit
+system("auditctl -D >& /dev/null");
+
+# create temp directory
+my $dir = tempdir( TEMPLATE => '/tmp/audit-testsuite-XXXX', CLEANUP => 1 );
+
+# create stdout/stderr sinks
+( my $fh_out, my $stdout ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-out-XXXX',
+ UNLINK => 1
+);
+( my $fh_err, my $stderr ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-err-XXXX',
+ UNLINK => 1
+);
+
+###
+# tests
+
+my $dirs = 4;
+
+# setup directory hierarchy
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir $dir."/dir".$i;
+ for (my $j=0; $j < $dirs; $j++) {
+ mkdir $dir."/dir".$i."/subdir".$j;
+ }
+}
+mkdir "$dir/mnt";
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir "$dir/mnt/mnt$i";
+ mkdir "$dir/leaf$i";
+}
+
+my $stat_pid = fork();
+
+if ($stat_pid == 0) {
+ run_stat($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $mount_pid = fork();
+
+if ($mount_pid == 0) {
+ run_mount($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $key = key_gen();
+
+my $audit_pid = fork();
+
+if ($audit_pid == 0) {
+ run_mark_audit($dir, $dirs, $key);
+ # Never reached
+ exit;
+}
+
+# Sleep for a minute to let stress test run...
+sleep(60);
+ok(1);
+
+###
+# cleanup
+
+kill('KILL', $stat_pid, $mount_pid, $audit_pid);
+# Wait for children to terminate
+waitpid($stat_pid, 0);
+waitpid($mount_pid, 0);
+waitpid($audit_pid, 0);
+system("auditctl -D >& /dev/null");
+umount_all($dir, $dirs, 1);
--
2.16.4
--
paul moore
www.paul-moore.com
Jan Kara
2018-10-09 07:40:23 UTC
Permalink
Post by Paul Moore
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
No commentary on the test itself, other than perhaps it should live
under test_manual/, but in running the tests in a loop today I am
reliably able to panic my test kernel after ~30m or so.
Interesting. How do you run the test?
Post by Paul Moore
I'm using the kernel linked below which is Fedora Rawhide +
selinux/next + audit/next + audit/working-fsnotify_fixes; a link to
the patches added to the Rawhide kernel can be found in the list
archive linked below.
* https://groups.google.com/forum/#!topic/kernel-secnext/SFv0d-ij3z8
* https://copr.fedorainfracloud.org/coprs/pcmoore/kernel-secnext/build/805949
I can try to reproduce this but could you perhaps find out which of the
list manipulations in audit_remove_tree_rule() hit the corrution?

Honza
Post by Paul Moore
[ 139.619065] list_del corruption. prev->next should be
ffff985fa98d4100, but was ffff985fae91e370
[ 139.622504] ------------[ cut here ]------------
[ 139.623402] kernel BUG at lib/list_debug.c:53!
[ 139.624294] invalid opcode: 0000 [#1] SMP PTI
[ 139.625439] CPU: 1 PID: 3248 Comm: auditctl Not tainted
4.19.0-0.rc6.git2.1.1.secnext.fc30.x86_64 #1
[ 139.630761] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 139.631812] RIP: 0010:__list_del_entry_valid.cold.1+0x34/0x4c
[ 139.632853] Code: ce 34 ac e8 b8 f7 c0 ff 0f 0b 48 c7 c7 a8 cf 34
ac e8 aa f7 c0 ff 0f 0b 48 89 f2 48 89 fe 48 c7 c7 68 cf 34 ac e8 96
f7 c0 ff <0f> 0b 48 89 fe 48 c7 c7 30 cf 34 ac e8 85 f7 c0 ff 0f 0b 90
90 90
[ 139.636347] RSP: 0018:ffffb4890293fbb8 EFLAGS: 00010246
[ 139.637295] RAX: 0000000000000054 RBX: ffff985fae91e620 RCX: 0000000000000000
[ 139.638573] RDX: 0000000000000000 RSI: ffff985fb77d6be8 RDI: ffff985fb77d6be8
[ 139.639855] RBP: ffff985fa98d40c0 R08: 00046c1ac1fe8d00 R09: 0000000000000000
[ 139.641136] R10: 0000000000000000 R11: 0000000000000000 R12: ffff985fa98d4100
[ 139.642416] R13: 0000000000000000 R14: ffff985faf00fe20 R15: 00000000000003f4
[ 139.643699] FS: 00007f9898252b80(0000) GS:ffff985fb7600000(0000)
knlGS:0000000000000000
[ 139.645199] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 139.646244] CR2: 0000561d333b8218 CR3: 000000023110a000 CR4: 00000000001406e0
[ 139.647997] audit_remove_tree_rule+0xad/0x160
[ 139.648819] audit_del_rule+0x90/0x190
[ 139.649507] audit_rule_change+0x98e/0xbf0
[ 139.650259] audit_receive_msg+0x142/0x1160
[ 139.651030] ? netlink_deliver_tap+0x99/0x410
[ 139.651832] audit_receive+0x54/0xb0
[ 139.652495] netlink_unicast+0x181/0x210
[ 139.653211] netlink_sendmsg+0x218/0x3e0
[ 139.653936] sock_sendmsg+0x36/0x40
[ 139.654583] __sys_sendto+0xf1/0x160
[ 139.655244] ? syscall_trace_enter+0x1d3/0x330
[ 139.656055] ? trace_hardirqs_off_thunk+0x1a/0x1c
[ 139.656912] __x64_sys_sendto+0x24/0x30
[ 139.657615] do_syscall_64+0x60/0x1f0
[ 139.658285] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 139.659192] RIP: 0033:0x7f989835a7fb
[ 139.659847] Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f3
0f 1e fa 48 8d 05 25 6f 0c 00 41 89 ca 8b 00 85 c0 75 14 b8 2c 00 00
00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 41 57 4d 89 c7 41 56
41 89
000000000000002c
[ 139.664531] RAX: ffffffffffffffda RBX: 0000000000000460 RCX: 00007f989835a7fb
[ 139.665809] RDX: 0000000000000460 RSI: 00007ffc95ae6b80 RDI: 0000000000000003
[ 139.667087] RBP: 0000000000000003 R08: 00007ffc95ae6b6c R09: 000000000000000c
[ 139.668370] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc95ae6b80
[ 139.669648] R13: 00007ffc95ae6b6c R14: 0000000000000002 R15: 000000000000044f
[ 139.670935] Modules linked in: ib_isert iscsi_target_mod ib_srpt
target_core_mod ib_srp scsi_transport_srp rpcrdma ib_umad rdma_ucm
ib_iser ib_ipoib rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_cm
mlx5_ib ib_uverbs ib_core crct10dif_pclmul crc32_pclmul
ghash_clmulni_intel joydev virtio_balloon i2c_piix4 sunrpc
drm_kms_helper ttm crc32c_intel mlx5_core drm virtio_console mlxfw
serio_raw devlink virtio_blk virtio_net net_failover failover
ata_generic pata_acpi qemu_fw_cfg
[ 139.678676] ---[ end trace be98f2acb1e536e4 ]---
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-10-10 06:43:46 UTC
Permalink
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
No commentary on the test itself, other than perhaps it should live
under test_manual/, but in running the tests in a loop today I am
reliably able to panic my test kernel after ~30m or so.
Interesting. How do you run the test?
Nothing fancy, just a simple bash loop:

# cd tests/stress_tree
# while ./test; do /bin/true; done
Post by Jan Kara
Post by Paul Moore
I'm using the kernel linked below which is Fedora Rawhide +
selinux/next + audit/next + audit/working-fsnotify_fixes; a link to
the patches added to the Rawhide kernel can be found in the list
archive linked below.
* https://groups.google.com/forum/#!topic/kernel-secnext/SFv0d-ij3z8
* https://copr.fedorainfracloud.org/coprs/pcmoore/kernel-secnext/build/805949
I can try to reproduce this but could you perhaps find out which of the
list manipulations in audit_remove_tree_rule() hit the corrution?
A quick dump of the audit_remove_tree_rule() function makes it look
like it is the inner most list_del_init() call.

Dump of assembler code for function audit_remove_tree_rule:
646 {
0xffffffff811ad900 <+0>: callq 0xffffffff81c01850 <__fentry__>

647 struct audit_tree *tree;

648 tree = rule->tree;
0xffffffff811ad905 <+5>: push %r14
0xffffffff811ad907 <+7>: xor %eax,%eax
0xffffffff811ad909 <+9>: push %r13
0xffffffff811ad90b <+11>: push %r12
0xffffffff811ad90d <+13>: push %rbp
0xffffffff811ad90e <+14>: push %rbx
0xffffffff811ad90f <+15>: mov 0x140(%rdi),%rbp

649 if (tree) {
0xffffffff811ad916 <+22>: test %rbp,%rbp
0xffffffff811ad919 <+25>: je 0xffffffff811ad990
<audit_remove_tree_rule+144>
0xffffffff811ad91b <+27>: mov %rdi,%rbx

650 spin_lock(&hash_lock);
651 list_del_init(&rule->rlist);
0xffffffff811ad92a <+42>: lea 0x150(%rbx),%r12

652 if (list_empty(&tree->rules) && !tree->goner) {
653 tree->root = NULL;
0xffffffff811ad999 <+153>: movq $0x0,0x8(%rbp)

654 list_del_init(&tree->same_root);
0xffffffff811ad9a1 <+161>: lea 0x40(%rbp),%r12

655 tree->goner = 1;
0xffffffff811ad9c8 <+200>: lea 0x30(%rbp),%r12
0xffffffff811ad9cc <+204>: movl $0x1,0x4(%rbp)

656 list_move(&tree->list, &prune_list);
657 rule->tree = NULL;
0xffffffff811ada21 <+289>: movq $0x0,0x140(%rbx)

658 spin_unlock(&hash_lock);
659 audit_schedule_prune();

660 return 1;
0xffffffff811ada44 <+324>: mov $0x1,%eax
0xffffffff811ada49 <+329>: pop %rbx
0xffffffff811ada4a <+330>: pop %rbp
0xffffffff811ada4b <+331>: pop %r12
0xffffffff811ada4d <+333>: pop %r13
0xffffffff811ada4f <+335>: pop %r14
0xffffffff811ada51 <+337>: retq
0xffffffff811ada52: data16 nopw %cs:0x0(%rax,%rax,1)
0xffffffff811ada5d: nopl (%rax)

661 }
662 rule->tree = NULL;
0xffffffff811ad974 <+116>: movq $0x0,0x140(%rbx)

663 spin_unlock(&hash_lock);
664 return 1;
0xffffffff811ad98b <+139>: mov $0x1,%eax
0xffffffff811ad990 <+144>: pop %rbx
0xffffffff811ad991 <+145>: pop %rbp
0xffffffff811ad992 <+146>: pop %r12
0xffffffff811ad994 <+148>: pop %r13
0xffffffff811ad996 <+150>: pop %r14
0xffffffff811ad998 <+152>: retq

665 }
666 return 0;
667 }
Post by Jan Kara
Post by Paul Moore
[ 139.619065] list_del corruption. prev->next should be
ffff985fa98d4100, but was ffff985fae91e370
[ 139.622504] ------------[ cut here ]------------
[ 139.623402] kernel BUG at lib/list_debug.c:53!
[ 139.624294] invalid opcode: 0000 [#1] SMP PTI
[ 139.625439] CPU: 1 PID: 3248 Comm: auditctl Not tainted
4.19.0-0.rc6.git2.1.1.secnext.fc30.x86_64 #1
[ 139.630761] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 139.631812] RIP: 0010:__list_del_entry_valid.cold.1+0x34/0x4c
[ 139.632853] Code: ce 34 ac e8 b8 f7 c0 ff 0f 0b 48 c7 c7 a8 cf 34
ac e8 aa f7 c0 ff 0f 0b 48 89 f2 48 89 fe 48 c7 c7 68 cf 34 ac e8 96
f7 c0 ff <0f> 0b 48 89 fe 48 c7 c7 30 cf 34 ac e8 85 f7 c0 ff 0f 0b 90
90 90
[ 139.636347] RSP: 0018:ffffb4890293fbb8 EFLAGS: 00010246
[ 139.637295] RAX: 0000000000000054 RBX: ffff985fae91e620 RCX: 0000000000000000
[ 139.638573] RDX: 0000000000000000 RSI: ffff985fb77d6be8 RDI: ffff985fb77d6be8
[ 139.639855] RBP: ffff985fa98d40c0 R08: 00046c1ac1fe8d00 R09: 0000000000000000
[ 139.641136] R10: 0000000000000000 R11: 0000000000000000 R12: ffff985fa98d4100
[ 139.642416] R13: 0000000000000000 R14: ffff985faf00fe20 R15: 00000000000003f4
[ 139.643699] FS: 00007f9898252b80(0000) GS:ffff985fb7600000(0000)
knlGS:0000000000000000
[ 139.645199] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 139.646244] CR2: 0000561d333b8218 CR3: 000000023110a000 CR4: 00000000001406e0
[ 139.647997] audit_remove_tree_rule+0xad/0x160
[ 139.648819] audit_del_rule+0x90/0x190
[ 139.649507] audit_rule_change+0x98e/0xbf0
[ 139.650259] audit_receive_msg+0x142/0x1160
[ 139.651030] ? netlink_deliver_tap+0x99/0x410
[ 139.651832] audit_receive+0x54/0xb0
[ 139.652495] netlink_unicast+0x181/0x210
[ 139.653211] netlink_sendmsg+0x218/0x3e0
[ 139.653936] sock_sendmsg+0x36/0x40
[ 139.654583] __sys_sendto+0xf1/0x160
[ 139.655244] ? syscall_trace_enter+0x1d3/0x330
[ 139.656055] ? trace_hardirqs_off_thunk+0x1a/0x1c
[ 139.656912] __x64_sys_sendto+0x24/0x30
[ 139.657615] do_syscall_64+0x60/0x1f0
[ 139.658285] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 139.659192] RIP: 0033:0x7f989835a7fb
[ 139.659847] Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f3
0f 1e fa 48 8d 05 25 6f 0c 00 41 89 ca 8b 00 85 c0 75 14 b8 2c 00 00
00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 41 57 4d 89 c7 41 56
41 89
000000000000002c
[ 139.664531] RAX: ffffffffffffffda RBX: 0000000000000460 RCX: 00007f989835a7fb
[ 139.665809] RDX: 0000000000000460 RSI: 00007ffc95ae6b80 RDI: 0000000000000003
[ 139.667087] RBP: 0000000000000003 R08: 00007ffc95ae6b6c R09: 000000000000000c
[ 139.668370] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc95ae6b80
[ 139.669648] R13: 00007ffc95ae6b6c R14: 0000000000000002 R15: 000000000000044f
[ 139.670935] Modules linked in: ib_isert iscsi_target_mod ib_srpt
target_core_mod ib_srp scsi_transport_srp rpcrdma ib_umad rdma_ucm
ib_iser ib_ipoib rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_cm
mlx5_ib ib_uverbs ib_core crct10dif_pclmul crc32_pclmul
ghash_clmulni_intel joydev virtio_balloon i2c_piix4 sunrpc
drm_kms_helper ttm crc32c_intel mlx5_core drm virtio_console mlxfw
serio_raw devlink virtio_blk virtio_net net_failover failover
ata_generic pata_acpi qemu_fw_cfg
[ 139.678676] ---[ end trace be98f2acb1e536e4 ]---
--
SUSE Labs, CR
--
paul moore
www.paul-moore.com
Jan Kara
2018-10-11 11:39:37 UTC
Permalink
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
No commentary on the test itself, other than perhaps it should live
under test_manual/, but in running the tests in a loop today I am
reliably able to panic my test kernel after ~30m or so.
Interesting. How do you run the test?
# cd tests/stress_tree
# while ./test; do /bin/true; done
OK, I did succeed in reproducing some problems with my patches - once I was
able to trigger a livelock and following softlockup warning - this is
actually a problem introduced by my patches, and once a use after free
issue (not sure what that was since after I've added some debugging I
wasn't able to trigger it anymore). Anyway, I'll try more after fixing the
livelock. Do you want me to add fixes on top of my series or just fixup the
original series?

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-10-11 23:03:53 UTC
Permalink
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
No commentary on the test itself, other than perhaps it should live
under test_manual/, but in running the tests in a loop today I am
reliably able to panic my test kernel after ~30m or so.
Interesting. How do you run the test?
# cd tests/stress_tree
# while ./test; do /bin/true; done
OK, I did succeed in reproducing some problems with my patches - once I was
able to trigger a livelock and following softlockup warning - this is
actually a problem introduced by my patches, and once a use after free
issue (not sure what that was since after I've added some debugging I
wasn't able to trigger it anymore). Anyway, I'll try more after fixing the
livelock. Do you want me to add fixes on top of my series or just fixup the
original series?
Since these are pretty serious bugs, and I try to avoid merging known-broken patches which will go up to Linus, why don't you go ahead and respin the patchset with the new fixes included. You can also use the opportunity to squash in the rename patch and fix that mid-patchset compilation problem that I fixed up during the merge.

Thanks.

--
paul moore
www.paul-moore.com
Jan Kara
2018-10-15 10:04:03 UTC
Permalink
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
No commentary on the test itself, other than perhaps it should live
under test_manual/, but in running the tests in a loop today I am
reliably able to panic my test kernel after ~30m or so.
Interesting. How do you run the test?
# cd tests/stress_tree
# while ./test; do /bin/true; done
OK, I did succeed in reproducing some problems with my patches - once I was
able to trigger a livelock and following softlockup warning - this is
actually a problem introduced by my patches, and once a use after free
issue (not sure what that was since after I've added some debugging I
wasn't able to trigger it anymore). Anyway, I'll try more after fixing the
livelock. Do you want me to add fixes on top of my series or just fixup the
original series?
Since these are pretty serious bugs, and I try to avoid merging
known-broken patches which will go up to Linus, why don't you go ahead
and respin the patchset with the new fixes included. You can also use
the opportunity to squash in the rename patch and fix that mid-patchset
compilation problem that I fixed up during the merge.
OK, I'm now testing a version with the softlockup fixed and some locking
around untag_chunk() simplified when I had to meddle with that anyway. I'll
see if I can hit further failures...

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-10-15 15:39:51 UTC
Permalink
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
No commentary on the test itself, other than perhaps it should live
under test_manual/, but in running the tests in a loop today I am
reliably able to panic my test kernel after ~30m or so.
Interesting. How do you run the test?
# cd tests/stress_tree
# while ./test; do /bin/true; done
OK, I did succeed in reproducing some problems with my patches - once I was
able to trigger a livelock and following softlockup warning - this is
actually a problem introduced by my patches, and once a use after free
issue (not sure what that was since after I've added some debugging I
wasn't able to trigger it anymore). Anyway, I'll try more after fixing the
livelock. Do you want me to add fixes on top of my series or just fixup the
original series?
Since these are pretty serious bugs, and I try to avoid merging
known-broken patches which will go up to Linus, why don't you go ahead
and respin the patchset with the new fixes included. You can also use
the opportunity to squash in the rename patch and fix that mid-patchset
compilation problem that I fixed up during the merge.
OK, I'm now testing a version with the softlockup fixed and some locking
around untag_chunk() simplified when I had to meddle with that anyway. I'll
see if I can hit further failures...
Thanks for the update, let me know how the testing goes ...
--
paul moore
www.paul-moore.com
Jan Kara
2018-10-17 10:09:52 UTC
Permalink
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
No commentary on the test itself, other than perhaps it should live
under test_manual/, but in running the tests in a loop today I am
reliably able to panic my test kernel after ~30m or so.
Interesting. How do you run the test?
# cd tests/stress_tree
# while ./test; do /bin/true; done
OK, I did succeed in reproducing some problems with my patches - once I was
able to trigger a livelock and following softlockup warning - this is
actually a problem introduced by my patches, and once a use after free
issue (not sure what that was since after I've added some debugging I
wasn't able to trigger it anymore). Anyway, I'll try more after fixing the
livelock. Do you want me to add fixes on top of my series or just fixup the
original series?
Since these are pretty serious bugs, and I try to avoid merging
known-broken patches which will go up to Linus, why don't you go ahead
and respin the patchset with the new fixes included. You can also use
the opportunity to squash in the rename patch and fix that mid-patchset
compilation problem that I fixed up during the merge.
OK, I'm now testing a version with the softlockup fixed and some locking
around untag_chunk() simplified when I had to meddle with that anyway. I'll
see if I can hit further failures...
Thanks for the update, let me know how the testing goes ...
OK, yesterday I've finally nailed down the list corruption. Testing has ran
fine for 10 hours, after that it crashed due to independent problem in
fsnotify infrastructure. I've restarted the testing but I think patches are
good for another posting - will send in a minute.

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-11-14 00:34:18 UTC
Permalink
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
I'd like to get this into the audit-testsuite repo, but I think it
should live under test_manual/ instead of tests, is that okay with
you? If so, no need to resubmit, I can move the file during the
merge.
Post by Jan Kara
diff --git a/tests/stress_tree/Makefile b/tests/stress_tree/Makefile
new file mode 100644
index 000000000000..7ade09aad86f
--- /dev/null
+++ b/tests/stress_tree/Makefile
@@ -0,0 +1,8 @@
+TARGETS=$(patsubst %.c,%,$(wildcard *.c))
+
+LDLIBS += -lpthread
+
+all: $(TARGETS)
+ rm -f $(TARGETS)
+
diff --git a/tests/stress_tree/test b/tests/stress_tree/test
new file mode 100755
index 000000000000..6215bec810d1
--- /dev/null
+++ b/tests/stress_tree/test
@@ -0,0 +1,171 @@
+#!/usr/bin/perl
+
+use strict;
+
+use Test;
+BEGIN { plan tests => 1 }
+
+use File::Temp qw/ tempdir tempfile /;
+
+###
+# functions
+
+sub key_gen {
+ my $key = "testsuite-" . time . "-";
+ return $key;
+}
+
+# Run stat on random files in subtrees to generate audit events
+sub run_stat {
+ my $path;
+
+ while (1) {
+ $path = "$dir/mnt/mnt".int(rand($dirs))."/subdir".int(rand($dirs));
+ stat($path);
+ }
+}
+
+# Generate audit rules for subtrees. Do one rule per subtree. Because watch
+# recursively iterates child mounts and we mount $dir/leaf$i under various
+# subtrees, the inode corresponding to $dir/leaf$i gets tagged by different
+# trees.
+sub run_mark_audit {
+
+ while (1) {
+ for (my $i=0; $i < $dirs; $i++) {
+ system("auditctl -w $dir/mnt/mnt$i -p r -k $key");
+ }
+ system("auditctl -D -k $key >& /dev/null");
+ }
+}
+
+sub umount_all {
+
+ for (my $i=0; $i < $dirs; $i++) {
+ while (system("umount $dir/leaf$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ for (my $j=0; $j < $dirs; $j++) {
+ while (system("umount $dir/mnt/mnt$i/subdir$j >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ while (system("umount $dir/mnt/mnt$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+}
+
+# Mount and unmount filesystems. We pick random leaf mount so that sometimes
+# a leaf mount point root inode will gather more tags from different trees
+# and sometimes we will be quicker in unmounting all instances of leaf and
+# thus excercise inode evistion path
+sub run_mount {
+
+ while (1) {
+ # We use tmpfs here and not just bind mounts of some dir so
+ # that the root inode gets evicted once all instances are
+ # unmounted.
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount -t tmpfs none $dir/leaf$i");
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount --bind $dir/dir$i $dir/mnt/mnt$i");
+ for (my $j=0; $j < $dirs; $j++) {
+ my $leaf="$dir/leaf".int(rand($dirs));
+ system("mount --bind $leaf $dir/mnt/mnt$i/subdir$j");
+ }
+ }
+ umount_all($dir, $dirs, 0);
+ }
+}
+
+
+###
+# setup
+
+# reset audit
+system("auditctl -D >& /dev/null");
+
+# create temp directory
+my $dir = tempdir( TEMPLATE => '/tmp/audit-testsuite-XXXX', CLEANUP => 1 );
+
+# create stdout/stderr sinks
+( my $fh_out, my $stdout ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-out-XXXX',
+ UNLINK => 1
+);
+( my $fh_err, my $stderr ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-err-XXXX',
+ UNLINK => 1
+);
+
+###
+# tests
+
+my $dirs = 4;
+
+# setup directory hierarchy
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir $dir."/dir".$i;
+ for (my $j=0; $j < $dirs; $j++) {
+ mkdir $dir."/dir".$i."/subdir".$j;
+ }
+}
+mkdir "$dir/mnt";
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir "$dir/mnt/mnt$i";
+ mkdir "$dir/leaf$i";
+}
+
+my $stat_pid = fork();
+
+if ($stat_pid == 0) {
+ run_stat($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $mount_pid = fork();
+
+if ($mount_pid == 0) {
+ run_mount($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $key = key_gen();
+
+my $audit_pid = fork();
+
+if ($audit_pid == 0) {
+ run_mark_audit($dir, $dirs, $key);
+ # Never reached
+ exit;
+}
+
+# Sleep for a minute to let stress test run...
+sleep(60);
+ok(1);
+
+###
+# cleanup
+
+kill('KILL', $stat_pid, $mount_pid, $audit_pid);
+# Wait for children to terminate
+waitpid($stat_pid, 0);
+waitpid($mount_pid, 0);
+waitpid($audit_pid, 0);
+system("auditctl -D >& /dev/null");
+umount_all($dir, $dirs, 1);
--
2.16.4
--
paul moore
www.paul-moore.com
Jan Kara
2018-11-14 12:16:16 UTC
Permalink
Post by Paul Moore
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
I'd like to get this into the audit-testsuite repo, but I think it
should live under test_manual/ instead of tests, is that okay with
you? If so, no need to resubmit, I can move the file during the
merge.
Sure, move it wherever you find it best.

Honza
Post by Paul Moore
Post by Jan Kara
diff --git a/tests/stress_tree/Makefile b/tests/stress_tree/Makefile
new file mode 100644
index 000000000000..7ade09aad86f
--- /dev/null
+++ b/tests/stress_tree/Makefile
@@ -0,0 +1,8 @@
+TARGETS=$(patsubst %.c,%,$(wildcard *.c))
+
+LDLIBS += -lpthread
+
+all: $(TARGETS)
+ rm -f $(TARGETS)
+
diff --git a/tests/stress_tree/test b/tests/stress_tree/test
new file mode 100755
index 000000000000..6215bec810d1
--- /dev/null
+++ b/tests/stress_tree/test
@@ -0,0 +1,171 @@
+#!/usr/bin/perl
+
+use strict;
+
+use Test;
+BEGIN { plan tests => 1 }
+
+use File::Temp qw/ tempdir tempfile /;
+
+###
+# functions
+
+sub key_gen {
+ my $key = "testsuite-" . time . "-";
+ return $key;
+}
+
+# Run stat on random files in subtrees to generate audit events
+sub run_stat {
+ my $path;
+
+ while (1) {
+ $path = "$dir/mnt/mnt".int(rand($dirs))."/subdir".int(rand($dirs));
+ stat($path);
+ }
+}
+
+# Generate audit rules for subtrees. Do one rule per subtree. Because watch
+# recursively iterates child mounts and we mount $dir/leaf$i under various
+# subtrees, the inode corresponding to $dir/leaf$i gets tagged by different
+# trees.
+sub run_mark_audit {
+
+ while (1) {
+ for (my $i=0; $i < $dirs; $i++) {
+ system("auditctl -w $dir/mnt/mnt$i -p r -k $key");
+ }
+ system("auditctl -D -k $key >& /dev/null");
+ }
+}
+
+sub umount_all {
+
+ for (my $i=0; $i < $dirs; $i++) {
+ while (system("umount $dir/leaf$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ for (my $j=0; $j < $dirs; $j++) {
+ while (system("umount $dir/mnt/mnt$i/subdir$j >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ while (system("umount $dir/mnt/mnt$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+}
+
+# Mount and unmount filesystems. We pick random leaf mount so that sometimes
+# a leaf mount point root inode will gather more tags from different trees
+# and sometimes we will be quicker in unmounting all instances of leaf and
+# thus excercise inode evistion path
+sub run_mount {
+
+ while (1) {
+ # We use tmpfs here and not just bind mounts of some dir so
+ # that the root inode gets evicted once all instances are
+ # unmounted.
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount -t tmpfs none $dir/leaf$i");
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount --bind $dir/dir$i $dir/mnt/mnt$i");
+ for (my $j=0; $j < $dirs; $j++) {
+ my $leaf="$dir/leaf".int(rand($dirs));
+ system("mount --bind $leaf $dir/mnt/mnt$i/subdir$j");
+ }
+ }
+ umount_all($dir, $dirs, 0);
+ }
+}
+
+
+###
+# setup
+
+# reset audit
+system("auditctl -D >& /dev/null");
+
+# create temp directory
+my $dir = tempdir( TEMPLATE => '/tmp/audit-testsuite-XXXX', CLEANUP => 1 );
+
+# create stdout/stderr sinks
+( my $fh_out, my $stdout ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-out-XXXX',
+ UNLINK => 1
+);
+( my $fh_err, my $stderr ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-err-XXXX',
+ UNLINK => 1
+);
+
+###
+# tests
+
+my $dirs = 4;
+
+# setup directory hierarchy
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir $dir."/dir".$i;
+ for (my $j=0; $j < $dirs; $j++) {
+ mkdir $dir."/dir".$i."/subdir".$j;
+ }
+}
+mkdir "$dir/mnt";
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir "$dir/mnt/mnt$i";
+ mkdir "$dir/leaf$i";
+}
+
+my $stat_pid = fork();
+
+if ($stat_pid == 0) {
+ run_stat($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $mount_pid = fork();
+
+if ($mount_pid == 0) {
+ run_mount($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $key = key_gen();
+
+my $audit_pid = fork();
+
+if ($audit_pid == 0) {
+ run_mark_audit($dir, $dirs, $key);
+ # Never reached
+ exit;
+}
+
+# Sleep for a minute to let stress test run...
+sleep(60);
+ok(1);
+
+###
+# cleanup
+
+kill('KILL', $stat_pid, $mount_pid, $audit_pid);
+# Wait for children to terminate
+waitpid($stat_pid, 0);
+waitpid($mount_pid, 0);
+waitpid($audit_pid, 0);
+system("auditctl -D >& /dev/null");
+umount_all($dir, $dirs, 1);
--
2.16.4
--
paul moore
www.paul-moore.com
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-11-19 15:19:55 UTC
Permalink
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
Add stress test for stressing audit tree watches by adding and deleting
rules while events are generated and watched filesystems are mounted and
unmounted in parallel.
---
tests/stress_tree/Makefile | 8 +++
tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
create mode 100644 tests/stress_tree/Makefile
create mode 100755 tests/stress_tree/test
I'd like to get this into the audit-testsuite repo, but I think it
should live under test_manual/ instead of tests, is that okay with
you? If so, no need to resubmit, I can move the file during the
merge.
Sure, move it wherever you find it best.
Great, merged into the tests_manual directory, and fixed up some style
nits found via ./tools/check-syntax. Thanks again!
Post by Jan Kara
Post by Paul Moore
Post by Jan Kara
diff --git a/tests/stress_tree/Makefile b/tests/stress_tree/Makefile
new file mode 100644
index 000000000000..7ade09aad86f
--- /dev/null
+++ b/tests/stress_tree/Makefile
@@ -0,0 +1,8 @@
+TARGETS=$(patsubst %.c,%,$(wildcard *.c))
+
+LDLIBS += -lpthread
+
+all: $(TARGETS)
+ rm -f $(TARGETS)
+
diff --git a/tests/stress_tree/test b/tests/stress_tree/test
new file mode 100755
index 000000000000..6215bec810d1
--- /dev/null
+++ b/tests/stress_tree/test
@@ -0,0 +1,171 @@
+#!/usr/bin/perl
+
+use strict;
+
+use Test;
+BEGIN { plan tests => 1 }
+
+use File::Temp qw/ tempdir tempfile /;
+
+###
+# functions
+
+sub key_gen {
+ my $key = "testsuite-" . time . "-";
+ return $key;
+}
+
+# Run stat on random files in subtrees to generate audit events
+sub run_stat {
+ my $path;
+
+ while (1) {
+ $path = "$dir/mnt/mnt".int(rand($dirs))."/subdir".int(rand($dirs));
+ stat($path);
+ }
+}
+
+# Generate audit rules for subtrees. Do one rule per subtree. Because watch
+# recursively iterates child mounts and we mount $dir/leaf$i under various
+# subtrees, the inode corresponding to $dir/leaf$i gets tagged by different
+# trees.
+sub run_mark_audit {
+
+ while (1) {
+ for (my $i=0; $i < $dirs; $i++) {
+ system("auditctl -w $dir/mnt/mnt$i -p r -k $key");
+ }
+ system("auditctl -D -k $key >& /dev/null");
+ }
+}
+
+sub umount_all {
+
+ for (my $i=0; $i < $dirs; $i++) {
+ while (system("umount $dir/leaf$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ for (my $j=0; $j < $dirs; $j++) {
+ while (system("umount $dir/mnt/mnt$i/subdir$j >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+ while (system("umount $dir/mnt/mnt$i >& /dev/null") > 0 &&
+ $ignore_fail == 0) {
+ # Nothing - loop until umount succeeds
+ }
+ }
+}
+
+# Mount and unmount filesystems. We pick random leaf mount so that sometimes
+# a leaf mount point root inode will gather more tags from different trees
+# and sometimes we will be quicker in unmounting all instances of leaf and
+# thus excercise inode evistion path
+sub run_mount {
+
+ while (1) {
+ # We use tmpfs here and not just bind mounts of some dir so
+ # that the root inode gets evicted once all instances are
+ # unmounted.
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount -t tmpfs none $dir/leaf$i");
+ }
+ for (my $i=0; $i < $dirs; $i++) {
+ system("mount --bind $dir/dir$i $dir/mnt/mnt$i");
+ for (my $j=0; $j < $dirs; $j++) {
+ my $leaf="$dir/leaf".int(rand($dirs));
+ system("mount --bind $leaf $dir/mnt/mnt$i/subdir$j");
+ }
+ }
+ umount_all($dir, $dirs, 0);
+ }
+}
+
+
+###
+# setup
+
+# reset audit
+system("auditctl -D >& /dev/null");
+
+# create temp directory
+my $dir = tempdir( TEMPLATE => '/tmp/audit-testsuite-XXXX', CLEANUP => 1 );
+
+# create stdout/stderr sinks
+( my $fh_out, my $stdout ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-out-XXXX',
+ UNLINK => 1
+);
+( my $fh_err, my $stderr ) = tempfile(
+ TEMPLATE => '/tmp/audit-testsuite-err-XXXX',
+ UNLINK => 1
+);
+
+###
+# tests
+
+my $dirs = 4;
+
+# setup directory hierarchy
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir $dir."/dir".$i;
+ for (my $j=0; $j < $dirs; $j++) {
+ mkdir $dir."/dir".$i."/subdir".$j;
+ }
+}
+mkdir "$dir/mnt";
+for (my $i=0; $i < $dirs; $i++) {
+ mkdir "$dir/mnt/mnt$i";
+ mkdir "$dir/leaf$i";
+}
+
+my $stat_pid = fork();
+
+if ($stat_pid == 0) {
+ run_stat($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $mount_pid = fork();
+
+if ($mount_pid == 0) {
+ run_mount($dir, $dirs);
+ # Never reached
+ exit;
+}
+
+my $key = key_gen();
+
+my $audit_pid = fork();
+
+if ($audit_pid == 0) {
+ run_mark_audit($dir, $dirs, $key);
+ # Never reached
+ exit;
+}
+
+# Sleep for a minute to let stress test run...
+sleep(60);
+ok(1);
+
+###
+# cleanup
+
+kill('KILL', $stat_pid, $mount_pid, $audit_pid);
+# Wait for children to terminate
+waitpid($stat_pid, 0);
+waitpid($mount_pid, 0);
+waitpid($audit_pid, 0);
+system("auditctl -D >& /dev/null");
+umount_all($dir, $dirs, 1);
--
2.16.4
--
paul moore
www.paul-moore.com
--
SUSE Labs, CR
--
paul moore
www.paul-moore.com
Jan Kara
2018-09-04 16:06:23 UTC
Permalink
Audit tree code is replacing marks attached to inodes in non-atomic way.
Thus fsnotify_find_mark() in tag_chunk() may find a mark that belongs to
a chunk that is no longer valid one and will soon be destroyed. Tags
added to such chunk will be simply lost.

Fix the problem by making sure old mark is marked as going away (through
fsnotify_detach_mark()) before dropping mark_mutex and thus in an atomic
way wrt tag_chunk(). Note that this does not fix the problem completely
as if tag_chunk() finds a mark that is going away, it fails with
-ENOENT. But at least the failure is not silent and currently there's no
way to search for another fsnotify mark attached to the inode. We'll fix
this problem in later patch.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 8a74b468b666..c194dbd53753 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -278,8 +278,9 @@ static void untag_chunk(struct node *p)
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
+ fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
- fsnotify_destroy_mark(entry, audit_tree_group);
+ fsnotify_free_mark(entry);
goto out;
}

@@ -320,8 +321,9 @@ static void untag_chunk(struct node *p)
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
spin_unlock(&hash_lock);
+ fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
- fsnotify_destroy_mark(entry, audit_tree_group);
+ fsnotify_free_mark(entry);
fsnotify_put_mark(&new->mark); /* drop initial reference */
goto out;

@@ -364,8 +366,9 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
+ fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_destroy_mark(entry, audit_tree_group);
+ fsnotify_free_mark(entry);
fsnotify_put_mark(entry);
return 0;
}
@@ -446,10 +449,9 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
+ fsnotify_detach_mark(chunk_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
-
- fsnotify_destroy_mark(chunk_entry, audit_tree_group);
-
+ fsnotify_free_mark(chunk_entry);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return 0;
@@ -477,8 +479,9 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
spin_unlock(&hash_lock);
+ fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_destroy_mark(old_entry, audit_tree_group);
+ fsnotify_free_mark(old_entry);
fsnotify_put_mark(chunk_entry); /* drop initial reference */
fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
return 0;
--
2.16.4
Jan Kara
2018-09-04 16:06:25 UTC
Permalink
Currently, the audit tree code does not make sure that when a chunk is
inserted into the hash table, it is fully initialized. So in theory a
user of RCU lookup could see uninitialized structure in the hash table
and crash. Add appropriate barriers between initialization of the
structure and its insertion into hash table.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 32 +++++++++++++++++++++++++++++---
1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index bac5dd90c629..307749d6773c 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -186,6 +186,12 @@ static void insert_hash(struct audit_chunk *chunk)

if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
return;
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
WARN_ON_ONCE(!chunk->key);
list = chunk_hash(chunk->key);
list_add_rcu(&chunk->hash, list);
@@ -199,7 +205,11 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
struct audit_chunk *p;

list_for_each_entry_rcu(p, list, hash) {
- if (p->key == key) {
+ /*
+ * We use a data dependency barrier in READ_ONCE() to make sure
+ * the chunk we see is fully initialized.
+ */
+ if (READ_ONCE(p->key) == key) {
atomic_long_inc(&p->refs);
return p;
}
@@ -304,9 +314,15 @@ static void untag_chunk(struct node *p)
list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
}

- list_replace_rcu(&chunk->hash, &new->hash);
list_for_each_entry(owner, &new->trees, same_root)
owner->root = new;
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&chunk->hash, &new->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
@@ -368,6 +384,10 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
chunk->key = inode_to_key(inode);
+ /*
+ * Inserting into the hash table has to go last as once we do that RCU
+ * readers can see the chunk.
+ */
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
@@ -459,7 +479,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- list_replace_rcu(&old->hash, &chunk->hash);
list_for_each_entry(owner, &chunk->trees, same_root)
owner->root = chunk;
old->dead = 1;
@@ -467,6 +486,13 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
}
+ /*
+ * Make sure chunk is fully initialized before making it visible in the
+ * hash. Pairs with a data dependency barrier in READ_ONCE() in
+ * audit_tree_lookup().
+ */
+ smp_wmb();
+ list_replace_rcu(&old->hash, &chunk->hash);
spin_unlock(&hash_lock);
fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
--
2.16.4
Jan Kara
2018-09-04 16:06:27 UTC
Permalink
The audit_tree_group->mark_mutex is held all the time while we create
the fsnotify mark, add it to the inode, and insert chunk into the hash.
Hence mark cannot get detached during this time and so the check whether
the mark is attached in insert_hash() is pointless.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 2 --
1 file changed, 2 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 6978a92f6fef..af91b0d33478 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -184,8 +184,6 @@ static void insert_hash(struct audit_chunk *chunk)
{
struct list_head *list;

- if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
- return;
/*
* Make sure chunk is fully initialized before making it visible in the
* hash. Pairs with a data dependency barrier in READ_ONCE() in
--
2.16.4
Jan Kara
2018-09-04 16:06:30 UTC
Permalink
Audit tree code currently associates new fsnotify mark with each new
chunk. As chunk attached to an inode is replaced when new tag is added /
removed, we also need to remove old fsnotify mark and add a new one on
such occasion. This is cumbersome and makes locking rules somewhat
difficult to follow.

Fix these problems by allocating fsnotify mark independently of chunk
and keeping it all the time while there is some chunk attached to an
inode. Also add documentation about the locking rules so that things are
easier to follow.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 164 +++++++++++++++++++++++++++-------------------------
1 file changed, 85 insertions(+), 79 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 481fdc190c2f..ef109000ed01 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -27,7 +27,6 @@ struct audit_chunk {
unsigned long key;
struct fsnotify_mark *mark;
struct list_head trees; /* with root here */
- int dead;
int count;
atomic_long_t refs;
struct rcu_head head;
@@ -48,8 +47,15 @@ static LIST_HEAD(prune_list);
static struct task_struct *prune_thread;

/*
- * One struct chunk is attached to each inode of interest.
- * We replace struct chunk on tagging/untagging.
+ * One struct chunk is attached to each inode of interest through
+ * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
+ * untagging, the mark is stable as long as there is chunk attached. The
+ * association between mark and chunk is protected by hash_lock and
+ * audit_tree_group->mark_mutex. Thus as long as we hold
+ * audit_tree_group->mark_mutex and check that the mark is alive by
+ * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
+ * the current chunk.
+ *
* Rules have pointer to struct audit_tree.
* Rules have struct list_head rlist forming a list of rules over
* the same tree.
@@ -68,8 +74,12 @@ static struct task_struct *prune_thread;
* tree is refcounted; one reference for "some rules on rules_list refer to
* it", one for each chunk with pointer to it.
*
- * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
- * of watch contributes 1 to .refs).
+ * chunk is refcounted by embedded .refs. Mark associated with the chunk holds
+ * one chunk reference. This reference is dropped either when a mark is going
+ * to be freed (corresponding inode goes away) or when chunk attached to the
+ * mark gets replaced. This reference must be dropped using
+ * audit_mark_put_chunk() to make sure the reference is dropped only after RCU
+ * grace period as it protects RCU readers of the hash table.
*
* node.index allows to get from node.list to containing chunk.
* MSB of that sucker is stolen to mark taggings that we might have to
@@ -160,8 +170,6 @@ static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)

static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = mark_chunk(entry);
- audit_mark_put_chunk(chunk);
kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
}

@@ -188,13 +196,6 @@ static struct audit_chunk *alloc_chunk(int count)
if (!chunk)
return NULL;

- chunk->mark = alloc_mark();
- if (!chunk->mark) {
- kfree(chunk);
- return NULL;
- }
- audit_mark(chunk->mark)->chunk = chunk;
-
INIT_LIST_HEAD(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
@@ -277,6 +278,20 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}

+static void replace_mark_chunk(struct fsnotify_mark *entry,
+ struct audit_chunk *chunk)
+{
+ struct audit_chunk *old;
+
+ assert_spin_locked(&hash_lock);
+ old = mark_chunk(entry);
+ audit_mark(entry)->chunk = chunk;
+ if (chunk)
+ chunk->mark = entry;
+ if (old)
+ old->mark = NULL;
+}
+
static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
struct node *skip)
{
@@ -300,6 +315,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
get_tree(owner);
list_replace_init(&old->owners[j].list, &new->owners[i].list);
}
+ replace_mark_chunk(old->mark, new);
/*
* Make sure chunk is fully initialized before making it visible in the
* hash. Pairs with a data dependency barrier in READ_ONCE() in
@@ -317,6 +333,10 @@ static void untag_chunk(struct node *p)
struct audit_tree *owner;
int size = chunk->count - 1;

+ /* Racing with audit_tree_freeing_mark()? */
+ if (!entry)
+ return;
+
fsnotify_get_mark(entry);

spin_unlock(&hash_lock);
@@ -326,29 +346,30 @@ static void untag_chunk(struct node *p)

mutex_lock(&entry->group->mark_mutex);
/*
- * mark_mutex protects mark from getting detached and thus also from
- * mark->connector->obj getting NULL.
+ * mark_mutex stabilizes chunk attached to the mark so we can check
+ * whether it didn't change while we've dropped hash_lock.
*/
- if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
+ if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+ mark_chunk(entry) != chunk) {
mutex_unlock(&entry->group->mark_mutex);
- if (new)
- fsnotify_put_mark(new->mark);
+ kfree(new);
goto out;
}

owner = p->owner;

if (!size) {
- chunk->dead = 1;
spin_lock(&hash_lock);
list_del_init(&chunk->trees);
if (owner->root == chunk)
owner->root = NULL;
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
+ replace_mark_chunk(entry, NULL);
spin_unlock(&hash_lock);
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
+ audit_mark_put_chunk(chunk);
fsnotify_free_mark(entry);
goto out;
}
@@ -356,13 +377,6 @@ static void untag_chunk(struct node *p)
if (!new)
goto Fallback;

- if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
- FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- fsnotify_put_mark(new->mark);
- goto Fallback;
- }
-
- chunk->dead = 1;
spin_lock(&hash_lock);
if (owner->root == chunk) {
list_del_init(&owner->same_root);
@@ -375,10 +389,8 @@ static void untag_chunk(struct node *p)
*/
replace_chunk(new, chunk, p);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
- fsnotify_free_mark(entry);
- fsnotify_put_mark(new->mark); /* drop initial reference */
+ audit_mark_put_chunk(chunk);
goto out;

Fallback:
@@ -409,23 +421,31 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}

- entry = chunk->mark;
+ entry = alloc_mark();
+ if (!entry) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
+ kfree(chunk);
+ return -ENOMEM;
+ }
+
if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry);
+ kfree(chunk);
return -ENOSPC;
}

spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- chunk->dead = 1;
fsnotify_detach_mark(entry);
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_free_mark(entry);
fsnotify_put_mark(entry);
+ kfree(chunk);
return 0;
}
+ replace_mark_chunk(entry, chunk);
chunk->owners[0].index = (1U << 31);
chunk->owners[0].owner = tree;
get_tree(tree);
@@ -442,33 +462,41 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
insert_hash(chunk);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry); /* drop initial reference */
+ /*
+ * Drop our initial reference. When mark we point to is getting freed,
+ * we get notification through ->freeing_mark callback and cleanup
+ * chunk pointing to this mark.
+ */
+ fsnotify_put_mark(entry);
return 0;
}

/* the first tagged inode becomes root of tree */
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *old_entry, *chunk_entry;
+ struct fsnotify_mark *entry;
struct audit_chunk *chunk, *old;
struct node *p;
int n;

mutex_lock(&audit_tree_group->mark_mutex);
- old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
- audit_tree_group);
- if (!old_entry)
+ entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+ if (!entry)
return create_chunk(inode, tree);

- old = mark_chunk(old_entry)->chunk;
-
+ /*
+ * Found mark is guaranteed to be attached and mark_mutex protects mark
+ * from getting detached and thus it makes sure there is chunk attached
+ * to the mark.
+ */
/* are we already there? */
spin_lock(&hash_lock);
+ old = mark_chunk(entry);
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
return 0;
}
}
@@ -477,41 +505,16 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
return -ENOMEM;
}

- chunk_entry = chunk->mark;
-
- /*
- * mark_mutex protects mark from getting detached and thus also from
- * mark->connector->obj getting NULL.
- */
- if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- /* old_entry is being shot, lets just lie */
- mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(old_entry);
- fsnotify_put_mark(chunk->mark);
- return -ENOENT;
- }
-
- if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
- FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(chunk_entry);
- fsnotify_put_mark(old_entry);
- return -ENOSPC;
- }
-
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- chunk->dead = 1;
- fsnotify_detach_mark(chunk_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(chunk_entry);
- fsnotify_put_mark(chunk_entry);
- fsnotify_put_mark(old_entry);
+ fsnotify_put_mark(entry);
+ kfree(chunk);
return 0;
}
p = &chunk->owners[chunk->count - 1];
@@ -519,7 +522,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
p->owner = tree;
get_tree(tree);
list_add(&p->list, &tree->chunks);
- old->dead = 1;
if (!tree->root) {
tree->root = chunk;
list_add(&tree->same_root, &chunk->trees);
@@ -530,11 +532,10 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
*/
replace_chunk(chunk, old, NULL);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(old_entry);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(old_entry);
- fsnotify_put_mark(chunk_entry); /* drop initial reference */
- fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
+ fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+ audit_mark_put_chunk(old);
+
return 0;
}

@@ -1003,10 +1004,6 @@ static void evict_chunk(struct audit_chunk *chunk)
int need_prune = 0;
int n;

- if (chunk->dead)
- return;
-
- chunk->dead = 1;
mutex_lock(&audit_filter_mutex);
spin_lock(&hash_lock);
while (!list_empty(&chunk->trees)) {
@@ -1045,9 +1042,18 @@ static int audit_tree_handle_event(struct fsnotify_group *group,

static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
{
- struct audit_chunk *chunk = mark_chunk(entry);
+ struct audit_chunk *chunk;

- evict_chunk(chunk);
+ mutex_lock(&entry->group->mark_mutex);
+ spin_lock(&hash_lock);
+ chunk = mark_chunk(entry);
+ replace_mark_chunk(entry, NULL);
+ spin_unlock(&hash_lock);
+ mutex_unlock(&entry->group->mark_mutex);
+ if (chunk) {
+ evict_chunk(chunk);
+ audit_mark_put_chunk(chunk);
+ }

/*
* We are guaranteed to have at least one reference to the mark from
--
2.16.4
Jan Kara
2018-09-04 16:06:28 UTC
Permalink
Provide a helper function audit_mark_put_chunk() for dropping mark's
reference (which has to happen only after RCU grace period expires).
Currently that happens only from a single place but in later patches we
introduce more callers.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index af91b0d33478..0cd08b3581f1 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -132,10 +132,20 @@ static void __put_chunk(struct rcu_head *rcu)
audit_put_chunk(chunk);
}

+/*
+ * Drop reference to the chunk that was held by the mark. This is the reference
+ * that gets dropped after we've removed the chunk from the hash table and we
+ * use it to make sure chunk cannot be freed before RCU grace period expires.
+ */
+static void audit_mark_put_chunk(struct audit_chunk *chunk)
+{
+ call_rcu(&chunk->head, __put_chunk);
+}
+
static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
- call_rcu(&chunk->head, __put_chunk);
+ audit_mark_put_chunk(chunk);
}

static struct audit_chunk *alloc_chunk(int count)
--
2.16.4
Jan Kara
2018-09-04 16:06:31 UTC
Permalink
Variables pointing to fsnotify_mark are sometimes called 'entry' and
sometimes 'mark'. Use 'mark' in all places.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 95 +++++++++++++++++++++++++++--------------------------
1 file changed, 48 insertions(+), 47 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ef109000ed01..9c53f7c37bdf 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -158,9 +158,9 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}

-static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *mark)
{
- return container_of(entry, struct audit_tree_mark, mark);
+ return container_of(mark, struct audit_tree_mark, mark);
}

static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
@@ -168,9 +168,9 @@ static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
return audit_mark(mark)->chunk;
}

-static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
+static void audit_tree_destroy_watch(struct fsnotify_mark *mark)
{
- kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
+ kmem_cache_free(audit_tree_mark_cachep, audit_mark(mark));
}

static struct fsnotify_mark *alloc_mark(void)
@@ -224,7 +224,7 @@ static inline struct list_head *chunk_hash(unsigned long key)
return chunk_hash_heads + n % HASH_SIZE;
}

-/* hash_lock & entry->group->mark_mutex is held by caller */
+/* hash_lock & mark->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
struct list_head *list;
@@ -278,16 +278,16 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}

-static void replace_mark_chunk(struct fsnotify_mark *entry,
+static void replace_mark_chunk(struct fsnotify_mark *mark,
struct audit_chunk *chunk)
{
struct audit_chunk *old;

assert_spin_locked(&hash_lock);
- old = mark_chunk(entry);
- audit_mark(entry)->chunk = chunk;
+ old = mark_chunk(mark);
+ audit_mark(mark)->chunk = chunk;
if (chunk)
- chunk->mark = entry;
+ chunk->mark = mark;
if (old)
old->mark = NULL;
}
@@ -328,30 +328,30 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
- struct fsnotify_mark *entry = chunk->mark;
+ struct fsnotify_mark *mark = chunk->mark;
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;

/* Racing with audit_tree_freeing_mark()? */
- if (!entry)
+ if (!mark)
return;

- fsnotify_get_mark(entry);
+ fsnotify_get_mark(mark);

spin_unlock(&hash_lock);

if (size)
new = alloc_chunk(size);

- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&mark->group->mark_mutex);
/*
* mark_mutex stabilizes chunk attached to the mark so we can check
* whether it didn't change while we've dropped hash_lock.
*/
- if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
- mark_chunk(entry) != chunk) {
- mutex_unlock(&entry->group->mark_mutex);
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+ mark_chunk(mark) != chunk) {
+ mutex_unlock(&mark->group->mark_mutex);
kfree(new);
goto out;
}
@@ -365,12 +365,12 @@ static void untag_chunk(struct node *p)
owner->root = NULL;
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
- replace_mark_chunk(entry, NULL);
+ replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
- mutex_unlock(&entry->group->mark_mutex);
+ fsnotify_detach_mark(mark);
+ mutex_unlock(&mark->group->mark_mutex);
audit_mark_put_chunk(chunk);
- fsnotify_free_mark(entry);
+ fsnotify_free_mark(mark);
goto out;
}

@@ -389,7 +389,7 @@ static void untag_chunk(struct node *p)
*/
replace_chunk(new, chunk, p);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
audit_mark_put_chunk(chunk);
goto out;

@@ -404,16 +404,16 @@ static void untag_chunk(struct node *p)
p->owner = NULL;
put_tree(owner);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
out:
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
spin_lock(&hash_lock);
}

/* Call with group->mark_mutex held, releases it */
static int create_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *entry;
+ struct fsnotify_mark *mark;
struct audit_chunk *chunk = alloc_chunk(1);

if (!chunk) {
@@ -421,16 +421,16 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}

- entry = alloc_mark();
- if (!entry) {
+ mark = alloc_mark();
+ if (!mark) {
mutex_unlock(&audit_tree_group->mark_mutex);
kfree(chunk);
return -ENOMEM;
}

- if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
+ if (fsnotify_add_inode_mark_locked(mark, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
kfree(chunk);
return -ENOSPC;
}
@@ -438,14 +438,14 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
+ fsnotify_detach_mark(mark);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(entry);
- fsnotify_put_mark(entry);
+ fsnotify_free_mark(mark);
+ fsnotify_put_mark(mark);
kfree(chunk);
return 0;
}
- replace_mark_chunk(entry, chunk);
+ replace_mark_chunk(mark, chunk);
chunk->owners[0].index = (1U << 31);
chunk->owners[0].owner = tree;
get_tree(tree);
@@ -467,21 +467,21 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
* we get notification through ->freeing_mark callback and cleanup
* chunk pointing to this mark.
*/
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return 0;
}

/* the first tagged inode becomes root of tree */
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *entry;
+ struct fsnotify_mark *mark;
struct audit_chunk *chunk, *old;
struct node *p;
int n;

mutex_lock(&audit_tree_group->mark_mutex);
- entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
- if (!entry)
+ mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+ if (!mark)
return create_chunk(inode, tree);

/*
@@ -491,12 +491,12 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
*/
/* are we already there? */
spin_lock(&hash_lock);
- old = mark_chunk(entry);
+ old = mark_chunk(mark);
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return 0;
}
}
@@ -505,7 +505,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return -ENOMEM;
}

@@ -513,7 +513,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
kfree(chunk);
return 0;
}
@@ -533,7 +533,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
replace_chunk(chunk, old, NULL);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+ fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */
audit_mark_put_chunk(old);

return 0;
@@ -1040,16 +1040,17 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
return 0;
}

-static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
+static void audit_tree_freeing_mark(struct fsnotify_mark *mark,
+ struct fsnotify_group *group)
{
struct audit_chunk *chunk;

- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&mark->group->mark_mutex);
spin_lock(&hash_lock);
- chunk = mark_chunk(entry);
- replace_mark_chunk(entry, NULL);
+ chunk = mark_chunk(mark);
+ replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
if (chunk) {
evict_chunk(chunk);
audit_mark_put_chunk(chunk);
@@ -1059,7 +1060,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
* We are guaranteed to have at least one reference to the mark from
* either the inode or the caller of fsnotify_destroy_mark().
*/
- BUG_ON(refcount_read(&entry->refcnt) < 1);
+ BUG_ON(refcount_read(&mark->refcnt) < 1);
}

static const struct fsnotify_ops audit_tree_ops = {
--
2.16.4
Richard Guy Briggs
2018-09-14 18:29:45 UTC
Permalink
Post by Jan Kara
Variables pointing to fsnotify_mark are sometimes called 'entry' and
sometimes 'mark'. Use 'mark' in all places.
Thank you, thank you, thank you! This would have been great at the
beginning of the patchset!
Post by Jan Kara
---
kernel/audit_tree.c | 95 +++++++++++++++++++++++++++--------------------------
1 file changed, 48 insertions(+), 47 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ef109000ed01..9c53f7c37bdf 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -158,9 +158,9 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}
-static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *mark)
{
- return container_of(entry, struct audit_tree_mark, mark);
+ return container_of(mark, struct audit_tree_mark, mark);
}
static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
@@ -168,9 +168,9 @@ static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
return audit_mark(mark)->chunk;
}
-static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
+static void audit_tree_destroy_watch(struct fsnotify_mark *mark)
{
- kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
+ kmem_cache_free(audit_tree_mark_cachep, audit_mark(mark));
}
static struct fsnotify_mark *alloc_mark(void)
@@ -224,7 +224,7 @@ static inline struct list_head *chunk_hash(unsigned long key)
return chunk_hash_heads + n % HASH_SIZE;
}
-/* hash_lock & entry->group->mark_mutex is held by caller */
+/* hash_lock & mark->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
struct list_head *list;
@@ -278,16 +278,16 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}
-static void replace_mark_chunk(struct fsnotify_mark *entry,
+static void replace_mark_chunk(struct fsnotify_mark *mark,
struct audit_chunk *chunk)
{
struct audit_chunk *old;
assert_spin_locked(&hash_lock);
- old = mark_chunk(entry);
- audit_mark(entry)->chunk = chunk;
+ old = mark_chunk(mark);
+ audit_mark(mark)->chunk = chunk;
if (chunk)
- chunk->mark = entry;
+ chunk->mark = mark;
if (old)
old->mark = NULL;
}
@@ -328,30 +328,30 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
- struct fsnotify_mark *entry = chunk->mark;
+ struct fsnotify_mark *mark = chunk->mark;
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
/* Racing with audit_tree_freeing_mark()? */
- if (!entry)
+ if (!mark)
return;
- fsnotify_get_mark(entry);
+ fsnotify_get_mark(mark);
spin_unlock(&hash_lock);
if (size)
new = alloc_chunk(size);
- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&mark->group->mark_mutex);
/*
* mark_mutex stabilizes chunk attached to the mark so we can check
* whether it didn't change while we've dropped hash_lock.
*/
- if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
- mark_chunk(entry) != chunk) {
- mutex_unlock(&entry->group->mark_mutex);
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+ mark_chunk(mark) != chunk) {
+ mutex_unlock(&mark->group->mark_mutex);
kfree(new);
goto out;
}
@@ -365,12 +365,12 @@ static void untag_chunk(struct node *p)
owner->root = NULL;
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
- replace_mark_chunk(entry, NULL);
+ replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
- mutex_unlock(&entry->group->mark_mutex);
+ fsnotify_detach_mark(mark);
+ mutex_unlock(&mark->group->mark_mutex);
audit_mark_put_chunk(chunk);
- fsnotify_free_mark(entry);
+ fsnotify_free_mark(mark);
goto out;
}
@@ -389,7 +389,7 @@ static void untag_chunk(struct node *p)
*/
replace_chunk(new, chunk, p);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
audit_mark_put_chunk(chunk);
goto out;
@@ -404,16 +404,16 @@ static void untag_chunk(struct node *p)
p->owner = NULL;
put_tree(owner);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
spin_lock(&hash_lock);
}
/* Call with group->mark_mutex held, releases it */
static int create_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *entry;
+ struct fsnotify_mark *mark;
struct audit_chunk *chunk = alloc_chunk(1);
if (!chunk) {
@@ -421,16 +421,16 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}
- entry = alloc_mark();
- if (!entry) {
+ mark = alloc_mark();
+ if (!mark) {
mutex_unlock(&audit_tree_group->mark_mutex);
kfree(chunk);
return -ENOMEM;
}
- if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
+ if (fsnotify_add_inode_mark_locked(mark, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
kfree(chunk);
return -ENOSPC;
}
@@ -438,14 +438,14 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
+ fsnotify_detach_mark(mark);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(entry);
- fsnotify_put_mark(entry);
+ fsnotify_free_mark(mark);
+ fsnotify_put_mark(mark);
kfree(chunk);
return 0;
}
- replace_mark_chunk(entry, chunk);
+ replace_mark_chunk(mark, chunk);
chunk->owners[0].index = (1U << 31);
chunk->owners[0].owner = tree;
get_tree(tree);
@@ -467,21 +467,21 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
* we get notification through ->freeing_mark callback and cleanup
* chunk pointing to this mark.
*/
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return 0;
}
/* the first tagged inode becomes root of tree */
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *entry;
+ struct fsnotify_mark *mark;
struct audit_chunk *chunk, *old;
struct node *p;
int n;
mutex_lock(&audit_tree_group->mark_mutex);
- entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
- if (!entry)
+ mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+ if (!mark)
return create_chunk(inode, tree);
/*
@@ -491,12 +491,12 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
*/
/* are we already there? */
spin_lock(&hash_lock);
- old = mark_chunk(entry);
+ old = mark_chunk(mark);
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return 0;
}
}
@@ -505,7 +505,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return -ENOMEM;
}
@@ -513,7 +513,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
kfree(chunk);
return 0;
}
@@ -533,7 +533,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
replace_chunk(chunk, old, NULL);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+ fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */
audit_mark_put_chunk(old);
return 0;
@@ -1040,16 +1040,17 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
return 0;
}
-static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
+static void audit_tree_freeing_mark(struct fsnotify_mark *mark,
+ struct fsnotify_group *group)
{
struct audit_chunk *chunk;
- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&mark->group->mark_mutex);
spin_lock(&hash_lock);
- chunk = mark_chunk(entry);
- replace_mark_chunk(entry, NULL);
+ chunk = mark_chunk(mark);
+ replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
if (chunk) {
evict_chunk(chunk);
audit_mark_put_chunk(chunk);
@@ -1059,7 +1060,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
* We are guaranteed to have at least one reference to the mark from
* either the inode or the caller of fsnotify_destroy_mark().
*/
- BUG_ON(refcount_read(&entry->refcnt) < 1);
+ BUG_ON(refcount_read(&mark->refcnt) < 1);
}
static const struct fsnotify_ops audit_tree_ops = {
--
2.16.4
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jan Kara
2018-09-17 16:44:21 UTC
Permalink
Post by Richard Guy Briggs
Post by Jan Kara
Variables pointing to fsnotify_mark are sometimes called 'entry' and
sometimes 'mark'. Use 'mark' in all places.
Thank you, thank you, thank you! This would have been great at the
beginning of the patchset!
Yeah, I know but then I'd have to rewrite all the patches which I was too
lazy to do... At least it's improved for the future.

Honza
Post by Richard Guy Briggs
Post by Jan Kara
---
kernel/audit_tree.c | 95 +++++++++++++++++++++++++++--------------------------
1 file changed, 48 insertions(+), 47 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ef109000ed01..9c53f7c37bdf 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -158,9 +158,9 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}
-static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *mark)
{
- return container_of(entry, struct audit_tree_mark, mark);
+ return container_of(mark, struct audit_tree_mark, mark);
}
static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
@@ -168,9 +168,9 @@ static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
return audit_mark(mark)->chunk;
}
-static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
+static void audit_tree_destroy_watch(struct fsnotify_mark *mark)
{
- kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
+ kmem_cache_free(audit_tree_mark_cachep, audit_mark(mark));
}
static struct fsnotify_mark *alloc_mark(void)
@@ -224,7 +224,7 @@ static inline struct list_head *chunk_hash(unsigned long key)
return chunk_hash_heads + n % HASH_SIZE;
}
-/* hash_lock & entry->group->mark_mutex is held by caller */
+/* hash_lock & mark->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
struct list_head *list;
@@ -278,16 +278,16 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}
-static void replace_mark_chunk(struct fsnotify_mark *entry,
+static void replace_mark_chunk(struct fsnotify_mark *mark,
struct audit_chunk *chunk)
{
struct audit_chunk *old;
assert_spin_locked(&hash_lock);
- old = mark_chunk(entry);
- audit_mark(entry)->chunk = chunk;
+ old = mark_chunk(mark);
+ audit_mark(mark)->chunk = chunk;
if (chunk)
- chunk->mark = entry;
+ chunk->mark = mark;
if (old)
old->mark = NULL;
}
@@ -328,30 +328,30 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
- struct fsnotify_mark *entry = chunk->mark;
+ struct fsnotify_mark *mark = chunk->mark;
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
/* Racing with audit_tree_freeing_mark()? */
- if (!entry)
+ if (!mark)
return;
- fsnotify_get_mark(entry);
+ fsnotify_get_mark(mark);
spin_unlock(&hash_lock);
if (size)
new = alloc_chunk(size);
- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&mark->group->mark_mutex);
/*
* mark_mutex stabilizes chunk attached to the mark so we can check
* whether it didn't change while we've dropped hash_lock.
*/
- if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
- mark_chunk(entry) != chunk) {
- mutex_unlock(&entry->group->mark_mutex);
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+ mark_chunk(mark) != chunk) {
+ mutex_unlock(&mark->group->mark_mutex);
kfree(new);
goto out;
}
@@ -365,12 +365,12 @@ static void untag_chunk(struct node *p)
owner->root = NULL;
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
- replace_mark_chunk(entry, NULL);
+ replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
- mutex_unlock(&entry->group->mark_mutex);
+ fsnotify_detach_mark(mark);
+ mutex_unlock(&mark->group->mark_mutex);
audit_mark_put_chunk(chunk);
- fsnotify_free_mark(entry);
+ fsnotify_free_mark(mark);
goto out;
}
@@ -389,7 +389,7 @@ static void untag_chunk(struct node *p)
*/
replace_chunk(new, chunk, p);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
audit_mark_put_chunk(chunk);
goto out;
@@ -404,16 +404,16 @@ static void untag_chunk(struct node *p)
p->owner = NULL;
put_tree(owner);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
spin_lock(&hash_lock);
}
/* Call with group->mark_mutex held, releases it */
static int create_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *entry;
+ struct fsnotify_mark *mark;
struct audit_chunk *chunk = alloc_chunk(1);
if (!chunk) {
@@ -421,16 +421,16 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}
- entry = alloc_mark();
- if (!entry) {
+ mark = alloc_mark();
+ if (!mark) {
mutex_unlock(&audit_tree_group->mark_mutex);
kfree(chunk);
return -ENOMEM;
}
- if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
+ if (fsnotify_add_inode_mark_locked(mark, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
kfree(chunk);
return -ENOSPC;
}
@@ -438,14 +438,14 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
+ fsnotify_detach_mark(mark);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(entry);
- fsnotify_put_mark(entry);
+ fsnotify_free_mark(mark);
+ fsnotify_put_mark(mark);
kfree(chunk);
return 0;
}
- replace_mark_chunk(entry, chunk);
+ replace_mark_chunk(mark, chunk);
chunk->owners[0].index = (1U << 31);
chunk->owners[0].owner = tree;
get_tree(tree);
@@ -467,21 +467,21 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
* we get notification through ->freeing_mark callback and cleanup
* chunk pointing to this mark.
*/
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return 0;
}
/* the first tagged inode becomes root of tree */
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *entry;
+ struct fsnotify_mark *mark;
struct audit_chunk *chunk, *old;
struct node *p;
int n;
mutex_lock(&audit_tree_group->mark_mutex);
- entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
- if (!entry)
+ mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+ if (!mark)
return create_chunk(inode, tree);
/*
@@ -491,12 +491,12 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
*/
/* are we already there? */
spin_lock(&hash_lock);
- old = mark_chunk(entry);
+ old = mark_chunk(mark);
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return 0;
}
}
@@ -505,7 +505,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return -ENOMEM;
}
@@ -513,7 +513,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
kfree(chunk);
return 0;
}
@@ -533,7 +533,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
replace_chunk(chunk, old, NULL);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+ fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */
audit_mark_put_chunk(old);
return 0;
@@ -1040,16 +1040,17 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
return 0;
}
-static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
+static void audit_tree_freeing_mark(struct fsnotify_mark *mark,
+ struct fsnotify_group *group)
{
struct audit_chunk *chunk;
- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&mark->group->mark_mutex);
spin_lock(&hash_lock);
- chunk = mark_chunk(entry);
- replace_mark_chunk(entry, NULL);
+ chunk = mark_chunk(mark);
+ replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
if (chunk) {
evict_chunk(chunk);
audit_mark_put_chunk(chunk);
@@ -1059,7 +1060,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
* We are guaranteed to have at least one reference to the mark from
* either the inode or the caller of fsnotify_destroy_mark().
*/
- BUG_ON(refcount_read(&entry->refcnt) < 1);
+ BUG_ON(refcount_read(&mark->refcnt) < 1);
}
static const struct fsnotify_ops audit_tree_ops = {
--
2.16.4
- RGB
--
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Richard Guy Briggs
2018-09-17 18:13:40 UTC
Permalink
Post by Jan Kara
Post by Richard Guy Briggs
Post by Jan Kara
Variables pointing to fsnotify_mark are sometimes called 'entry' and
sometimes 'mark'. Use 'mark' in all places.
Thank you, thank you, thank you! This would have been great at the
beginning of the patchset!
Yeah, I know but then I'd have to rewrite all the patches which I was too
lazy to do... At least it's improved for the future.
The other one I kept tripping on was "old" in *_chunk() functions, which
I guess should be obvious it is talking about struct audit_chunk given
the funciton naming, but that would hvae been useful to have been called
"old_chunk". (Similarly "new".)

I know I'm free to submit a patch. :-)
Post by Jan Kara
Honza
Post by Richard Guy Briggs
Post by Jan Kara
---
kernel/audit_tree.c | 95 +++++++++++++++++++++++++++--------------------------
1 file changed, 48 insertions(+), 47 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ef109000ed01..9c53f7c37bdf 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -158,9 +158,9 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}
-static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *mark)
{
- return container_of(entry, struct audit_tree_mark, mark);
+ return container_of(mark, struct audit_tree_mark, mark);
}
static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
@@ -168,9 +168,9 @@ static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
return audit_mark(mark)->chunk;
}
-static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
+static void audit_tree_destroy_watch(struct fsnotify_mark *mark)
{
- kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
+ kmem_cache_free(audit_tree_mark_cachep, audit_mark(mark));
}
static struct fsnotify_mark *alloc_mark(void)
@@ -224,7 +224,7 @@ static inline struct list_head *chunk_hash(unsigned long key)
return chunk_hash_heads + n % HASH_SIZE;
}
-/* hash_lock & entry->group->mark_mutex is held by caller */
+/* hash_lock & mark->group->mark_mutex is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
struct list_head *list;
@@ -278,16 +278,16 @@ static struct audit_chunk *find_chunk(struct node *p)
return container_of(p, struct audit_chunk, owners[0]);
}
-static void replace_mark_chunk(struct fsnotify_mark *entry,
+static void replace_mark_chunk(struct fsnotify_mark *mark,
struct audit_chunk *chunk)
{
struct audit_chunk *old;
assert_spin_locked(&hash_lock);
- old = mark_chunk(entry);
- audit_mark(entry)->chunk = chunk;
+ old = mark_chunk(mark);
+ audit_mark(mark)->chunk = chunk;
if (chunk)
- chunk->mark = entry;
+ chunk->mark = mark;
if (old)
old->mark = NULL;
}
@@ -328,30 +328,30 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
- struct fsnotify_mark *entry = chunk->mark;
+ struct fsnotify_mark *mark = chunk->mark;
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
/* Racing with audit_tree_freeing_mark()? */
- if (!entry)
+ if (!mark)
return;
- fsnotify_get_mark(entry);
+ fsnotify_get_mark(mark);
spin_unlock(&hash_lock);
if (size)
new = alloc_chunk(size);
- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&mark->group->mark_mutex);
/*
* mark_mutex stabilizes chunk attached to the mark so we can check
* whether it didn't change while we've dropped hash_lock.
*/
- if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
- mark_chunk(entry) != chunk) {
- mutex_unlock(&entry->group->mark_mutex);
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+ mark_chunk(mark) != chunk) {
+ mutex_unlock(&mark->group->mark_mutex);
kfree(new);
goto out;
}
@@ -365,12 +365,12 @@ static void untag_chunk(struct node *p)
owner->root = NULL;
list_del_init(&p->list);
list_del_rcu(&chunk->hash);
- replace_mark_chunk(entry, NULL);
+ replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
- mutex_unlock(&entry->group->mark_mutex);
+ fsnotify_detach_mark(mark);
+ mutex_unlock(&mark->group->mark_mutex);
audit_mark_put_chunk(chunk);
- fsnotify_free_mark(entry);
+ fsnotify_free_mark(mark);
goto out;
}
@@ -389,7 +389,7 @@ static void untag_chunk(struct node *p)
*/
replace_chunk(new, chunk, p);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
audit_mark_put_chunk(chunk);
goto out;
@@ -404,16 +404,16 @@ static void untag_chunk(struct node *p)
p->owner = NULL;
put_tree(owner);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
spin_lock(&hash_lock);
}
/* Call with group->mark_mutex held, releases it */
static int create_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *entry;
+ struct fsnotify_mark *mark;
struct audit_chunk *chunk = alloc_chunk(1);
if (!chunk) {
@@ -421,16 +421,16 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}
- entry = alloc_mark();
- if (!entry) {
+ mark = alloc_mark();
+ if (!mark) {
mutex_unlock(&audit_tree_group->mark_mutex);
kfree(chunk);
return -ENOMEM;
}
- if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
+ if (fsnotify_add_inode_mark_locked(mark, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
kfree(chunk);
return -ENOSPC;
}
@@ -438,14 +438,14 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- fsnotify_detach_mark(entry);
+ fsnotify_detach_mark(mark);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_free_mark(entry);
- fsnotify_put_mark(entry);
+ fsnotify_free_mark(mark);
+ fsnotify_put_mark(mark);
kfree(chunk);
return 0;
}
- replace_mark_chunk(entry, chunk);
+ replace_mark_chunk(mark, chunk);
chunk->owners[0].index = (1U << 31);
chunk->owners[0].owner = tree;
get_tree(tree);
@@ -467,21 +467,21 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
* we get notification through ->freeing_mark callback and cleanup
* chunk pointing to this mark.
*/
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return 0;
}
/* the first tagged inode becomes root of tree */
static int tag_chunk(struct inode *inode, struct audit_tree *tree)
{
- struct fsnotify_mark *entry;
+ struct fsnotify_mark *mark;
struct audit_chunk *chunk, *old;
struct node *p;
int n;
mutex_lock(&audit_tree_group->mark_mutex);
- entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
- if (!entry)
+ mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+ if (!mark)
return create_chunk(inode, tree);
/*
@@ -491,12 +491,12 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
*/
/* are we already there? */
spin_lock(&hash_lock);
- old = mark_chunk(entry);
+ old = mark_chunk(mark);
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return 0;
}
}
@@ -505,7 +505,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
return -ENOMEM;
}
@@ -513,7 +513,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(mark);
kfree(chunk);
return 0;
}
@@ -533,7 +533,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
replace_chunk(chunk, old, NULL);
spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex);
- fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+ fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */
audit_mark_put_chunk(old);
return 0;
@@ -1040,16 +1040,17 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
return 0;
}
-static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
+static void audit_tree_freeing_mark(struct fsnotify_mark *mark,
+ struct fsnotify_group *group)
{
struct audit_chunk *chunk;
- mutex_lock(&entry->group->mark_mutex);
+ mutex_lock(&mark->group->mark_mutex);
spin_lock(&hash_lock);
- chunk = mark_chunk(entry);
- replace_mark_chunk(entry, NULL);
+ chunk = mark_chunk(mark);
+ replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&mark->group->mark_mutex);
if (chunk) {
evict_chunk(chunk);
audit_mark_put_chunk(chunk);
@@ -1059,7 +1060,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
* We are guaranteed to have at least one reference to the mark from
* either the inode or the caller of fsnotify_destroy_mark().
*/
- BUG_ON(refcount_read(&entry->refcnt) < 1);
+ BUG_ON(refcount_read(&mark->refcnt) < 1);
}
static const struct fsnotify_ops audit_tree_ops = {
--
2.16.4
- RGB
--
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
--
SUSE Labs, CR
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jan Kara
2018-09-04 16:06:22 UTC
Permalink
When an inode is tagged with a tree, tag_chunk() checks whether there is
audit_tree_group mark attached to the inode and adds one if not. However
nothing protects another tag_chunk() to add the mark between we've
checked and try to add the fsnotify mark thus resulting in an error from
fsnotify_add_mark() and consequently an ENOSPC error from tag_chunk().

Fix the problem by holding mark_mutex over the whole check-insert code
sequence.

Signed-off-by: Jan Kara <***@suse.cz>
---
kernel/audit_tree.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 1b55b1026a36..8a74b468b666 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -342,25 +342,29 @@ static void untag_chunk(struct node *p)
spin_lock(&hash_lock);
}

+/* Call with group->mark_mutex held, releases it */
static int create_chunk(struct inode *inode, struct audit_tree *tree)
{
struct fsnotify_mark *entry;
struct audit_chunk *chunk = alloc_chunk(1);
- if (!chunk)
+
+ if (!chunk) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
return -ENOMEM;
+ }

entry = &chunk->mark;
- if (fsnotify_add_inode_mark(entry, inode, 0)) {
+ if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry);
return -ENOSPC;
}

- mutex_lock(&entry->group->mark_mutex);
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(entry);
return 0;
@@ -375,7 +379,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
}
insert_hash(chunk);
spin_unlock(&hash_lock);
- mutex_unlock(&entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry); /* drop initial reference */
return 0;
}
@@ -389,6 +393,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
struct node *p;
int n;

+ mutex_lock(&audit_tree_group->mark_mutex);
old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
audit_tree_group);
if (!old_entry)
@@ -401,6 +406,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
return 0;
}
@@ -409,20 +415,20 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)

chunk = alloc_chunk(old->count + 1);
if (!chunk) {
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
return -ENOMEM;
}

chunk_entry = &chunk->mark;

- mutex_lock(&old_entry->group->mark_mutex);
/*
* mark_mutex protects mark from getting detached and thus also from
* mark->connector->obj getting NULL.
*/
if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
/* old_entry is being shot, lets just lie */
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
fsnotify_put_mark(&chunk->mark);
return -ENOENT;
@@ -430,7 +436,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)

if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return -ENOSPC;
@@ -440,7 +446,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
chunk->dead = 1;
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);

fsnotify_destroy_mark(chunk_entry, audit_tree_group);

@@ -471,7 +477,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
list_add(&tree->same_root, &chunk->trees);
}
spin_unlock(&hash_lock);
- mutex_unlock(&old_entry->group->mark_mutex);
+ mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_destroy_mark(old_entry, audit_tree_group);
fsnotify_put_mark(chunk_entry); /* drop initial reference */
fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
--
2.16.4
Richard Guy Briggs
2018-09-14 14:09:09 UTC
Permalink
Post by Jan Kara
Allocate fsnotify mark independently instead of embedding it inside
chunk. This will allow us to just replace chunk attached to mark when
growing / shrinking chunk instead of replacing mark attached to inode
which is a more complex operation.
---
kernel/audit_tree.c | 64 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 50 insertions(+), 14 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0cd08b3581f1..481fdc190c2f 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -25,7 +25,7 @@ struct audit_tree {
struct audit_chunk {
struct list_head hash;
unsigned long key;
- struct fsnotify_mark mark;
+ struct fsnotify_mark *mark;
struct list_head trees; /* with root here */
int dead;
int count;
@@ -38,6 +38,11 @@ struct audit_chunk {
} owners[];
};
+struct audit_tree_mark {
+ struct fsnotify_mark mark;
+ struct audit_chunk *chunk;
+};
+
static LIST_HEAD(tree_list);
static LIST_HEAD(prune_list);
static struct task_struct *prune_thread;
@@ -73,6 +78,7 @@ static struct task_struct *prune_thread;
*/
static struct fsnotify_group *audit_tree_group;
+static struct kmem_cache *audit_tree_mark_cachep __read_mostly;
static struct audit_tree *alloc_tree(const char *s)
{
@@ -142,10 +148,33 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}
+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+{
+ return container_of(entry, struct audit_tree_mark, mark);
+}
+
+static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
+{
+ return audit_mark(mark)->chunk;
+}
+
static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+ struct audit_chunk *chunk = mark_chunk(entry);
audit_mark_put_chunk(chunk);
+ kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
+}
+
+static struct fsnotify_mark *alloc_mark(void)
+{
+ struct audit_tree_mark *mark;
Would it make sense to call this local variable "amark" to indicate it
isn't a struct fsnotify_mark, but in fact an audit helper variant?
Post by Jan Kara
+
+ mark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
+ if (!mark)
+ return NULL;
+ fsnotify_init_mark(&mark->mark, audit_tree_group);
+ mark->mark.mask = FS_IN_IGNORED;
+ return &mark->mark;
There are no other places where it is used in this patch to name a
variable, but this one I found a bit confusing to follow the
"mark->mark"
Post by Jan Kara
}
static struct audit_chunk *alloc_chunk(int count)
@@ -159,6 +188,13 @@ static struct audit_chunk *alloc_chunk(int count)
if (!chunk)
return NULL;
+ chunk->mark = alloc_mark();
+ if (!chunk->mark) {
+ kfree(chunk);
+ return NULL;
+ }
+ audit_mark(chunk->mark)->chunk = chunk;
+
INIT_LIST_HEAD(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
@@ -167,8 +203,6 @@ static struct audit_chunk *alloc_chunk(int count)
INIT_LIST_HEAD(&chunk->owners[i].list);
chunk->owners[i].index = i;
}
- fsnotify_init_mark(&chunk->mark, audit_tree_group);
- chunk->mark.mask = FS_IN_IGNORED;
return chunk;
}
@@ -278,7 +312,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
- struct fsnotify_mark *entry = &chunk->mark;
+ struct fsnotify_mark *entry = chunk->mark;
struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
@@ -298,7 +332,7 @@ static void untag_chunk(struct node *p)
if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
mutex_unlock(&entry->group->mark_mutex);
if (new)
- fsnotify_put_mark(&new->mark);
+ fsnotify_put_mark(new->mark);
goto out;
}
@@ -322,9 +356,9 @@ static void untag_chunk(struct node *p)
if (!new)
goto Fallback;
- if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj,
+ if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
FSNOTIFY_OBJ_TYPE_INODE, 1)) {
- fsnotify_put_mark(&new->mark);
+ fsnotify_put_mark(new->mark);
goto Fallback;
}
@@ -344,7 +378,7 @@ static void untag_chunk(struct node *p)
fsnotify_detach_mark(entry);
mutex_unlock(&entry->group->mark_mutex);
fsnotify_free_mark(entry);
- fsnotify_put_mark(&new->mark); /* drop initial reference */
+ fsnotify_put_mark(new->mark); /* drop initial reference */
goto out;
@@ -375,7 +409,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}
- entry = &chunk->mark;
+ entry = chunk->mark;
if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(entry);
@@ -426,7 +460,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (!old_entry)
return create_chunk(inode, tree);
- old = container_of(old_entry, struct audit_chunk, mark);
+ old = mark_chunk(old_entry)->chunk;
/* are we already there? */
spin_lock(&hash_lock);
@@ -447,7 +481,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
return -ENOMEM;
}
- chunk_entry = &chunk->mark;
+ chunk_entry = chunk->mark;
/*
* mark_mutex protects mark from getting detached and thus also from
@@ -457,7 +491,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
/* old_entry is being shot, lets just lie */
mutex_unlock(&audit_tree_group->mark_mutex);
fsnotify_put_mark(old_entry);
- fsnotify_put_mark(&chunk->mark);
+ fsnotify_put_mark(chunk->mark);
return -ENOENT;
}
@@ -1011,7 +1045,7 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
{
- struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+ struct audit_chunk *chunk = mark_chunk(entry);
evict_chunk(chunk);
@@ -1032,6 +1066,8 @@ static int __init audit_tree_init(void)
{
int i;
+ audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
+
audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
if (IS_ERR(audit_tree_group))
audit_panic("cannot initialize fsnotify group for rectree watches");
--
2.16.4
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jan Kara
2018-09-17 16:46:23 UTC
Permalink
Post by Richard Guy Briggs
Post by Jan Kara
Allocate fsnotify mark independently instead of embedding it inside
chunk. This will allow us to just replace chunk attached to mark when
growing / shrinking chunk instead of replacing mark attached to inode
which is a more complex operation.
---
...
Post by Richard Guy Briggs
Post by Jan Kara
+static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
+{
+ return audit_mark(mark)->chunk;
+}
+
static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+ struct audit_chunk *chunk = mark_chunk(entry);
audit_mark_put_chunk(chunk);
+ kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
+}
+
+static struct fsnotify_mark *alloc_mark(void)
+{
+ struct audit_tree_mark *mark;
Would it make sense to call this local variable "amark" to indicate it
isn't a struct fsnotify_mark, but in fact an audit helper variant?
Post by Jan Kara
+
+ mark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
+ if (!mark)
+ return NULL;
+ fsnotify_init_mark(&mark->mark, audit_tree_group);
+ mark->mark.mask = FS_IN_IGNORED;
+ return &mark->mark;
There are no other places where it is used in this patch to name a
variable, but this one I found a bit confusing to follow the
"mark->mark"
Yeah, makes sense. I can do the change.

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-10-03 22:11:02 UTC
Permalink
Post by Jan Kara
Post by Richard Guy Briggs
Post by Jan Kara
Allocate fsnotify mark independently instead of embedding it inside
chunk. This will allow us to just replace chunk attached to mark when
growing / shrinking chunk instead of replacing mark attached to inode
which is a more complex operation.
---
...
Post by Richard Guy Briggs
Post by Jan Kara
+static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
+{
+ return audit_mark(mark)->chunk;
+}
+
static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
{
- struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+ struct audit_chunk *chunk = mark_chunk(entry);
audit_mark_put_chunk(chunk);
+ kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
+}
+
+static struct fsnotify_mark *alloc_mark(void)
+{
+ struct audit_tree_mark *mark;
Would it make sense to call this local variable "amark" to indicate it
isn't a struct fsnotify_mark, but in fact an audit helper variant?
Post by Jan Kara
+
+ mark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
+ if (!mark)
+ return NULL;
+ fsnotify_init_mark(&mark->mark, audit_tree_group);
+ mark->mark.mask = FS_IN_IGNORED;
+ return &mark->mark;
There are no other places where it is used in this patch to name a
variable, but this one I found a bit confusing to follow the
"mark->mark"
Yeah, makes sense. I can do the change.
Unless you have to respin this patchset for some other reason I
wouldn't worry about it.

I've been working my way through the patchset this week (currently on
09/11) and I expect to finish it up today. Assuming everything looks
good, I'm going to merge this into a working branch, include it in my
weekly -rc test builds, and beat on it for a couple of weeks. If all
is good I'll merge it into audit/next after the upcoming merge window.

Thanks for your patience.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-09-14 19:13:28 UTC
Permalink
Hello,
Jan,
this is the third revision of the series that addresses problems I have
identified when trying to understand how exactly is kernel/audit_tree.c using
generic fsnotify framework. I hope I have understood all the interactions right
but careful review is certainly welcome.
I've tried to review it as carefully as I am able. As best I understand
it, this all looks reasonable and an improvement over the previous
state. Thanks for the hard work.

FWIW,
The patches have been tested by a stress test I have written which mounts &
unmounts filesystems in the directory tree while adding and removing audit
rules for this tree in parallel and accessing the tree to generate events.
Still some real-world testing would be welcome.
* Fixed up mark freeing to use proper pointer as pointed out by Amir
* Changed some naming based on Paul's review
* Split the last patch to ease review
* Rewrite test script so that it can be included in audit testsuite
* Some cleanups and improvements suggested by Amir
Honza
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jan Kara
2018-09-17 16:57:14 UTC
Permalink
Post by Richard Guy Briggs
Hello,
Jan,
this is the third revision of the series that addresses problems I have
identified when trying to understand how exactly is kernel/audit_tree.c using
generic fsnotify framework. I hope I have understood all the interactions right
but careful review is certainly welcome.
I've tried to review it as carefully as I am able. As best I understand
it, this all looks reasonable and an improvement over the previous
state. Thanks for the hard work.
FWIW,
Thanks for review! Paul should I send you updated patch 9 with that one
variable renamed or will you do that small change while merging the series?

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-10-04 01:20:35 UTC
Permalink
Post by Jan Kara
Post by Richard Guy Briggs
Hello,
Jan,
this is the third revision of the series that addresses problems I have
identified when trying to understand how exactly is kernel/audit_tree.c using
generic fsnotify framework. I hope I have understood all the interactions right
but careful review is certainly welcome.
I've tried to review it as carefully as I am able. As best I understand
it, this all looks reasonable and an improvement over the previous
state. Thanks for the hard work.
FWIW,
Thanks for review! Paul should I send you updated patch 9 with that one
variable renamed or will you do that small change while merging the series?
Hi Jan,

Thanks again for these patches and your patience; some travel,
holidays, and a job change delayed my review. However, everything
looks okay to me (minus the one problem I noted in patch 09/11). I've
added the patches to audit/working-fsnotify_fixes and I'm going to
start stressing them as soon as I get a test kernel built with the
idea of merging them into audit/next as soon as the upcoming merge
window closes.

As far as the variable rename is concerned, that's not something I
would prefer to change during a merge, but if you or Richard wanted to
submit a renaming patch I would be okay with that in this case. If
you do submit the rename patch, please base it on top of this patchset
(or audit/working-fsnotify_fixes).

Thanks!
--
paul moore
www.paul-moore.com
Jan Kara
2018-10-04 06:59:39 UTC
Permalink
Post by Paul Moore
Post by Jan Kara
Post by Richard Guy Briggs
Hello,
Jan,
this is the third revision of the series that addresses problems I have
identified when trying to understand how exactly is kernel/audit_tree.c using
generic fsnotify framework. I hope I have understood all the interactions right
but careful review is certainly welcome.
I've tried to review it as carefully as I am able. As best I understand
it, this all looks reasonable and an improvement over the previous
state. Thanks for the hard work.
FWIW,
Thanks for review! Paul should I send you updated patch 9 with that one
variable renamed or will you do that small change while merging the series?
Hi Jan,
Thanks again for these patches and your patience; some travel,
holidays, and a job change delayed my review. However, everything
looks okay to me (minus the one problem I noted in patch 09/11). I've
added the patches to audit/working-fsnotify_fixes and I'm going to
start stressing them as soon as I get a test kernel built with the
idea of merging them into audit/next as soon as the upcoming merge
window closes.
As far as the variable rename is concerned, that's not something I
would prefer to change during a merge, but if you or Richard wanted to
submit a renaming patch I would be okay with that in this case. If
you do submit the rename patch, please base it on top of this patchset
(or audit/working-fsnotify_fixes).
Great, thanks. I will send the rename patch in a moment.

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Paul Moore
2018-10-03 22:08:14 UTC
Permalink
Post by Jan Kara
Allocate fsnotify mark independently instead of embedding it inside
chunk. This will allow us to just replace chunk attached to mark when
growing / shrinking chunk instead of replacing mark attached to inode
which is a more complex operation.
---
kernel/audit_tree.c | 64 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 50 insertions(+), 14 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0cd08b3581f1..481fdc190c2f 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -142,10 +148,33 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}
+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+{
+ return container_of(entry, struct audit_tree_mark, mark);
+}
+
+static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
+{
+ return audit_mark(mark)->chunk;
+}
+
...
Post by Jan Kara
@@ -426,7 +460,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (!old_entry)
return create_chunk(inode, tree);
- old = container_of(old_entry, struct audit_chunk, mark);
+ old = mark_chunk(old_entry)->chunk;
I'm pretty sure you mean the following instead?

old = mark_chunk(old_entry);
Post by Jan Kara
/* are we already there? */
spin_lock(&hash_lock);
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-10-03 22:39:09 UTC
Permalink
Post by Paul Moore
Post by Jan Kara
Allocate fsnotify mark independently instead of embedding it inside
chunk. This will allow us to just replace chunk attached to mark when
growing / shrinking chunk instead of replacing mark attached to inode
which is a more complex operation.
---
kernel/audit_tree.c | 64 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 50 insertions(+), 14 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0cd08b3581f1..481fdc190c2f 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -142,10 +148,33 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}
+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+{
+ return container_of(entry, struct audit_tree_mark, mark);
+}
+
+static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
+{
+ return audit_mark(mark)->chunk;
+}
+
...
Post by Jan Kara
@@ -426,7 +460,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (!old_entry)
return create_chunk(inode, tree);
- old = container_of(old_entry, struct audit_chunk, mark);
+ old = mark_chunk(old_entry)->chunk;
I'm pretty sure you mean the following instead?
old = mark_chunk(old_entry);
Yup, nice catch. This could have been
"old = audit_mark(old_entry)->chunk"
but the mark_chunk() helper avoids that. (It compiles because it got
fixed/replaced in the following patch.)

This is why "old" should be called "old_chunk" and "old_entry" should be
called "old_mark" (which the latter is in the last patch).
Post by Paul Moore
Post by Jan Kara
/* are we already there? */
spin_lock(&hash_lock);
paul moore
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jan Kara
2018-10-04 06:57:20 UTC
Permalink
Post by Paul Moore
Post by Jan Kara
Allocate fsnotify mark independently instead of embedding it inside
chunk. This will allow us to just replace chunk attached to mark when
growing / shrinking chunk instead of replacing mark attached to inode
which is a more complex operation.
---
kernel/audit_tree.c | 64 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 50 insertions(+), 14 deletions(-)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0cd08b3581f1..481fdc190c2f 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -142,10 +148,33 @@ static void audit_mark_put_chunk(struct audit_chunk *chunk)
call_rcu(&chunk->head, __put_chunk);
}
+static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
+{
+ return container_of(entry, struct audit_tree_mark, mark);
+}
+
+static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
+{
+ return audit_mark(mark)->chunk;
+}
+
...
Post by Jan Kara
@@ -426,7 +460,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
if (!old_entry)
return create_chunk(inode, tree);
- old = container_of(old_entry, struct audit_chunk, mark);
+ old = mark_chunk(old_entry)->chunk;
I'm pretty sure you mean the following instead?
old = mark_chunk(old_entry);
Right, it gets fixed up in a later patch but it would be good to fix it here
(e.g. not to break bisection).

Honza
--
Jan Kara <***@suse.com>
SUSE Labs, CR
Loading...